其他 LLM 连接器 #

概述 #

Semantic Kernel 支持多种 LLM 连接器,包括 Hugging Face、本地模型和其他第三方服务,提供灵活的模型选择。

Hugging Face 连接器 #

基本配置 #

csharp
using Microsoft.SemanticKernel;

var builder = Kernel.CreateBuilder();

builder.AddHuggingFaceChatCompletion(
    model: "microsoft/DialoGPT-medium",
    apiKey: Environment.GetEnvironmentVariable("HUGGINGFACE_API_KEY")
);

var kernel = builder.Build();

文本生成 #

csharp
builder.AddHuggingFaceTextGeneration(
    model: "gpt2",
    apiKey: "api-key"
);

var result = await kernel.InvokePromptAsync("Hello, world!");

嵌入生成 #

csharp
builder.AddHuggingFaceTextEmbeddingGeneration(
    model: "sentence-transformers/all-MiniLM-L6-v2",
    apiKey: "api-key"
);

Ollama 连接器 #

安装 Ollama #

bash
# macOS
brew install ollama

# Linux
curl -fsSL https://ollama.com/install.sh | sh

# 启动服务
ollama serve

# 拉取模型
ollama pull llama2
ollama pull mistral

配置 Ollama #

csharp
using Microsoft.SemanticKernel;

var builder = Kernel.CreateBuilder();

builder.AddOllamaChatCompletion(
    modelId: "llama2",
    endpoint: new Uri("http://localhost:11434")
);

var kernel = builder.Build();

使用本地模型 #

csharp
var result = await kernel.InvokePromptAsync("你好,请介绍一下自己");
Console.WriteLine(result);

可用模型 #

模型 描述 大小
llama2 Meta Llama 2 4GB+
mistral Mistral AI 4GB+
codellama 代码生成 4GB+
phi Microsoft Phi 2GB+
gemma Google Gemma 5GB+

自定义连接器 #

创建自定义连接器 #

csharp
using Microsoft.SemanticKernel.Services;
using Microsoft.SemanticKernel.ChatCompletion;

public class CustomChatCompletionService : IChatCompletionService
{
    private readonly HttpClient _httpClient;
    private readonly string _modelId;

    public CustomChatCompletionService(string modelId, string endpoint, string apiKey)
    {
        _modelId = modelId;
        _httpClient = new HttpClient
        {
            BaseAddress = new Uri(endpoint)
        };
        _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
    }

    public IReadOnlyDictionary<string, object?> Attributes => new Dictionary<string, object?>
    {
        ["ModelId"] = _modelId
    };

    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(
        ChatHistory chatHistory,
        PromptExecutionSettings? executionSettings = null,
        Kernel? kernel = null,
        CancellationToken cancellationToken = default)
    {
        // 实现自定义 API 调用逻辑
        var request = new
        {
            model = _modelId,
            messages = chatHistory.Select(m => new { role = m.Role.ToString(), content = m.Content })
        };

        var response = await _httpClient.PostAsJsonAsync("/chat", request, cancellationToken);
        var result = await response.Content.ReadFromJsonAsync<ChatResponse>(cancellationToken);

        return new List<ChatMessageContent>
        {
            new(AuthorRole.Assistant, result?.Content ?? "")
        };
    }

    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(
        ChatHistory chatHistory,
        PromptExecutionSettings? executionSettings = null,
        Kernel? kernel = null,
        [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        // 实现流式输出
        yield return new StreamingChatMessageContent(AuthorRole.Assistant, "流式内容");
    }
}

注册自定义服务 #

csharp
var builder = Kernel.CreateBuilder();

builder.Services.AddKeyedSingleton<IChatCompletionService>(
    "custom",
    new CustomChatCompletionService(
        modelId: "custom-model",
        endpoint: "https://api.custom-llm.com",
        apiKey: "api-key"
    )
);

var kernel = builder.Build();

OpenAI 兼容 API #

使用兼容服务 #

csharp
// 许多服务提供 OpenAI 兼容 API
builder.AddOpenAIChatCompletion(
    modelId: "local-model",
    apiKey: "not-needed",  // 本地服务可能不需要
    endpoint: new Uri("http://localhost:8000/v1")
);

常见兼容服务 #

服务 端点 说明
vLLM http://localhost:8000/v1 高性能推理
LocalAI http://localhost:8080/v1 本地 OpenAI 替代
LM Studio http://localhost:1234/v1 桌面应用
Text Generation WebUI http://localhost:5000/v1 Gradio 界面

多模型管理 #

配置多个模型 #

csharp
var builder = Kernel.CreateBuilder();

// OpenAI
builder.AddOpenAIChatCompletion(
    serviceId: "openai-gpt4",
    modelId: "gpt-4",
    apiKey: "openai-key"
);

// Azure OpenAI
builder.AddAzureOpenAIChatCompletion(
    serviceId: "azure-gpt4",
    deploymentName: "gpt-4-deployment",
    endpoint: "https://your-resource.openai.azure.com/",
    apiKey: "azure-key"
);

// Ollama
builder.AddOllamaChatCompletion(
    serviceId: "ollama-llama2",
    modelId: "llama2",
    endpoint: new Uri("http://localhost:11434")
);

// Hugging Face
builder.AddHuggingFaceChatCompletion(
    serviceId: "hf-model",
    model: "microsoft/DialoGPT-medium",
    apiKey: "hf-key"
);

var kernel = builder.Build();

动态选择模型 #

csharp
public class ModelRouter
{
    private readonly Kernel _kernel;

    public async Task<string> GetCompletionAsync(
        string prompt,
        ModelRequirements requirements)
    {
        var serviceName = requirements switch
        {
            { NeedsHighQuality: true } => "openai-gpt4",
            { NeedsSpeed: true } => "azure-gpt4",
            { NeedsPrivacy: true } => "ollama-llama2",
            { NeedsLowCost: true } => "hf-model",
            _ => "azure-gpt4"
        };

        return await _kernel.InvokePromptAsync(
            prompt,
            serviceName: serviceName
        );
    }
}

模型能力适配 #

检测模型能力 #

csharp
public class ModelCapabilities
{
    public bool SupportsFunctionCalling { get; set; }
    public bool SupportsVision { get; set; }
    public int MaxContextTokens { get; set; }
    public bool SupportsStreaming { get; set; }
}

public class ModelCapabilityService
{
    private readonly Dictionary<string, ModelCapabilities> _capabilities = new()
    {
        ["gpt-4"] = new ModelCapabilities
        {
            SupportsFunctionCalling = true,
            SupportsVision = true,
            MaxContextTokens = 128000,
            SupportsStreaming = true
        },
        ["llama2"] = new ModelCapabilities
        {
            SupportsFunctionCalling = false,
            SupportsVision = false,
            MaxContextTokens = 4096,
            SupportsStreaming = true
        }
    };

    public ModelCapabilities GetCapabilities(string modelId)
    {
        return _capabilities.TryGetValue(modelId, out var caps)
            ? caps
            : new ModelCapabilities();
    }
}

根据能力调整行为 #

csharp
public async Task<string> GetCompletionAsync(
    Kernel kernel,
    string prompt,
    string serviceName)
{
    var capabilities = _capabilityService.GetCapabilities(serviceName);

    var settings = new OpenAIPromptExecutionSettings();

    if (capabilities.SupportsFunctionCalling)
    {
        settings.FunctionChoiceBehavior = FunctionChoiceBehavior.Auto();
    }

    return await kernel.InvokePromptAsync(
        prompt,
        new KernelArguments(settings),
        serviceName: serviceName
    );
}

本地模型部署 #

使用 vLLM #

bash
# 安装 vLLM
pip install vllm

# 启动服务
python -m vllm.entrypoints.openai.api_server \
    --model meta-llama/Llama-2-7b-chat-hf \
    --host 0.0.0.0 \
    --port 8000
csharp
builder.AddOpenAIChatCompletion(
    modelId: "Llama-2-7b-chat-hf",
    apiKey: "not-needed",
    endpoint: new Uri("http://localhost:8000/v1")
);

使用 Text Generation WebUI #

bash
# 安装
git clone https://github.com/oobabooga/text-generation-webui
cd text-generation-webui
pip install -r requirements.txt

# 启动
python server.py --api --extensions openai
csharp
builder.AddOpenAIChatCompletion(
    modelId: "local-model",
    apiKey: "not-needed",
    endpoint: new Uri("http://localhost:5000/v1")
);

最佳实践 #

1. 模型选择策略 #

csharp
public string SelectModel(TaskType taskType, PrivacyLevel privacy)
{
    return (taskType, privacy) switch
    {
        (TaskType.Complex, PrivacyLevel.Public) => "openai-gpt4",
        (TaskType.Simple, PrivacyLevel.Public) => "azure-gpt35",
        (_, PrivacyLevel.Sensitive) => "ollama-llama2",
        _ => "azure-gpt4"
    };
}

2. 错误处理 #

csharp
public async Task<string> SafeGetCompletionAsync(
    Kernel kernel,
    string prompt,
    string primaryService,
    string fallbackService)
{
    try
    {
        return await kernel.InvokePromptAsync(
            prompt,
            serviceName: primaryService
        );
    }
    catch (Exception ex)
    {
        _logger.LogWarning(ex, "主服务失败,使用备用服务");
        return await kernel.InvokePromptAsync(
            prompt,
            serviceName: fallbackService
        );
    }
}

3. 性能监控 #

csharp
public class ModelPerformanceTracker
{
    private readonly Dictionary<string, List<TimeSpan>> _responseTimes = new();

    public void RecordResponse(string serviceName, TimeSpan duration)
    {
        if (!_responseTimes.ContainsKey(serviceName))
        {
            _responseTimes[serviceName] = new List<TimeSpan>();
        }
        _responseTimes[serviceName].Add(duration);
    }

    public TimeSpan GetAverageResponseTime(string serviceName)
    {
        if (!_responseTimes.TryGetValue(serviceName, out var times))
            return TimeSpan.Zero;

        return TimeSpan.FromTicks((long)times.Average(t => t.Ticks));
    }
}

下一步 #

现在你已经掌握了多种 LLM 连接器,接下来学习 记忆系统,了解如何管理对话历史和知识存储!

最后更新:2026-04-04