其他 LLM 连接器 #
概述 #
Semantic Kernel 支持多种 LLM 连接器,包括 Hugging Face、本地模型和其他第三方服务,提供灵活的模型选择。
Hugging Face 连接器 #
基本配置 #
csharp
using Microsoft.SemanticKernel;
var builder = Kernel.CreateBuilder();
builder.AddHuggingFaceChatCompletion(
model: "microsoft/DialoGPT-medium",
apiKey: Environment.GetEnvironmentVariable("HUGGINGFACE_API_KEY")
);
var kernel = builder.Build();
文本生成 #
csharp
builder.AddHuggingFaceTextGeneration(
model: "gpt2",
apiKey: "api-key"
);
var result = await kernel.InvokePromptAsync("Hello, world!");
嵌入生成 #
csharp
builder.AddHuggingFaceTextEmbeddingGeneration(
model: "sentence-transformers/all-MiniLM-L6-v2",
apiKey: "api-key"
);
Ollama 连接器 #
安装 Ollama #
bash
# macOS
brew install ollama
# Linux
curl -fsSL https://ollama.com/install.sh | sh
# 启动服务
ollama serve
# 拉取模型
ollama pull llama2
ollama pull mistral
配置 Ollama #
csharp
using Microsoft.SemanticKernel;
var builder = Kernel.CreateBuilder();
builder.AddOllamaChatCompletion(
modelId: "llama2",
endpoint: new Uri("http://localhost:11434")
);
var kernel = builder.Build();
使用本地模型 #
csharp
var result = await kernel.InvokePromptAsync("你好,请介绍一下自己");
Console.WriteLine(result);
可用模型 #
| 模型 | 描述 | 大小 |
|---|---|---|
| llama2 | Meta Llama 2 | 4GB+ |
| mistral | Mistral AI | 4GB+ |
| codellama | 代码生成 | 4GB+ |
| phi | Microsoft Phi | 2GB+ |
| gemma | Google Gemma | 5GB+ |
自定义连接器 #
创建自定义连接器 #
csharp
using Microsoft.SemanticKernel.Services;
using Microsoft.SemanticKernel.ChatCompletion;
public class CustomChatCompletionService : IChatCompletionService
{
private readonly HttpClient _httpClient;
private readonly string _modelId;
public CustomChatCompletionService(string modelId, string endpoint, string apiKey)
{
_modelId = modelId;
_httpClient = new HttpClient
{
BaseAddress = new Uri(endpoint)
};
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
}
public IReadOnlyDictionary<string, object?> Attributes => new Dictionary<string, object?>
{
["ModelId"] = _modelId
};
public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(
ChatHistory chatHistory,
PromptExecutionSettings? executionSettings = null,
Kernel? kernel = null,
CancellationToken cancellationToken = default)
{
// 实现自定义 API 调用逻辑
var request = new
{
model = _modelId,
messages = chatHistory.Select(m => new { role = m.Role.ToString(), content = m.Content })
};
var response = await _httpClient.PostAsJsonAsync("/chat", request, cancellationToken);
var result = await response.Content.ReadFromJsonAsync<ChatResponse>(cancellationToken);
return new List<ChatMessageContent>
{
new(AuthorRole.Assistant, result?.Content ?? "")
};
}
public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(
ChatHistory chatHistory,
PromptExecutionSettings? executionSettings = null,
Kernel? kernel = null,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// 实现流式输出
yield return new StreamingChatMessageContent(AuthorRole.Assistant, "流式内容");
}
}
注册自定义服务 #
csharp
var builder = Kernel.CreateBuilder();
builder.Services.AddKeyedSingleton<IChatCompletionService>(
"custom",
new CustomChatCompletionService(
modelId: "custom-model",
endpoint: "https://api.custom-llm.com",
apiKey: "api-key"
)
);
var kernel = builder.Build();
OpenAI 兼容 API #
使用兼容服务 #
csharp
// 许多服务提供 OpenAI 兼容 API
builder.AddOpenAIChatCompletion(
modelId: "local-model",
apiKey: "not-needed", // 本地服务可能不需要
endpoint: new Uri("http://localhost:8000/v1")
);
常见兼容服务 #
| 服务 | 端点 | 说明 |
|---|---|---|
| vLLM | http://localhost:8000/v1 | 高性能推理 |
| LocalAI | http://localhost:8080/v1 | 本地 OpenAI 替代 |
| LM Studio | http://localhost:1234/v1 | 桌面应用 |
| Text Generation WebUI | http://localhost:5000/v1 | Gradio 界面 |
多模型管理 #
配置多个模型 #
csharp
var builder = Kernel.CreateBuilder();
// OpenAI
builder.AddOpenAIChatCompletion(
serviceId: "openai-gpt4",
modelId: "gpt-4",
apiKey: "openai-key"
);
// Azure OpenAI
builder.AddAzureOpenAIChatCompletion(
serviceId: "azure-gpt4",
deploymentName: "gpt-4-deployment",
endpoint: "https://your-resource.openai.azure.com/",
apiKey: "azure-key"
);
// Ollama
builder.AddOllamaChatCompletion(
serviceId: "ollama-llama2",
modelId: "llama2",
endpoint: new Uri("http://localhost:11434")
);
// Hugging Face
builder.AddHuggingFaceChatCompletion(
serviceId: "hf-model",
model: "microsoft/DialoGPT-medium",
apiKey: "hf-key"
);
var kernel = builder.Build();
动态选择模型 #
csharp
public class ModelRouter
{
private readonly Kernel _kernel;
public async Task<string> GetCompletionAsync(
string prompt,
ModelRequirements requirements)
{
var serviceName = requirements switch
{
{ NeedsHighQuality: true } => "openai-gpt4",
{ NeedsSpeed: true } => "azure-gpt4",
{ NeedsPrivacy: true } => "ollama-llama2",
{ NeedsLowCost: true } => "hf-model",
_ => "azure-gpt4"
};
return await _kernel.InvokePromptAsync(
prompt,
serviceName: serviceName
);
}
}
模型能力适配 #
检测模型能力 #
csharp
public class ModelCapabilities
{
public bool SupportsFunctionCalling { get; set; }
public bool SupportsVision { get; set; }
public int MaxContextTokens { get; set; }
public bool SupportsStreaming { get; set; }
}
public class ModelCapabilityService
{
private readonly Dictionary<string, ModelCapabilities> _capabilities = new()
{
["gpt-4"] = new ModelCapabilities
{
SupportsFunctionCalling = true,
SupportsVision = true,
MaxContextTokens = 128000,
SupportsStreaming = true
},
["llama2"] = new ModelCapabilities
{
SupportsFunctionCalling = false,
SupportsVision = false,
MaxContextTokens = 4096,
SupportsStreaming = true
}
};
public ModelCapabilities GetCapabilities(string modelId)
{
return _capabilities.TryGetValue(modelId, out var caps)
? caps
: new ModelCapabilities();
}
}
根据能力调整行为 #
csharp
public async Task<string> GetCompletionAsync(
Kernel kernel,
string prompt,
string serviceName)
{
var capabilities = _capabilityService.GetCapabilities(serviceName);
var settings = new OpenAIPromptExecutionSettings();
if (capabilities.SupportsFunctionCalling)
{
settings.FunctionChoiceBehavior = FunctionChoiceBehavior.Auto();
}
return await kernel.InvokePromptAsync(
prompt,
new KernelArguments(settings),
serviceName: serviceName
);
}
本地模型部署 #
使用 vLLM #
bash
# 安装 vLLM
pip install vllm
# 启动服务
python -m vllm.entrypoints.openai.api_server \
--model meta-llama/Llama-2-7b-chat-hf \
--host 0.0.0.0 \
--port 8000
csharp
builder.AddOpenAIChatCompletion(
modelId: "Llama-2-7b-chat-hf",
apiKey: "not-needed",
endpoint: new Uri("http://localhost:8000/v1")
);
使用 Text Generation WebUI #
bash
# 安装
git clone https://github.com/oobabooga/text-generation-webui
cd text-generation-webui
pip install -r requirements.txt
# 启动
python server.py --api --extensions openai
csharp
builder.AddOpenAIChatCompletion(
modelId: "local-model",
apiKey: "not-needed",
endpoint: new Uri("http://localhost:5000/v1")
);
最佳实践 #
1. 模型选择策略 #
csharp
public string SelectModel(TaskType taskType, PrivacyLevel privacy)
{
return (taskType, privacy) switch
{
(TaskType.Complex, PrivacyLevel.Public) => "openai-gpt4",
(TaskType.Simple, PrivacyLevel.Public) => "azure-gpt35",
(_, PrivacyLevel.Sensitive) => "ollama-llama2",
_ => "azure-gpt4"
};
}
2. 错误处理 #
csharp
public async Task<string> SafeGetCompletionAsync(
Kernel kernel,
string prompt,
string primaryService,
string fallbackService)
{
try
{
return await kernel.InvokePromptAsync(
prompt,
serviceName: primaryService
);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "主服务失败,使用备用服务");
return await kernel.InvokePromptAsync(
prompt,
serviceName: fallbackService
);
}
}
3. 性能监控 #
csharp
public class ModelPerformanceTracker
{
private readonly Dictionary<string, List<TimeSpan>> _responseTimes = new();
public void RecordResponse(string serviceName, TimeSpan duration)
{
if (!_responseTimes.ContainsKey(serviceName))
{
_responseTimes[serviceName] = new List<TimeSpan>();
}
_responseTimes[serviceName].Add(duration);
}
public TimeSpan GetAverageResponseTime(string serviceName)
{
if (!_responseTimes.TryGetValue(serviceName, out var times))
return TimeSpan.Zero;
return TimeSpan.FromTicks((long)times.Average(t => t.Ticks));
}
}
下一步 #
现在你已经掌握了多种 LLM 连接器,接下来学习 记忆系统,了解如何管理对话历史和知识存储!
最后更新:2026-04-04