RAG 应用 #
概述 #
RAG(Retrieval-Augmented Generation,检索增强生成)是一种结合检索和生成的技术,通过检索相关文档来增强 LLM 的回答能力。
RAG 架构 #
text
┌─────────────────────────────────────────────────────────────┐
│ RAG 架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 用户问题 │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ 向量化查询 │ │
│ └──────┬──────┘ │
│ │ │
│ ▼ │
│ ┌─────────────┐ ┌─────────────┐ │
│ │ 向量检索 │────▶│ 相关文档 │ │
│ └──────┬──────┘ └─────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ 构建提示词 │ │
│ │ 问题 + 文档 │ │
│ └──────┬──────┘ │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ LLM 生成 │ │
│ └──────┬──────┘ │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ 最终回答 │ │
│ └─────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
文档处理管道 #
文档加载 #
csharp
public interface IDocumentLoader
{
Task<string> LoadAsync(string path);
}
public class DocumentLoader : IDocumentLoader
{
public async Task<string> LoadAsync(string path)
{
var extension = Path.GetExtension(path).ToLowerInvariant();
return extension switch
{
".txt" => await File.ReadAllTextAsync(path),
".md" => await File.ReadAllTextAsync(path),
".pdf" => await LoadPdfAsync(path),
".docx" => await LoadDocxAsync(path),
_ => throw new NotSupportedException($"不支持的文件格式: {extension}")
};
}
private async Task<string> LoadPdfAsync(string path)
{
// 使用 PDF 解析库
// 实现略
return "";
}
private async Task<string> LoadDocxAsync(string path)
{
// 使用 Word 解析库
// 实现略
return "";
}
}
文本分块 #
csharp
public class TextChunker
{
private readonly int _chunkSize;
private readonly int _overlap;
public TextChunker(int chunkSize = 500, int overlap = 50)
{
_chunkSize = chunkSize;
_overlap = overlap;
}
public List<TextChunk> Chunk(string text, string documentId)
{
var chunks = new List<TextChunk>();
var sentences = SplitSentences(text);
var currentChunk = new StringBuilder();
var currentStart = 0;
var position = 0;
for (int i = 0; i < sentences.Count; i++)
{
if (currentChunk.Length + sentences[i].Length > _chunkSize &&
currentChunk.Length > 0)
{
chunks.Add(new TextChunk
{
Id = $"{documentId}_{chunks.Count}",
DocumentId = documentId,
Content = currentChunk.ToString(),
StartIndex = currentStart,
EndIndex = position
});
// 处理重叠
var overlapText = GetOverlapText(currentChunk.ToString());
currentChunk.Clear();
currentChunk.Append(overlapText);
currentStart = position - overlapText.Length;
}
currentChunk.Append(sentences[i]);
position += sentences[i].Length;
}
if (currentChunk.Length > 0)
{
chunks.Add(new TextChunk
{
Id = $"{documentId}_{chunks.Count}",
DocumentId = documentId,
Content = currentChunk.ToString(),
StartIndex = currentStart,
EndIndex = position
});
}
return chunks;
}
private List<string> SplitSentences(string text)
{
var delimiters = new[] { '。', '!', '?', '.', '!', '?', '\n' };
return text.Split(delimiters, StringSplitOptions.RemoveEmptyEntries)
.Select(s => s.Trim())
.Where(s => !string.IsNullOrEmpty(s))
.ToList();
}
private string GetOverlapText(string text)
{
if (text.Length <= _overlap) return text;
return text.Substring(text.Length - _overlap);
}
}
public class TextChunk
{
public string Id { get; set; }
public string DocumentId { get; set; }
public string Content { get; set; }
public int StartIndex { get; set; }
public int EndIndex { get; set; }
}
向量存储 #
索引文档 #
csharp
public class DocumentIndexer
{
private readonly ITextEmbeddingGenerationService _embeddingService;
private readonly IMemoryStore _memoryStore;
private readonly TextChunker _chunker;
public DocumentIndexer(
ITextEmbeddingGenerationService embeddingService,
IMemoryStore memoryStore,
TextChunker chunker)
{
_embeddingService = embeddingService;
_memoryStore = memoryStore;
_chunker = chunker;
}
public async Task IndexDocumentAsync(Document document)
{
var chunks = _chunker.Chunk(document.Content, document.Id);
foreach (var chunk in chunks)
{
var embedding = await _embeddingService.GenerateEmbeddingAsync(chunk.Content);
var record = new MemoryRecord(
new MemoryRecordMetadata(
isReference: false,
id: chunk.Id,
text: chunk.Content,
description: document.Title,
externalSourceName: document.Source,
additionalMetadata: JsonSerializer.Serialize(new
{
document.Id,
document.Title,
chunk.StartIndex,
chunk.EndIndex
})
),
embedding,
null
);
await _memoryStore.UpsertAsync("documents", record);
}
}
public async Task IndexDocumentsAsync(IEnumerable<Document> documents)
{
await _memoryStore.CreateCollectionAsync("documents");
foreach (var doc in documents)
{
await IndexDocumentAsync(doc);
}
}
}
RAG 查询 #
基本查询 #
csharp
public class RAGService
{
private readonly Kernel _kernel;
private readonly ITextEmbeddingGenerationService _embeddingService;
private readonly IMemoryStore _memoryStore;
public async Task<string> QueryAsync(string question, int topK = 5)
{
// 1. 向量化问题
var queryEmbedding = await _embeddingService.GenerateEmbeddingAsync(question);
// 2. 检索相关文档
var relevantDocs = new List<string>();
var results = _memoryStore.GetNearestMatchesAsync(
"documents",
queryEmbedding,
limit: topK,
minRelevanceScore: 0.7f
);
await foreach (var (record, score) in results)
{
relevantDocs.Add(record.Metadata.Text);
}
// 3. 构建提示词
var context = string.Join("\n\n---\n\n", relevantDocs);
var prompt = $"""
请根据以下参考资料回答问题。如果资料中没有相关信息,请说明。
参考资料:
{context}
问题:{question}
回答:
""";
// 4. 生成回答
return await _kernel.InvokePromptAsync(prompt);
}
}
高级查询 #
csharp
public class AdvancedRAGService
{
public async Task<RAGResponse> QueryWithSourcesAsync(
string question,
int topK = 5,
float minScore = 0.7f)
{
var queryEmbedding = await _embeddingService.GenerateEmbeddingAsync(question);
var sources = new List<SourceInfo>();
var results = _memoryStore.GetNearestMatchesAsync(
"documents",
queryEmbedding,
limit: topK,
minRelevanceScore: minScore
);
await foreach (var (record, score) in results)
{
var metadata = JsonSerializer.Deserialize<Dictionary<string, string>>(
record.Metadata.AdditionalMetadata
);
sources.Add(new SourceInfo
{
Content = record.Metadata.Text,
DocumentId = metadata?["Id"] ?? "",
Title = record.Metadata.Description,
Score = score
});
}
var context = string.Join("\n\n", sources.Select(s => s.Content));
var prompt = $"""
基于以下参考资料回答问题。
要求:
1. 回答要准确,基于参考资料
2. 如果资料不足,明确说明
3. 引用来源时使用 [来源编号] 格式
参考资料:
{FormatSources(sources)}
问题:{question}
回答:
""";
var answer = await _kernel.InvokePromptAsync(prompt);
return new RAGResponse
{
Answer = answer.ToString(),
Sources = sources
};
}
private string FormatSources(List<SourceInfo> sources)
{
var sb = new StringBuilder();
for (int i = 0; i < sources.Count; i++)
{
sb.AppendLine($"[{i + 1}] {sources[i].Content}");
}
return sb.ToString();
}
}
public class RAGResponse
{
public string Answer { get; set; }
public List<SourceInfo> Sources { get; set; }
}
public class SourceInfo
{
public string Content { get; set; }
public string DocumentId { get; set; }
public string Title { get; set; }
public float Score { get; set; }
}
完整示例 #
文档问答系统 #
csharp
public class DocumentQAService
{
private readonly Kernel _kernel;
private readonly IDocumentLoader _loader;
private readonly DocumentIndexer _indexer;
private readonly RAGService _ragService;
public async Task ImportDocumentAsync(string filePath)
{
var content = await _loader.LoadAsync(filePath);
var document = new Document
{
Id = Guid.NewGuid().ToString(),
Title = Path.GetFileNameWithoutExtension(filePath),
Content = content,
Source = filePath
};
await _indexer.IndexDocumentAsync(document);
}
public async Task<QAResponse> AskAsync(string question)
{
var response = await _ragService.QueryWithSourcesAsync(question);
return new QAResponse
{
Question = question,
Answer = response.Answer,
Sources = response.Sources.Select(s => new SourceReference
{
Title = s.Title,
Snippet = s.Content.Length > 200
? s.Content.Substring(0, 200) + "..."
: s.Content,
Relevance = s.Score
}).ToList()
};
}
}
使用示例 #
csharp
var qaService = new DocumentQAService(...);
// 导入文档
await qaService.ImportDocumentAsync("docs/product-manual.pdf");
await qaService.ImportDocumentAsync("docs/faq.md");
// 提问
var response = await qaService.AskAsync("如何重置密码?");
Console.WriteLine($"问题:{response.Question}");
Console.WriteLine($"回答:{response.Answer}");
Console.WriteLine("\n参考来源:");
foreach (var source in response.Sources)
{
Console.WriteLine($"- {source.Title} (相关性: {source.Relevance:F2})");
}
优化技巧 #
混合检索 #
csharp
public class HybridSearchService
{
public async Task<List<SearchResult>> SearchAsync(string query)
{
// 向量检索
var vectorResults = await VectorSearchAsync(query);
// 关键词检索
var keywordResults = await KeywordSearchAsync(query);
// 合并结果
return MergeResults(vectorResults, keywordResults);
}
private List<SearchResult> MergeResults(
List<SearchResult> vectorResults,
List<SearchResult> keywordResults)
{
var merged = new Dictionary<string, SearchResult>();
foreach (var result in vectorResults)
{
result.Score *= 0.7f; // 向量检索权重
merged[result.Id] = result;
}
foreach (var result in keywordResults)
{
if (merged.TryGetValue(result.Id, out var existing))
{
existing.Score += result.Score * 0.3f; // 关键词检索权重
}
else
{
result.Score *= 0.3f;
merged[result.Id] = result;
}
}
return merged.Values.OrderByDescending(r => r.Score).ToList();
}
}
重排序 #
csharp
public class Reranker
{
private readonly Kernel _kernel;
public async Task<List<SearchResult>> RerankAsync(
string query,
List<SearchResult> results)
{
var rerankPrompt = $"""
对以下搜索结果按与问题的相关性重新排序。
问题:{query}
搜索结果:
{string.Join("\n", results.Select((r, i) => $"[{i}] {r.Content}"))}
请返回最相关的3个结果的编号,用逗号分隔(如:2,0,5)
""";
var response = await _kernel.InvokePromptAsync(rerankPrompt);
var indices = response.ToString().Split(',')
.Select(s => int.Parse(s.Trim()))
.ToList();
return indices.Select(i => results[i]).ToList();
}
}
最佳实践 #
1. 合理的分块大小 #
csharp
// 根据文档类型选择
var chunker = new TextChunker(
chunkSize: documentType switch
{
"technical" => 1000, // 技术文档
"faq" => 200, // FAQ
"article" => 500, // 文章
_ => 500
},
overlap: 50
);
2. 元数据管理 #
csharp
var metadata = new
{
documentId,
title,
source,
author,
createdAt,
tags,
chunkIndex,
totalChunks
};
3. 增量更新 #
csharp
public async Task UpdateDocumentAsync(Document document)
{
// 删除旧的索引
await DeleteDocumentChunksAsync(document.Id);
// 重新索引
await IndexDocumentAsync(document);
}
下一步 #
现在你已经掌握了 RAG 应用,接下来学习 流式输出,了解如何实现实时响应!
最后更新:2026-04-04