RAG 应用 #

概述 #

RAG（Retrieval-Augmented Generation，检索增强生成）是一种结合检索和生成的技术，通过检索相关文档来增强 LLM 的回答能力。

RAG 架构 #

text

┌─────────────────────────────────────────────────────────────┐
│                    RAG 架构                                  │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  用户问题                                                   │
│      │                                                      │
│      ▼                                                      │
│  ┌─────────────┐                                            │
│  │  向量化查询  │                                            │
│  └──────┬──────┘                                            │
│         │                                                    │
│         ▼                                                    │
│  ┌─────────────┐     ┌─────────────┐                        │
│  │  向量检索    │────▶│  相关文档    │                        │
│  └──────┬──────┘     └─────────────┘                        │
│         │                                                    │
│         ▼                                                    │
│  ┌─────────────┐                                            │
│  │  构建提示词  │                                            │
│  │  问题 + 文档 │                                            │
│  └──────┬──────┘                                            │
│         │                                                    │
│         ▼                                                    │
│  ┌─────────────┐                                            │
│  │   LLM 生成   │                                            │
│  └──────┬──────┘                                            │
│         │                                                    │
│         ▼                                                    │
│  ┌─────────────┐                                            │
│  │   最终回答   │                                            │
│  └─────────────┘                                            │
│                                                             │
└─────────────────────────────────────────────────────────────┘

文档处理管道 #

文档加载 #

csharp

public interface IDocumentLoader
{
    Task<string> LoadAsync(string path);
}

public class DocumentLoader : IDocumentLoader
{
    public async Task<string> LoadAsync(string path)
    {
        var extension = Path.GetExtension(path).ToLowerInvariant();
        
        return extension switch
        {
            ".txt" => await File.ReadAllTextAsync(path),
            ".md" => await File.ReadAllTextAsync(path),
            ".pdf" => await LoadPdfAsync(path),
            ".docx" => await LoadDocxAsync(path),
            _ => throw new NotSupportedException($"不支持的文件格式: {extension}")
        };
    }

    private async Task<string> LoadPdfAsync(string path)
    {
        // 使用 PDF 解析库
        // 实现略
        return "";
    }

    private async Task<string> LoadDocxAsync(string path)
    {
        // 使用 Word 解析库
        // 实现略
        return "";
    }
}

文本分块 #

csharp

public class TextChunker
{
    private readonly int _chunkSize;
    private readonly int _overlap;

    public TextChunker(int chunkSize = 500, int overlap = 50)
    {
        _chunkSize = chunkSize;
        _overlap = overlap;
    }

    public List<TextChunk> Chunk(string text, string documentId)
    {
        var chunks = new List<TextChunk>();
        var sentences = SplitSentences(text);
        
        var currentChunk = new StringBuilder();
        var currentStart = 0;
        var position = 0;

        for (int i = 0; i < sentences.Count; i++)
        {
            if (currentChunk.Length + sentences[i].Length > _chunkSize && 
                currentChunk.Length > 0)
            {
                chunks.Add(new TextChunk
                {
                    Id = $"{documentId}_{chunks.Count}",
                    DocumentId = documentId,
                    Content = currentChunk.ToString(),
                    StartIndex = currentStart,
                    EndIndex = position
                });

                // 处理重叠
                var overlapText = GetOverlapText(currentChunk.ToString());
                currentChunk.Clear();
                currentChunk.Append(overlapText);
                currentStart = position - overlapText.Length;
            }

            currentChunk.Append(sentences[i]);
            position += sentences[i].Length;
        }

        if (currentChunk.Length > 0)
        {
            chunks.Add(new TextChunk
            {
                Id = $"{documentId}_{chunks.Count}",
                DocumentId = documentId,
                Content = currentChunk.ToString(),
                StartIndex = currentStart,
                EndIndex = position
            });
        }

        return chunks;
    }

    private List<string> SplitSentences(string text)
    {
        var delimiters = new[] { '。', '！', '？', '.', '!', '?', '\n' };
        return text.Split(delimiters, StringSplitOptions.RemoveEmptyEntries)
            .Select(s => s.Trim())
            .Where(s => !string.IsNullOrEmpty(s))
            .ToList();
    }

    private string GetOverlapText(string text)
    {
        if (text.Length <= _overlap) return text;
        return text.Substring(text.Length - _overlap);
    }
}

public class TextChunk
{
    public string Id { get; set; }
    public string DocumentId { get; set; }
    public string Content { get; set; }
    public int StartIndex { get; set; }
    public int EndIndex { get; set; }
}

向量存储 #

索引文档 #

csharp

public class DocumentIndexer
{
    private readonly ITextEmbeddingGenerationService _embeddingService;
    private readonly IMemoryStore _memoryStore;
    private readonly TextChunker _chunker;

    public DocumentIndexer(
        ITextEmbeddingGenerationService embeddingService,
        IMemoryStore memoryStore,
        TextChunker chunker)
    {
        _embeddingService = embeddingService;
        _memoryStore = memoryStore;
        _chunker = chunker;
    }

    public async Task IndexDocumentAsync(Document document)
    {
        var chunks = _chunker.Chunk(document.Content, document.Id);
        
        foreach (var chunk in chunks)
        {
            var embedding = await _embeddingService.GenerateEmbeddingAsync(chunk.Content);
            
            var record = new MemoryRecord(
                new MemoryRecordMetadata(
                    isReference: false,
                    id: chunk.Id,
                    text: chunk.Content,
                    description: document.Title,
                    externalSourceName: document.Source,
                    additionalMetadata: JsonSerializer.Serialize(new
                    {
                        document.Id,
                        document.Title,
                        chunk.StartIndex,
                        chunk.EndIndex
                    })
                ),
                embedding,
                null
            );

            await _memoryStore.UpsertAsync("documents", record);
        }
    }

    public async Task IndexDocumentsAsync(IEnumerable<Document> documents)
    {
        await _memoryStore.CreateCollectionAsync("documents");
        
        foreach (var doc in documents)
        {
            await IndexDocumentAsync(doc);
        }
    }
}

RAG 查询 #

基本查询 #

csharp

public class RAGService
{
    private readonly Kernel _kernel;
    private readonly ITextEmbeddingGenerationService _embeddingService;
    private readonly IMemoryStore _memoryStore;

    public async Task<string> QueryAsync(string question, int topK = 5)
    {
        // 1. 向量化问题
        var queryEmbedding = await _embeddingService.GenerateEmbeddingAsync(question);

        // 2. 检索相关文档
        var relevantDocs = new List<string>();
        var results = _memoryStore.GetNearestMatchesAsync(
            "documents",
            queryEmbedding,
            limit: topK,
            minRelevanceScore: 0.7f
        );

        await foreach (var (record, score) in results)
        {
            relevantDocs.Add(record.Metadata.Text);
        }

        // 3. 构建提示词
        var context = string.Join("\n\n---\n\n", relevantDocs);
        var prompt = $"""
            请根据以下参考资料回答问题。如果资料中没有相关信息，请说明。
            
            参考资料：
            {context}
            
            问题：{question}
            
            回答：
            """;

        // 4. 生成回答
        return await _kernel.InvokePromptAsync(prompt);
    }
}

高级查询 #

csharp

public class AdvancedRAGService
{
    public async Task<RAGResponse> QueryWithSourcesAsync(
        string question,
        int topK = 5,
        float minScore = 0.7f)
    {
        var queryEmbedding = await _embeddingService.GenerateEmbeddingAsync(question);

        var sources = new List<SourceInfo>();
        var results = _memoryStore.GetNearestMatchesAsync(
            "documents",
            queryEmbedding,
            limit: topK,
            minRelevanceScore: minScore
        );

        await foreach (var (record, score) in results)
        {
            var metadata = JsonSerializer.Deserialize<Dictionary<string, string>>(
                record.Metadata.AdditionalMetadata
            );
            
            sources.Add(new SourceInfo
            {
                Content = record.Metadata.Text,
                DocumentId = metadata?["Id"] ?? "",
                Title = record.Metadata.Description,
                Score = score
            });
        }

        var context = string.Join("\n\n", sources.Select(s => s.Content));
        
        var prompt = $"""
            基于以下参考资料回答问题。
            
            要求：
            1. 回答要准确，基于参考资料
            2. 如果资料不足，明确说明
            3. 引用来源时使用 [来源编号] 格式
            
            参考资料：
            {FormatSources(sources)}
            
            问题：{question}
            
            回答：
            """;

        var answer = await _kernel.InvokePromptAsync(prompt);

        return new RAGResponse
        {
            Answer = answer.ToString(),
            Sources = sources
        };
    }

    private string FormatSources(List<SourceInfo> sources)
    {
        var sb = new StringBuilder();
        for (int i = 0; i < sources.Count; i++)
        {
            sb.AppendLine($"[{i + 1}] {sources[i].Content}");
        }
        return sb.ToString();
    }
}

public class RAGResponse
{
    public string Answer { get; set; }
    public List<SourceInfo> Sources { get; set; }
}

public class SourceInfo
{
    public string Content { get; set; }
    public string DocumentId { get; set; }
    public string Title { get; set; }
    public float Score { get; set; }
}

完整示例 #

文档问答系统 #

csharp

public class DocumentQAService
{
    private readonly Kernel _kernel;
    private readonly IDocumentLoader _loader;
    private readonly DocumentIndexer _indexer;
    private readonly RAGService _ragService;

    public async Task ImportDocumentAsync(string filePath)
    {
        var content = await _loader.LoadAsync(filePath);
        var document = new Document
        {
            Id = Guid.NewGuid().ToString(),
            Title = Path.GetFileNameWithoutExtension(filePath),
            Content = content,
            Source = filePath
        };

        await _indexer.IndexDocumentAsync(document);
    }

    public async Task<QAResponse> AskAsync(string question)
    {
        var response = await _ragService.QueryWithSourcesAsync(question);

        return new QAResponse
        {
            Question = question,
            Answer = response.Answer,
            Sources = response.Sources.Select(s => new SourceReference
            {
                Title = s.Title,
                Snippet = s.Content.Length > 200 
                    ? s.Content.Substring(0, 200) + "..." 
                    : s.Content,
                Relevance = s.Score
            }).ToList()
        };
    }
}

使用示例 #

csharp

var qaService = new DocumentQAService(...);

// 导入文档
await qaService.ImportDocumentAsync("docs/product-manual.pdf");
await qaService.ImportDocumentAsync("docs/faq.md");

// 提问
var response = await qaService.AskAsync("如何重置密码？");

Console.WriteLine($"问题：{response.Question}");
Console.WriteLine($"回答：{response.Answer}");
Console.WriteLine("\n参考来源：");
foreach (var source in response.Sources)
{
    Console.WriteLine($"- {source.Title} (相关性: {source.Relevance:F2})");
}

优化技巧 #

混合检索 #

csharp

public class HybridSearchService
{
    public async Task<List<SearchResult>> SearchAsync(string query)
    {
        // 向量检索
        var vectorResults = await VectorSearchAsync(query);
        
        // 关键词检索
        var keywordResults = await KeywordSearchAsync(query);
        
        // 合并结果
        return MergeResults(vectorResults, keywordResults);
    }

    private List<SearchResult> MergeResults(
        List<SearchResult> vectorResults,
        List<SearchResult> keywordResults)
    {
        var merged = new Dictionary<string, SearchResult>();
        
        foreach (var result in vectorResults)
        {
            result.Score *= 0.7f;  // 向量检索权重
            merged[result.Id] = result;
        }
        
        foreach (var result in keywordResults)
        {
            if (merged.TryGetValue(result.Id, out var existing))
            {
                existing.Score += result.Score * 0.3f;  // 关键词检索权重
            }
            else
            {
                result.Score *= 0.3f;
                merged[result.Id] = result;
            }
        }
        
        return merged.Values.OrderByDescending(r => r.Score).ToList();
    }
}

重排序 #

csharp

public class Reranker
{
    private readonly Kernel _kernel;

    public async Task<List<SearchResult>> RerankAsync(
        string query,
        List<SearchResult> results)
    {
        var rerankPrompt = $"""
            对以下搜索结果按与问题的相关性重新排序。
            
            问题：{query}
            
            搜索结果：
            {string.Join("\n", results.Select((r, i) => $"[{i}] {r.Content}"))}
            
            请返回最相关的3个结果的编号，用逗号分隔（如：2,0,5）
            """;

        var response = await _kernel.InvokePromptAsync(rerankPrompt);
        var indices = response.ToString().Split(',')
            .Select(s => int.Parse(s.Trim()))
            .ToList();

        return indices.Select(i => results[i]).ToList();
    }
}

最佳实践 #

1. 合理的分块大小 #

csharp

// 根据文档类型选择
var chunker = new TextChunker(
    chunkSize: documentType switch
    {
        "technical" => 1000,   // 技术文档
        "faq" => 200,          // FAQ
        "article" => 500,      // 文章
        _ => 500
    },
    overlap: 50
);

2. 元数据管理 #

csharp

var metadata = new
{
    documentId,
    title,
    source,
    author,
    createdAt,
    tags,
    chunkIndex,
    totalChunks
};

3. 增量更新 #

csharp

public async Task UpdateDocumentAsync(Document document)
{
    // 删除旧的索引
    await DeleteDocumentChunksAsync(document.Id);
    
    // 重新索引
    await IndexDocumentAsync(document);
}

下一步 #

现在你已经掌握了 RAG 应用，接下来学习流式输出，了解如何实现实时响应！