RAG 高级技术 #
概述 #
基础的 RAG 应用可能面临检索不准确、回答质量不高等问题。本章介绍多种高级技术来优化 RAG 应用的性能。
text
┌─────────────────────────────────────────────────────────────┐
│ RAG 优化技术总览 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 检索优化 │
│ ├── 混合检索(向量 + 关键词) │
│ ├── 重排序(Reranking) │
│ ├── 查询扩展(Query Expansion) │
│ └── HyDE(假设文档嵌入) │
│ │
│ 索引优化 │
│ ├── 分层索引 │
│ ├── 自动合并检索 │
│ └── 句子窗口检索 │
│ │
│ 查询优化 │
│ ├── 查询重写 │
│ ├── 多查询 │
│ └── 子问题分解 │
│ │
│ 生成优化 │
│ ├── 长上下文重排序 │
│ ├── 引用生成 │
│ └── 事实核查 │
│ │
└─────────────────────────────────────────────────────────────┘
混合检索 #
向量 + BM25 混合 #
bash
pip install llama-index-retrievers-bm25 rank-bm25
python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
vector_retriever = VectorIndexRetriever(
index=index,
similarity_top_k=5,
)
bm25_retriever = BM25Retriever.from_defaults(
nodes=list(index.docstore.docs.values()),
similarity_top_k=5,
)
fusion_retriever = QueryFusionRetriever(
retrievers=[vector_retriever, bm25_retriever],
similarity_top_k=5,
mode="reciprocal_rerank",
)
query_engine = index.as_query_engine(retriever=fusion_retriever)
response = query_engine.query("你的问题")
融合策略 #
python
fusion_retriever = QueryFusionRetriever(
retrievers=[retriever1, retriever2],
mode="reciprocal_rerank",
similarity_top_k=10,
)
fusion_retriever = QueryFusionRetriever(
retrievers=[retriever1, retriever2],
mode="simple",
similarity_top_k=10,
)
fusion_retriever = QueryFusionRetriever(
retrievers=[retriever1, retriever2],
mode="dist_based",
similarity_top_k=10,
)
重排序 #
Cohere 重排序 #
bash
pip install llama-index-postprocessor-cohere-rerank
python
from llama_index.core import VectorStoreIndex
from llama_index.postprocessor.cohere_rerank import CohereRerank
index = VectorStoreIndex.from_documents(documents)
cohere_rerank = CohereRerank(
api_key="your-cohere-key",
top_n=5,
)
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[cohere_rerank],
)
response = query_engine.query("你的问题")
Sentence Transformer 重排序 #
bash
pip install llama-index-postprocessor-sentence-transformer-rerank
python
from llama_index.postprocessor.sentence_transformer_rerank import (
SentenceTransformerRerank,
)
reranker = SentenceTransformerRerank(
model="cross-encoder/ms-marco-MiniLM-L-6-v2",
top_n=5,
)
query_engine = index.as_query_engine(
similarity_top_k=15,
node_postprocessors=[reranker],
)
FlashRerank #
bash
pip install llama-index-postprocessor-flashrank-rerank
python
from llama_index.postprocessor.flashrank_rerank import FlashRankRerank
reranker = FlashRankRerank(
model="ms-marco-MiniLM-L-12-v2",
top_n=5,
)
query_engine = index.as_query_engine(
similarity_top_k=15,
node_postprocessors=[reranker],
)
Jina Reranker #
bash
pip install llama-index-postprocessor-jinaai-rerank
python
from llama_index.postprocessor.jinaai_rerank import JinaRerank
reranker = JinaRerank(
api_key="your-jina-key",
top_n=5,
)
query_engine = index.as_query_engine(
similarity_top_k=15,
node_postprocessors=[reranker],
)
查询扩展 #
HyDE(假设文档嵌入) #
bash
pip install llama-index-embeddings-hyde
python
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.hyde import HydeEmbedding
hyde = HydeEmbedding.from_defaults(
model_name="gpt-4o-mini",
embed_model=embed_model,
)
index = VectorStoreIndex.from_documents(
documents,
embed_model=hyde,
)
query_engine = index.as_query_engine()
多查询扩展 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.retrievers import QueryFusionRetriever
index = VectorStoreIndex.from_documents(documents)
base_retriever = index.as_retriever(similarity_top_k=5)
fusion_retriever = QueryFusionRetriever(
retrievers=[base_retriever],
similarity_top_k=5,
num_queries=3,
mode="reciprocal_rerank",
use_async=True,
)
query_engine = index.as_query_engine(retriever=fusion_retriever)
查询重写 #
python
from llama_index.core import VectorStoreIndex, PromptTemplate
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
index = VectorStoreIndex.from_documents(documents)
hyde_transform = HyDEQueryTransform(include_original=True)
query_engine = TransformQueryEngine(
query_engine=index.as_query_engine(),
query_transform=hyde_transform,
)
response = query_engine.query("你的问题")
分层索引 #
自动合并检索 #
python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.retrievers import AutoMergingRetriever
documents = SimpleDirectoryReader("./data").load_data()
parser = HierarchicalNodeParser.from_defaults(
chunk_sizes=[2048, 512, 128]
)
nodes = parser.get_nodes_from_documents(documents)
index = VectorStoreIndex(nodes)
retriever = AutoMergingRetriever(
index.as_retriever(similarity_top_k=10),
storage_context=index.storage_context,
)
query_engine = index.as_query_engine(retriever=retriever)
句子窗口检索 #
python
from llama_index.core import VectorStoreIndex, Document
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
parser = SentenceWindowNodeParser.from_defaults(
window_size=3,
window_metadata_key="window",
original_text_metadata_key="original_text",
)
nodes = parser.get_nodes_from_documents(documents)
index = VectorStoreIndex(nodes)
postprocessor = MetadataReplacementPostProcessor(
target_metadata_key="window"
)
query_engine = index.as_query_engine(
similarity_top_k=5,
node_postprocessors=[postprocessor],
)
子问题分解 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
index = VectorStoreIndex.from_documents(documents)
query_engine_tools = [
QueryEngineTool(
query_engine=index.as_query_engine(),
metadata=ToolMetadata(
name="knowledge_base",
description="知识库查询工具",
),
),
]
query_engine = SubQuestionQueryEngine.from_defaults(
query_engine_tools=query_engine_tools,
use_async=True,
)
response = query_engine.query("比较文档 A 和文档 B 的主要区别")
长上下文重排序 #
python
from llama_index.postprocessor.long_context_reorder import LongContextReorder
reorder_processor = LongContextReorder()
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[reorder_processor],
)
response = query_engine.query("你的问题")
元数据过滤 #
静态过滤 #
python
from llama_index.core.vector_stores import MetadataFilters, MetadataFilter
filters = MetadataFilters(
filters=[
MetadataFilter(key="category", value="tech"),
MetadataFilter(key="year", value=2024),
]
)
query_engine = index.as_query_engine(filters=filters)
动态过滤 #
python
from llama_index.core.vector_stores import MetadataFilters, MetadataFilter
from llama_index.core.retrievers import VectorIndexRetriever
class DynamicFilterRetriever(VectorIndexRetriever):
def __init__(self, index, **kwargs):
super().__init__(index=index, **kwargs)
self.index = index
def _retrieve(self, query_bundle):
category = extract_category_from_query(query_bundle.query_str)
filters = MetadataFilters(
filters=[
MetadataFilter(key="category", value=category),
]
)
retriever = self.index.as_retriever(filters=filters)
return retriever.retrieve(query_bundle)
def extract_category_from_query(query):
if "python" in query.lower():
return "programming"
elif "机器学习" in query:
return "ai"
return "general"
引用生成 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import CitationQueryEngine
index = VectorStoreIndex.from_documents(documents)
query_engine = CitationQueryEngine.from_args(
index,
similarity_top_k=5,
citation_chunk_size=512,
)
response = query_engine.query("你的问题")
print(response.response)
for i, node in enumerate(response.source_nodes):
print(f"\n[{i+1}] 来源: {node.node.metadata.get('source')}")
print(f"内容: {node.node.text[:100]}...")
完整优化示例 #
python
import os
from llama_index.core import (
VectorStoreIndex,
SimpleDirectoryReader,
Settings,
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.postprocessor.sentence_transformer_rerank import (
SentenceTransformerRerank,
)
from llama_index.postprocessor.long_context_reorder import LongContextReorder
os.environ["OPENAI_API_KEY"] = "sk-your-key"
Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
vector_retriever = VectorIndexRetriever(
index=index,
similarity_top_k=10,
)
bm25_retriever = BM25Retriever.from_defaults(
nodes=list(index.docstore.docs.values()),
similarity_top_k=10,
)
fusion_retriever = QueryFusionRetriever(
retrievers=[vector_retriever, bm25_retriever],
similarity_top_k=15,
mode="reciprocal_rerank",
)
reranker = SentenceTransformerRerank(
model="cross-encoder/ms-marco-MiniLM-L-6-v2",
top_n=5,
)
reorder = LongContextReorder()
query_engine = index.as_query_engine(
retriever=fusion_retriever,
node_postprocessors=[reranker, reorder],
)
response = query_engine.query("你的问题")
print(response)
性能对比 #
text
┌─────────────────────────────────────────────────────────────┐
│ RAG 技术效果对比 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 技术 准确率提升 延迟增加 推荐场景 │
│ ───────────────────────────────────────────────────────── │
│ 基础向量检索 基准 基准 简单场景 │
│ 混合检索 +10-15% +20% 通用场景 │
│ 重排序 +15-25% +50% 高质量需求 │
│ HyDE +5-10% +30% 概念性问题 │
│ 分层索引 +10-20% +10% 大型文档 │
│ 子问题分解 +20-30% +100% 复杂问题 │
│ │
└─────────────────────────────────────────────────────────────┘
下一步 #
掌握 RAG 高级技术后,接下来学习 智能代理 构建自主决策的 AI 系统!
最后更新:2026-03-30