查询引擎 #
概述 #
查询引擎(Query Engine)是 LlamaIndex 中端到端的查询接口,它整合了检索器和响应合成器,负责从索引中检索相关信息并生成最终回答。
text
┌─────────────────────────────────────────────────────────────┐
│ 查询引擎架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 用户问题 │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Query Engine │ │
│ │ │ │
│ │ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ Retriever │───→│ Synthesizer │ │ │
│ │ └─────────────┘ └─────────────┘ │ │
│ │ │ │ │ │
│ │ ▼ ▼ │ │
│ │ ┌───────────┐ ┌───────────┐ │ │
│ │ │ Nodes │ │ Response │ │ │
│ │ └───────────┘ └───────────┘ │ │
│ │ │ │
│ └─────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
基本用法 #
创建查询引擎 #
python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("你的问题")
print(response)
配置选项 #
python
query_engine = index.as_query_engine(
similarity_top_k=3,
response_mode="compact",
streaming=True,
verbose=True,
)
获取来源信息 #
python
response = query_engine.query("你的问题")
print(f"回答: {response}")
print("\n来源:")
for i, node in enumerate(response.source_nodes):
print(f"[{i+1}] {node.node.metadata.get('file_name', 'unknown')}")
print(f" 相似度: {node.score:.4f}")
print(f" 内容: {node.node.text[:100]}...")
流式输出 #
启用流式输出 #
python
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("你的问题")
for text in response.response_gen:
print(text, end="", flush=True)
print()
异步流式输出 #
python
import asyncio
async def stream_query():
query_engine = index.as_query_engine(streaming=True)
response = await query_engine.aquery("你的问题")
async for text in response.async_response_gen():
print(text, end="", flush=True)
print()
asyncio.run(stream_query())
流式输出回调 #
python
from llama_index.core import VectorStoreIndex
query_engine = index.as_query_engine(streaming=True)
def on_token(token: str):
print(token, end="", flush=True)
response = query_engine.query("你的问题")
for token in response.response_gen:
on_token(token)
查询引擎类型 #
RetrieverQueryEngine #
最基础的查询引擎,执行检索后合成响应:
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import get_response_synthesizer
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever(similarity_top_k=3)
response_synthesizer = get_response_synthesizer(response_mode="compact")
query_engine = RetrieverQueryEngine(
retriever=retriever,
response_synthesizer=response_synthesizer,
)
response = query_engine.query("你的问题")
RouterQueryEngine #
根据问题自动选择合适的查询引擎:
python
from llama_index.core import VectorStoreIndex, SummaryIndex
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.tools import QueryEngineTool
vector_index = VectorStoreIndex.from_documents(documents)
summary_index = SummaryIndex.from_documents(documents)
vector_tool = QueryEngineTool.from_defaults(
query_engine=vector_index.as_query_engine(),
name="vector_search",
description="适合语义搜索和具体问题查询",
)
summary_tool = QueryEngineTool.from_defaults(
query_engine=summary_index.as_query_engine(response_mode="tree_summarize"),
name="summary",
description="适合文档总结和概览",
)
query_engine = RouterQueryEngine.from_defaults(
query_engine_tools=[vector_tool, summary_tool],
)
response = query_engine.query("总结文档的主要内容")
print(response)
MultiStepQueryEngine #
多步迭代查询,逐步深入:
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import MultiStepQueryEngine
index = VectorStoreIndex.from_documents(documents)
query_engine = MultiStepQueryEngine(
query_engine=index.as_query_engine(),
num_steps=3,
use_async=True,
)
response = query_engine.query("复杂问题")
print(response)
SubQuestionQueryEngine #
将复杂问题分解为子问题:
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
index = VectorStoreIndex.from_documents(documents)
query_engine_tools = [
QueryEngineTool(
query_engine=index.as_query_engine(),
metadata=ToolMetadata(
name="knowledge_base",
description="知识库查询",
),
),
]
query_engine = SubQuestionQueryEngine.from_defaults(
query_engine_tools=query_engine_tools,
)
response = query_engine.query("比较文档 A 和文档 B 的异同")
print(response)
RetryQueryEngine #
检测低质量回答并重试:
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetryQueryEngine
from llama_index.core.evaluation import RelevancyEvaluator
index = VectorStoreIndex.from_documents(documents)
base_engine = index.as_query_engine()
query_engine = RetryQueryEngine(
query_engine=base_engine,
evaluator=RelevancyEvaluator(),
max_retries=3,
)
response = query_engine.query("你的问题")
CitationQueryEngine #
带引用的查询引擎:
python
from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import CitationQueryEngine
index = VectorStoreIndex.from_documents(documents)
query_engine = CitationQueryEngine.from_args(
index,
similarity_top_k=3,
citation_chunk_size=512,
)
response = query_engine.query("你的问题")
print(response)
for node in response.source_nodes:
print(f"引用 [{node.node.metadata.get('source')}]: {node.node.text[:100]}")
自定义查询引擎 #
继承 BaseQueryEngine #
python
from llama_index.core.query_engine import BaseQueryEngine
from llama_index.core.schema import QueryBundle, Response
from typing import Any
class CustomQueryEngine(BaseQueryEngine):
def __init__(self, index):
self.index = index
def _query(self, query_bundle: QueryBundle) -> Response:
query_engine = self.index.as_query_engine()
return query_engine.query(query_bundle.query_str)
async def _aquery(self, query_bundle: QueryBundle) -> Response:
query_engine = self.index.as_query_engine()
return await query_engine.aquery(query_bundle.query_str)
def _get_prompt_modules(self) -> dict:
return {}
custom_engine = CustomQueryEngine(index)
response = custom_engine.query("你的问题")
查询参数 #
相似度阈值 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.vector_stores import MetadataFilters
query_engine = index.as_query_engine(
similarity_top_k=10,
)
from llama_index.core.postprocessor import SimilarityPostprocessor
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[
SimilarityPostprocessor(similarity_cutoff=0.7),
],
)
元数据过滤 #
python
from llama_index.core.vector_stores import MetadataFilters, MetadataFilter
filters = MetadataFilters(
filters=[
MetadataFilter(key="category", value="tech"),
MetadataFilter(key="year", value=2024),
]
)
query_engine = index.as_query_engine(filters=filters)
response = query_engine.query("你的问题")
响应模式 #
python
from llama_index.core import get_response_synthesizer
compact_engine = index.as_query_engine(
response_mode="compact"
)
refine_engine = index.as_query_engine(
response_mode="refine"
)
tree_summarize_engine = index.as_query_engine(
response_mode="tree_summarize"
)
simple_engine = index.as_query_engine(
response_mode="simple_summarize"
)
no_text_engine = index.as_query_engine(
response_mode="no_text"
)
节点后处理 #
重排序 #
python
from llama_index.core import VectorStoreIndex
from llama_index.postprocessor.cohere_rerank import CohereRerank
index = VectorStoreIndex.from_documents(documents)
cohere_rerank = CohereRerank(
api_key="your-cohere-key",
top_n=3,
)
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[cohere_rerank],
)
response = query_engine.query("你的问题")
本地重排序 #
python
from llama_index.postprocessor.sentence_transformer_rerank import (
SentenceTransformerRerank,
)
rerank_processor = SentenceTransformerRerank(
model="cross-encoder/ms-marco-MiniLM-L-6-v2",
top_n=3,
)
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[rerank_processor],
)
长上下文重排序 #
python
from llama_index.postprocessor.long_context_reorder import LongContextReorder
reorder_processor = LongContextReorder()
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[reorder_processor],
)
组合后处理器 #
python
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.postprocessor.sentence_transformer_rerank import (
SentenceTransformerRerank,
)
from llama_index.postprocessor.long_context_reorder import LongContextReorder
processors = [
SimilarityPostprocessor(similarity_cutoff=0.5),
SentenceTransformerRerank(model="cross-encoder/ms-marco-MiniLM-L-6-v2", top_n=5),
LongContextReorder(),
]
query_engine = index.as_query_engine(
similarity_top_k=20,
node_postprocessors=processors,
)
异步查询 #
python
import asyncio
from llama_index.core import VectorStoreIndex
async def async_query():
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = await query_engine.aquery("你的问题")
print(response)
questions = ["问题1", "问题2", "问题3"]
tasks = [query_engine.aquery(q) for q in questions]
responses = await asyncio.gather(*tasks)
for q, r in zip(questions, responses):
print(f"问题: {q}")
print(f"回答: {r}\n")
asyncio.run(async_query())
完整示例 #
python
import os
from llama_index.core import (
VectorStoreIndex,
SummaryIndex,
SimpleDirectoryReader,
Settings,
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.tools import QueryEngineTool
os.environ["OPENAI_API_KEY"] = "sk-your-key"
Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
documents = SimpleDirectoryReader("./data").load_data()
print(f"加载了 {len(documents)} 个文档")
vector_index = VectorStoreIndex.from_documents(documents)
summary_index = SummaryIndex.from_documents(documents)
vector_tool = QueryEngineTool.from_defaults(
query_engine=vector_index.as_query_engine(
similarity_top_k=3,
streaming=True,
),
name="vector_search",
description="适合语义搜索和具体问题",
)
summary_tool = QueryEngineTool.from_defaults(
query_engine=summary_index.as_query_engine(
response_mode="tree_summarize"
),
name="summary",
description="适合文档总结",
)
query_engine = RouterQueryEngine.from_defaults(
query_engine_tools=[vector_tool, summary_tool],
)
print("\n=== 智能问答系统 ===")
print("输入 'quit' 退出\n")
while True:
question = input("问题: ").strip()
if question.lower() == "quit":
break
if not question:
continue
print("\n回答: ", end="")
response = query_engine.query(question)
if hasattr(response, "response_gen"):
for text in response.response_gen:
print(text, end="", flush=True)
print()
else:
print(response)
print()
下一步 #
掌握查询引擎后,接下来学习 检索器 深入理解检索机制!
最后更新:2026-03-30