RAG 应用实战 #

本章介绍如何使用 Qdrant 构建检索增强生成（RAG）系统。

RAG 概述 #

text

RAG 架构流程：

┌─────────────────────────────────────────────────────────────┐
│                      RAG 系统                                │
├─────────────────────────────────────────────────────────────┤
│                                                              │
│  用户问题                                                    │
│      │                                                       │
│      ↓                                                       │
│  ┌─────────────┐                                             │
│  │  Embedding  │                                             │
│  └─────────────┘                                             │
│      │                                                       │
│      ↓                                                       │
│  ┌─────────────┐      ┌─────────────┐                       │
│  │   Qdrant    │  →   │  相关文档   │                       │
│  │  向量搜索   │      │  检索结果   │                       │
│  └─────────────┘      └─────────────┘                       │
│                              │                               │
│                              ↓                               │
│  ┌─────────────────────────────────────────────────────┐   │
│  │                    LLM                               │   │
│  │  问题 + 相关文档 → 生成回答                          │   │
│  └─────────────────────────────────────────────────────┘   │
│                              │                               │
│                              ↓                               │
│                         最终回答                             │
│                                                              │
└─────────────────────────────────────────────────────────────┘

环境准备 #

安装依赖 #

bash

pip install qdrant-client sentence-transformers langchain langchain-openai

导入库 #

python

from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Optional
import os

知识库构建 #

文档数据 #

python

knowledge_base = [
    {
        "id": "kb_001",
        "title": "Qdrant 简介",
        "content": "Qdrant 是一个高性能的开源向量数据库，使用 Rust 编写。它支持高维向量的存储、索引和相似性搜索，是构建 AI 应用的核心基础设施。Qdrant 提供 REST API 和 gRPC 接口，支持 Python、JavaScript、Go 等多种语言的 SDK。",
        "category": "产品介绍"
    },
    {
        "id": "kb_002",
        "title": "Qdrant 安装方式",
        "content": "Qdrant 支持多种安装方式：1. Docker 安装最简单，使用 docker run 命令即可启动；2. Kubernetes 部署适合生产环境；3. Qdrant Cloud 是官方托管的云服务，无需运维；4. Python 内存模式适合快速测试。",
        "category": "安装部署"
    },
    {
        "id": "kb_003",
        "title": "Collection 管理",
        "content": "Collection 是 Qdrant 中最顶层的容器，类似于关系数据库中的表。创建 Collection 时需要指定向量维度和距离度量方式。Qdrant 支持余弦相似度、欧几里得距离和点积三种距离度量。",
        "category": "核心概念"
    },
    {
        "id": "kb_004",
        "title": "向量搜索原理",
        "content": "Qdrant 使用 HNSW（Hierarchical Navigable Small World）算法进行向量索引。HNSW 是一种高效的近似最近邻搜索算法，通过构建多层图结构实现快速的向量检索。搜索时从顶层开始，逐层向下细化，最终返回最相似的结果。",
        "category": "技术原理"
    },
    {
        "id": "kb_005",
        "title": "Payload 过滤",
        "content": "Qdrant 支持丰富的 Payload 过滤功能。可以为向量附加元数据，如文本、数值、标签等，并在搜索时根据这些元数据进行过滤。支持的过滤条件包括精确匹配、范围查询、地理位置过滤等。",
        "category": "核心功能"
    },
    {
        "id": "kb_006",
        "title": "分布式部署",
        "content": "Qdrant 支持分布式集群部署，通过分片和复制实现水平扩展和高可用。分片将数据分布到多个节点，复制为每个分片创建多个副本。集群使用 Raft 协议保证一致性。",
        "category": "部署架构"
    },
    {
        "id": "kb_007",
        "title": "性能优化建议",
        "content": "Qdrant 性能优化建议：1. 调整 HNSW 参数 m 和 ef_construct；2. 使用向量量化减少内存占用；3. 为 Payload 字段创建索引；4. 使用批量操作提高吞吐量；5. 合理配置搜索参数 hnsw_ef。",
        "category": "性能优化"
    },
    {
        "id": "kb_008",
        "title": "RAG 应用场景",
        "content": "RAG（检索增强生成）是 Qdrant 的典型应用场景。RAG 系统首先从知识库中检索相关文档，然后将文档作为上下文提供给大语言模型，生成更准确、更有依据的回答。这种方式解决了 LLM 的知识时效性和幻觉问题。",
        "category": "应用场景"
    }
]

创建知识库索引 #

python

class KnowledgeBase:
    def __init__(self, collection_name: str = "rag_knowledge"):
        self.client = QdrantClient(":memory:")
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.collection_name = collection_name
    
    def build_index(self, documents: List[Dict]):
        self.client.create_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(size=384, distance=Distance.COSINE)
        )
        
        texts = [doc["content"] for doc in documents]
        embeddings = self.model.encode(texts)
        
        points = [
            PointStruct(
                id=i,
                vector=embeddings[i].tolist(),
                payload={
                    "id": documents[i]["id"],
                    "title": documents[i]["title"],
                    "content": documents[i]["content"],
                    "category": documents[i]["category"]
                }
            )
            for i in range(len(documents))
        ]
        
        self.client.upsert(self.collection_name, points)
        print(f"知识库索引完成，共 {len(points)} 条文档")
    
    def retrieve(self, query: str, top_k: int = 3) -> List[Dict]:
        query_embedding = self.model.encode(query)
        
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_embedding.tolist(),
            limit=top_k
        )
        
        return [
            {
                "id": r.payload["id"],
                "title": r.payload["title"],
                "content": r.payload["content"],
                "category": r.payload["category"],
                "score": r.score
            }
            for r in results
        ]

kb = KnowledgeBase()
kb.build_index(knowledge_base)

RAG 系统实现 #

简单 RAG 实现 #

python

class SimpleRAG:
    def __init__(self, knowledge_base: KnowledgeBase):
        self.kb = knowledge_base
    
    def build_prompt(self, query: str, contexts: List[Dict]) -> str:
        context_text = "\n\n".join([
            f"【{c['title']}】\n{c['content']}"
            for c in contexts
        ])
        
        prompt = f"""基于以下知识库内容回答问题。如果知识库中没有相关信息，请说明。

知识库内容：
{context_text}

问题：{query}

请基于知识库内容给出准确、详细的回答："""
        
        return prompt
    
    def answer(self, query: str, top_k: int = 3) -> Dict:
        contexts = self.kb.retrieve(query, top_k)
        
        prompt = self.build_prompt(query, contexts)
        
        return {
            "query": query,
            "contexts": contexts,
            "prompt": prompt
        }

rag = SimpleRAG(kb)
result = rag.answer("如何安装 Qdrant？")

print("检索到的相关文档:")
for ctx in result["contexts"]:
    print(f"  - {ctx['title']} (相关度: {ctx['score']:.4f})")

print("\n生成的提示词:")
print(result["prompt"][:500] + "...")

LangChain 集成 #

python

from langchain_core.documents import Document
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_core.language_models import BaseLLM
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

class QdrantVectorStore:
    def __init__(self, knowledge_base: KnowledgeBase):
        self.kb = knowledge_base
    
    def similarity_search(self, query: str, k: int = 3) -> List[Document]:
        results = self.kb.retrieve(query, k)
        
        return [
            Document(
                page_content=r["content"],
                metadata={
                    "id": r["id"],
                    "title": r["title"],
                    "category": r["category"],
                    "score": r["score"]
                }
            )
            for r in results
        ]
    
    def as_retriever(self, search_kwargs: Optional[Dict] = None):
        search_kwargs = search_kwargs or {"k": 3}
        
        class Retriever:
            def __init__(self, store, kwargs):
                self.store = store
                self.kwargs = kwargs
            
            def invoke(self, query: str) -> List[Document]:
                return self.store.similarity_search(query, k=self.kwargs.get("k", 3))
        
        return Retriever(self, search_kwargs)

vector_store = QdrantVectorStore(kb)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

docs = retriever.invoke("Qdrant 是什么？")
print(f"检索到 {len(docs)} 个文档")
for doc in docs:
    print(f"  - {doc.metadata['title']}")

RAG 提示词模板 #

python

RAG_PROMPT_TEMPLATE = """你是一个专业的问答助手。请基于提供的上下文信息回答用户问题。

要求：
1. 只使用上下文中的信息回答
2. 如果上下文中没有相关信息，请明确说明
3. 回答要准确、详细、有条理
4. 可以引用具体的上下文来源

上下文信息：
{context}

用户问题：{question}

请给出回答："""

def format_docs(docs: List[Document]) -> str:
    return "\n\n".join([
        f"【{doc.metadata.get('title', '未知')}】\n{doc.page_content}"
        for doc in docs
    ])

def build_rag_chain(retriever, llm=None):
    def rag_pipeline(question: str) -> str:
        docs = retriever.invoke(question)
        context = format_docs(docs)
        
        prompt = RAG_PROMPT_TEMPLATE.format(
            context=context,
            question=question
        )
        
        return prompt
    
    return rag_pipeline

rag_chain = build_rag_chain(retriever)

prompt = rag_chain("Qdrant 支持哪些距离度量方式？")
print(prompt)

高级 RAG 功能 #

多轮对话支持 #

python

class ConversationalRAG:
    def __init__(self, knowledge_base: KnowledgeBase):
        self.kb = knowledge_base
        self.conversation_history = []
    
    def build_conversational_prompt(self, query: str, contexts: List[Dict]) -> str:
        history_text = ""
        if self.conversation_history:
            history_text = "\n\n历史对话：\n"
            for turn in self.conversation_history[-3:]:
                history_text += f"问：{turn['query']}\n答：{turn['answer'][:200]}...\n"
        
        context_text = "\n\n".join([
            f"【{c['title']}】\n{c['content']}"
            for c in contexts
        ])
        
        prompt = f"""你是一个专业的问答助手。请基于提供的上下文信息和对话历史回答用户问题。

上下文信息：
{context_text}
{history_text}
当前问题：{query}

请给出回答："""
        
        return prompt
    
    def chat(self, query: str) -> Dict:
        contexts = self.kb.retrieve(query)
        
        prompt = self.build_conversational_prompt(query, contexts)
        
        self.conversation_history.append({
            "query": query,
            "contexts": contexts
        })
        
        return {
            "query": query,
            "contexts": contexts,
            "prompt": prompt
        }

conv_rag = ConversationalRAG(kb)

result1 = conv_rag.chat("Qdrant 是什么？")
print("第一轮对话提示词已生成")

result2 = conv_rag.chat("它支持哪些安装方式？")
print("第二轮对话提示词已生成（包含历史上下文）")

混合检索 #

python

class HybridRAG:
    def __init__(self, knowledge_base: KnowledgeBase):
        self.kb = knowledge_base
    
    def keyword_search(self, query: str, documents: List[Dict]) -> List[Dict]:
        query_words = set(query.lower().split())
        
        scored = []
        for doc in documents:
            content_words = set(doc["content"].lower().split())
            overlap = len(query_words & content_words)
            scored.append((doc, overlap))
        
        scored.sort(key=lambda x: x[1], reverse=True)
        return [s[0] for s in scored[:3]]
    
    def hybrid_retrieve(self, query: str, alpha: float = 0.5) -> List[Dict]:
        semantic_results = self.kb.retrieve(query, top_k=5)
        
        keyword_results = self.keyword_search(query, knowledge_base)
        
        combined = {}
        
        for i, result in enumerate(semantic_results):
            doc_id = result["id"]
            score = result["score"] * alpha
            combined[doc_id] = {
                **result,
                "semantic_score": result["score"],
                "keyword_score": 0,
                "final_score": score
            }
        
        for i, result in enumerate(keyword_results):
            doc_id = result["id"]
            keyword_score = (len(keyword_results) - i) / len(keyword_results) * (1 - alpha)
            
            if doc_id in combined:
                combined[doc_id]["keyword_score"] = keyword_score
                combined[doc_id]["final_score"] += keyword_score
            else:
                combined[doc_id] = {
                    **result,
                    "semantic_score": 0,
                    "keyword_score": keyword_score,
                    "final_score": keyword_score
                }
        
        results = sorted(combined.values(), key=lambda x: x["final_score"], reverse=True)
        
        return results[:3]

hybrid_rag = HybridRAG(kb)
results = hybrid_rag.hybrid_retrieve("Qdrant 安装部署")

print("混合检索结果:")
for r in results:
    print(f"  - {r['title']}")
    print(f"    语义分数: {r['semantic_score']:.4f}, 关键词分数: {r['keyword_score']:.4f}")
    print(f"    最终分数: {r['final_score']:.4f}")

重排序 #

python

class RerankedRAG:
    def __init__(self, knowledge_base: KnowledgeBase):
        self.kb = knowledge_base
    
    def rerank(self, query: str, documents: List[Dict]) -> List[Dict]:
        query_embedding = self.kb.model.encode(query)
        
        reranked = []
        for doc in documents:
            doc_embedding = self.kb.model.encode(doc["content"])
            
            similarity = np.dot(query_embedding, doc_embedding) / (
                np.linalg.norm(query_embedding) * np.linalg.norm(doc_embedding)
            )
            
            reranked.append({
                **doc,
                "rerank_score": float(similarity)
            })
        
        reranked.sort(key=lambda x: x["rerank_score"], reverse=True)
        
        return reranked
    
    def retrieve_with_rerank(self, query: str, top_k: int = 5, final_k: int = 3) -> List[Dict]:
        initial_results = self.kb.retrieve(query, top_k=top_k)
        
        reranked = self.rerank(query, initial_results)
        
        return reranked[:final_k]

reranked_rag = RerankedRAG(kb)
results = reranked_rag.retrieve_with_rerank("Qdrant 性能优化")

print("重排序后的结果:")
for r in results:
    print(f"  - {r['title']} (重排序分数: {r['rerank_score']:.4f})")

评估与优化 #

检索质量评估 #

python

def evaluate_retrieval(kb: KnowledgeBase, test_cases: List[Dict]) -> Dict:
    total = len(test_cases)
    hits = 0
    mrr_sum = 0
    
    for case in test_cases:
        query = case["query"]
        expected_ids = set(case["relevant_docs"])
        
        results = kb.retrieve(query, top_k=5)
        retrieved_ids = {r["id"] for r in results}
        
        if retrieved_ids & expected_ids:
            hits += 1
        
        for i, result in enumerate(results):
            if result["id"] in expected_ids:
                mrr_sum += 1 / (i + 1)
                break
    
    return {
        "hit_rate": hits / total,
        "mrr": mrr_sum / total
    }

test_cases = [
    {
        "query": "Qdrant 如何安装",
        "relevant_docs": ["kb_002"]
    },
    {
        "query": "向量搜索的原理",
        "relevant_docs": ["kb_004"]
    },
    {
        "query": "Qdrant 性能优化",
        "relevant_docs": ["kb_007"]
    }
]

eval_results = evaluate_retrieval(kb, test_cases)
print(f"命中率: {eval_results['hit_rate']:.2%}")
print(f"MRR: {eval_results['mrr']:.4f}")

完整 RAG 示例 #

python

class ProductionRAG:
    def __init__(self):
        self.kb = KnowledgeBase("production_rag")
        self.kb.build_index(knowledge_base)
        self.conversation_history = []
    
    def retrieve(self, query: str, top_k: int = 3) -> List[Dict]:
        return self.kb.retrieve(query, top_k)
    
    def build_prompt(self, query: str, contexts: List[Dict]) -> str:
        context_text = "\n\n".join([
            f"【{c['title']}】\n{c['content']}"
            for c in contexts
        ])
        
        history_text = ""
        if self.conversation_history:
            history_text = "\n\n历史对话：\n"
            for turn in self.conversation_history[-2:]:
                history_text += f"问：{turn['query']}\n答：{turn['answer'][:100]}...\n"
        
        return f"""基于以下知识库内容回答问题。

知识库内容：
{context_text}
{history_text}
当前问题：{query}

请给出准确、详细的回答："""
    
    def query(self, question: str) -> Dict:
        contexts = self.retrieve(question)
        
        prompt = self.build_prompt(question, contexts)
        
        return {
            "question": question,
            "contexts": contexts,
            "prompt": prompt,
            "sources": [c["title"] for c in contexts]
        }

prod_rag = ProductionRAG()

result = prod_rag.query("如何在生产环境部署 Qdrant？")

print("问题:", result["question"])
print("\n相关文档:")
for ctx in result["contexts"]:
    print(f"  - {ctx['title']} (相关度: {ctx['score']:.4f})")

print("\n生成的提示词（前 500 字符）:")
print(result["prompt"][:500] + "...")

小结 #

本章实现了完整的 RAG 系统：

知识库构建和索引
基础 RAG 实现
LangChain 集成
多轮对话支持
混合检索和重排序
检索质量评估

下一步 #

继续学习推荐系统，了解如何构建个性化推荐应用！