聊天机器人 #
项目概述 #
本章将构建一个智能聊天机器人,支持多轮对话、记忆管理、上下文理解等功能。
text
┌─────────────────────────────────────────────────────────────┐
│ 聊天机器人架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 用户输入 │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Chat Engine │ │
│ │ │ │
│ │ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ Memory │───→│ Context │ │ │
│ │ └─────────────┘ └─────────────┘ │ │
│ │ │ │ │ │
│ │ ▼ ▼ │ │
│ │ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ Retriever │ │ LLM │ │ │
│ │ └─────────────┘ └─────────────┘ │ │
│ │ │ │
│ └─────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ 回复输出 │
│ │
└─────────────────────────────────────────────────────────────┘
Chat Engine 基础 #
简单聊天引擎 #
python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.chat_engine import SimpleChatEngine
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
chat_engine = SimpleChatEngine.from_defaults()
response = chat_engine.chat("你好!")
print(response)
response = chat_engine.chat("我刚才说了什么?")
print(response)
索引聊天引擎 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.chat_engine import ContextChatEngine
index = VectorStoreIndex.from_documents(documents)
chat_engine = index.as_chat_engine(
chat_mode="context",
verbose=True,
)
response = chat_engine.chat("文档的主要内容是什么?")
print(response)
ReAct 聊天引擎 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.chat_engine import ReActAgent
index = VectorStoreIndex.from_documents(documents)
chat_engine = index.as_chat_engine(
chat_mode="react",
verbose=True,
)
response = chat_engine.chat("帮我查找关于 Python 的信息并总结")
print(response)
聊天模式 #
text
┌─────────────────────────────────────────────────────────────┐
│ 聊天模式对比 │
├─────────────────────────────────────────────────────────────┤
│ │
│ simple │
│ ├── 简单对话,无检索 │
│ └── 适合一般聊天 │
│ │
│ context │
│ ├── 基于上下文的对话 │
│ ├── 每次检索相关文档 │
│ └── 适合知识问答 │
│ │
│ condense_question │
│ ├── 将问题与历史压缩 │
│ ├── 生成独立问题后检索 │
│ └── 适合多轮问答 │
│ │
│ react │
│ ├── ReAct 代理模式 │
│ ├── 支持工具调用 │
│ └── 适合复杂任务 │
│ │
│ openai │
│ ├── OpenAI 原生模式 │
│ ├── 需要 Function Calling │
│ └── 适合 GPT 模型 │
│ │
└─────────────────────────────────────────────────────────────┘
Condense Question 模式 #
python
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents)
chat_engine = index.as_chat_engine(
chat_mode="condense_question",
verbose=True,
)
response = chat_engine.chat("LlamaIndex 是什么?")
print(response)
response = chat_engine.chat("它的主要功能有哪些?")
print(response)
Condense Plus Context 模式 #
python
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context",
verbose=True,
)
response = chat_engine.chat("文档中提到了哪些技术?")
print(response)
记忆管理 #
基本记忆 #
python
from llama_index.core.memory import ChatMemoryBuffer
memory = ChatMemoryBuffer.from_defaults(token_limit=4096)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
)
response1 = chat_engine.chat("我叫张三")
response2 = chat_engine.chat("我叫什么名字?")
print(response2)
自定义记忆限制 #
python
from llama_index.core.memory import ChatMemoryBuffer
memory = ChatMemoryBuffer.from_defaults(
token_limit=2048,
)
memory = ChatMemoryBuffer.from_defaults(
token_limit=8192,
)
获取对话历史 #
python
chat_engine = index.as_chat_engine()
chat_engine.chat("你好")
chat_engine.chat("今天天气怎么样")
history = chat_engine.chat_history
for message in history:
print(f"{message.role}: {message.content}")
重置记忆 #
python
chat_engine.reset()
chat_engine.chat_history = []
流式输出 #
基本流式输出 #
python
chat_engine = index.as_chat_engine(
chat_mode="context",
streaming=True,
)
response = chat_engine.stream_chat("你的问题")
for token in response.response_gen:
print(token, end="", flush=True)
print()
异步流式输出 #
python
import asyncio
async def async_chat():
chat_engine = index.as_chat_engine(
chat_mode="context",
streaming=True,
)
response = await chat_engine.astream_chat("你的问题")
async for token in response.async_response_gen():
print(token, end="", flush=True)
print()
asyncio.run(async_chat())
自定义提示词 #
系统提示词 #
python
from llama_index.core import PromptTemplate
system_prompt = """你是一个专业的技术支持助手。
你的职责是:
1. 回答用户关于产品的技术问题
2. 提供清晰、准确的解决方案
3. 如果不确定,请诚实告知
请始终保持专业、友好的态度。
"""
chat_engine = index.as_chat_engine(
chat_mode="context",
system_prompt=system_prompt,
)
自定义聊天模板 #
python
from llama_index.core import PromptTemplate
condense_prompt_tmpl = """给定以下对话和一个后续问题,将后续问题重写为一个独立的问题。
对话历史:
{chat_history}
后续问题:{question}
独立问题:"""
condense_prompt = PromptTemplate(condense_prompt_tmpl)
chat_engine = index.as_chat_engine(
chat_mode="condense_question",
condense_question_prompt=condense_prompt,
)
完整聊天机器人实现 #
聊天机器人类 #
python
import os
from typing import Optional, List
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine import CondensePlusContextChatEngine
class Chatbot:
def __init__(
self,
data_dir: str = "./data",
model: str = "gpt-4o-mini",
token_limit: int = 4096,
):
self.data_dir = data_dir
self.model = model
self.token_limit = token_limit
self.index: Optional[VectorStoreIndex] = None
self.chat_engine: Optional[CondensePlusContextChatEngine] = None
self.memory: Optional[ChatMemoryBuffer] = None
def initialize(self):
Settings.llm = OpenAI(model=self.model)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
documents = SimpleDirectoryReader(self.data_dir).load_data()
print(f"加载了 {len(documents)} 个文档")
self.index = VectorStoreIndex.from_documents(documents)
self.memory = ChatMemoryBuffer.from_defaults(
token_limit=self.token_limit
)
self.chat_engine = self.index.as_chat_engine(
chat_mode="condense_plus_context",
memory=self.memory,
verbose=True,
)
print("聊天机器人初始化完成")
def chat(self, message: str) -> str:
if self.chat_engine is None:
raise ValueError("聊天机器人未初始化")
response = self.chat_engine.chat(message)
return str(response)
def stream_chat(self, message: str):
if self.chat_engine is None:
raise ValueError("聊天机器人未初始化")
streaming_engine = self.index.as_chat_engine(
chat_mode="condense_plus_context",
memory=self.memory,
streaming=True,
)
response = streaming_engine.stream_chat(message)
for token in response.response_gen:
yield token
def get_history(self) -> List[dict]:
if self.memory is None:
return []
history = []
for message in self.memory.get_all():
history.append({
"role": message.role.value,
"content": message.content,
})
return history
def reset(self):
if self.memory is not None:
self.memory.reset()
print("对话历史已重置")
def set_system_prompt(self, prompt: str):
self.chat_engine = self.index.as_chat_engine(
chat_mode="condense_plus_context",
memory=self.memory,
system_prompt=prompt,
)
def main():
chatbot = Chatbot(data_dir="./data")
chatbot.initialize()
print("\n=== 智能聊天机器人 ===")
print("输入 'quit' 退出")
print("输入 'reset' 重置对话")
print("输入 'history' 查看历史\n")
while True:
message = input("你: ").strip()
if message.lower() == "quit":
break
if message.lower() == "reset":
chatbot.reset()
continue
if message.lower() == "history":
history = chatbot.get_history()
for msg in history:
print(f" {msg['role']}: {msg['content'][:50]}...")
print()
continue
if not message:
continue
print("机器人: ", end="")
for token in chatbot.stream_chat(message):
print(token, end="", flush=True)
print("\n")
if __name__ == "__main__":
main()
多用户支持 #
用户会话管理 #
python
from typing import Dict
from llama_index.core.memory import ChatMemoryBuffer
class MultiUserChatbot:
def __init__(self, index: VectorStoreIndex):
self.index = index
self.sessions: Dict[str, ChatMemoryBuffer] = {}
def get_or_create_session(self, user_id: str) -> ChatMemoryBuffer:
if user_id not in self.sessions:
self.sessions[user_id] = ChatMemoryBuffer.from_defaults(
token_limit=4096
)
return self.sessions[user_id]
def chat(self, user_id: str, message: str) -> str:
memory = self.get_or_create_session(user_id)
chat_engine = self.index.as_chat_engine(
chat_mode="context",
memory=memory,
)
response = chat_engine.chat(message)
return str(response)
def reset_session(self, user_id: str):
if user_id in self.sessions:
self.sessions[user_id].reset()
del self.sessions[user_id]
FastAPI 多用户聊天 #
python
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Dict
app = FastAPI(title="聊天机器人 API")
chatbot: Optional[MultiUserChatbot] = None
class ChatRequest(BaseModel):
user_id: str
message: str
class ChatResponse(BaseModel):
response: str
@app.on_event("startup")
async def startup():
global chatbot
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
chatbot = MultiUserChatbot(index)
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
if chatbot is None:
raise HTTPException(status_code=500, detail="系统未初始化")
response = chatbot.chat(request.user_id, request.message)
return ChatResponse(response=response)
@app.post("/reset/{user_id}")
async def reset(user_id: str):
if chatbot is None:
raise HTTPException(status_code=500, detail="系统未初始化")
chatbot.reset_session(user_id)
return {"message": "会话已重置"}
高级功能 #
意图识别 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import QueryEngineTool, FunctionTool
def get_weather(city: str) -> str:
return f"{city} 的天气:晴朗,25°C"
def search_knowledge(query: str) -> str:
return knowledge_query_engine.query(query)
weather_tool = FunctionTool.from_defaults(
fn=get_weather,
name="get_weather",
description="获取指定城市的天气",
)
knowledge_tool = QueryEngineTool.from_defaults(
query_engine=index.as_query_engine(),
name="knowledge_search",
description="搜索知识库",
)
agent = ReActAgent.from_tools(
[weather_tool, knowledge_tool],
verbose=True,
)
response = agent.chat("北京今天天气怎么样?")
情感分析 #
python
from llama_index.core import PromptTemplate
sentiment_prompt = """分析以下用户消息的情感倾向:
消息:{message}
请返回以下之一:positive, negative, neutral
情感:"""
def analyze_sentiment(message: str) -> str:
llm = OpenAI(model="gpt-4o-mini")
prompt = PromptTemplate(sentiment_prompt)
response = llm.complete(prompt.format(message=message))
return response.text.strip()
def chat_with_sentiment(message: str) -> str:
sentiment = analyze_sentiment(message)
if sentiment == "negative":
system_prompt = "用户似乎有些不满,请更加耐心和友好地回答。"
else:
system_prompt = "请保持专业和友好的态度。"
chat_engine = index.as_chat_engine(
chat_mode="context",
system_prompt=system_prompt,
)
return chat_engine.chat(message)
下一步 #
完成聊天机器人后,接下来学习 多模态 RAG 探索图文混合检索!
最后更新:2026-03-30