存储与持久化 #
概述 #
LlamaIndex 提供了灵活的存储抽象,支持向量存储、文档存储、索引存储等多种存储后端,可以轻松实现数据的持久化和高效检索。
text
┌─────────────────────────────────────────────────────────────┐
│ 存储架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ StorageContext │
│ ├── VectorStore 向量存储 │
│ │ ├── Chroma │
│ │ ├── Pinecone │
│ │ ├── Qdrant │
│ │ └── Weaviate │
│ │ │
│ ├── DocumentStore 文档存储 │
│ │ ├── SimpleDocumentStore │
│ │ └── MongoDBDocumentStore │
│ │ │
│ └── IndexStore 索引存储 │
│ ├── SimpleIndexStore │
│ └── RedisIndexStore │
│ │
└─────────────────────────────────────────────────────────────┘
本地持久化 #
基本持久化 #
python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir="./storage")
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()
response = query_engine.query("你的问题")
指定存储组件 #
python
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.vector_stores import SimpleVectorStore
storage_context = StorageContext.from_defaults(
docstore=SimpleDocumentStore(),
index_store=SimpleIndexStore(),
vector_store=SimpleVectorStore(),
)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
storage_context.persist(persist_dir="./storage")
向量存储 #
Chroma #
bash
pip install llama-index-vector-stores-chroma chromadb
python
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("my_collection")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_collection("my_collection")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(vector_store)
Pinecone #
bash
pip install llama-index-vector-stores-pinecone pinecone-client
python
from pinecone import Pinecone
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore
pc = Pinecone(api_key="your-api-key")
pinecone_index = pc.Index("my-index")
vector_store = PineconeVectorStore(
pinecone_index=pinecone_index,
namespace="my-namespace",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
index = VectorStoreIndex.from_vector_store(vector_store)
Qdrant #
bash
pip install llama-index-vector-stores-qdrant qdrant-client
python
from qdrant_client import QdrantClient
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
client = QdrantClient(url="http://localhost:6333")
vector_store = QdrantVectorStore(
client=client,
collection_name="my_collection",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
Weaviate #
bash
pip install llama-index-vector-stores-weaviate weaviate-client
python
import weaviate
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.weaviate import WeaviateVectorStore
client = weaviate.Client("http://localhost:8080")
vector_store = WeaviateVectorStore(
weaviate_client=client,
index_name="MyIndex",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
Milvus #
bash
pip install llama-index-vector-stores-milvus pymilvus
python
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore
vector_store = MilvusVectorStore(
uri="http://localhost:19530",
collection_name="my_collection",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
Redis #
bash
pip install llama-index-vector-stores-redis redis
python
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.redis import RedisVectorStore
vector_store = RedisVectorStore(
redis_url="redis://localhost:6379",
index_name="my_index",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
文档存储 #
SimpleDocumentStore #
python
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
docstore = SimpleDocumentStore()
storage_context = StorageContext.from_defaults(docstore=docstore)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
doc = storage_context.docstore.get_document(doc_id)
all_docs = storage_context.docstore.get_all_documents()
MongoDB Document Store #
bash
pip install llama-index-storage-docstore-mongodb pymongo
python
from llama_index.storage.docstore.mongodb import MongoDocumentStore
from llama_index.core import StorageContext
docstore = MongoDocumentStore.from_uri(
uri="mongodb://localhost:27017",
db_name="llamaindex",
collection_name="documents",
)
storage_context = StorageContext.from_defaults(docstore=docstore)
索引存储 #
SimpleIndexStore #
python
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core import StorageContext
index_store = SimpleIndexStore()
storage_context = StorageContext.from_defaults(index_store=index_store)
Redis Index Store #
bash
pip install llama-index-storage-index-store-redis redis
python
from llama_index.storage.index_store.redis import RedisIndexStore
from llama_index.core import StorageContext
index_store = RedisIndexStore.from_host_and_port(
host="localhost",
port=6379,
)
storage_context = StorageContext.from_defaults(index_store=index_store)
增量更新 #
添加文档 #
python
from llama_index.core import VectorStoreIndex, Document
index = VectorStoreIndex.from_documents([
Document(text="初始文档")
])
index.insert(Document(text="新文档 1"))
index.insert(Document(text="新文档 2"))
index.insert_nodes([node1, node2])
for doc in new_documents:
index.insert(doc)
删除文档 #
python
index.delete_ref_doc(doc_id)
index.delete_nodes([node_id1, node_id2])
index.delete(doc_id)
更新文档 #
python
index.delete_ref_doc(old_doc_id)
index.insert(new_document)
缓存 #
嵌入缓存 #
python
from llama_index.core import VectorStoreIndex
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
storage_context = StorageContext.from_defaults(
docstore=SimpleDocumentStore(),
)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
use_async=True,
)
Redis 缓存 #
python
from llama_index.storage.kvstore.redis import RedisKVStore
from llama_index.core.storage.kvstore.types import KVStoreKey
kvstore = RedisKVStore.from_host_and_port(
host="localhost",
port=6379,
)
kvstore.put("key", "value")
value = kvstore.get("key")
多索引管理 #
管理多个索引 #
python
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.storage.index_store import SimpleIndexStore
index_store = SimpleIndexStore()
storage_context1 = StorageContext.from_defaults(
index_store=index_store,
index_id="index_1",
)
index1 = VectorStoreIndex.from_documents(
docs1,
storage_context=storage_context1,
)
storage_context2 = StorageContext.from_defaults(
index_store=index_store,
index_id="index_2",
)
index2 = VectorStoreIndex.from_documents(
docs2,
storage_context=storage_context2,
)
index_store.persist(persist_dir="./index_storage")
加载特定索引 #
python
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.storage.index_store import SimpleIndexStore
index_store = SimpleIndexStore.from_persist_dir("./index_storage")
index1 = load_index_from_storage(
StorageContext.from_defaults(index_store=index_store),
index_id="index_1",
)
index2 = load_index_from_storage(
StorageContext.from_defaults(index_store=index_store),
index_id="index_2",
)
完整示例 #
python
import os
from llama_index.core import (
VectorStoreIndex,
SimpleDirectoryReader,
StorageContext,
Settings,
)
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
os.environ["OPENAI_API_KEY"] = "sk-your-key"
Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("knowledge_base")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
documents = SimpleDirectoryReader("./data").load_data()
print(f"加载了 {len(documents)} 个文档")
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True,
)
print("索引构建完成")
query_engine = index.as_query_engine()
response = query_engine.query("文档的主要内容是什么?")
print(f"\n回答: {response}")
new_doc = Document(text="这是新添加的文档内容。")
index.insert(new_doc)
print("新文档已添加")
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_collection("knowledge_base")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(vector_store)
query_engine = index.as_query_engine()
response = query_engine.query("新文档的内容是什么?")
print(f"\n回答: {response}")
向量存储对比 #
text
┌─────────────────────────────────────────────────────────────┐
│ 向量存储对比 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 存储类型 特点 适用场景 │
│ ───────────────────────────────────────────────────────── │
│ Chroma 开源、易用、本地存储 开发测试 │
│ Pinecone 云托管、高性能 生产环境 │
│ Qdrant 开源、高性能 自托管生产 │
│ Weaviate 开源、功能丰富 企业级应用 │
│ Milvus 开源、分布式 大规模数据 │
│ Redis 快速、易部署 缓存场景 │
│ │
└─────────────────────────────────────────────────────────────┘
下一步 #
掌握存储与持久化后,接下来学习 文档问答系统 开始实战项目!
最后更新:2026-03-30