知识图谱实战 #
本章介绍如何使用 Weaviate 构建知识图谱。
知识图谱概述 #
text
知识图谱架构:
┌─────────────────────────────────────────────────────────────┐
│ 知识图谱 │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────┐ │
│ │ 公司 │ │
│ │ Weaviate│ │
│ └────┬────┘ │
│ │ │
│ ┌───────────┼───────────┐ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
│ │ 产品 │ │ 技术 │ │ 人物 │ │
│ │Vector DB│ │ Go │ │ 创始人 │ │
│ └────┬────┘ └────┬────┘ └────┬────┘ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
│ │ 功能 │ │ 模块 │ │ 项目 │ │
│ │语义搜索 │ │Vectorizer│ │ 开源 │ │
│ └─────────┘ └─────────┘ └─────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
知识图谱模型设计 #
定义实体类型 #
python
import weaviate
import weaviate.classes as wvc
client = weaviate.connect_to_local()
client.collections.delete(["Company", "Technology", "Person", "Product", "Feature"])
companies = client.collections.create(
name="Company",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="description", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="founded", data_type=wvc.config.DataType.DATE),
wvc.config.Property(name="headquarters", data_type=wvc.config.DataType.TEXT)
],
references=[
wvc.config.ReferenceProperty(name="develops", target_collection="Product"),
wvc.config.ReferenceProperty(name="uses", target_collection="Technology"),
wvc.config.ReferenceProperty(name="hasEmployee", target_collection="Person")
]
)
technologies = client.collections.create(
name="Technology",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="description", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="type", data_type=wvc.config.DataType.TEXT)
],
references=[
wvc.config.ReferenceProperty(name="usedBy", target_collection="Company"),
wvc.config.ReferenceProperty(name="powers", target_collection="Product")
]
)
persons = client.collections.create(
name="Person",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="role", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="bio", data_type=wvc.config.DataType.TEXT)
],
references=[
wvc.config.ReferenceProperty(name="worksAt", target_collection="Company"),
wvc.config.ReferenceProperty(name="created", target_collection="Product")
]
)
products = client.collections.create(
name="Product",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="description", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="type", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="license", data_type=wvc.config.DataType.TEXT)
],
references=[
wvc.config.ReferenceProperty(name="developedBy", target_collection="Company"),
wvc.config.ReferenceProperty(name="hasFeature", target_collection="Feature"),
wvc.config.ReferenceProperty(name="builtWith", target_collection="Technology")
]
)
features = client.collections.create(
name="Feature",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="description", data_type=wvc.config.DataType.TEXT)
],
references=[
wvc.config.ReferenceProperty(name="partOf", target_collection="Product")
]
)
print("知识图谱 Schema 创建完成")
构建知识图谱 #
添加实体数据 #
python
from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
def get_embedding(text):
return model.encode(text).tolist()
companies = client.collections.get("Company")
technologies = client.collections.get("Technology")
persons = client.collections.get("Person")
products = client.collections.get("Product")
features = client.collections.get("Feature")
go_tech_uuid = technologies.data.insert(
properties={"name": "Go", "description": "Go 编程语言", "type": "编程语言"},
vector=get_embedding("Go 编程语言")
)
rust_tech_uuid = technologies.data.insert(
properties={"name": "Rust", "description": "Rust 编程语言", "type": "编程语言"},
vector=get_embedding("Rust 编程语言")
)
weaviate_uuid = companies.data.insert(
properties={
"name": "Weaviate",
"description": "云原生向量数据库公司",
"headquarters": "阿姆斯特丹"
},
vector=get_embedding("Weaviate 云原生向量数据库公司"),
references={"uses": [go_tech_uuid]}
)
qdrant_uuid = companies.data.insert(
properties={
"name": "Qdrant",
"description": "高性能向量数据库公司",
"headquarters": "柏林"
},
vector=get_embedding("Qdrant 高性能向量数据库公司"),
references={"uses": [rust_tech_uuid]}
)
weaviate_product_uuid = products.data.insert(
properties={
"name": "Weaviate",
"description": "云原生向量数据库",
"type": "数据库",
"license": "BSD-3-Clause"
},
vector=get_embedding("Weaviate 云原生向量数据库"),
references={
"developedBy": [weaviate_uuid],
"builtWith": [go_tech_uuid]
}
)
qdrant_product_uuid = products.data.insert(
properties={
"name": "Qdrant",
"description": "高性能向量搜索引擎",
"type": "数据库",
"license": "Apache-2.0"
},
vector=get_embedding("Qdrant 高性能向量搜索引擎"),
references={
"developedBy": [qdrant_uuid],
"builtWith": [rust_tech_uuid]
}
)
semantic_search_uuid = features.data.insert(
properties={"name": "语义搜索", "description": "基于向量相似性的语义检索"},
vector=get_embedding("语义搜索 向量相似性"),
references={"partOf": [weaviate_product_uuid, qdrant_product_uuid]}
)
rag_uuid = features.data.insert(
properties={"name": "RAG", "description": "检索增强生成"},
vector=get_embedding("RAG 检索增强生成"),
references={"partOf": [weaviate_product_uuid, qdrant_product_uuid]}
)
modules_uuid = features.data.insert(
properties={"name": "模块化架构", "description": "内置向量化模块"},
vector=get_embedding("模块化架构 内置向量化"),
references={"partOf": [weaviate_product_uuid]}
)
founder_uuid = persons.data.insert(
properties={"name": "Bob van Luijt", "role": "创始人", "bio": "Weaviate 创始人兼 CEO"},
vector=get_embedding("Bob van Luijt Weaviate 创始人"),
references={"worksAt": [weaviate_uuid], "created": [weaviate_product_uuid]}
)
print("知识图谱数据导入完成")
知识图谱查询 #
查询实体及其关系 #
python
products = client.collections.get("Product")
response = products.query.fetch_objects(
limit=10,
return_properties=["name", "description", "type"],
return_references=[
wvc.query.QueryReference(
link_on="developedBy",
return_properties=["name", "headquarters"]
),
wvc.query.QueryReference(
link_on="builtWith",
return_properties=["name", "type"]
),
wvc.query.QueryReference(
link_on="hasFeature",
return_properties=["name", "description"]
)
]
)
print("产品及其关系:")
for obj in response.objects:
print(f"\n产品: {obj.properties['name']}")
print(f"描述: {obj.properties['description']}")
if obj.references["developedBy"].objects:
company = obj.references["developedBy"].objects[0]
print(f"开发公司: {company.properties['name']} ({company.properties['headquarters']})")
if obj.references["builtWith"].objects:
techs = [t.properties['name'] for t in obj.references["builtWith"].objects]
print(f"使用技术: {', '.join(techs)}")
if obj.references["hasFeature"].objects:
features = [f.properties['name'] for f in obj.references["hasFeature"].objects]
print(f"功能特性: {', '.join(features)}")
反向关系查询 #
python
technologies = client.collections.get("Technology")
response = technologies.query.fetch_objects(
limit=10,
return_properties=["name", "type"],
return_references=[
wvc.query.QueryReference(
link_on="usedBy",
return_properties=["name", "headquarters"]
),
wvc.query.QueryReference(
link_on="powers",
return_properties=["name", "type"]
)
]
)
print("技术及其使用者:")
for obj in response.objects:
print(f"\n技术: {obj.properties['name']} ({obj.properties['type']})")
if obj.references["usedBy"].objects:
companies = [c.properties['name'] for c in obj.references["usedBy"].objects]
print(f"使用公司: {', '.join(companies)}")
if obj.references["powers"].objects:
products = [p.properties['name'] for p in obj.references["powers"].objects]
print(f"驱动的产品: {', '.join(products)}")
多层关系查询 #
python
def get_product_ecosystem(product_name: str):
products = client.collections.get("Product")
response = products.query.fetch_objects(
filters=Filter.by_property("name").equal(product_name),
return_properties=["name", "description"],
return_references=[
wvc.query.QueryReference(
link_on="developedBy",
return_properties=["name"],
return_references=[
wvc.query.QueryReference(
link_on="hasEmployee",
return_properties=["name", "role"]
)
]
),
wvc.query.QueryReference(
link_on="hasFeature",
return_properties=["name", "description"]
)
]
)
return response
result = get_product_ecosystem("Weaviate")
if result.objects:
product = result.objects[0]
print(f"产品: {product.properties['name']}")
if product.references["developedBy"].objects:
company = product.references["developedBy"].objects[0]
print(f"公司: {company.properties['name']}")
if company.references.get("hasEmployee"):
employees = [e.properties['name'] for e in company.references["hasEmployee"].objects]
print(f"员工: {', '.join(employees)}")
if product.references["hasFeature"].objects:
features = [f.properties['name'] for f in product.references["hasFeature"].objects]
print(f"功能: {', '.join(features)}")
语义关联查询 #
向量搜索与关系结合 #
python
from weaviate.classes.query import Filter
def semantic_search_with_relations(query: str, limit: int = 5):
query_vector = get_embedding(query)
products = client.collections.get("Product")
response = products.query.near_vector(
near_vector=query_vector,
limit=limit,
return_properties=["name", "description"],
return_references=[
wvc.query.QueryReference(
link_on="developedBy",
return_properties=["name"]
),
wvc.query.QueryReference(
link_on="hasFeature",
return_properties=["name"]
)
]
)
return response
results = semantic_search_with_relations("高性能向量搜索")
print("语义搜索结果(含关系):")
for obj in results.objects:
print(f"\n产品: {obj.properties['name']}")
print(f"描述: {obj.properties['description']}")
if obj.references["developedBy"].objects:
company = obj.references["developedBy"].objects[0]
print(f"公司: {company.properties['name']}")
if obj.references["hasFeature"].objects:
features = [f.properties['name'] for f in obj.references["hasFeature"].objects]
print(f"功能: {', '.join(features)}")
关联推理查询 #
python
def find_related_technologies(tech_name: str):
technologies = client.collections.get("Technology")
response = technologies.query.fetch_objects(
filters=Filter.by_property("name").equal(tech_name),
return_properties=["name"],
return_references=[
wvc.query.QueryReference(
link_on="usedBy",
return_properties=["name"],
return_references=[
wvc.query.QueryReference(
link_on="develops",
return_properties=["name", "type"]
)
]
)
]
)
return response
result = find_related_technologies("Go")
if result.objects:
tech = result.objects[0]
print(f"技术: {tech.properties['name']}")
related_products = []
for company in tech.references["usedBy"].objects:
if company.references.get("develops"):
for product in company.references["develops"].objects:
related_products.append({
"company": company.properties["name"],
"product": product.properties["name"],
"type": product.properties["type"]
})
print("\n相关产品:")
for p in related_products:
print(f"- {p['product']} ({p['type']}) by {p['company']}")
知识图谱可视化 #
导出图数据 #
python
def export_graph_data():
nodes = []
edges = []
collections = {
"Company": client.collections.get("Company"),
"Technology": client.collections.get("Technology"),
"Person": client.collections.get("Person"),
"Product": client.collections.get("Product"),
"Feature": client.collections.get("Feature")
}
for collection_name, collection in collections.items():
response = collection.query.fetch_objects(limit=100)
for obj in response.objects:
nodes.append({
"id": str(obj.uuid),
"label": obj.properties.get("name", "Unknown"),
"type": collection_name
})
products = client.collections.get("Product")
response = products.query.fetch_objects(
limit=100,
return_references=[
wvc.query.QueryReference(link_on="developedBy"),
wvc.query.QueryReference(link_on="builtWith"),
wvc.query.QueryReference(link_on="hasFeature")
]
)
for obj in response.objects:
source_id = str(obj.uuid)
for ref_name in ["developedBy", "builtWith", "hasFeature"]:
if obj.references[ref_name].objects:
for target in obj.references[ref_name].objects:
edges.append({
"source": source_id,
"target": str(target.uuid),
"relationship": ref_name
})
return {"nodes": nodes, "edges": edges}
graph_data = export_graph_data()
print(f"节点数: {len(graph_data['nodes'])}")
print(f"边数: {len(graph_data['edges'])}")
知识图谱更新 #
添加新关系 #
python
def add_relationship(
from_collection: str,
from_uuid: str,
relation: str,
to_uuid: str
):
collection = client.collections.get(from_collection)
collection.data.reference_add(
from_uuid=from_uuid,
from_property=relation,
to_uuid=to_uuid
)
add_relationship(
"Company",
str(weaviate_uuid),
"develops",
str(qdrant_product_uuid)
)
删除关系 #
python
def remove_relationship(
from_collection: str,
from_uuid: str,
relation: str,
to_uuid: str
):
collection = client.collections.get(from_collection)
collection.data.reference_delete(
from_uuid=from_uuid,
from_property=relation,
to_uuid=to_uuid
)
完整示例 #
python
import weaviate
import weaviate.classes as wvc
from sentence_transformers import SentenceTransformer
client = weaviate.connect_to_local()
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
def get_embedding(text):
return model.encode(text).tolist()
for name in ["Company", "Product", "Feature"]:
try:
client.collections.delete(name)
except:
pass
companies = client.collections.create(
name="Company",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT)],
references=[wvc.config.ReferenceProperty(name="develops", target_collection="Product")]
)
products = client.collections.create(
name="Product",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="type", data_type=wvc.config.DataType.TEXT)
],
references=[
wvc.config.ReferenceProperty(name="developedBy", target_collection="Company"),
wvc.config.ReferenceProperty(name="hasFeature", target_collection="Feature")
]
)
features = client.collections.create(
name="Feature",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT)]
)
company_uuid = companies.data.insert(
properties={"name": "Weaviate"},
vector=get_embedding("Weaviate")
)
product_uuid = products.data.insert(
properties={"name": "Weaviate DB", "type": "向量数据库"},
vector=get_embedding("Weaviate 向量数据库"),
references={"developedBy": [company_uuid]}
)
feature_uuid = features.data.insert(
properties={"name": "语义搜索"},
vector=get_embedding("语义搜索")
)
products.data.reference_add(
from_uuid=product_uuid,
from_property="hasFeature",
to_uuid=feature_uuid
)
print("知识图谱构建完成\n")
response = products.query.fetch_objects(
return_properties=["name", "type"],
return_references=[
wvc.query.QueryReference(link_on="developedBy", return_properties=["name"]),
wvc.query.QueryReference(link_on="hasFeature", return_properties=["name"])
]
)
for obj in response.objects:
print(f"产品: {obj.properties['name']}")
if obj.references["developedBy"].objects:
print(f" 公司: {obj.references['developedBy'].objects[0].properties['name']}")
if obj.references["hasFeature"].objects:
print(f" 功能: {obj.references['hasFeature'].objects[0].properties['name']}")
client.close()
小结 #
本章介绍了使用 Weaviate 构建知识图谱:
- 知识图谱模型设计
- 实体和关系定义
- 知识图谱构建
- 关系查询
- 语义关联查询
- 知识图谱可视化
- 知识图谱更新
总结 #
恭喜你完成了 Weaviate 文档的学习!你现在应该掌握了:
- Weaviate 的核心概念和架构
- 数据建模和 Schema 设计
- 向量操作和搜索查询
- 模块化架构和向量化
- 分布式部署和高可用
- 监控运维和性能优化
- 语义搜索、RAG、知识图谱等实战应用
继续探索 Weaviate,构建你的 AI 应用!
最后更新:2026-04-04