对象管理 #
本章详细介绍 Weaviate 的对象管理操作。
对象操作概览 #
text
Weaviate 对象操作:
┌─────────────────────────────────────────────────────────────┐
│ 对象操作 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 创建 (Create) │
│ ├── insert: 插入单个对象 │
│ └── batch: 批量插入对象 │
│ │
│ 读取 (Read) │
│ ├── fetch_object_by_id: 根据 ID 获取 │
│ └── fetch_objects: 批量获取 │
│ │
│ 更新 (Update) │
│ ├── update: 更新属性 │
│ ├── replace: 替换整个对象 │
│ └── update_vector: 更新向量 │
│ │
│ 删除 (Delete) │
│ ├── delete_by_id: 删除单个对象 │
│ └── delete_many: 批量删除 │
│ │
│ 引用 (Reference) │
│ ├── add: 添加引用 │
│ ├── delete: 删除引用 │
│ └── replace: 替换引用 │
│ │
└─────────────────────────────────────────────────────────────┘
创建对象 #
插入单个对象 #
python
import weaviate.classes as wvc
articles = client.collections.get("Article")
uuid = articles.data.insert({
"title": "Weaviate 入门指南",
"content": "Weaviate 是一个云原生向量数据库...",
"category": "技术",
"views": 1000
})
print(f"Created object: {uuid}")
插入带 UUID #
python
from uuid import uuid4
custom_uuid = str(uuid4())
uuid = articles.data.insert(
properties={
"title": "自定义 UUID 文章",
"content": "使用自定义 UUID 创建对象..."
},
uuid=custom_uuid
)
print(f"Created with custom UUID: {uuid}")
插入带向量 #
python
import numpy as np
vector = np.random.rand(1536).tolist()
uuid = articles.data.insert(
properties={
"title": "自定义向量文章",
"content": "这篇文章使用自定义向量..."
},
vector=vector
)
插入带引用 #
python
authors = client.collections.get("Author")
articles = client.collections.get("Article")
author_uuid = authors.data.insert({
"name": "张三",
"email": "zhangsan@example.com"
})
article_uuid = articles.data.insert(
properties={
"title": "引用作者的文章",
"content": "这篇文章关联了作者..."
},
references={
"writtenBy": author_uuid
}
)
批量操作 #
批量插入 #
python
articles = client.collections.get("Article")
objects = [
{"title": "文章1", "content": "内容1", "views": 100},
{"title": "文章2", "content": "内容2", "views": 200},
{"title": "文章3", "content": "内容3", "views": 300}
]
with articles.batch.dynamic() as batch:
for obj in objects:
batch.add_object(properties=obj)
print("Batch insert completed")
批量插入配置 #
python
from weaviate.util import get_valid_uuid
with articles.batch.fixed_size(batch_size=100) as batch:
for i in range(1000):
batch.add_object(
properties={
"title": f"文章 {i}",
"content": f"这是第 {i} 篇文章",
"views": i * 10
}
)
print(f"Total errors: {len(articles.batch.failed_objects)}")
批量插入错误处理 #
python
with articles.batch.dynamic() as batch:
for i in range(100):
batch.add_object(
properties={
"title": f"文章 {i}",
"content": f"内容 {i}"
}
)
for error in articles.batch.failed_objects:
print(f"Error: {error.message}")
print(f"Object: {error.object_}")
批量引用 #
python
authors = client.collections.get("Author")
articles = client.collections.get("Article")
author_uuids = []
for i in range(10):
uuid = authors.data.insert({"name": f"作者 {i}"})
author_uuids.append(uuid)
with articles.batch.dynamic() as batch:
for i, author_uuid in enumerate(author_uuids):
batch.add_object(
properties={"title": f"文章 {i}"},
references={"writtenBy": author_uuid}
)
读取对象 #
根据 ID 获取 #
python
articles = client.collections.get("Article")
article = articles.query.fetch_object_by_id(uuid)
print(f"Title: {article.properties['title']}")
print(f"Content: {article.properties['content']}")
print(f"UUID: {article.uuid}")
获取带向量 #
python
article = articles.query.fetch_object_by_id(
uuid=uuid,
include_vector=True
)
print(f"Vector dimensions: {len(article.vector['default'])}")
批量获取 #
python
response = articles.query.fetch_objects(
limit=10,
offset=0
)
print(f"Total: {response.total_count}")
for obj in response.objects:
print(f"- {obj.properties['title']}")
获取带属性选择 #
python
response = articles.query.fetch_objects(
limit=10,
return_properties=["title", "views"]
)
for obj in response.objects:
print(f"- {obj.properties['title']} (views: {obj.properties['views']})")
获取带引用 #
python
response = articles.query.fetch_objects(
limit=10,
return_references=[
wvc.query.QueryReference(
link_on="writtenBy",
return_properties=["name", "email"]
)
]
)
for obj in response.objects:
author = obj.references["writtenBy"].objects[0]
print(f"- {obj.properties['title']} by {author.properties['name']}")
更新对象 #
更新属性 #
python
articles = client.collections.get("Article")
article = articles.query.fetch_objects(limit=1).objects[0]
articles.data.update(
uuid=article.uuid,
properties={
"title": article.properties["title"],
"content": article.properties["content"],
"views": article.properties["views"] + 100
}
)
print("Object updated")
部分更新 #
python
articles.data.update(
uuid=article.uuid,
properties={
"views": 2000
}
)
替换对象 #
python
articles.data.replace(
uuid=article.uuid,
properties={
"title": "全新的标题",
"content": "全新的内容",
"category": "更新",
"views": 0
}
)
更新向量 #
python
import numpy as np
new_vector = np.random.rand(1536).tolist()
articles.data.update(
uuid=article.uuid,
vector=new_vector
)
更新引用 #
python
new_author_uuid = authors.data.insert({"name": "李四"})
articles.data.update(
uuid=article.uuid,
references={
"writtenBy": new_author_uuid
}
)
删除对象 #
删除单个对象 #
python
articles = client.collections.get("Article")
article = articles.query.fetch_objects(limit=1).objects[0]
result = articles.data.delete_by_id(article.uuid)
print(f"Deleted: {result}")
批量删除 #
python
from weaviate.classes.query import Filter
result = articles.data.delete_many(
where=Filter.by_property("views").less_than(100)
)
print(f"Deleted {result.successful} objects")
print(f"Failed: {result.failed}")
删除所有对象 #
python
result = articles.data.delete_many(
where=Filter.by_property("title").like("*")
)
print(f"Deleted {result.successful} objects")
删除带确认 #
python
result = articles.data.delete_by_id(
uuid=article.uuid,
consistency_level=wvc.config.ConsistencyLevel.ALL
)
print(f"Deleted with consistency: {result}")
引用管理 #
添加引用 #
python
articles = client.collections.get("Article")
categories = client.collections.get("Category")
article_uuid = articles.query.fetch_objects(limit=1).objects[0].uuid
category_uuid = categories.data.insert({"name": "新技术"})
articles.data.reference_add(
from_uuid=article_uuid,
from_property="hasCategory",
to_uuid=category_uuid
)
批量添加引用 #
python
category_uuids = [
categories.data.insert({"name": "AI"}),
categories.data.insert({"name": "数据库"}),
categories.data.insert({"name": "教程"})
]
articles.data.reference_add_many(
from_property="hasCategory",
references=[
wvc.data.Reference(
from_uuid=article_uuid,
to_uuids=category_uuids
)
]
)
删除引用 #
python
articles.data.reference_delete(
from_uuid=article_uuid,
from_property="hasCategory",
to_uuid=category_uuids[0]
)
替换引用 #
python
new_category_uuid = categories.data.insert({"name": "向量数据库"})
articles.data.reference_replace(
from_uuid=article_uuid,
from_property="hasCategory",
to_uuids=[new_category_uuid]
)
存在性检查 #
检查对象是否存在 #
python
exists = articles.data.exists(uuid)
if exists:
print("Object exists")
else:
print("Object not found")
检查引用是否存在 #
python
article = articles.query.fetch_object_by_id(
uuid=article_uuid,
return_references=[
wvc.query.QueryReference(link_on="hasCategory")
]
)
if article.references["hasCategory"].objects:
print("Has references")
else:
print("No references")
一致性级别 #
一致性级别说明 #
text
一致性级别:
ONE:
├── 只需一个节点确认
├── 最快,但可能不一致
└── 适合高吞吐量场景
QUORUM (默认):
├── 需要多数节点确认
├── 平衡性能和一致性
└── 推荐大多数场景
ALL:
├── 需要所有节点确认
├── 最强一致性,最慢
└── 适合关键数据
配置一致性级别 #
python
import weaviate.classes as wvc
uuid = articles.data.insert(
properties={
"title": "高一致性文章",
"content": "使用 ALL 一致性级别..."
},
consistency_level=wvc.config.ConsistencyLevel.ALL
)
读取一致性 #
python
article = articles.query.fetch_object_by_id(
uuid=uuid,
consistency_level=wvc.config.ConsistencyLevel.ALL
)
租户数据操作 #
租户对象操作 #
python
articles = client.collections.get("Article")
tenant_a = articles.with_tenant("tenant_a")
tenant_a.data.insert({
"title": "租户A的文章",
"content": "这是租户A的数据..."
})
response = tenant_a.query.fetch_objects(limit=10)
for obj in response.objects:
print(f"- {obj.properties['title']}")
租户批量操作 #
python
with tenant_a.batch.dynamic() as batch:
for i in range(100):
batch.add_object(
properties={
"title": f"租户A文章 {i}",
"content": f"内容 {i}"
}
)
完整示例 #
python
import weaviate
import weaviate.classes as wvc
from weaviate.classes.query import Filter
import numpy as np
client = weaviate.connect_to_local()
client.collections.delete("Product")
products = client.collections.create(
name="Product",
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
properties=[
wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="price", data_type=wvc.config.DataType.NUMBER),
wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="tags", data_type=wvc.config.DataType.TEXT_ARRAY)
]
)
sample_products = [
{"name": "iPhone 15", "price": 7999, "category": "手机", "tags": ["苹果", "智能手机"]},
{"name": "MacBook Pro", "price": 14999, "category": "电脑", "tags": ["苹果", "笔记本"]},
{"name": "AirPods Pro", "price": 1899, "category": "配件", "tags": ["苹果", "耳机"]},
{"name": "Galaxy S24", "price": 6999, "category": "手机", "tags": ["三星", "智能手机"]},
{"name": "ThinkPad X1", "price": 9999, "category": "电脑", "tags": ["联想", "笔记本"]}
]
with products.batch.dynamic() as batch:
for product in sample_products:
vector = np.random.rand(64).tolist()
batch.add_object(properties=product, vector=vector)
print(f"Inserted {len(sample_products)} products\n")
response = products.query.fetch_objects(limit=10)
print("所有产品:")
for obj in response.objects:
print(f"- {obj.properties['name']}: ¥{obj.properties['price']}")
response = products.query.fetch_objects(
filters=Filter.by_property("category").equal("手机"),
limit=10
)
print("\n手机类别产品:")
for obj in response.objects:
print(f"- {obj.properties['name']}")
first_product = products.query.fetch_objects(limit=1).objects[0]
products.data.update(
uuid=first_product.uuid,
properties={"price": first_product.properties["price"] * 0.9}
)
print(f"\n更新了 {first_product.properties['name']} 的价格")
result = products.data.delete_many(
where=Filter.by_property("price").less_than(2000)
)
print(f"\n删除了 {result.successful} 个低价产品")
response = products.aggregate.over_all(total_count=True)
print(f"\n剩余产品数量: {response.total_count}")
client.close()
小结 #
本章介绍了 Weaviate 的对象管理:
- 创建单个和批量对象
- 读取对象
- 更新和替换对象
- 删除单个和批量对象
- 引用管理
- 一致性级别
- 租户数据操作
下一步 #
继续学习 模块与向量化,了解 Weaviate 的模块化架构!
最后更新:2026-04-04