对象管理 #

本章详细介绍 Weaviate 的对象管理操作。

对象操作概览 #

text
Weaviate 对象操作:

┌─────────────────────────────────────────────────────────────┐
│                      对象操作                                │
├─────────────────────────────────────────────────────────────┤
│                                                              │
│  创建 (Create)                                               │
│  ├── insert: 插入单个对象                                    │
│  └── batch: 批量插入对象                                     │
│                                                              │
│  读取 (Read)                                                 │
│  ├── fetch_object_by_id: 根据 ID 获取                        │
│  └── fetch_objects: 批量获取                                 │
│                                                              │
│  更新 (Update)                                               │
│  ├── update: 更新属性                                        │
│  ├── replace: 替换整个对象                                   │
│  └── update_vector: 更新向量                                 │
│                                                              │
│  删除 (Delete)                                               │
│  ├── delete_by_id: 删除单个对象                              │
│  └── delete_many: 批量删除                                   │
│                                                              │
│  引用 (Reference)                                            │
│  ├── add: 添加引用                                           │
│  ├── delete: 删除引用                                        │
│  └── replace: 替换引用                                       │
│                                                              │
└─────────────────────────────────────────────────────────────┘

创建对象 #

插入单个对象 #

python
import weaviate.classes as wvc

articles = client.collections.get("Article")

uuid = articles.data.insert({
    "title": "Weaviate 入门指南",
    "content": "Weaviate 是一个云原生向量数据库...",
    "category": "技术",
    "views": 1000
})

print(f"Created object: {uuid}")

插入带 UUID #

python
from uuid import uuid4

custom_uuid = str(uuid4())

uuid = articles.data.insert(
    properties={
        "title": "自定义 UUID 文章",
        "content": "使用自定义 UUID 创建对象..."
    },
    uuid=custom_uuid
)

print(f"Created with custom UUID: {uuid}")

插入带向量 #

python
import numpy as np

vector = np.random.rand(1536).tolist()

uuid = articles.data.insert(
    properties={
        "title": "自定义向量文章",
        "content": "这篇文章使用自定义向量..."
    },
    vector=vector
)

插入带引用 #

python
authors = client.collections.get("Author")
articles = client.collections.get("Article")

author_uuid = authors.data.insert({
    "name": "张三",
    "email": "zhangsan@example.com"
})

article_uuid = articles.data.insert(
    properties={
        "title": "引用作者的文章",
        "content": "这篇文章关联了作者..."
    },
    references={
        "writtenBy": author_uuid
    }
)

批量操作 #

批量插入 #

python
articles = client.collections.get("Article")

objects = [
    {"title": "文章1", "content": "内容1", "views": 100},
    {"title": "文章2", "content": "内容2", "views": 200},
    {"title": "文章3", "content": "内容3", "views": 300}
]

with articles.batch.dynamic() as batch:
    for obj in objects:
        batch.add_object(properties=obj)

print("Batch insert completed")

批量插入配置 #

python
from weaviate.util import get_valid_uuid

with articles.batch.fixed_size(batch_size=100) as batch:
    for i in range(1000):
        batch.add_object(
            properties={
                "title": f"文章 {i}",
                "content": f"这是第 {i} 篇文章",
                "views": i * 10
            }
        )

print(f"Total errors: {len(articles.batch.failed_objects)}")

批量插入错误处理 #

python
with articles.batch.dynamic() as batch:
    for i in range(100):
        batch.add_object(
            properties={
                "title": f"文章 {i}",
                "content": f"内容 {i}"
            }
        )

for error in articles.batch.failed_objects:
    print(f"Error: {error.message}")
    print(f"Object: {error.object_}")

批量引用 #

python
authors = client.collections.get("Author")
articles = client.collections.get("Article")

author_uuids = []
for i in range(10):
    uuid = authors.data.insert({"name": f"作者 {i}"})
    author_uuids.append(uuid)

with articles.batch.dynamic() as batch:
    for i, author_uuid in enumerate(author_uuids):
        batch.add_object(
            properties={"title": f"文章 {i}"},
            references={"writtenBy": author_uuid}
        )

读取对象 #

根据 ID 获取 #

python
articles = client.collections.get("Article")

article = articles.query.fetch_object_by_id(uuid)

print(f"Title: {article.properties['title']}")
print(f"Content: {article.properties['content']}")
print(f"UUID: {article.uuid}")

获取带向量 #

python
article = articles.query.fetch_object_by_id(
    uuid=uuid,
    include_vector=True
)

print(f"Vector dimensions: {len(article.vector['default'])}")

批量获取 #

python
response = articles.query.fetch_objects(
    limit=10,
    offset=0
)

print(f"Total: {response.total_count}")
for obj in response.objects:
    print(f"- {obj.properties['title']}")

获取带属性选择 #

python
response = articles.query.fetch_objects(
    limit=10,
    return_properties=["title", "views"]
)

for obj in response.objects:
    print(f"- {obj.properties['title']} (views: {obj.properties['views']})")

获取带引用 #

python
response = articles.query.fetch_objects(
    limit=10,
    return_references=[
        wvc.query.QueryReference(
            link_on="writtenBy",
            return_properties=["name", "email"]
        )
    ]
)

for obj in response.objects:
    author = obj.references["writtenBy"].objects[0]
    print(f"- {obj.properties['title']} by {author.properties['name']}")

更新对象 #

更新属性 #

python
articles = client.collections.get("Article")

article = articles.query.fetch_objects(limit=1).objects[0]

articles.data.update(
    uuid=article.uuid,
    properties={
        "title": article.properties["title"],
        "content": article.properties["content"],
        "views": article.properties["views"] + 100
    }
)

print("Object updated")

部分更新 #

python
articles.data.update(
    uuid=article.uuid,
    properties={
        "views": 2000
    }
)

替换对象 #

python
articles.data.replace(
    uuid=article.uuid,
    properties={
        "title": "全新的标题",
        "content": "全新的内容",
        "category": "更新",
        "views": 0
    }
)

更新向量 #

python
import numpy as np

new_vector = np.random.rand(1536).tolist()

articles.data.update(
    uuid=article.uuid,
    vector=new_vector
)

更新引用 #

python
new_author_uuid = authors.data.insert({"name": "李四"})

articles.data.update(
    uuid=article.uuid,
    references={
        "writtenBy": new_author_uuid
    }
)

删除对象 #

删除单个对象 #

python
articles = client.collections.get("Article")

article = articles.query.fetch_objects(limit=1).objects[0]

result = articles.data.delete_by_id(article.uuid)

print(f"Deleted: {result}")

批量删除 #

python
from weaviate.classes.query import Filter

result = articles.data.delete_many(
    where=Filter.by_property("views").less_than(100)
)

print(f"Deleted {result.successful} objects")
print(f"Failed: {result.failed}")

删除所有对象 #

python
result = articles.data.delete_many(
    where=Filter.by_property("title").like("*")
)

print(f"Deleted {result.successful} objects")

删除带确认 #

python
result = articles.data.delete_by_id(
    uuid=article.uuid,
    consistency_level=wvc.config.ConsistencyLevel.ALL
)

print(f"Deleted with consistency: {result}")

引用管理 #

添加引用 #

python
articles = client.collections.get("Article")
categories = client.collections.get("Category")

article_uuid = articles.query.fetch_objects(limit=1).objects[0].uuid
category_uuid = categories.data.insert({"name": "新技术"})

articles.data.reference_add(
    from_uuid=article_uuid,
    from_property="hasCategory",
    to_uuid=category_uuid
)

批量添加引用 #

python
category_uuids = [
    categories.data.insert({"name": "AI"}),
    categories.data.insert({"name": "数据库"}),
    categories.data.insert({"name": "教程"})
]

articles.data.reference_add_many(
    from_property="hasCategory",
    references=[
        wvc.data.Reference(
            from_uuid=article_uuid,
            to_uuids=category_uuids
        )
    ]
)

删除引用 #

python
articles.data.reference_delete(
    from_uuid=article_uuid,
    from_property="hasCategory",
    to_uuid=category_uuids[0]
)

替换引用 #

python
new_category_uuid = categories.data.insert({"name": "向量数据库"})

articles.data.reference_replace(
    from_uuid=article_uuid,
    from_property="hasCategory",
    to_uuids=[new_category_uuid]
)

存在性检查 #

检查对象是否存在 #

python
exists = articles.data.exists(uuid)

if exists:
    print("Object exists")
else:
    print("Object not found")

检查引用是否存在 #

python
article = articles.query.fetch_object_by_id(
    uuid=article_uuid,
    return_references=[
        wvc.query.QueryReference(link_on="hasCategory")
    ]
)

if article.references["hasCategory"].objects:
    print("Has references")
else:
    print("No references")

一致性级别 #

一致性级别说明 #

text
一致性级别:

ONE:
├── 只需一个节点确认
├── 最快,但可能不一致
└── 适合高吞吐量场景

QUORUM (默认):
├── 需要多数节点确认
├── 平衡性能和一致性
└── 推荐大多数场景

ALL:
├── 需要所有节点确认
├── 最强一致性,最慢
└── 适合关键数据

配置一致性级别 #

python
import weaviate.classes as wvc

uuid = articles.data.insert(
    properties={
        "title": "高一致性文章",
        "content": "使用 ALL 一致性级别..."
    },
    consistency_level=wvc.config.ConsistencyLevel.ALL
)

读取一致性 #

python
article = articles.query.fetch_object_by_id(
    uuid=uuid,
    consistency_level=wvc.config.ConsistencyLevel.ALL
)

租户数据操作 #

租户对象操作 #

python
articles = client.collections.get("Article")

tenant_a = articles.with_tenant("tenant_a")

tenant_a.data.insert({
    "title": "租户A的文章",
    "content": "这是租户A的数据..."
})

response = tenant_a.query.fetch_objects(limit=10)

for obj in response.objects:
    print(f"- {obj.properties['title']}")

租户批量操作 #

python
with tenant_a.batch.dynamic() as batch:
    for i in range(100):
        batch.add_object(
            properties={
                "title": f"租户A文章 {i}",
                "content": f"内容 {i}"
            }
        )

完整示例 #

python
import weaviate
import weaviate.classes as wvc
from weaviate.classes.query import Filter
import numpy as np

client = weaviate.connect_to_local()

client.collections.delete("Product")

products = client.collections.create(
    name="Product",
    vectorizer_config=wvc.config.Configure.Vectorizer.none(),
    properties=[
        wvc.config.Property(name="name", data_type=wvc.config.DataType.TEXT),
        wvc.config.Property(name="price", data_type=wvc.config.DataType.NUMBER),
        wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT),
        wvc.config.Property(name="tags", data_type=wvc.config.DataType.TEXT_ARRAY)
    ]
)

sample_products = [
    {"name": "iPhone 15", "price": 7999, "category": "手机", "tags": ["苹果", "智能手机"]},
    {"name": "MacBook Pro", "price": 14999, "category": "电脑", "tags": ["苹果", "笔记本"]},
    {"name": "AirPods Pro", "price": 1899, "category": "配件", "tags": ["苹果", "耳机"]},
    {"name": "Galaxy S24", "price": 6999, "category": "手机", "tags": ["三星", "智能手机"]},
    {"name": "ThinkPad X1", "price": 9999, "category": "电脑", "tags": ["联想", "笔记本"]}
]

with products.batch.dynamic() as batch:
    for product in sample_products:
        vector = np.random.rand(64).tolist()
        batch.add_object(properties=product, vector=vector)

print(f"Inserted {len(sample_products)} products\n")

response = products.query.fetch_objects(limit=10)
print("所有产品:")
for obj in response.objects:
    print(f"- {obj.properties['name']}: ¥{obj.properties['price']}")

response = products.query.fetch_objects(
    filters=Filter.by_property("category").equal("手机"),
    limit=10
)
print("\n手机类别产品:")
for obj in response.objects:
    print(f"- {obj.properties['name']}")

first_product = products.query.fetch_objects(limit=1).objects[0]

products.data.update(
    uuid=first_product.uuid,
    properties={"price": first_product.properties["price"] * 0.9}
)
print(f"\n更新了 {first_product.properties['name']} 的价格")

result = products.data.delete_many(
    where=Filter.by_property("price").less_than(2000)
)
print(f"\n删除了 {result.successful} 个低价产品")

response = products.aggregate.over_all(total_count=True)
print(f"\n剩余产品数量: {response.total_count}")

client.close()

小结 #

本章介绍了 Weaviate 的对象管理:

  • 创建单个和批量对象
  • 读取对象
  • 更新和替换对象
  • 删除单个和批量对象
  • 引用管理
  • 一致性级别
  • 租户数据操作

下一步 #

继续学习 模块与向量化,了解 Weaviate 的模块化架构!

最后更新:2026-04-04