Payload 管理 #
Payload 是 Qdrant 中存储元数据的关键机制,本章详细介绍 Payload 的管理。
Payload 概述 #
text
Payload 结构:
┌─────────────────────────────────────────────────────────────┐
│ Point │
├─────────────────────────────────────────────────────────────┤
│ id: 12345 │
│ vector: [0.1, 0.2, 0.3, ...] │
├─────────────────────────────────────────────────────────────┤
│ payload: { │
│ ├── 字符串: "title": "文档标题" │
│ ├── 数值: "price": 99.99 │
│ ├── 布尔: "is_active": true │
│ ├── 数组: "tags": ["AI", "ML"] │
│ ├── 对象: "metadata": {...} │
│ └── 地理: "location": {"lat": 39.9, "lon": 116.4} │
│ } │
└─────────────────────────────────────────────────────────────┘
Payload 数据类型 #
基本类型 #
python
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
client = QdrantClient(":memory:")
client.create_collection(
collection_name="payload_demo",
vectors_config=VectorParams(size=4, distance=Distance.COSINE)
)
point = PointStruct(
id=1,
vector=[0.1, 0.2, 0.3, 0.4],
payload={
"title": "示例文档",
"content": "这是一段文档内容...",
"price": 99.99,
"count": 100,
"is_active": True,
"tags": ["AI", "ML", "Python"],
"score": 0.95,
"created_at": 1704067200
}
)
client.upsert("payload_demo", [point])
支持的数据类型 #
| 类型 | Python 类型 | 示例 |
|---|---|---|
| keyword | str | “hello” |
| integer | int | 42 |
| float | float | 3.14 |
| bool | bool | True |
| geo | dict | |
| datetime | int (timestamp) | 1704067200 |
| array | list | [“a”, “b”, “c”] |
嵌套对象 #
python
point = PointStruct(
id=2,
vector=[0.2, 0.3, 0.4, 0.5],
payload={
"title": "嵌套示例",
"author": {
"name": "张三",
"email": "zhangsan@example.com",
"profile": {
"age": 30,
"location": "Beijing"
}
},
"metadata": {
"version": "1.0",
"created": "2024-01-01"
}
}
)
client.upsert("payload_demo", [point])
地理位置 #
python
point = PointStruct(
id=3,
vector=[0.3, 0.4, 0.5, 0.6],
payload={
"name": "北京天安门",
"location": {
"lat": 39.9087,
"lon": 116.3975
}
}
)
client.upsert("payload_demo", [point])
Payload 索引 #
创建索引 #
python
from qdrant_client.models import PayloadSchemaType
client.create_payload_index(
collection_name="payload_demo",
field_name="title",
field_schema=PayloadSchemaType.KEYWORD
)
client.create_payload_index(
collection_name="payload_demo",
field_name="price",
field_schema=PayloadSchemaType.FLOAT
)
client.create_payload_index(
collection_name="payload_demo",
field_name="created_at",
field_schema=PayloadSchemaType.INTEGER
)
client.create_payload_index(
collection_name="payload_demo",
field_name="location",
field_schema=PayloadSchemaType.GEO
)
print("Payload 索引创建成功")
索引类型 #
python
from qdrant_client.models import PayloadSchemaType, TextIndexParams, TokenizerType
keyword_index = PayloadSchemaType.KEYWORD
integer_index = PayloadSchemaType.INTEGER
float_index = PayloadSchemaType.FLOAT
geo_index = PayloadSchemaType.GEO
text_index = TextIndexParams(
type="text",
tokenizer=TokenizerType.WORD,
min_token_len=2,
max_token_len=20,
lowercase=True
)
client.create_payload_index(
collection_name="payload_demo",
field_name="content",
field_schema=text_index
)
索引类型说明 #
| 类型 | 适用场景 | 支持操作 |
|---|---|---|
| keyword | 精确匹配 | Match, MatchAny, MatchExcept |
| integer | 数值范围 | Range, Match |
| float | 数值范围 | Range |
| geo | 地理位置 | GeoBoundingBox, GeoRadius |
| text | 全文搜索 | MatchText |
| bool | 布尔值 | Match |
| datetime | 时间范围 | Range |
删除索引 #
python
client.delete_payload_index(
collection_name="payload_demo",
field_name="temp_field"
)
print("索引已删除")
Payload 操作 #
设置 Payload #
python
client.set_payload(
collection_name="payload_demo",
payload={
"status": "active",
"updated_at": 1704153600
},
points=[1, 2, 3]
)
print("Payload 已设置")
覆盖 Payload #
python
client.overwrite_payload(
collection_name="payload_demo",
payload={
"new_field": "new_value"
},
points=[1]
)
print("Payload 已覆盖")
删除 Payload 字段 #
python
client.delete_payload(
collection_name="payload_demo",
keys=["temp_field", "old_field"],
points=[1, 2, 3]
)
print("指定字段已删除")
清空 Payload #
python
client.clear_payload(
collection_name="payload_demo",
points_selector=[1, 2, 3]
)
print("Payload 已清空")
批量设置 Payload #
python
from qdrant_client.models import PointIdsList
batch_size = 100
for start in range(0, 1000, batch_size):
ids = list(range(start, start + batch_size))
client.set_payload(
collection_name="payload_demo",
payload={"processed": True, "batch": start // batch_size},
points=PointIdsList(points=ids)
)
print("批量设置 Payload 完成")
Payload 过滤 #
精确匹配 #
python
from qdrant_client.models import Filter, FieldCondition, MatchValue
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=Filter(
must=[
FieldCondition(
key="title",
match=MatchValue(value="示例文档")
)
]
),
limit=10
)
文本匹配 #
python
from qdrant_client.models import MatchText
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=Filter(
must=[
FieldCondition(
key="content",
match=MatchText(text="文档")
)
]
),
limit=10
)
多值匹配 #
python
from qdrant_client.models import MatchAny
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=Filter(
must=[
FieldCondition(
key="tags",
match=MatchAny(any=["AI", "ML", "Python"])
)
]
),
limit=10
)
排除匹配 #
python
from qdrant_client.models import MatchExcept
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=Filter(
must=[
FieldCondition(
key="status",
match=MatchExcept(**{"except": ["deleted", "archived"]})
)
]
),
limit=10
)
范围过滤 #
python
from qdrant_client.models import Range
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=Filter(
must=[
FieldCondition(
key="price",
range=Range(
gte=50,
lte=200
)
)
]
),
limit=10
)
地理位置过滤 #
python
from qdrant_client.models import GeoRadius, GeoBoundingBox
geo_radius_filter = Filter(
must=[
FieldCondition(
key="location",
geo_radius=GeoRadius(
center={"lat": 39.9, "lon": 116.4},
radius=10000
)
)
]
)
geo_box_filter = Filter(
must=[
FieldCondition(
key="location",
geo_bounding_box=GeoBoundingBox(
top_left={"lat": 40.0, "lon": 116.0},
bottom_right={"lat": 39.5, "lon": 117.0}
)
)
]
)
数组条件 #
python
from qdrant_client.models import ValuesCount
has_multiple_tags = Filter(
must=[
FieldCondition(
key="tags",
values_count=ValuesCount(gte=2)
)
]
)
空值检查 #
python
from qdrant_client.models import IsEmpty
has_description = Filter(
must=[
FieldCondition(
key="description",
is_empty=False
)
]
)
no_description = Filter(
must=[
FieldCondition(
key="description",
is_empty=True
)
]
)
嵌套字段过滤 #
python
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=Filter(
must=[
FieldCondition(
key="author.name",
match=MatchValue(value="张三")
),
FieldCondition(
key="author.profile.age",
range=Range(gte=25, lte=35)
)
]
),
limit=10
)
复合过滤 #
python
complex_filter = Filter(
must=[
FieldCondition(key="status", match=MatchValue(value="active")),
FieldCondition(key="price", range=Range(lte=500))
],
should=[
FieldCondition(key="category", match=MatchValue(value="electronics")),
FieldCondition(key="category", match=MatchValue(value="books"))
],
must_not=[
FieldCondition(key="tags", match=MatchValue(value="discontinued"))
]
)
results = client.search(
collection_name="payload_demo",
query_vector=[0.1, 0.2, 0.3, 0.4],
query_filter=complex_filter,
limit=10
)
Payload 查询 #
滚动查询 Payload #
python
from qdrant_client.models import ScrollResult
offset = None
all_payloads = []
while True:
result: ScrollResult = client.scroll(
collection_name="payload_demo",
limit=100,
offset=offset,
with_payload=True,
with_vectors=False
)
points, next_offset = result
for point in points:
all_payloads.append(point.payload)
if next_offset is None:
break
offset = next_offset
print(f"获取了 {len(all_payloads)} 个 Payload")
按条件滚动 #
python
offset = None
filtered_payloads = []
while True:
result = client.scroll(
collection_name="payload_demo",
scroll_filter=Filter(
must=[
FieldCondition(key="status", match=MatchValue(value="active"))
]
),
limit=100,
offset=offset
)
points, next_offset = result
for point in points:
filtered_payloads.append(point.payload)
if next_offset is None:
break
offset = next_offset
print(f"过滤后获取了 {len(filtered_payloads)} 个 Payload")
Payload 最佳实践 #
选择合适的数据类型 #
text
数据类型选择建议:
字符串:
├── 精确匹配 → keyword
├── 全文搜索 → text
└── 长文本 → 不索引,仅存储
数值:
├── 整数 → integer
├── 浮点数 → float
└── 金额 → float(注意精度)
时间:
├── 存储为 Unix 时间戳
└── 使用 integer 类型
地理位置:
├── 使用 geo 类型
└── 格式:{"lat": float, "lon": float}
索引策略 #
python
def setup_payload_indexes(collection_name):
indexes = [
("category", PayloadSchemaType.KEYWORD),
("status", PayloadSchemaType.KEYWORD),
("price", PayloadSchemaType.FLOAT),
("created_at", PayloadSchemaType.INTEGER),
("location", PayloadSchemaType.GEO),
]
for field_name, field_schema in indexes:
try:
client.create_payload_index(
collection_name=collection_name,
field_name=field_name,
field_schema=field_schema
)
print(f"索引创建成功: {field_name}")
except Exception as e:
print(f"索引创建失败 {field_name}: {e}")
setup_payload_indexes("payload_demo")
Payload 大小优化 #
python
def optimize_payload(payload):
optimized = {}
for key, value in payload.items():
if isinstance(value, str) and len(value) > 1000:
optimized[key] = value[:1000] + "..."
elif isinstance(value, dict):
optimized[key] = optimize_payload(value)
else:
optimized[key] = value
return optimized
large_payload = {
"title": "文档标题",
"content": "这是一段非常长的内容..." * 100
}
optimized = optimize_payload(large_payload)
批量操作优化 #
python
def batch_set_payload(collection_name, points_payloads, batch_size=100):
points_ids = list(points_payloads.keys())
for i in range(0, len(points_ids), batch_size):
batch_ids = points_ids[i:i + batch_size]
for point_id in batch_ids:
client.set_payload(
collection_name=collection_name,
payload=points_payloads[point_id],
points=[point_id]
)
print(f"已处理 {min(i + batch_size, len(points_ids))}/{len(points_ids)}")
payloads = {
1: {"status": "active"},
2: {"status": "inactive"},
3: {"status": "active"}
}
batch_set_payload("payload_demo", payloads)
小结 #
本章详细介绍了 Payload 管理:
- Payload 数据类型
- Payload 索引创建和管理
- Payload 增删改操作
- 各种过滤条件
- 最佳实践
下一步 #
掌握 Payload 管理后,继续学习 索引与性能,深入了解性能优化技巧!
最后更新:2026-04-04