Payload 管理 #

Payload 是 Qdrant 中存储元数据的关键机制,本章详细介绍 Payload 的管理。

Payload 概述 #

text
Payload 结构:

┌─────────────────────────────────────────────────────────────┐
│                          Point                               │
├─────────────────────────────────────────────────────────────┤
│  id: 12345                                                   │
│  vector: [0.1, 0.2, 0.3, ...]                               │
├─────────────────────────────────────────────────────────────┤
│  payload: {                                                  │
│    ├── 字符串: "title": "文档标题"                           │
│    ├── 数值: "price": 99.99                                  │
│    ├── 布尔: "is_active": true                               │
│    ├── 数组: "tags": ["AI", "ML"]                           │
│    ├── 对象: "metadata": {...}                              │
│    └── 地理: "location": {"lat": 39.9, "lon": 116.4}        │
│  }                                                           │
└─────────────────────────────────────────────────────────────┘

Payload 数据类型 #

基本类型 #

python
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="payload_demo",
    vectors_config=VectorParams(size=4, distance=Distance.COSINE)
)

point = PointStruct(
    id=1,
    vector=[0.1, 0.2, 0.3, 0.4],
    payload={
        "title": "示例文档",
        "content": "这是一段文档内容...",
        "price": 99.99,
        "count": 100,
        "is_active": True,
        "tags": ["AI", "ML", "Python"],
        "score": 0.95,
        "created_at": 1704067200
    }
)

client.upsert("payload_demo", [point])

支持的数据类型 #

类型 Python 类型 示例
keyword str “hello”
integer int 42
float float 3.14
bool bool True
geo dict
datetime int (timestamp) 1704067200
array list [“a”, “b”, “c”]

嵌套对象 #

python
point = PointStruct(
    id=2,
    vector=[0.2, 0.3, 0.4, 0.5],
    payload={
        "title": "嵌套示例",
        "author": {
            "name": "张三",
            "email": "zhangsan@example.com",
            "profile": {
                "age": 30,
                "location": "Beijing"
            }
        },
        "metadata": {
            "version": "1.0",
            "created": "2024-01-01"
        }
    }
)

client.upsert("payload_demo", [point])

地理位置 #

python
point = PointStruct(
    id=3,
    vector=[0.3, 0.4, 0.5, 0.6],
    payload={
        "name": "北京天安门",
        "location": {
            "lat": 39.9087,
            "lon": 116.3975
        }
    }
)

client.upsert("payload_demo", [point])

Payload 索引 #

创建索引 #

python
from qdrant_client.models import PayloadSchemaType

client.create_payload_index(
    collection_name="payload_demo",
    field_name="title",
    field_schema=PayloadSchemaType.KEYWORD
)

client.create_payload_index(
    collection_name="payload_demo",
    field_name="price",
    field_schema=PayloadSchemaType.FLOAT
)

client.create_payload_index(
    collection_name="payload_demo",
    field_name="created_at",
    field_schema=PayloadSchemaType.INTEGER
)

client.create_payload_index(
    collection_name="payload_demo",
    field_name="location",
    field_schema=PayloadSchemaType.GEO
)

print("Payload 索引创建成功")

索引类型 #

python
from qdrant_client.models import PayloadSchemaType, TextIndexParams, TokenizerType

keyword_index = PayloadSchemaType.KEYWORD

integer_index = PayloadSchemaType.INTEGER

float_index = PayloadSchemaType.FLOAT

geo_index = PayloadSchemaType.GEO

text_index = TextIndexParams(
    type="text",
    tokenizer=TokenizerType.WORD,
    min_token_len=2,
    max_token_len=20,
    lowercase=True
)

client.create_payload_index(
    collection_name="payload_demo",
    field_name="content",
    field_schema=text_index
)

索引类型说明 #

类型 适用场景 支持操作
keyword 精确匹配 Match, MatchAny, MatchExcept
integer 数值范围 Range, Match
float 数值范围 Range
geo 地理位置 GeoBoundingBox, GeoRadius
text 全文搜索 MatchText
bool 布尔值 Match
datetime 时间范围 Range

删除索引 #

python
client.delete_payload_index(
    collection_name="payload_demo",
    field_name="temp_field"
)

print("索引已删除")

Payload 操作 #

设置 Payload #

python
client.set_payload(
    collection_name="payload_demo",
    payload={
        "status": "active",
        "updated_at": 1704153600
    },
    points=[1, 2, 3]
)

print("Payload 已设置")

覆盖 Payload #

python
client.overwrite_payload(
    collection_name="payload_demo",
    payload={
        "new_field": "new_value"
    },
    points=[1]
)

print("Payload 已覆盖")

删除 Payload 字段 #

python
client.delete_payload(
    collection_name="payload_demo",
    keys=["temp_field", "old_field"],
    points=[1, 2, 3]
)

print("指定字段已删除")

清空 Payload #

python
client.clear_payload(
    collection_name="payload_demo",
    points_selector=[1, 2, 3]
)

print("Payload 已清空")

批量设置 Payload #

python
from qdrant_client.models import PointIdsList

batch_size = 100

for start in range(0, 1000, batch_size):
    ids = list(range(start, start + batch_size))
    
    client.set_payload(
        collection_name="payload_demo",
        payload={"processed": True, "batch": start // batch_size},
        points=PointIdsList(points=ids)
    )

print("批量设置 Payload 完成")

Payload 过滤 #

精确匹配 #

python
from qdrant_client.models import Filter, FieldCondition, MatchValue

results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=Filter(
        must=[
            FieldCondition(
                key="title",
                match=MatchValue(value="示例文档")
            )
        ]
    ),
    limit=10
)

文本匹配 #

python
from qdrant_client.models import MatchText

results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=Filter(
        must=[
            FieldCondition(
                key="content",
                match=MatchText(text="文档")
            )
        ]
    ),
    limit=10
)

多值匹配 #

python
from qdrant_client.models import MatchAny

results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=Filter(
        must=[
            FieldCondition(
                key="tags",
                match=MatchAny(any=["AI", "ML", "Python"])
            )
        ]
    ),
    limit=10
)

排除匹配 #

python
from qdrant_client.models import MatchExcept

results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=Filter(
        must=[
            FieldCondition(
                key="status",
                match=MatchExcept(**{"except": ["deleted", "archived"]})
            )
        ]
    ),
    limit=10
)

范围过滤 #

python
from qdrant_client.models import Range

results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=Filter(
        must=[
            FieldCondition(
                key="price",
                range=Range(
                    gte=50,
                    lte=200
                )
            )
        ]
    ),
    limit=10
)

地理位置过滤 #

python
from qdrant_client.models import GeoRadius, GeoBoundingBox

geo_radius_filter = Filter(
    must=[
        FieldCondition(
            key="location",
            geo_radius=GeoRadius(
                center={"lat": 39.9, "lon": 116.4},
                radius=10000
            )
        )
    ]
)

geo_box_filter = Filter(
    must=[
        FieldCondition(
            key="location",
            geo_bounding_box=GeoBoundingBox(
                top_left={"lat": 40.0, "lon": 116.0},
                bottom_right={"lat": 39.5, "lon": 117.0}
            )
        )
    ]
)

数组条件 #

python
from qdrant_client.models import ValuesCount

has_multiple_tags = Filter(
    must=[
        FieldCondition(
            key="tags",
            values_count=ValuesCount(gte=2)
        )
    ]
)

空值检查 #

python
from qdrant_client.models import IsEmpty

has_description = Filter(
    must=[
        FieldCondition(
            key="description",
            is_empty=False
        )
    ]
)

no_description = Filter(
    must=[
        FieldCondition(
            key="description",
            is_empty=True
        )
    ]
)

嵌套字段过滤 #

python
results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=Filter(
        must=[
            FieldCondition(
                key="author.name",
                match=MatchValue(value="张三")
            ),
            FieldCondition(
                key="author.profile.age",
                range=Range(gte=25, lte=35)
            )
        ]
    ),
    limit=10
)

复合过滤 #

python
complex_filter = Filter(
    must=[
        FieldCondition(key="status", match=MatchValue(value="active")),
        FieldCondition(key="price", range=Range(lte=500))
    ],
    should=[
        FieldCondition(key="category", match=MatchValue(value="electronics")),
        FieldCondition(key="category", match=MatchValue(value="books"))
    ],
    must_not=[
        FieldCondition(key="tags", match=MatchValue(value="discontinued"))
    ]
)

results = client.search(
    collection_name="payload_demo",
    query_vector=[0.1, 0.2, 0.3, 0.4],
    query_filter=complex_filter,
    limit=10
)

Payload 查询 #

滚动查询 Payload #

python
from qdrant_client.models import ScrollResult

offset = None
all_payloads = []

while True:
    result: ScrollResult = client.scroll(
        collection_name="payload_demo",
        limit=100,
        offset=offset,
        with_payload=True,
        with_vectors=False
    )
    
    points, next_offset = result
    
    for point in points:
        all_payloads.append(point.payload)
    
    if next_offset is None:
        break
    
    offset = next_offset

print(f"获取了 {len(all_payloads)} 个 Payload")

按条件滚动 #

python
offset = None
filtered_payloads = []

while True:
    result = client.scroll(
        collection_name="payload_demo",
        scroll_filter=Filter(
            must=[
                FieldCondition(key="status", match=MatchValue(value="active"))
            ]
        ),
        limit=100,
        offset=offset
    )
    
    points, next_offset = result
    
    for point in points:
        filtered_payloads.append(point.payload)
    
    if next_offset is None:
        break
    
    offset = next_offset

print(f"过滤后获取了 {len(filtered_payloads)} 个 Payload")

Payload 最佳实践 #

选择合适的数据类型 #

text
数据类型选择建议:

字符串:
├── 精确匹配 → keyword
├── 全文搜索 → text
└── 长文本 → 不索引,仅存储

数值:
├── 整数 → integer
├── 浮点数 → float
└── 金额 → float(注意精度)

时间:
├── 存储为 Unix 时间戳
└── 使用 integer 类型

地理位置:
├── 使用 geo 类型
└── 格式:{"lat": float, "lon": float}

索引策略 #

python
def setup_payload_indexes(collection_name):
    indexes = [
        ("category", PayloadSchemaType.KEYWORD),
        ("status", PayloadSchemaType.KEYWORD),
        ("price", PayloadSchemaType.FLOAT),
        ("created_at", PayloadSchemaType.INTEGER),
        ("location", PayloadSchemaType.GEO),
    ]
    
    for field_name, field_schema in indexes:
        try:
            client.create_payload_index(
                collection_name=collection_name,
                field_name=field_name,
                field_schema=field_schema
            )
            print(f"索引创建成功: {field_name}")
        except Exception as e:
            print(f"索引创建失败 {field_name}: {e}")

setup_payload_indexes("payload_demo")

Payload 大小优化 #

python
def optimize_payload(payload):
    optimized = {}
    
    for key, value in payload.items():
        if isinstance(value, str) and len(value) > 1000:
            optimized[key] = value[:1000] + "..."
        elif isinstance(value, dict):
            optimized[key] = optimize_payload(value)
        else:
            optimized[key] = value
    
    return optimized

large_payload = {
    "title": "文档标题",
    "content": "这是一段非常长的内容..." * 100
}

optimized = optimize_payload(large_payload)

批量操作优化 #

python
def batch_set_payload(collection_name, points_payloads, batch_size=100):
    points_ids = list(points_payloads.keys())
    
    for i in range(0, len(points_ids), batch_size):
        batch_ids = points_ids[i:i + batch_size]
        
        for point_id in batch_ids:
            client.set_payload(
                collection_name=collection_name,
                payload=points_payloads[point_id],
                points=[point_id]
            )
        
        print(f"已处理 {min(i + batch_size, len(points_ids))}/{len(points_ids)}")

payloads = {
    1: {"status": "active"},
    2: {"status": "inactive"},
    3: {"status": "active"}
}

batch_set_payload("payload_demo", payloads)

小结 #

本章详细介绍了 Payload 管理:

  • Payload 数据类型
  • Payload 索引创建和管理
  • Payload 增删改操作
  • 各种过滤条件
  • 最佳实践

下一步 #

掌握 Payload 管理后,继续学习 索引与性能,深入了解性能优化技巧!

最后更新:2026-04-04