搜索查询 #
搜索是 Qdrant 最核心的功能,本章详细介绍各种搜索方式和技巧。
搜索类型概览 #
text
Qdrant 搜索类型:
┌─────────────────────────────────────────────────────────────┐
│ 搜索功能 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 基础搜索 高级搜索 特殊搜索 │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ 向量搜索 │ │ 过滤搜索 │ │ 推荐 │ │
│ │ 相似性查询 │ │ 混合搜索 │ │ 发现 │ │
│ │ 批量搜索 │ │ 重排序 │ │ 滚动 │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
基础向量搜索 #
简单搜索 #
python
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
client = QdrantClient(":memory:")
client.create_collection(
collection_name="search_demo",
vectors_config=VectorParams(size=4, distance=Distance.COSINE)
)
points = [
PointStruct(id=i, vector=[i*0.1, i*0.1+0.1, i*0.1+0.2, i*0.1+0.3], payload={"index": i})
for i in range(100)
]
client.upsert("search_demo", points)
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
limit=5
)
for result in results:
print(f"ID: {result.id}, Score: {result.score:.4f}, Payload: {result.payload}")
搜索参数详解 #
python
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
limit=10,
offset=0,
with_payload=True,
with_vectors=False,
score_threshold=0.8
)
print(f"返回 {len(results)} 个结果(score >= 0.8)")
| 参数 | 说明 |
|---|---|
| query_vector | 查询向量 |
| limit | 返回结果数量 |
| offset | 偏移量(分页) |
| with_payload | 是否返回 Payload |
| with_vectors | 是否返回向量 |
| score_threshold | 最低相似度阈值 |
多向量搜索 #
python
client.create_collection(
collection_name="multi_search",
vectors_config={
"text": VectorParams(size=4, distance=Distance.COSINE),
"image": VectorParams(size=4, distance=Distance.EUCLID)
}
)
points = [
PointStruct(
id=i,
vector={
"text": [i*0.1, i*0.1+0.1, i*0.1+0.2, i*0.1+0.3],
"image": [i*0.2, i*0.2+0.1, i*0.2+0.2, i*0.2+0.3]
},
payload={"index": i}
)
for i in range(10)
]
client.upsert("multi_search", points)
results = client.search(
collection_name="multi_search",
query_vector=("text", [0.5, 0.6, 0.7, 0.8]),
limit=5
)
for result in results:
print(f"ID: {result.id}, Score: {result.score:.4f}")
批量搜索 #
python
query_vectors = [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2]
]
results = client.search_batch(
collection_name="search_demo",
requests=[
{
"vector": vec,
"limit": 3
}
for vec in query_vectors
]
)
for i, batch_results in enumerate(results):
print(f"\n查询 {i+1} 结果:")
for result in batch_results:
print(f" ID: {result.id}, Score: {result.score:.4f}")
过滤搜索 #
基础过滤 #
python
from qdrant_client.models import Filter, FieldCondition, MatchValue
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
query_filter=Filter(
must=[
FieldCondition(
key="category",
match=MatchValue(value="technology")
)
]
),
limit=5
)
匹配条件 #
python
from qdrant_client.models import MatchText, MatchAny, MatchExcept
exact_match = FieldCondition(
key="category",
match=MatchValue(value="technology")
)
text_match = FieldCondition(
key="title",
match=MatchText(text="python")
)
any_match = FieldCondition(
key="tags",
match=MatchAny(any=["AI", "ML", "Python"])
)
except_match = FieldCondition(
key="status",
match=MatchExcept(**{"except": ["deleted", "archived"]})
)
范围条件 #
python
from qdrant_client.models import Range
price_range = FieldCondition(
key="price",
range=Range(
gte=100,
lte=500
)
)
date_range = FieldCondition(
key="created_at",
range=Range(
gte=1704067200,
lt=1735689600
)
)
地理位置过滤 #
python
from qdrant_client.models import GeoBoundingBox, GeoRadius, GeoPolygon
geo_box = FieldCondition(
key="location",
geo_bounding_box=GeoBoundingBox(
top_left={"lat": 40.0, "lon": 116.0},
bottom_right={"lat": 39.0, "lon": 117.0}
)
)
geo_radius = FieldCondition(
key="location",
geo_radius=GeoRadius(
center={"lat": 39.9, "lon": 116.4},
radius=10000
)
)
数组过滤 #
python
from qdrant_client.models import ValuesCount
has_tags = FieldCondition(
key="tags",
values_count=ValuesCount(
gte=2,
lte=5
)
)
空值过滤 #
python
from qdrant_client.models import IsEmpty
has_description = FieldCondition(
key="description",
is_empty=False
)
复合过滤 #
python
from qdrant_client.models import Filter, FieldCondition, MatchValue, Range
complex_filter = Filter(
must=[
FieldCondition(key="category", match=MatchValue(value="technology")),
FieldCondition(key="price", range=Range(lte=1000))
],
should=[
FieldCondition(key="brand", match=MatchValue(value="Apple")),
FieldCondition(key="brand", match=MatchValue(value="Samsung"))
],
must_not=[
FieldCondition(key="status", match=MatchValue(value="discontinued"))
]
)
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
query_filter=complex_filter,
limit=10
)
嵌套过滤 #
python
nested_filter = Filter(
must=[
FieldCondition(key="category", match=MatchValue(value="electronics")),
Filter(
should=[
FieldCondition(key="brand", match=MatchValue(value="Apple")),
Filter(
must=[
FieldCondition(key="brand", match=MatchValue(value="Samsung")),
FieldCondition(key="price", range=Range(lte=500))
]
)
]
)
]
)
搜索参数优化 #
HNSW 搜索参数 #
python
from qdrant_client.models import SearchParams
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
search_params=SearchParams(
hnsw_ef=128,
exact=False,
quantization={
"ignore": False,
"rescore": True,
"oversampling": 2.0
}
),
limit=10
)
| 参数 | 说明 |
|---|---|
| hnsw_ef | 搜索时的 ef 值,越大越精确 |
| exact | 是否使用精确搜索 |
| quantization | 量化相关参数 |
量化重排序 #
python
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
search_params=SearchParams(
quantization={
"ignore": False,
"rescore": True,
"oversampling": 2.0
}
),
limit=10
)
分页 #
Offset 分页 #
python
page = 2
page_size = 10
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
limit=page_size,
offset=(page - 1) * page_size
)
print(f"第 {page} 页,共 {len(results)} 条结果")
基于游标的分页 #
python
def paginated_search(collection_name, query_vector, page_size=10, last_score=None, last_id=None):
results = client.search(
collection_name=collection_name,
query_vector=query_vector,
limit=page_size + 1
)
if last_score is not None and last_id is not None:
results = [
r for r in results
if (r.score, r.id) < (last_score, last_id)
][:page_size]
return results
page1 = paginated_search("search_demo", [0.5, 0.6, 0.7, 0.8], page_size=5)
print("第一页:")
for r in page1:
print(f" ID: {r.id}, Score: {r.score:.4f}")
推荐搜索 #
推荐搜索基于已有的点来发现相似内容。
基础推荐 #
python
results = client.recommend(
collection_name="search_demo",
positive=[1, 2, 3],
limit=5
)
print("推荐结果:")
for result in results:
print(f" ID: {result.id}, Score: {result.score:.4f}")
正负样本推荐 #
python
results = client.recommend(
collection_name="search_demo",
positive=[1, 2],
negative=[5, 6],
limit=10
)
使用向量推荐 #
python
from qdrant_client.models import RecommendExample
results = client.recommend(
collection_name="search_demo",
positive=[
RecommendExample(vector=[0.5, 0.6, 0.7, 0.8])
],
limit=5
)
带过滤的推荐 #
python
results = client.recommend(
collection_name="search_demo",
positive=[1, 2],
negative=[5],
query_filter=Filter(
must=[
FieldCondition(key="category", match=MatchValue(value="technology"))
]
),
limit=10
)
发现搜索 #
发现搜索用于发现与已知点相似但又有差异的内容。
python
from qdrant_client.models import DiscoverySearch, DiscoveryQuery
results = client.discover(
collection_name="search_demo",
target=1,
context=[
{"positive": 2, "negative": 3},
{"positive": 4, "negative": 5}
],
limit=10
)
混合搜索 #
结合稠密向量和稀疏向量进行搜索。
创建混合索引 #
python
from qdrant_client.models import SparseVectorParams, SparseIndexParams
client.create_collection(
collection_name="hybrid_search",
vectors_config=VectorParams(size=4, distance=Distance.COSINE),
sparse_vectors_config={
"text-sparse": SparseVectorParams(
index=SparseIndexParams(on_disk=False)
)
}
)
插入混合向量 #
python
from qdrant_client.models import SparseVector
points = [
PointStruct(
id=i,
vector=[i*0.1, i*0.1+0.1, i*0.1+0.2, i*0.1+0.3],
sparse_vectors={
"text-sparse": SparseVector(
indices=[1, 5, 10],
values=[0.5, 0.8, 0.3]
)
},
payload={"text": f"文档 {i}"}
)
for i in range(10)
]
client.upsert("hybrid_search", points)
混合搜索查询 #
python
from qdrant_client.models import Query, FusionQuery, Fusion
results = client.query_points(
collection_name="hybrid_search",
query=FusionQuery(
queries=[
Query(
nearest=[0.5, 0.6, 0.7, 0.8]
),
Query(
nearest=SparseVector(
indices=[1, 5, 10],
values=[0.5, 0.8, 0.3]
),
using="text-sparse"
)
],
fusion=Fusion.RRF
),
limit=10
)
for result in results.points:
print(f"ID: {result.id}, Score: {result.score:.4f}")
分组搜索 #
按字段值分组返回结果。
python
from qdrant_client.models import GroupRequest
results = client.query_points_groups(
collection_name="search_demo",
query=[0.5, 0.6, 0.7, 0.8],
group_by="category",
limit=5,
group_size=3
)
for group in results.groups:
print(f"\n分组: {group.hits[0].payload.get('category', 'N/A')}")
for hit in group.hits:
print(f" ID: {hit.id}, Score: {hit.score:.4f}")
搜索结果处理 #
获取向量 #
python
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
with_vectors=True,
limit=5
)
for result in results:
print(f"ID: {result.id}")
print(f"Vector: {result.vector}")
print(f"Score: {result.score:.4f}")
print("---")
选择性 Payload #
python
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
with_payload=["title", "category"],
limit=5
)
Payload 投影 #
python
results = client.search(
collection_name="search_demo",
query_vector=[0.5, 0.6, 0.7, 0.8],
with_payload={
"include": ["title", "category"],
"exclude": ["large_field"]
},
limit=5
)
最佳实践 #
搜索性能优化 #
text
性能优化建议:
1. 调整 hnsw_ef
├── 精度优先:ef = limit * 4
└── 速度优先:ef = limit * 2
2. 使用量化
├── 减少内存占用
└── 启用重排序
3. 合理设置 limit
├── 不要设置过大
└── 配合分页使用
4. Payload 索引
├── 为过滤字段创建索引
└── 避免全扫描
过滤优化 #
python
client.create_payload_index(
collection_name="search_demo",
field_name="category",
field_schema="keyword"
)
client.create_payload_index(
collection_name="search_demo",
field_name="price",
field_schema="float"
)
搜索监控 #
python
import time
def monitored_search(collection_name, query_vector, limit=10):
start_time = time.time()
results = client.search(
collection_name=collection_name,
query_vector=query_vector,
limit=limit
)
elapsed = (time.time() - start_time) * 1000
print(f"搜索耗时: {elapsed:.2f}ms")
print(f"返回结果: {len(results)} 条")
return results
小结 #
本章详细介绍了 Qdrant 的搜索功能:
- 基础向量搜索
- 过滤搜索
- 批量搜索
- 推荐搜索
- 混合搜索
- 分组和分页
下一步 #
掌握搜索功能后,继续学习 Payload 管理,了解如何高效管理元数据!
最后更新:2026-04-04