向量搜索 #

一、搜索概述 #

1.1 搜索流程 #

text
向量搜索流程:

┌──────────┐     ┌──────────┐     ┌──────────┐
│ 查询向量  │────▶│ 索引搜索  │────▶│ 结果排序  │
└──────────┘     └──────────┘     └──────────┘
                       │
                       ▼
                 ┌──────────┐
                 │ 过滤/重排 │
                 └──────────┘

1.2 搜索类型 #

text
搜索类型:

┌─────────────────────────────────────────┐
│           ANN (近似最近邻)               │
├─────────────────────────────────────────┤
│  ├── 向量相似度搜索                      │
│  ├── 混合搜索(向量+标量)               │
│  └── 多向量搜索                          │
└─────────────────────────────────────────┘

二、基本搜索 #

2.1 简单搜索 #

python
from pymilvus import Collection

collection = Collection("documents")
collection.load()

search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10}
}

results = collection.search(
    data=[[0.1]*768],
    anns_field="embedding",
    param=search_params,
    limit=10
)

for hits in results:
    for hit in hits:
        print(f"ID: {hit.id}, Distance: {hit.distance}")

2.2 搜索参数 #

python
search_params = {
    "metric_type": "L2",
    "params": {
        "nprobe": 16,
        "offset": 0,
        "radius": 1.0,
        "range_filter": 0.5
    }
}

2.3 参数说明 #

参数 说明 默认值
metric_type 距离度量类型 L2
nprobe 搜索的聚类数量 16
offset 结果偏移量 0
radius 搜索半径 -
range_filter 范围过滤 -

三、距离度量 #

3.1 度量类型 #

类型 公式 适用场景
L2 √Σ(ai-bi)² 图像搜索
IP Σai*bi 推荐系统
COSINE Σai*bi/( a

3.2 L2距离 #

python
search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 16}
}

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10
)

for hit in results[0]:
    print(f"ID: {hit.id}, L2距离: {hit.distance}")

3.3 内积 (IP) #

python
search_params = {
    "metric_type": "IP",
    "params": {"nprobe": 16}
}

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10
)

for hit in results[0]:
    print(f"ID: {hit.id}, 内积: {hit.distance}")

3.4 余弦相似度 (COSINE) #

python
search_params = {
    "metric_type": "COSINE",
    "params": {"nprobe": 16}
}

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10
)

for hit in results[0]:
    print(f"ID: {hit.id}, 余弦相似度: {hit.distance}")

四、过滤搜索 #

4.1 标量过滤 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    expr='category == "electronics"',
    output_fields=["title", "category"]
)

4.2 复杂过滤 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    expr='price > 100 and price < 1000',
    output_fields=["title", "price"]
)

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    expr='category in ["electronics", "books"]',
    output_fields=["title", "category"]
)

4.3 JSON字段过滤 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    expr='metadata["author"] == "张三"',
    output_fields=["title", "metadata"]
)

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    expr='json_contains(metadata["tags"], "AI")',
    output_fields=["title", "metadata"]
)

4.4 数组字段过滤 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    expr='array_contains(tags, "AI")',
    output_fields=["title", "tags"]
)

五、批量搜索 #

5.1 多向量搜索 #

python
query_vectors = [
    [0.1]*768,
    [0.2]*768,
    [0.3]*768
]

results = collection.search(
    data=query_vectors,
    anns_field="embedding",
    param=search_params,
    limit=10
)

for i, hits in enumerate(results):
    print(f"查询 {i+1} 结果:")
    for hit in hits:
        print(f"  ID: {hit.id}, Distance: {hit.distance}")

5.2 批量搜索优化 #

python
import numpy as np

def batch_search(collection, query_vectors, batch_size=100):
    all_results = []
    
    for i in range(0, len(query_vectors), batch_size):
        batch = query_vectors[i:i+batch_size]
        results = collection.search(
            data=batch,
            anns_field="embedding",
            param=search_params,
            limit=10
        )
        all_results.extend(results)
    
    return all_results

query_vectors = np.random.rand(1000, 768).tolist()
results = batch_search(collection, query_vectors)

六、分区搜索 #

6.1 指定分区搜索 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    partition_names=["2024_01"]
)

6.2 多分区搜索 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    partition_names=["2024_01", "2024_02"]
)

七、结果处理 #

7.1 获取结果 #

python
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=10,
    output_fields=["title", "category"]
)

hits = results[0]

for hit in hits:
    print(f"ID: {hit.id}")
    print(f"Distance: {hit.distance}")
    print(f"Title: {hit.entity.get('title')}")
    print(f"Category: {hit.entity.get('category')}")

7.2 结果转换 #

python
def search_results_to_dict(results):
    result_list = []
    for hits in results:
        for hit in hits:
            result_list.append({
                "id": hit.id,
                "distance": hit.distance,
                "entity": hit.entity.fields
            })
    return result_list

results_dict = search_results_to_dict(results)

7.3 结果分页 #

python
page_size = 10
page = 1

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=page_size,
    offset=(page - 1) * page_size
)

八、范围搜索 #

8.1 半径搜索 #

python
search_params = {
    "metric_type": "L2",
    "params": {
        "nprobe": 16,
        "radius": 0.5
    }
}

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=100
)

8.2 范围过滤 #

python
search_params = {
    "metric_type": "L2",
    "params": {
        "nprobe": 16,
        "radius": 1.0,
        "range_filter": 0.3
    }
}

results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param=search_params,
    limit=100
)

for hit in results[0]:
    if 0.3 < hit.distance < 1.0:
        print(f"ID: {hit.id}, Distance: {hit.distance}")

九、搜索优化 #

9.1 参数调优 #

text
搜索参数调优指南:

┌─────────────────────────────────────────┐
│           nprobe 调优                    │
├─────────────────────────────────────────┤
│                                         │
│  高精度需求 → nprobe = nlist * 0.3      │
│  平衡模式   → nprobe = nlist * 0.1      │
│  高速模式   → nprobe = nlist * 0.05     │
│                                         │
│  nprobe 越大:                          │
│  - 精度越高                             │
│  - 速度越慢                             │
│                                         │
└─────────────────────────────────────────┘

9.2 索引选择 #

text
索引选择建议:

数据量              推荐索引
──────────────────────────────
< 10万              FLAT
10万-100万          IVF_FLAT
100万-1000万        IVF_PQ, HNSW
> 1000万            DISKANN, IVF_PQ

9.3 搜索性能优化 #

python
import time

def benchmark_search(collection, query_vectors, search_params, rounds=10):
    times = []
    
    for _ in range(rounds):
        start = time.time()
        results = collection.search(
            data=query_vectors,
            anns_field="embedding",
            param=search_params,
            limit=10
        )
        times.append(time.time() - start)
    
    avg_time = sum(times) / len(times)
    qps = len(query_vectors) / avg_time
    
    print(f"平均延迟: {avg_time*1000:.2f}ms")
    print(f"QPS: {qps:.2f}")

十、完整示例 #

10.1 RAG搜索示例 #

python
from pymilvus import Collection
import numpy as np

collection = Collection("documents")
collection.load()

def rag_search(query_text, embedding_model, top_k=5):
    query_embedding = embedding_model.encode(query_text)
    
    search_params = {
        "metric_type": "COSINE",
        "params": {"nprobe": 16}
    }
    
    results = collection.search(
        data=[query_embedding.tolist()],
        anns_field="embedding",
        param=search_params,
        limit=top_k,
        output_fields=["title", "content"]
    )
    
    documents = []
    for hit in results[0]:
        documents.append({
            "id": hit.id,
            "score": hit.distance,
            "title": hit.entity.get("title"),
            "content": hit.entity.get("content")
        })
    
    return documents

query = "什么是机器学习?"
results = rag_search(query, embedding_model)
for doc in results:
    print(f"Score: {doc['score']:.4f}")
    print(f"Title: {doc['title']}")
    print(f"Content: {doc['content'][:100]}...")
    print("-" * 50)

十一、总结 #

搜索操作速查表:

操作 方法
基本搜索 collection.search()
过滤搜索 expr参数
分区搜索 partition_names参数
批量搜索 多个查询向量
范围搜索 radius参数

下一步,让我们学习标量查询!

最后更新:2026-04-04