Supabase全文搜索 #

一、全文搜索概述 #

1.1 什么是全文搜索 #

text
全文搜索特点
├── 支持自然语言搜索
├── 支持模糊匹配
├── 支持相关性排序
├── 支持多语言
└── 高性能索引

1.2 核心概念 #

概念 说明
tsvector 文本向量,分词后的结果
tsquery 搜索查询
to_tsvector 转换为向量
to_tsquery 转换为查询

二、基础搜索 #

2.1 创建搜索向量 #

sql
-- 为表添加搜索向量列
ALTER TABLE posts ADD COLUMN search_vector tsvector;

-- 更新搜索向量
UPDATE posts
SET search_vector = 
    setweight(to_tsvector('english', title), 'A') ||
    setweight(to_tsvector('english', content), 'B');

-- 创建索引
CREATE INDEX idx_posts_search ON posts USING GIN (search_vector);

2.2 基础搜索 #

sql
-- 搜索包含关键词的文章
SELECT title, content
FROM posts
WHERE search_vector @@ to_tsquery('english', 'javascript');

-- 多词搜索(AND)
SELECT * FROM posts
WHERE search_vector @@ to_tsquery('english', 'javascript & react');

-- 多词搜索(OR)
SELECT * FROM posts
WHERE search_vector @@ to_tsquery('english', 'javascript | typescript');

2.3 相关性排序 #

sql
SELECT 
    title,
    ts_rank(search_vector, to_tsquery('english', 'javascript')) as rank
FROM posts
WHERE search_vector @@ to_tsquery('english', 'javascript')
ORDER BY rank DESC;

三、自动更新搜索向量 #

3.1 使用触发器 #

sql
CREATE OR REPLACE FUNCTION posts_search_trigger()
RETURNS TRIGGER AS $$
BEGIN
    NEW.search_vector :=
        setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
        setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'B') ||
        setweight(to_tsvector('english', COALESCE(NEW.tags::text, '')), 'C');
    
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER posts_search_update
    BEFORE INSERT OR UPDATE ON posts
    FOR EACH ROW
    EXECUTE FUNCTION posts_search_trigger();

四、搜索函数 #

4.1 创建搜索函数 #

sql
CREATE OR REPLACE FUNCTION search_posts(search_query TEXT)
RETURNS TABLE(
    id BIGINT,
    title TEXT,
    content TEXT,
    rank REAL
) AS $$
BEGIN
    RETURN QUERY
    SELECT 
        posts.id,
        posts.title,
        posts.content,
        ts_rank(posts.search_vector, websearch_to_tsquery('english', search_query)) as rank
    FROM posts
    WHERE posts.search_vector @@ websearch_to_tsquery('english', search_query)
    ORDER BY rank DESC
    LIMIT 20;
END;
$$ LANGUAGE plpgsql;

4.2 客户端调用 #

typescript
const { data, error } = await supabase.rpc('search_posts', {
  search_query: 'javascript react'
})

console.log(data)

五、高级搜索 #

5.1 搜索高亮 #

sql
SELECT 
    title,
    ts_headline('english', content, to_tsquery('english', 'javascript')) as highlighted_content
FROM posts
WHERE search_vector @@ to_tsquery('english', 'javascript');

5.2 搜索建议 #

sql
-- 获取搜索建议
CREATE OR REPLACE FUNCTION search_suggestions(prefix TEXT)
RETURNS TABLE(word TEXT, ndoc INTEGER) AS $$
BEGIN
    RETURN QUERY
    SELECT word, ndoc
    FROM ts_stat($$SELECT search_vector FROM posts$$)
    WHERE word LIKE prefix || '%'
    ORDER BY ndoc DESC
    LIMIT 10;
END;
$$ LANGUAGE plpgsql;

5.3 分面搜索 #

sql
CREATE OR REPLACE FUNCTION faceted_search(
    search_query TEXT,
    category_filter TEXT DEFAULT NULL
)
RETURNS TABLE(
    id BIGINT,
    title TEXT,
    category TEXT,
    rank REAL
) AS $$
BEGIN
    RETURN QUERY
    SELECT 
        posts.id,
        posts.title,
        posts.category,
        ts_rank(posts.search_vector, websearch_to_tsquery('english', search_query)) as rank
    FROM posts
    WHERE 
        posts.search_vector @@ websearch_to_tsquery('english', search_query)
        AND (category_filter IS NULL OR posts.category = category_filter)
    ORDER BY rank DESC;
END;
$$ LANGUAGE plpgsql;

六、中文搜索 #

6.1 安装中文分词扩展 #

sql
-- 使用zhparser扩展(需要自托管)
CREATE EXTENSION IF NOT EXISTS zhparser;

-- 创建中文配置
CREATE TEXT SEARCH CONFIGURATION chinese (PARSER = zhparser);
ALTER TEXT SEARCH CONFIGURATION chinese ADD MAPPING FOR n,v,a,i,e,l WITH simple;

6.2 中文搜索示例 #

sql
-- 创建中文搜索向量
ALTER TABLE articles ADD COLUMN search_vector_cn tsvector;

UPDATE articles
SET search_vector_cn = to_tsvector('chinese', title || ' ' || content);

-- 中文搜索
SELECT * FROM articles
WHERE search_vector_cn @@ to_tsquery('chinese', '数据库');

6.3 使用简单分词 #

sql
-- 不安装扩展的简单中文搜索
CREATE OR REPLACE FUNCTION simple_chinese_search(
    table_name TEXT,
    search_columns TEXT[],
    search_query TEXT
)
RETURNS SETOF RECORD AS $$
DECLARE
    where_clause TEXT;
BEGIN
    -- 构建简单的LIKE查询
    where_clause := string_agg(
        format('%I ILIKE ''%%%s%%''', column_name, search_query),
        ' OR '
    ) FROM unnest(search_columns) AS column_name;
    
    RETURN QUERY EXECUTE format(
        'SELECT * FROM %I WHERE %s',
        table_name,
        where_clause
    );
END;
$$ LANGUAGE plpgsql;

七、搜索优化 #

7.1 索引优化 #

sql
-- GIN索引(默认)
CREATE INDEX idx_posts_search ON posts USING GIN (search_vector);

-- GIN索引优化(减少索引大小)
CREATE INDEX idx_posts_search_fast ON posts USING GIN (search_vector gin_fast_update_ops);

7.2 查询优化 #

sql
-- 使用websearch_to_tsquery处理用户输入
SELECT * FROM posts
WHERE search_vector @@ websearch_to_tsquery('english', 'javascript -react');

-- 支持引号精确匹配
SELECT * FROM posts
WHERE search_vector @@ websearch_to_tsquery('english', '"javascript tutorial"');

八、完整示例 #

8.1 博客搜索 #

sql
-- 添加搜索向量
ALTER TABLE posts ADD COLUMN IF NOT EXISTS search_vector tsvector;

-- 创建触发器自动更新
CREATE OR REPLACE FUNCTION update_posts_search_vector()
RETURNS TRIGGER AS $$
BEGIN
    NEW.search_vector :=
        setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
        setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'B') ||
        setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'C');
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER posts_search_vector_update
    BEFORE INSERT OR UPDATE ON posts
    FOR EACH ROW
    EXECUTE FUNCTION update_posts_search_vector();

-- 创建索引
CREATE INDEX idx_posts_search ON posts USING GIN (search_vector);

-- 搜索函数
CREATE OR REPLACE FUNCTION search_posts(
    query TEXT,
    limit_count INTEGER DEFAULT 20,
    offset_count INTEGER DEFAULT 0
)
RETURNS TABLE(
    id BIGINT,
    title TEXT,
    excerpt TEXT,
    author_name TEXT,
    created_at TIMESTAMPTZ,
    rank REAL
) AS $$
BEGIN
    RETURN QUERY
    SELECT 
        posts.id,
        posts.title,
        LEFT(posts.content, 200) as excerpt,
        profiles.full_name as author_name,
        posts.created_at,
        ts_rank(posts.search_vector, websearch_to_tsquery('english', query)) as rank
    FROM posts
    LEFT JOIN profiles ON posts.author_id = profiles.id
    WHERE 
        posts.published = true
        AND posts.search_vector @@ websearch_to_tsquery('english', query)
    ORDER BY rank DESC
    LIMIT limit_count
    OFFSET offset_count;
END;
$$ LANGUAGE plpgsql;

8.2 客户端使用 #

typescript
async function searchPosts(query: string, page = 1, pageSize = 20) {
  const { data, error } = await supabase.rpc('search_posts', {
    query,
    limit_count: pageSize,
    offset_count: (page - 1) * pageSize,
  })

  return { data, error }
}

九、总结 #

全文搜索要点:

操作 说明
向量 tsvector
查询 to_tsquery, websearch_to_tsquery
索引 GIN
排序 ts_rank
高亮 ts_headline

下一步,让我们学习向量搜索!

最后更新:2026-03-28