PostgreSQL 索引 #

索引概述 #

索引是数据库中用于加速查询的数据结构，类似于书籍的目录。

text

┌─────────────────────────────────────────────────────────────┐
│                    索引的作用                                │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│   无索引：全表扫描                                          │
│   ┌─────────────────────────────────────────────────────┐  │
│   │ Row 1 → Row 2 → Row 3 → ... → Row 1000000          │  │
│   └─────────────────────────────────────────────────────┘  │
│   时间复杂度：O(n)                                          │
│                                                             │
│   有索引：索引查找                                          │
│   ┌─────────────────────────────────────────────────────┐  │
│   │ 索引结构 → 快速定位 → 返回结果                      │  │
│   └─────────────────────────────────────────────────────┘  │
│   时间复杂度：O(log n)                                      │
│                                                             │
└─────────────────────────────────────────────────────────────┘

索引类型 #

B-tree 索引（默认） #

sql

-- B-tree 是默认索引类型，适用于：
-- - 等值查询（=）
-- - 范围查询（<, <=, >, >=, BETWEEN）
-- - 排序（ORDER BY）
-- - 前缀匹配（LIKE 'xxx%'）

-- 创建 B-tree 索引
CREATE INDEX idx_users_email ON users(email);

-- 显式指定 B-tree
CREATE INDEX idx_users_name ON users USING btree(name);

-- 示例表
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    email VARCHAR(100),
    age INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

INSERT INTO users (name, email, age)
SELECT 
    'User ' || i,
    'user' || i || '@example.com',
    (random() * 50 + 18)::INTEGER
FROM generate_series(1, 100000) AS i;

-- 创建索引
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_age ON users(age);

-- 查看执行计划
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user50000@example.com';
-- Index Scan using idx_users_email on users  (cost=0.42..8.44 rows=1 width=...)

-- 范围查询
EXPLAIN ANALYZE SELECT * FROM users WHERE age BETWEEN 25 AND 35;
-- Bitmap Heap Scan on users
--   Recheck Cond: ((age >= 25) AND (age <= 35))
--   ->  Bitmap Index Scan on idx_users_age

Hash 索引 #

sql

-- Hash 索引只支持等值查询
-- 比 B-tree 快，但功能有限

CREATE INDEX idx_users_email_hash ON users USING hash(email);

-- 只能用于等值查询
SELECT * FROM users WHERE email = 'user50000@example.com';

-- 不能用于范围查询
-- SELECT * FROM users WHERE email > 'user';  -- 不能使用 Hash 索引

GiST 索引 #

sql

-- GiST (Generalized Search Tree) 适用于：
-- - 几何数据（点、线、多边形）
-- - 范围类型
-- - 全文搜索

-- 创建几何数据表
CREATE TABLE locations (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    point POINT
);

INSERT INTO locations (name, point)
SELECT 
    'Location ' || i,
    POINT(random() * 100, random() * 100)
FROM generate_series(1, 10000) AS i;

-- 创建 GiST 索引
CREATE INDEX idx_locations_point ON locations USING gist(point);

-- 范围查询
SELECT * FROM locations 
WHERE point <@ box(POINT(0, 0), POINT(50, 50));

-- 范围类型索引
CREATE TABLE reservations (
    id SERIAL PRIMARY KEY,
    room_id INTEGER,
    during TSRANGE
);

CREATE INDEX idx_reservations_during ON reservations USING gist(during);

GIN 索引 #

sql

-- GIN (Generalized Inverted Index) 适用于：
-- - 数组类型
-- - JSONB
-- - 全文搜索

-- 数组索引
CREATE TABLE articles (
    id SERIAL PRIMARY KEY,
    title VARCHAR(200),
    tags VARCHAR(50)[]
);

INSERT INTO articles (title, tags)
SELECT 
    'Article ' || i,
    ARRAY['tag' || (random() * 10)::INTEGER, 'tag' || (random() * 10)::INTEGER]
FROM generate_series(1, 10000) AS i;

-- 创建 GIN 索引
CREATE INDEX idx_articles_tags ON articles USING gin(tags);

-- 数组包含查询
SELECT * FROM articles WHERE tags @> ARRAY['tag1', 'tag2'];

-- JSONB 索引
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    attributes JSONB
);

CREATE INDEX idx_products_attributes ON products USING gin(attributes);

-- JSONB 包含查询
SELECT * FROM products WHERE attributes @> '{"color": "red"}';

-- JSONB 路径索引
CREATE INDEX idx_products_attributes_path ON products USING gin(attributes jsonb_path_ops);

BRIN 索引 #

sql

-- BRIN (Block Range Index) 适用于：
-- - 大表
-- - 数据按索引列有序存储
-- - 占用空间小

-- 创建大表
CREATE TABLE logs (
    id SERIAL,
    message TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

INSERT INTO logs (message)
SELECT 'Log message ' || i
FROM generate_series(1, 1000000) AS i;

-- 创建 BRIN 索引（非常小）
CREATE INDEX idx_logs_created_at ON logs USING brin(created_at);

-- BRIN 索引大小对比
SELECT 
    pg_size_pretty(pg_relation_size('idx_logs_created_at')) AS brin_size;

-- B-tree 索引会大很多
-- CREATE INDEX idx_logs_created_at_btree ON logs(created_at);
-- SELECT pg_size_pretty(pg_relation_size('idx_logs_created_at_btree'));

创建和管理索引 #

创建索引 #

sql

-- 基本语法
CREATE INDEX index_name ON table_name (column_name);

-- 多列索引
CREATE INDEX idx_users_name_email ON users(name, email);

-- 唯一索引
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- 并发创建（不锁表）
CREATE INDEX CONCURRENTLY idx_users_name ON users(name);

-- 表达式索引
CREATE INDEX idx_users_lower_email ON users(LOWER(email));

-- 部分索引
CREATE INDEX idx_users_active ON users(email) WHERE is_active = TRUE;

-- 函数索引
CREATE INDEX idx_users_created_date ON users((created_at::DATE));

查看索引 #

sql

-- 查看表的所有索引
\di

-- 查看特定表的索引
SELECT 
    indexname,
    indexdef
FROM pg_indexes 
WHERE tablename = 'users';

-- 查看索引大小
SELECT 
    indexrelname AS index_name,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes 
WHERE relname = 'users';

-- 查看索引使用情况
SELECT 
    indexrelname AS index_name,
    idx_scan AS times_used,
    idx_tup_read AS tuples_read,
    idx_tup_fetch AS tuples_fetched
FROM pg_stat_user_indexes 
WHERE relname = 'users';

删除索引 #

sql

-- 删除索引
DROP INDEX idx_users_email;

-- 如果存在则删除
DROP INDEX IF EXISTS idx_users_email;

-- 并发删除
DROP INDEX CONCURRENTLY idx_users_email;

重建索引 #

sql

-- 重建索引
REINDEX INDEX idx_users_email;

-- 重建表的所有索引
REINDEX TABLE users;

-- 重建数据库的所有索引
REINDEX DATABASE mydb;

特殊索引 #

部分索引 #

sql

-- 只索引满足条件的行
CREATE INDEX idx_orders_pending ON orders(created_at) 
WHERE status = 'pending';

-- 只索引活跃用户
CREATE INDEX idx_users_active_email ON users(email) 
WHERE is_active = TRUE;

-- 优点：
-- 1. 索引更小
-- 2. 维护成本更低
-- 3. 查询更快

表达式索引 #

sql

-- 对表达式结果建立索引
CREATE INDEX idx_users_lower_name ON users(LOWER(name));

-- 查询时使用相同表达式才能利用索引
SELECT * FROM users WHERE LOWER(name) = 'alice';

-- 计算列索引
CREATE INDEX idx_orders_year ON orders(EXTRACT(YEAR FROM created_at));

SELECT * FROM orders WHERE EXTRACT(YEAR FROM created_at) = 2026;

多列索引 #

sql

-- 多列索引的列顺序很重要
CREATE INDEX idx_users_name_age ON users(name, age);

-- 可以使用索引的查询
SELECT * FROM users WHERE name = 'Alice';              -- 使用索引
SELECT * FROM users WHERE name = 'Alice' AND age = 25; -- 使用索引
SELECT * FROM users WHERE name = 'Alice' AND age > 20; -- 使用索引

-- 不能使用索引的查询
SELECT * FROM users WHERE age = 25;  -- 不使用索引（缺少第一列）

-- 最左前缀原则
-- 索引 (a, b, c) 可以用于：
-- WHERE a = ?
-- WHERE a = ? AND b = ?
-- WHERE a = ? AND b = ? AND c = ?
-- 不能用于：
-- WHERE b = ?
-- WHERE c = ?

覆盖索引 #

sql

-- 创建包含查询所需所有列的索引
CREATE INDEX idx_users_email_name_age ON users(email, name, age);

-- 查询只需访问索引，不需要访问表
SELECT name, age FROM users WHERE email = 'alice@example.com';

-- 使用 INCLUDE 子句（PostgreSQL 11+）
CREATE INDEX idx_users_email_include ON users(email) INCLUDE (name, age);

SELECT name, age FROM users WHERE email = 'alice@example.com';
-- Index Only Scan

索引优化 #

选择合适的索引类型 #

text

┌─────────────────┬─────────────────────────────────────────────┐
│ 场景            │ 推荐索引类型                                │
├─────────────────┼─────────────────────────────────────────────┤
│ 等值、范围查询  │ B-tree                                      │
│ 仅等值查询      │ Hash                                        │
│ 数组、JSONB     │ GIN                                         │
│ 几何、范围类型  │ GiST                                        │
│ 大表有序数据    │ BRIN                                        │
│ 全文搜索        │ GIN 或 GiST                                 │
└─────────────────┴─────────────────────────────────────────────┘

索引使用建议 #

sql

-- 1. 为 WHERE 条件列创建索引
SELECT * FROM users WHERE email = ?;
CREATE INDEX idx_users_email ON users(email);

-- 2. 为 JOIN 条件列创建索引
SELECT * FROM orders o JOIN users u ON o.user_id = u.id;
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- 3. 为 ORDER BY 列创建索引
SELECT * FROM users ORDER BY created_at DESC;
CREATE INDEX idx_users_created_at ON users(created_at DESC);

-- 4. 避免过度索引
-- 索引会占用存储空间
-- 索引会降低写入性能

-- 5. 定期分析索引使用情况
SELECT 
    schemaname,
    relname AS table_name,
    indexrelname AS index_name,
    idx_scan AS times_used,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC, pg_relation_size(indexrelid) DESC;

-- 查找未使用的索引
SELECT 
    schemaname || '.' || relname AS table,
    indexrelname AS index,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
    idx_scan AS index_scans
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexrelid) DESC;

使用 EXPLAIN 分析 #

sql

-- 查看查询计划
EXPLAIN SELECT * FROM users WHERE email = 'alice@example.com';

-- 查看详细执行信息
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'alice@example.com';

-- 查看更详细的信息
EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM users WHERE email = 'alice@example.com';

-- 常见扫描类型：
-- Seq Scan        - 顺序扫描（全表扫描）
-- Index Scan      - 索引扫描
-- Index Only Scan - 仅索引扫描
-- Bitmap Scan     - 位图扫描

索引维护 #

sql

-- 更新统计信息
ANALYZE users;

-- 查看索引膨胀
SELECT 
    current_database(),
    schemaname,
    tablename,
    indexname,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

-- 重建索引（解决膨胀）
REINDEX INDEX CONCURRENTLY idx_users_email;

学习路径 #

text

高级阶段
├── 索引（本文）
├── 视图
├── 存储过程
├── 触发器
└── 事务与锁

下一步 #

掌握了索引后，接下来学习视图，了解如何创建和使用视图！