PostgreSQL 性能优化 #
性能优化概述 #
性能优化是一个系统工程,需要从多个层面入手。
text
┌─────────────────────────────────────────────────────────────┐
│ 优化层面 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 应用层 │
│ ├── SQL 语句优化 │
│ ├── 连接池管理 │
│ └── 缓存策略 │
│ │
│ 数据库层 │
│ ├── 索引设计 │
│ ├── 查询计划分析 │
│ ├── 表结构优化 │
│ └── 统计信息维护 │
│ │
│ 系统层 │
│ ├── 内存配置 │
│ ├── 磁盘 I/O │
│ └── CPU 利用 │
│ │
└─────────────────────────────────────────────────────────────┘
EXPLAIN 分析 #
基本用法 #
sql
-- 查看执行计划
EXPLAIN SELECT * FROM employees WHERE department_id = 1;
-- 查看实际执行结果
EXPLAIN ANALYZE SELECT * FROM employees WHERE department_id = 1;
-- 查看更详细信息
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT * FROM employees WHERE department_id = 1;
-- JSON 格式输出
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT * FROM employees WHERE department_id = 1;
执行计划解读 #
sql
EXPLAIN ANALYZE SELECT * FROM employees WHERE salary > 80000;
-- 输出示例:
-- Index Scan using idx_employees_salary on employees (cost=0.29..8.31 rows=1 width=72) (actual time=0.025..0.026 rows=2 loops=1)
-- Index Cond: (salary > '80000'::numeric)
-- Planning Time: 0.100 ms
-- Execution Time: 0.050 ms
-- 关键指标:
-- cost=启动成本..总成本
-- rows=预计返回行数
-- width=每行平均字节数
-- actual time=实际执行时间
-- rows=实际返回行数
-- loops=执行次数
扫描类型 #
text
┌─────────────────────┬─────────────────────────────────────────┐
│ 扫描类型 │ 说明 │
├─────────────────────┼─────────────────────────────────────────┤
│ Seq Scan │ 顺序扫描(全表扫描) │
│ Index Scan │ 索引扫描 │
│ Index Only Scan │ 仅索引扫描 │
│ Bitmap Index Scan │ 位图索引扫描 │
│ Bitmap Heap Scan │ 位图堆扫描 │
│ Tid Scan │ TID 扫描 │
│ Foreign Scan │ 外部表扫描 │
│ Function Scan │ 函数扫描 │
└─────────────────────┴─────────────────────────────────────────┘
连接类型 #
text
┌─────────────────────┬─────────────────────────────────────────┐
│ 连接类型 │ 说明 │
├─────────────────────┼─────────────────────────────────────────┤
│ Nested Loop │ 嵌套循环连接(适合小表驱动大表) │
│ Hash Join │ 哈希连接(适合等值连接大表) │
│ Merge Join │ 合并连接(适合有序数据) │
└─────────────────────┴─────────────────────────────────────────┘
查询优化 #
避免 SELECT * #
sql
-- 不推荐
SELECT * FROM employees;
-- 推荐:只查询需要的列
SELECT id, name, salary FROM employees;
使用索引 #
sql
-- 确保查询条件使用索引
-- 创建索引
CREATE INDEX idx_employees_department_id ON employees(department_id);
-- 使用索引的查询
SELECT * FROM employees WHERE department_id = 1;
-- 不使用索引的情况
-- 函数操作
SELECT * FROM employees WHERE UPPER(name) = 'ALICE'; -- 不使用索引
-- 解决方案:创建函数索引
CREATE INDEX idx_employees_upper_name ON employees(UPPER(name));
-- 或使用相同函数查询
SELECT * FROM employees WHERE name = 'Alice';
避免隐式类型转换 #
sql
-- 不推荐:字符串与数字比较
SELECT * FROM employees WHERE id = '1'; -- 隐式转换
-- 推荐:使用正确类型
SELECT * FROM employees WHERE id = 1;
优化 LIKE 查询 #
sql
-- 不使用索引(前缀通配符)
SELECT * FROM employees WHERE name LIKE '%lice';
-- 使用索引(后缀通配符)
SELECT * FROM employees WHERE name LIKE 'Ali%';
-- 使用 pg_trgm 扩展支持前缀通配符
CREATE EXTENSION pg_trgm;
CREATE INDEX idx_employees_name_trgm ON employees USING gin(name gin_trgm_ops);
SELECT * FROM employees WHERE name LIKE '%lice';
使用 LIMIT #
sql
-- 分页查询
SELECT * FROM employees ORDER BY id LIMIT 10 OFFSET 0;
-- 优化大 OFFSET
-- 不推荐
SELECT * FROM employees ORDER BY id LIMIT 10 OFFSET 100000;
-- 推荐:使用 WHERE 条件
SELECT * FROM employees WHERE id > 100000 ORDER BY id LIMIT 10;
使用 EXISTS 替代 IN #
sql
-- 不推荐:IN 子查询
SELECT * FROM employees
WHERE department_id IN (SELECT id FROM departments WHERE name = 'Engineering');
-- 推荐:EXISTS
SELECT * FROM employees e
WHERE EXISTS (
SELECT 1 FROM departments d
WHERE d.id = e.department_id AND d.name = 'Engineering'
);
-- 或使用 JOIN
SELECT e.* FROM employees e
JOIN departments d ON e.department_id = d.id
WHERE d.name = 'Engineering';
索引优化 #
选择合适的索引类型 #
sql
-- B-tree:默认,适合等值和范围查询
CREATE INDEX idx_employees_salary ON employees(salary);
-- Hash:仅适合等值查询
CREATE INDEX idx_employees_name_hash ON employees USING hash(name);
-- GIN:适合数组和 JSONB
CREATE INDEX idx_employees_tags ON employees USING gin(tags);
-- GiST:适合几何和范围类型
CREATE INDEX idx_locations_point ON locations USING gist(point);
-- BRIN:适合大表有序数据
CREATE INDEX idx_logs_created_at ON logs USING brin(created_at);
部分索引 #
sql
-- 只索引活跃用户
CREATE INDEX idx_active_users_email ON users(email) WHERE is_active = TRUE;
-- 只索引大额订单
CREATE INDEX idx_large_orders ON orders(created_at) WHERE amount > 10000;
表达式索引 #
sql
-- 对表达式结果创建索引
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- 对日期部分创建索引
CREATE INDEX idx_orders_year ON orders(EXTRACT(YEAR FROM created_at));
覆盖索引 #
sql
-- 包含额外列的索引
CREATE INDEX idx_users_email_include ON users(email) INCLUDE (name, created_at);
-- 查询只需访问索引
SELECT name, created_at FROM users WHERE email = 'alice@example.com';
-- Index Only Scan
查看索引使用情况 #
sql
-- 查看未使用的索引
SELECT
schemaname || '.' || relname AS table,
indexrelname AS index,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
idx_scan AS index_scans
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexrelid) DESC;
-- 查看索引大小
SELECT
indexrelname AS index_name,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE relname = 'employees';
配置优化 #
内存配置 #
ini
# postgresql.conf
# 共享缓冲区(建议设为系统内存的 25%)
shared_buffers = 4GB
# 工作内存(每个排序/哈希操作)
work_mem = 64MB
# 维护工作内存(VACUUM, CREATE INDEX)
maintenance_work_mem = 512MB
# 有效缓存大小(系统可用缓存预估)
effective_cache_size = 12GB
# 巨大页(大内存系统)
huge_pages = try
WAL 配置 #
ini
# WAL 缓冲区
wal_buffers = 64MB
# 检查点设置
checkpoint_completion_target = 0.9
max_wal_size = 2GB
min_wal_size = 1GB
# WAL 压缩
wal_compression = on
查询规划器 #
ini
# 随机页成本(SSD 可降低)
random_page_cost = 1.1
# 有效 I/O 并发
effective_io_concurrency = 200
# 并行查询
max_parallel_workers_per_gather = 4
max_parallel_workers = 8
连接配置 #
ini
# 最大连接数
max_connections = 200
# 连接池(使用 PgBouncer 更好)
# 每个连接的内存
统计信息 #
更新统计信息 #
sql
-- 分析表
ANALYZE employees;
-- 分析特定列
ANALYZE employees(name, salary);
-- 分析整个数据库
ANALYZE;
-- 更详细的统计信息
ALTER TABLE employees ALTER COLUMN name SET STATISTICS 500;
ANALYZE employees(name);
查看统计信息 #
sql
-- 查看表统计信息
SELECT
relname,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables;
-- 查看列统计信息
SELECT
attname,
n_distinct,
null_frac,
avg_width
FROM pg_stats
WHERE tablename = 'employees';
VACUUM 优化 #
手动 VACUUM #
sql
-- 清理死元组
VACUUM employees;
-- 完整清理(会锁表)
VACUUM FULL employees;
-- 清理并分析
VACUUM ANALYZE employees;
-- 查看表膨胀
SELECT
schemaname,
tablename,
n_live_tup,
n_dead_tup,
n_dead_tup::float / NULLIF(n_live_tup, 0) AS dead_ratio
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC;
自动 VACUUM 配置 #
ini
# postgresql.conf
# 启用自动 VACUUM
autovacuum = on
# 自动 VACUUM 工作进程数
autovacuum_max_workers = 3
-- 触发阈值
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.1
-- 分析阈值
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.05
-- 成本限制
autovacuum_vacuum_cost_limit = 200
autovacuum_vacuum_cost_delay = 2ms
监控工具 #
内置视图 #
sql
-- 活动查询
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query,
state
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes';
-- 锁等待
SELECT
blocked_locks.pid AS blocked_pid,
blocking_locks.pid AS blocking_pid,
blocked_activity.query AS blocked_query
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.relation = blocked_locks.relation
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;
-- 数据库大小
SELECT
datname,
pg_size_pretty(pg_database_size(datname)) AS size
FROM pg_database;
-- 表大小
SELECT
relname,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS table_size,
pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC
LIMIT 10;
pg_stat_statements #
sql
-- 启用扩展
CREATE EXTENSION pg_stat_statements;
-- 查看最慢的查询
SELECT
calls,
round(total_exec_time::numeric, 2) AS total_time_ms,
round(mean_exec_time::numeric, 2) AS avg_time_ms,
round((100 * total_exec_time / sum(total_exec_time) over())::numeric, 2) AS percent,
query
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;
-- 重置统计
SELECT pg_stat_statements_reset();
学习路径 #
text
运维阶段
├── 用户权限管理
├── 备份与恢复
└── 性能优化(本文)
扩展阶段
├── JSON操作
├── 全文搜索
└── 常用扩展
下一步 #
掌握了性能优化后,接下来学习 JSON操作,了解 PostgreSQL 的 JSON 功能!
最后更新:2026-03-29