性能优化 #
一、性能优化概述 #
1.1 优化方向 #
text
性能优化方向:
查询优化
├── SQL语句优化
├── 执行计划分析
├── 索引使用优化
└── 分区裁剪
存储优化
├── 数据压缩
├── 索引策略
├── 表空间管理
└── 数据生命周期
配置优化
├── 内存配置
├── 并行配置
├── WAL配置
└── 连接配置
架构优化
├── 超表设计
├── 分区策略
├── 连续聚合
└── 多节点部署
1.2 性能瓶颈识别 #
sql
-- 查看慢查询
SELECT
query,
calls,
total_time,
mean_time,
max_time
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 10;
-- 查看表统计
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch
FROM pg_stat_user_tables
ORDER BY seq_tup_read DESC;
-- 查看索引使用
SELECT
indexrelname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan;
二、查询优化 #
2.1 执行计划分析 #
sql
-- 查看执行计划
EXPLAIN
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查看详细执行计划
EXPLAIN ANALYZE
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查看完整执行计划
EXPLAIN (ANALYZE, BUFFERS, COSTS, TIMING)
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查看JSON格式
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
2.2 时间条件优化 #
sql
-- 推荐:使用时间范围
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 推荐:使用BETWEEN
SELECT * FROM sensor_data
WHERE time BETWEEN '2024-01-01' AND '2024-01-31';
-- 不推荐:使用函数阻止分区裁剪
SELECT * FROM sensor_data
WHERE EXTRACT(MONTH FROM time) = 1;
-- 推荐:使用时间范围替代
SELECT * FROM sensor_data
WHERE time >= '2024-01-01' AND time < '2024-02-01';
2.3 聚合查询优化 #
sql
-- 使用time_bucket优化聚合
SELECT
time_bucket('1 hour', time) AS hour,
sensor_id,
AVG(temperature) AS avg_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY hour, sensor_id;
-- 使用连续聚合预计算
CREATE MATERIALIZED VIEW hourly_stats
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS hour,
sensor_id,
AVG(temperature) AS avg_temp
FROM sensor_data
GROUP BY hour, sensor_id;
-- 查询预计算结果
SELECT * FROM hourly_stats
WHERE hour > NOW() - INTERVAL '24 hours';
2.4 JOIN优化 #
sql
-- 确保JOIN条件使用索引
SELECT
s.name,
sd.time,
sd.temperature
FROM sensor_data sd
JOIN sensors s ON sd.sensor_id = s.sensor_id
WHERE sd.time > NOW() - INTERVAL '24 hours';
-- 使用EXISTS替代IN
SELECT * FROM sensors s
WHERE EXISTS (
SELECT 1 FROM sensor_data sd
WHERE sd.sensor_id = s.sensor_id
AND sd.time > NOW() - INTERVAL '24 hours'
);
三、索引优化 #
3.1 索引类型选择 #
sql
-- B-tree索引(默认)
CREATE INDEX idx_time ON sensor_data (time DESC);
-- 复合索引
CREATE INDEX idx_sensor_time ON sensor_data (sensor_id, time DESC);
-- 部分索引
CREATE INDEX idx_recent ON sensor_data (time DESC)
WHERE time > NOW() - INTERVAL '30 days';
-- 表达式索引
CREATE INDEX idx_date ON sensor_data ((time::date));
-- GIN索引(JSON/数组)
CREATE INDEX idx_data_gin ON events USING GIN (data);
3.2 索引策略 #
text
索引策略建议:
时间列
├── 自动创建
├── 默认B-tree
└── 降序排列
设备ID列
├── 创建复合索引
├── (device_id, time DESC)
└── 覆盖常用查询
查询模式
├── 分析查询模式
├── 创建匹配索引
└── 避免过多索引
索引数量
├── 不超过5个索引
├── 监控索引使用
└── 删除无用索引
3.3 索引维护 #
sql
-- 查看索引使用情况
SELECT
indexrelname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) as size
FROM pg_stat_user_indexes
WHERE relname = 'sensor_data'
ORDER BY idx_scan;
-- 查找未使用的索引
SELECT
indexrelname,
pg_size_pretty(pg_relation_size(indexrelid)) as size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND relname = 'sensor_data';
-- 重建索引
REINDEX INDEX idx_sensor_time;
-- 并发重建索引(不锁表)
REINDEX INDEX CONCURRENTLY idx_sensor_time;
-- 更新统计信息
ANALYZE sensor_data;
四、配置优化 #
4.1 内存配置 #
ini
# postgresql.conf
# 共享缓冲区
shared_buffers = 4GB # 建议:系统内存的25%
# 工作内存
work_mem = 64MB # 每个操作的工作内存
# 维护工作内存
maintenance_work_mem = 1GB # 维护操作内存
# 有效缓存大小
effective_cache_size = 12GB # 系统可用缓存
# TimescaleDB特定配置
timescaledb.max_background_workers = 8
4.2 并行配置 #
ini
# 并行查询
max_worker_processes = 8
max_parallel_workers_per_gather = 4
max_parallel_workers = 8
max_parallel_maintenance_workers = 4
# 并行阈值
min_parallel_table_scan_size = 8MB
min_parallel_index_scan_size = 512kB
4.3 WAL配置 #
ini
# WAL配置
wal_buffers = 64MB
checkpoint_completion_target = 0.9
max_wal_size = 2GB
min_wal_size = 1GB
# WAL压缩
wal_compression = on
4.4 连接配置 #
ini
# 连接配置
max_connections = 200
listen_addresses = '*'
port = 5432
# 连接保持
tcp_keepalives_idle = 60
tcp_keepalives_interval = 10
tcp_keepalives_count = 6
4.5 使用timescaledb-tune #
bash
# 自动配置优化
timescaledb-tune
# 交互式配置
timescaledb-tune --interactive
# 静默模式
timescaledb-tune --quiet --yes
# 指定配置文件
timescaledb-tune --conf-path=/etc/postgresql/15/main/postgresql.conf
五、超表设计优化 #
5.1 分区间隔优化 #
sql
-- 查看当前分区间隔
SELECT
hypertable_name,
column_name,
time_interval
FROM timescaledb_information.dimensions;
-- 根据数据量调整
-- 高频数据:1小时-1天
SELECT set_chunk_time_interval('high_freq_data', INTERVAL '1 hour');
-- 中频数据:1天-7天
SELECT set_chunk_time_interval('medium_freq_data', INTERVAL '1 day');
-- 低频数据:7天-30天
SELECT set_chunk_time_interval('low_freq_data', INTERVAL '7 days');
5.2 空间分区优化 #
sql
-- 创建空间分区
SELECT create_hypertable(
'sensor_data',
'time',
partitioning_column => 'sensor_id',
number_partitions => 4
);
-- 分区数量建议
-- 2-4倍CPU核心数
-- 不超过设备数量
5.3 压缩优化 #
sql
-- 启用压缩
ALTER TABLE sensor_data SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'sensor_id',
timescaledb.compress_orderby = 'time DESC'
);
-- 添加压缩策略
SELECT add_compression_policy(
'sensor_data',
INTERVAL '7 days'
);
-- 查看压缩效果
SELECT
chunk_name,
before_compression_total_bytes,
after_compression_total_bytes,
round(100.0 * (before_compression_total_bytes - after_compression_total_bytes)
/ before_compression_total_bytes, 2) as compression_ratio
FROM chunk_compression_stats('sensor_data');
六、写入性能优化 #
6.1 批量写入 #
sql
-- 推荐:批量插入
INSERT INTO sensor_data (time, sensor_id, temperature) VALUES
(NOW(), 1, 25.5),
(NOW(), 2, 26.0),
(NOW(), 3, 24.8);
-- 不推荐:单行插入
INSERT INTO sensor_data VALUES (NOW(), 1, 25.5);
INSERT INTO sensor_data VALUES (NOW(), 2, 26.0);
INSERT INTO sensor_data VALUES (NOW(), 3, 24.8);
-- 使用COPY
COPY sensor_data FROM '/path/to/data.csv' WITH (FORMAT csv);
6.2 索引优化 #
sql
-- 大批量导入前临时删除索引
DROP INDEX idx_sensor_id;
-- 导入数据
COPY sensor_data FROM '/path/to/data.csv' WITH (FORMAT csv);
-- 重新创建索引
CREATE INDEX idx_sensor_id ON sensor_data (sensor_id);
-- 更新统计信息
ANALYZE sensor_data;
6.3 禁用触发器 #
sql
-- 大批量导入时临时禁用触发器
ALTER TABLE sensor_data DISABLE TRIGGER ALL;
-- 导入数据
COPY sensor_data FROM '/path/to/data.csv' WITH (FORMAT csv);
-- 重新启用触发器
ALTER TABLE sensor_data ENABLE TRIGGER ALL;
七、监控与诊断 #
7.1 性能监控 #
sql
-- 查看活跃查询
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query,
state
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes';
-- 查看锁等待
SELECT
blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS blocking_statement
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.relation = blocked_locks.relation
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid;
-- 查看缓存命中率
SELECT
sum(heap_blks_read) as heap_read,
sum(heap_blks_hit) as heap_hit,
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM pg_statio_user_tables;
7.2 慢查询日志 #
ini
# postgresql.conf
# 启用慢查询日志
log_min_duration_statement = 1000 # 记录超过1秒的查询
# 日志配置
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
7.3 性能基准测试 #
sql
-- 测试查询性能
EXPLAIN ANALYZE
SELECT
time_bucket('1 hour', time) AS hour,
AVG(temperature)
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY hour;
-- 测试写入性能
EXPLAIN ANALYZE
INSERT INTO sensor_data (time, sensor_id, temperature)
SELECT
NOW() - (random() * INTERVAL '30 days'),
(random() * 100)::INTEGER + 1,
20 + random() * 10
FROM generate_series(1, 10000);
八、常见问题解决 #
8.1 查询慢 #
sql
-- 问题:查询慢
-- 解决步骤
-- 1. 查看执行计划
EXPLAIN ANALYZE SELECT ...;
-- 2. 检查索引使用
SELECT * FROM pg_stat_user_indexes WHERE relname = 'sensor_data';
-- 3. 创建合适的索引
CREATE INDEX idx_sensor_time ON sensor_data (sensor_id, time DESC);
-- 4. 更新统计信息
ANALYZE sensor_data;
-- 5. 使用连续聚合
CREATE MATERIALIZED VIEW ... WITH (timescaledb.continuous) AS ...;
8.2 写入慢 #
sql
-- 问题:写入慢
-- 解决步骤
-- 1. 检查索引数量
SELECT count(*) FROM pg_indexes WHERE tablename = 'sensor_data';
-- 2. 使用批量插入
INSERT INTO ... VALUES (...), (...), ...;
-- 3. 使用COPY
COPY sensor_data FROM ...;
-- 4. 调整配置
SET work_mem = '256MB';
8.3 磁盘空间不足 #
sql
-- 问题:磁盘空间不足
-- 解决步骤
-- 1. 查看表大小
SELECT pg_size_pretty(pg_total_relation_size('sensor_data'));
-- 2. 启用压缩
ALTER TABLE sensor_data SET (timescaledb.compress);
SELECT add_compression_policy('sensor_data', INTERVAL '7 days');
-- 3. 添加保留策略
SELECT add_retention_policy('sensor_data', INTERVAL '90 days');
-- 4. 清理旧数据
SELECT drop_chunks('sensor_data', older_than => INTERVAL '90 days');
-- 5. 执行VACUUM
VACUUM sensor_data;
九、总结 #
性能优化要点:
| 优化方向 | 关键点 | 效果 |
|---|---|---|
| 查询优化 | 时间条件、索引使用 | 高 |
| 索引优化 | 合适索引、定期维护 | 高 |
| 配置优化 | 内存、并行、WAL | 中 |
| 架构优化 | 超表设计、连续聚合 | 高 |
最佳实践:
- 查询优化:使用时间条件,触发分区裁剪
- 索引策略:创建合适的复合索引
- 配置调优:根据硬件资源调整配置
- 监控维护:定期监控性能指标
下一步,让我们学习监控运维!
最后更新:2026-03-27