数据查询 #
一、查询概述 #
1.1 TimescaleDB查询特点 #
text
TimescaleDB查询优势:
分区裁剪
├── 自动过滤无关分片
├── 大幅提升查询性能
└── 透明化优化
时序函数
├── time_bucket - 时间分桶
├── 超函数 - 时序分析
└── 窗口函数 - 时序计算
并行查询
├── 多分片并行
├── 多进程处理
└── 自动优化
1.2 查询优化原则 #
text
查询优化原则:
时间条件
├── 始终包含时间范围
├── 使用时间列过滤
└── 触发分区裁剪
索引使用
├── 使用合适的索引
├── 避免全表扫描
└── 监控查询计划
聚合优化
├── 使用连续聚合
├── 预聚合数据
└── 减少实时计算
二、基本查询 #
2.1 简单查询 #
sql
-- 查询所有数据
SELECT * FROM sensor_data;
-- 查询特定列
SELECT time, sensor_id, temperature
FROM sensor_data;
-- 条件查询
SELECT * FROM sensor_data
WHERE sensor_id = 1;
-- 排序
SELECT * FROM sensor_data
ORDER BY time DESC;
-- 限制结果
SELECT * FROM sensor_data
ORDER BY time DESC
LIMIT 100;
2.2 时间范围查询 #
sql
-- 查询最近一小时
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '1 hour';
-- 查询最近24小时
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours';
-- 查询最近7天
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查询特定时间范围
SELECT * FROM sensor_data
WHERE time BETWEEN '2024-01-01' AND '2024-01-31';
-- 查询今天的数据
SELECT * FROM sensor_data
WHERE time >= CURRENT_DATE;
-- 查询昨天的数据
SELECT * FROM sensor_data
WHERE time >= CURRENT_DATE - INTERVAL '1 day'
AND time < CURRENT_DATE;
-- 查询本月数据
SELECT * FROM sensor_data
WHERE time >= DATE_TRUNC('month', CURRENT_DATE);
-- 查询上月数据
SELECT * FROM sensor_data
WHERE time >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
AND time < DATE_TRUNC('month', CURRENT_DATE);
2.3 多条件查询 #
sql
-- AND条件
SELECT * FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '24 hours';
-- OR条件
SELECT * FROM sensor_data
WHERE sensor_id = 1 OR sensor_id = 2;
-- IN条件
SELECT * FROM sensor_data
WHERE sensor_id IN (1, 2, 3, 4, 5);
-- NOT条件
SELECT * FROM sensor_data
WHERE sensor_id NOT IN (1, 2, 3);
-- 范围条件
SELECT * FROM sensor_data
WHERE temperature BETWEEN 20 AND 30;
-- NULL条件
SELECT * FROM sensor_data
WHERE humidity IS NULL;
SELECT * FROM sensor_data
WHERE humidity IS NOT NULL;
三、聚合查询 #
3.1 基本聚合 #
sql
-- 计数
SELECT COUNT(*) FROM sensor_data;
-- 分组计数
SELECT sensor_id, COUNT(*) as reading_count
FROM sensor_data
GROUP BY sensor_id
ORDER BY reading_count DESC;
-- 平均值
SELECT sensor_id, AVG(temperature) as avg_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY sensor_id;
-- 最大值和最小值
SELECT
sensor_id,
MAX(temperature) as max_temp,
MIN(temperature) as min_temp,
MAX(temperature) - MIN(temperature) as temp_range
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY sensor_id;
-- 求和
SELECT
sensor_id,
SUM(temperature) as total_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY sensor_id;
3.2 时间桶聚合 #
sql
-- 按小时聚合
SELECT
time_bucket('1 hour', time) AS hour,
sensor_id,
AVG(temperature) as avg_temp,
MAX(temperature) as max_temp,
MIN(temperature) as min_temp,
COUNT(*) as reading_count
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY hour, sensor_id
ORDER BY hour DESC, sensor_id;
-- 按天聚合
SELECT
time_bucket('1 day', time) AS day,
AVG(temperature) as avg_temp,
MAX(temperature) as max_temp,
MIN(temperature) as min_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '30 days'
GROUP BY day
ORDER BY day DESC;
-- 按周聚合
SELECT
time_bucket('7 days', time) AS week,
AVG(temperature) as avg_temp
FROM sensor_data
GROUP BY week
ORDER BY week DESC;
-- 自定义时间桶
SELECT
time_bucket('15 minutes', time) AS bucket,
AVG(temperature) as avg_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '6 hours'
GROUP BY bucket
ORDER BY bucket;
3.3 time_bucket函数 #
sql
-- time_bucket语法
-- time_bucket(bucket_width, timestamp, [offset], [origin])
-- 基本用法
SELECT time_bucket('1 hour', NOW());
-- 带偏移量(从整点开始)
SELECT time_bucket('1 hour', time, '30 minutes'::interval) AS bucket
FROM sensor_data
GROUP BY bucket;
-- 带起始时间
SELECT time_bucket('1 day', time, NULL, '2024-01-01'::timestamptz) AS day
FROM sensor_data
GROUP BY day;
-- 时间桶对齐
SELECT
time_bucket('1 hour', time) AS hour_start,
time_bucket('1 hour', time) + INTERVAL '1 hour' AS hour_end,
AVG(temperature) as avg_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY hour_start
ORDER BY hour_start;
3.4 HAVING子句 #
sql
-- 过滤聚合结果
SELECT
sensor_id,
AVG(temperature) as avg_temp,
COUNT(*) as reading_count
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY sensor_id
HAVING AVG(temperature) > 25
ORDER BY avg_temp DESC;
-- 多条件HAVING
SELECT
sensor_id,
AVG(temperature) as avg_temp,
COUNT(*) as reading_count
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY sensor_id
HAVING AVG(temperature) > 25 AND COUNT(*) > 100
ORDER BY avg_temp DESC;
四、窗口函数 #
4.1 排名函数 #
sql
-- ROW_NUMBER
SELECT
time,
sensor_id,
temperature,
ROW_NUMBER() OVER (PARTITION BY sensor_id ORDER BY time DESC) as row_num
FROM sensor_data
WHERE time > NOW() - INTERVAL '1 hour';
-- RANK
SELECT
time,
sensor_id,
temperature,
RANK() OVER (ORDER BY temperature DESC) as temp_rank
FROM sensor_data
WHERE time > NOW() - INTERVAL '1 hour';
-- DENSE_RANK
SELECT
time,
sensor_id,
temperature,
DENSE_RANK() OVER (PARTITION BY sensor_id ORDER BY temperature DESC) as temp_rank
FROM sensor_data
WHERE time > NOW() - INTERVAL '1 hour';
-- 每个传感器的最高温度
SELECT DISTINCT ON (sensor_id)
time,
sensor_id,
temperature
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
ORDER BY sensor_id, temperature DESC;
4.2 聚合窗口函数 #
sql
-- 移动平均
SELECT
time,
sensor_id,
temperature,
AVG(temperature) OVER (
PARTITION BY sensor_id
ORDER BY time
ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
) as moving_avg
FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time;
-- 累计和
SELECT
time,
sensor_id,
temperature,
SUM(temperature) OVER (
PARTITION BY sensor_id
ORDER BY time
) as cumulative_sum
FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time;
-- 累计平均
SELECT
time,
sensor_id,
temperature,
AVG(temperature) OVER (
PARTITION BY sensor_id
ORDER BY time
) as cumulative_avg
FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time;
4.3 偏移函数 #
sql
-- LAG - 前一行
SELECT
time,
sensor_id,
temperature,
LAG(temperature) OVER (PARTITION BY sensor_id ORDER BY time) as prev_temp,
temperature - LAG(temperature) OVER (PARTITION BY sensor_id ORDER BY time) as temp_diff
FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time;
-- LEAD - 后一行
SELECT
time,
sensor_id,
temperature,
LEAD(temperature) OVER (PARTITION BY sensor_id ORDER BY time) as next_temp
FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time;
-- FIRST_VALUE和LAST_VALUE
SELECT
time,
sensor_id,
temperature,
FIRST_VALUE(temperature) OVER (PARTITION BY sensor_id ORDER BY time) as first_temp,
LAST_VALUE(temperature) OVER (
PARTITION BY sensor_id
ORDER BY time
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as last_temp
FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time;
五、子查询 #
5.1 标量子查询 #
sql
-- 查询高于平均温度的记录
SELECT * FROM sensor_data
WHERE temperature > (
SELECT AVG(temperature) FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
);
-- 查询每个传感器的最新记录
SELECT * FROM sensor_data sd
WHERE time = (
SELECT MAX(time) FROM sensor_data
WHERE sensor_id = sd.sensor_id
);
5.2 IN子查询 #
sql
-- 查询温度最高的传感器
SELECT * FROM sensor_data
WHERE sensor_id IN (
SELECT sensor_id FROM sensor_data
WHERE temperature > 30
GROUP BY sensor_id
HAVING COUNT(*) > 10
);
5.3 EXISTS子查询 #
sql
-- 查询有异常温度的传感器
SELECT DISTINCT sensor_id FROM sensor_data sd
WHERE EXISTS (
SELECT 1 FROM sensor_data
WHERE sensor_id = sd.sensor_id
AND temperature > 35
AND time > NOW() - INTERVAL '24 hours'
);
六、连接查询 #
6.1 内连接 #
sql
-- 创建传感器信息表
CREATE TABLE sensors (
sensor_id INTEGER PRIMARY KEY,
name VARCHAR(100),
location VARCHAR(100),
type VARCHAR(50)
);
-- 内连接查询
SELECT
s.name,
s.location,
sd.time,
sd.temperature
FROM sensor_data sd
INNER JOIN sensors s ON sd.sensor_id = s.sensor_id
WHERE sd.time > NOW() - INTERVAL '24 hours'
ORDER BY sd.time DESC;
-- 多表连接
SELECT
s.name,
sd.time,
sd.temperature,
st.status
FROM sensor_data sd
INNER JOIN sensors s ON sd.sensor_id = s.sensor_id
INNER JOIN sensor_status st ON sd.sensor_id = st.sensor_id
WHERE sd.time > NOW() - INTERVAL '24 hours';
6.2 左连接 #
sql
-- 左连接(包含所有传感器)
SELECT
s.sensor_id,
s.name,
COUNT(sd.time) as reading_count
FROM sensors s
LEFT JOIN sensor_data sd ON s.sensor_id = sd.sensor_id
AND sd.time > NOW() - INTERVAL '24 hours'
GROUP BY s.sensor_id, s.name
ORDER BY reading_count DESC;
6.3 自连接 #
sql
-- 自连接比较相邻记录
SELECT
a.time,
a.sensor_id,
a.temperature as temp1,
b.temperature as temp2,
a.temperature - b.temperature as diff
FROM sensor_data a
INNER JOIN sensor_data b ON a.sensor_id = b.sensor_id
AND b.time = (
SELECT MAX(time) FROM sensor_data
WHERE sensor_id = a.sensor_id AND time < a.time
)
WHERE a.time > NOW() - INTERVAL '1 hour';
七、CTE查询 #
7.1 基本CTE #
sql
-- 使用CTE计算统计数据
WITH hourly_stats AS (
SELECT
time_bucket('1 hour', time) AS hour,
sensor_id,
AVG(temperature) as avg_temp,
MAX(temperature) as max_temp,
MIN(temperature) as min_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY hour, sensor_id
)
SELECT
hour,
sensor_id,
avg_temp,
max_temp - min_temp as temp_range
FROM hourly_stats
WHERE avg_temp > 25
ORDER BY hour DESC, avg_temp DESC;
7.2 多CTE #
sql
-- 多个CTE
WITH
daily_stats AS (
SELECT
time_bucket('1 day', time) AS day,
sensor_id,
AVG(temperature) as avg_temp
FROM sensor_data
WHERE time > NOW() - INTERVAL '30 days'
GROUP BY day, sensor_id
),
sensor_avg AS (
SELECT
sensor_id,
AVG(avg_temp) as overall_avg
FROM daily_stats
GROUP BY sensor_id
)
SELECT
d.day,
d.sensor_id,
d.avg_temp,
s.overall_avg,
d.avg_temp - s.overall_avg as temp_diff
FROM daily_stats d
JOIN sensor_avg s ON d.sensor_id = s.sensor_id
ORDER BY d.day DESC, d.sensor_id;
八、查询优化 #
8.1 查看执行计划 #
sql
-- 查看执行计划
EXPLAIN
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查看详细执行计划
EXPLAIN ANALYZE
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查看执行计划+成本
EXPLAIN (ANALYZE, BUFFERS, COSTS)
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
8.2 索引使用 #
sql
-- 检查索引使用
EXPLAIN ANALYZE
SELECT * FROM sensor_data
WHERE sensor_id = 1
AND time > NOW() - INTERVAL '24 hours';
-- 创建合适的索引
CREATE INDEX idx_sensor_time ON sensor_data (sensor_id, time DESC);
-- 使用部分索引
CREATE INDEX idx_recent ON sensor_data (time DESC)
WHERE time > NOW() - INTERVAL '30 days';
8.3 分区裁剪验证 #
sql
-- 验证分区裁剪
EXPLAIN ANALYZE
SELECT * FROM sensor_data
WHERE time > NOW() - INTERVAL '7 days';
-- 查看扫描的分片
-- Custom Scan (ChunkAppend)
-- -> Index Scan on _hyper_1_10_chunk
-- -> Index Scan on _hyper_1_11_chunk
-- 只扫描相关分片
九、查询监控 #
9.1 慢查询监控 #
sql
-- 查看慢查询
SELECT
query,
calls,
total_time,
mean_time,
max_time
FROM pg_stat_statements
WHERE query LIKE '%sensor_data%'
ORDER BY mean_time DESC
LIMIT 10;
-- 查看当前运行的查询
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query,
state
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes';
9.2 查询统计 #
sql
-- 表查询统计
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch
FROM pg_stat_user_tables
WHERE tablename = 'sensor_data';
-- 索引使用统计
SELECT
indexrelname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
WHERE relname = 'sensor_data';
十、总结 #
查询操作要点:
| 查询类型 | 关键字 | 用途 |
|---|---|---|
| 基本查询 | SELECT/WHERE | 数据过滤 |
| 聚合查询 | GROUP BY/HAVING | 数据统计 |
| 时间桶 | time_bucket | 时间分桶 |
| 窗口函数 | OVER/PARTITION BY | 时序计算 |
| 连接查询 | JOIN | 多表关联 |
最佳实践:
- 时间条件:始终包含时间范围,触发分区裁剪
- 索引使用:创建合适的复合索引
- 聚合优化:使用连续聚合预计算
- 监控调优:监控慢查询,持续优化
下一步,让我们学习数据更新操作!
最后更新:2026-03-27