WHERE子句 #
一、WHERE概述 #
1.1 WHERE特点 #
Cassandra的WHERE子句有以下特点:
text
WHERE特点:
✓ 必须包含分区键
✓ 支持聚簇列范围查询
✓ 支持IN操作符
✓ 支持集合查询
⚠ 不支持OR
⚠ 非主键列需要ALLOW FILTERING
1.2 基本语法 #
sql
SELECT ... FROM table_name
WHERE condition [AND condition ...]
[ALLOW FILTERING];
二、分区键条件 #
2.1 单列分区键 #
sql
-- 精确匹配
SELECT * FROM users
WHERE user_id = 550e8400-e29b-41d4-a716-446655440000;
-- IN查询
SELECT * FROM users
WHERE user_id IN (
550e8400-e29b-41d4-a716-446655440000,
660e8400-e29b-41d4-a716-446655440001,
770e8400-e29b-41d4-a716-446655440002
);
2.2 复合分区键 #
sql
-- 完整分区键
SELECT * FROM sensor_data
WHERE sensor_id = 'sensor-001'
AND date = '2024-01-15'
AND hour = 10;
-- 部分分区键(不支持)
-- SELECT * FROM sensor_data WHERE sensor_id = 'sensor-001';
-- 错误:必须提供完整的分区键
-- IN查询
SELECT * FROM sensor_data
WHERE sensor_id = 'sensor-001'
AND date IN ('2024-01-15', '2024-01-16')
AND hour = 10;
2.3 Token查询 #
sql
-- 使用Token函数
SELECT * FROM users
WHERE token(user_id) = token(550e8400-e29b-41d4-a716-446655440000);
-- Token范围查询
SELECT * FROM users
WHERE token(user_id) > token(min_uuid)
AND token(user_id) < token(max_uuid);
-- 分页查询
SELECT * FROM users
WHERE token(user_id) > token(last_uuid)
LIMIT 100;
三、聚簇列条件 #
3.1 精确匹配 #
sql
-- 单个聚簇列
SELECT * FROM orders
WHERE user_id = ?
AND order_id = ?;
-- 多个聚簇列
SELECT * FROM events
WHERE device_id = ?
AND event_date = ?
AND event_time = ?;
3.2 范围查询 #
sql
-- 大于
SELECT * FROM orders
WHERE user_id = ?
AND order_date > '2024-01-01';
-- 小于
SELECT * FROM orders
WHERE user_id = ?
AND order_date < '2024-02-01';
-- 范围
SELECT * FROM orders
WHERE user_id = ?
AND order_date >= '2024-01-01'
AND order_date <= '2024-01-31';
3.3 聚簇列规则 #
text
聚簇列查询规则:
顺序使用
├── 必须按定义顺序使用
└── 不能跳过前面的聚簇列
范围限制
├── 范围查询后不能有其他条件
└── 只能有一个范围条件
示例(PRIMARY KEY (pk, ck1, ck2)):
✓ WHERE pk = ? AND ck1 = ? AND ck2 = ?
✓ WHERE pk = ? AND ck1 = ? AND ck2 > ?
✓ WHERE pk = ? AND ck1 > ?
✗ WHERE pk = ? AND ck2 = ?(跳过ck1)
✗ WHERE pk = ? AND ck1 > ? AND ck2 = ?(范围后有条件)
四、操作符 #
4.1 比较操作符 #
sql
-- 等于
SELECT * FROM users WHERE user_id = ?;
-- 大于
SELECT * FROM orders
WHERE user_id = ? AND order_date > ?;
-- 大于等于
SELECT * FROM orders
WHERE user_id = ? AND order_date >= ?;
-- 小于
SELECT * FROM orders
WHERE user_id = ? AND order_date < ?;
-- 小于等于
SELECT * FROM orders
WHERE user_id = ? AND order_date <= ?;
4.2 IN操作符 #
sql
-- 分区键IN查询
SELECT * FROM users
WHERE user_id IN (?, ?, ?);
-- 聚簇列IN查询
SELECT * FROM orders
WHERE user_id = ?
AND order_date IN ('2024-01-01', '2024-01-02', '2024-01-03');
-- 多列IN查询
SELECT * FROM events
WHERE device_id IN ('dev1', 'dev2')
AND event_date IN ('2024-01-01', '2024-01-02');
4.3 集合操作符 #
sql
-- CONTAINS(List/Set)
SELECT * FROM users
WHERE tags CONTAINS 'vip';
-- CONTAINS KEY(Map)
SELECT * FROM users
WHERE preferences CONTAINS KEY 'theme';
-- CONTAINS(Map值)
SELECT * FROM users
WHERE preferences CONTAINS 'dark';
-- CONTAINS ENTRY(Map键值对)
SELECT * FROM users
WHERE preferences CONTAINS ENTRY('theme', 'dark');
4.4 LIKE操作符(SASI索引) #
sql
-- LIKE需要SASI索引支持
SELECT * FROM users
WHERE name LIKE '张%';
SELECT * FROM users
WHERE name LIKE '%三';
SELECT * FROM users
WHERE name LIKE '%张%';
五、ALLOW FILTERING #
5.1 使用场景 #
sql
-- 非主键列过滤
SELECT * FROM users
WHERE email = 'zhang@example.com'
ALLOW FILTERING;
-- 非主键列范围
SELECT * FROM products
WHERE price > 100 AND price < 500
ALLOW FILTERING;
-- 组合条件
SELECT * FROM users
WHERE name = '张三' AND age > 25
ALLOW FILTERING;
5.2 性能影响 #
text
ALLOW FILTERING性能影响:
执行过程
├── 扫描所有分区
├── 检查每行是否满足条件
└── 返回匹配的行
影响
├── 全表扫描
├── 性能极差
├── 可能超时
└── 资源消耗大
建议
├── 数据量小时可用
├── 开发测试时可用
├── 生产环境避免使用
└── 考虑创建索引
5.3 替代方案 #
sql
-- 不推荐:ALLOW FILTERING
SELECT * FROM users WHERE email = ?
ALLOW FILTERING;
-- 推荐:创建二级索引
CREATE INDEX users_email_idx ON users (email);
SELECT * FROM users WHERE email = ?;
-- 推荐:创建物化视图
CREATE MATERIALIZED VIEW users_by_email AS
SELECT * FROM users
WHERE email IS NOT NULL
PRIMARY KEY (email, user_id);
SELECT * FROM users_by_email WHERE email = ?;
六、条件组合 #
6.1 AND组合 #
sql
-- 多条件AND
SELECT * FROM orders
WHERE user_id = ?
AND order_date >= '2024-01-01'
AND order_date <= '2024-01-31'
AND status = 'completed';
6.2 不支持OR #
sql
-- 不支持OR
-- SELECT * FROM users WHERE name = '张三' OR name = '李四';
-- 替代方案:使用IN
SELECT * FROM users
WHERE user_id IN (
(SELECT user_id FROM users_by_name WHERE name = '张三'),
(SELECT user_id FROM users_by_name WHERE name = '李四')
);
-- 或者分开查询
SELECT * FROM users WHERE user_id = uuid1;
SELECT * FROM users WHERE user_id = uuid2;
七、NULL处理 #
7.1 NULL查询 #
sql
-- 查询NULL值(需要ALLOW FILTERING)
SELECT * FROM users
WHERE phone IS NULL
ALLOW FILTERING;
-- 查询非NULL值
SELECT * FROM users
WHERE phone IS NOT NULL
ALLOW FILTERING;
八、查询优化 #
8.1 使用主键 #
sql
-- 最优:使用完整主键
SELECT * FROM orders
WHERE user_id = ?
AND order_id = ?;
-- 次优:分区键+聚簇列范围
SELECT * FROM orders
WHERE user_id = ?
AND order_date > ?;
-- 避免:非主键列
SELECT * FROM orders
WHERE amount > 100
ALLOW FILTERING;
8.2 查询追踪 #
sql
-- 启用追踪分析查询
TRACING ON;
SELECT * FROM users WHERE user_id = ?;
-- 查看执行计划和耗时
九、总结 #
WHERE子句要点:
- 分区键必须:必须包含完整分区键
- 聚簇列顺序:按定义顺序使用
- 范围限制:范围查询后不能有其他条件
- IN操作符:支持分区键和聚簇列
- 集合查询:CONTAINS操作符
- 避免过滤:慎用ALLOW FILTERING
下一步,让我们学习聚合函数!
最后更新:2026-03-27