RocksDB列族 #
一、列族基础 #
1.1 什么是列族 #
列族(Column Family)是RocksDB提供的逻辑数据分区机制,允许在同一个数据库实例中存储多组独立的数据。
text
数据库结构:
┌─────────────────────────────────────┐
│ RocksDB 数据库 │
├─────────────────────────────────────┤
│ 列族: default │
│ ├── MemTable │
│ ├── SST Files │
│ └── 独立配置 │
├─────────────────────────────────────┤
│ 列族: users │
│ ├── MemTable │
│ ├── SST Files │
│ └── 独立配置 │
├─────────────────────────────────────┤
│ 列族: orders │
│ ├── MemTable │
│ ├── SST Files │
│ └── 独立配置 │
└─────────────────────────────────────┘
↑
共享WAL
1.2 列族特点 #
| 特点 | 说明 |
|---|---|
| 数据隔离 | 每个列族数据独立存储 |
| 配置独立 | 每个列族可独立配置压缩、缓存等 |
| 共享WAL | 所有列族共享WAL,保证原子性 |
| 原子写入 | 跨列族写入支持原子性 |
| 独立Flush | 每个列族独立Flush到磁盘 |
1.3 基本使用 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <iostream>
#include <vector>
int main() {
rocksdb::DB* db;
rocksdb::Options options;
options.create_if_missing = true;
// 打开数据库,获取所有列族
std::vector<std::string> column_families;
rocksdb::Status status = rocksdb::DB::ListColumnFamilies(
options, "/tmp/testdb", &column_families);
// 创建列族句柄
std::vector<rocksdb::ColumnFamilyHandle*> handles;
// 打开数据库
status = rocksdb::DB::Open(options, "/tmp/testdb", &db);
if (!status.ok()) {
std::cerr << "Open failed: " << status.ToString() << std::endl;
return 1;
}
// 创建新列族
rocksdb::ColumnFamilyHandle* users_cf;
status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "users", &users_cf);
if (status.ok()) {
std::cout << "Column family 'users' created!" << std::endl;
}
// 写入数据到指定列族
db->Put(rocksdb::WriteOptions(), users_cf, "user:1", "Alice");
db->Put(rocksdb::WriteOptions(), users_cf, "user:2", "Bob");
// 写入数据到默认列族
db->Put(rocksdb::WriteOptions(), "default_key", "default_value");
// 从指定列族读取
std::string value;
status = db->Get(rocksdb::ReadOptions(), users_cf, "user:1", &value);
if (status.ok()) {
std::cout << "user:1 = " << value << std::endl;
}
// 清理
delete users_cf;
delete db;
return 0;
}
二、列族操作 #
2.1 创建列族 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <iostream>
void CreateColumnFamilies(rocksdb::DB* db) {
rocksdb::ColumnFamilyHandle* users_cf = nullptr;
rocksdb::ColumnFamilyHandle* orders_cf = nullptr;
rocksdb::ColumnFamilyHandle* products_cf = nullptr;
// 创建列族
rocksdb::Status status;
status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "users", &users_cf);
if (status.ok()) {
std::cout << "Created column family: users" << std::endl;
}
status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "orders", &orders_cf);
if (status.ok()) {
std::cout << "Created column family: orders" << std::endl;
}
status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "products", &products_cf);
if (status.ok()) {
std::cout << "Created column family: products" << std::endl;
}
// 使用列族...
// 清理列族句柄
if (users_cf) delete users_cf;
if (orders_cf) delete orders_cf;
if (products_cf) delete products_cf;
}
2.2 删除列族 #
cpp
#include <rocksdb/db.h>
#include <iostream>
void DropColumnFamily(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* handle) {
std::string cf_name = handle->GetName();
rocksdb::Status status = db->DropColumnFamily(handle);
if (status.ok()) {
std::cout << "Dropped column family: " << cf_name << std::endl;
} else {
std::cerr << "Failed to drop column family: " << status.ToString() << std::endl;
}
// 注意:删除后句柄仍然需要delete
delete handle;
}
2.3 列出所有列族 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <iostream>
#include <vector>
void ListColumnFamilies(const std::string& db_path) {
std::vector<std::string> column_families;
rocksdb::Options options;
rocksdb::Status status = rocksdb::DB::ListColumnFamilies(
options, db_path, &column_families);
if (status.ok()) {
std::cout << "Column families in database:" << std::endl;
for (const auto& cf : column_families) {
std::cout << " - " << cf << std::endl;
}
} else {
std::cerr << "Failed to list column families: " << status.ToString() << std::endl;
}
}
三、打开带列族的数据库 #
3.1 打开现有列族 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <iostream>
#include <vector>
int main() {
rocksdb::DB* db;
rocksdb::Options options;
options.create_if_missing = true;
// 获取现有列族列表
std::vector<std::string> column_families;
rocksdb::DB::ListColumnFamilies(options, "/tmp/testdb", &column_families);
// 准备列族描述符
std::vector<rocksdb::ColumnFamilyDescriptor> column_family_descriptors;
for (const auto& cf_name : column_families) {
column_family_descriptors.push_back(
rocksdb::ColumnFamilyDescriptor(cf_name, rocksdb::ColumnFamilyOptions())
);
}
// 打开数据库和所有列族
std::vector<rocksdb::ColumnFamilyHandle*> handles;
rocksdb::Status status = rocksdb::DB::Open(
options,
"/tmp/testdb",
column_family_descriptors,
&handles,
&db
);
if (!status.ok()) {
std::cerr << "Open failed: " << status.ToString() << std::endl;
return 1;
}
// 使用列族
for (size_t i = 0; i < handles.size(); i++) {
std::cout << "Column family: " << handles[i]->GetName() << std::endl;
}
// 清理
for (auto handle : handles) {
delete handle;
}
delete db;
return 0;
}
3.2 打开特定列族 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <vector>
rocksdb::DB* OpenWithColumnFamilies(
const std::string& db_path,
const std::vector<std::string>& cf_names,
std::vector<rocksdb::ColumnFamilyHandle*>* handles) {
rocksdb::Options options;
options.create_if_missing = true;
// 获取所有列族
std::vector<std::string> all_cfs;
rocksdb::DB::ListColumnFamilies(options, db_path, &all_cfs);
// 确保default列族在列表中
if (std::find(all_cfs.begin(), all_cfs.end(), "default") == all_cfs.end()) {
all_cfs.insert(all_cfs.begin(), "default");
}
// 创建描述符
std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
for (const auto& cf : all_cfs) {
descriptors.push_back(rocksdb::ColumnFamilyDescriptor(cf, rocksdb::ColumnFamilyOptions()));
}
rocksdb::DB* db;
rocksdb::Status status = rocksdb::DB::Open(options, db_path, descriptors, handles, &db);
return status.ok() ? db : nullptr;
}
四、列族配置 #
4.1 独立配置选项 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/table.h>
#include <rocksdb/filter_policy.h>
rocksdb::ColumnFamilyOptions GetUsersCFOptions() {
rocksdb::ColumnFamilyOptions options;
// 压缩配置
options.compression = rocksdb::CompressionType::kLZ4Compression;
// 写缓冲区配置
options.write_buffer_size = 64 * 1024 * 1024; // 64MB
options.max_write_buffer_number = 4;
// Compaction配置
options.max_bytes_for_level_base = 256 * 1024 * 1024; // 256MB
// 布隆过滤器
rocksdb::BlockBasedTableOptions table_options;
table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10));
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
return options;
}
rocksdb::ColumnFamilyOptions GetOrdersCFOptions() {
rocksdb::ColumnFamilyOptions options;
// 订单数据使用更高压缩比
options.compression = rocksdb::CompressionType::kZSTD;
// 更大的写缓冲区
options.write_buffer_size = 128 * 1024 * 1024; // 128MB
return options;
}
4.2 不同列族不同配置 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <iostream>
int main() {
rocksdb::DB* db;
rocksdb::Options options;
options.create_if_missing = true;
rocksdb::DB::Open(options, "/tmp/testdb", &db);
// 为不同列族创建不同配置
rocksdb::ColumnFamilyOptions users_options;
users_options.compression = rocksdb::CompressionType::kLZ4Compression;
users_options.write_buffer_size = 64 * 1024 * 1024;
rocksdb::ColumnFamilyOptions logs_options;
logs_options.compression = rocksdb::CompressionType::kZSTD;
logs_options.write_buffer_size = 256 * 1024 * 1024;
logs_options.ttl = 7 * 24 * 3600; // 7天TTL
// 创建列族
rocksdb::ColumnFamilyHandle* users_cf;
rocksdb::ColumnFamilyHandle* logs_cf;
db->CreateColumnFamily(users_options, "users", &users_cf);
db->CreateColumnFamily(logs_options, "logs", &logs_cf);
std::cout << "Created column families with different configurations" << std::endl;
delete users_cf;
delete logs_cf;
delete db;
return 0;
}
五、跨列族操作 #
5.1 跨列族原子写入 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/write_batch.h>
#include <iostream>
void AtomicCrossCFWrite(
rocksdb::DB* db,
rocksdb::ColumnFamilyHandle* users_cf,
rocksdb::ColumnFamilyHandle* orders_cf) {
// 创建WriteBatch
rocksdb::WriteBatch batch;
// 向不同列族写入数据
batch.Put(users_cf, "user:1", "Alice");
batch.Put(users_cf, "user:1:email", "alice@example.com");
batch.Put(orders_cf, "order:1", "user:1|product:100|qty:2");
batch.Put(orders_cf, "order:2", "user:1|product:200|qty:1");
// 原子写入
rocksdb::Status status = db->Write(rocksdb::WriteOptions(), &batch);
if (status.ok()) {
std::cout << "Atomic cross-column-family write successful!" << std::endl;
} else {
std::cerr << "Write failed: " << status.ToString() << std::endl;
}
}
5.2 跨列族迭代 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/column_family.h>
#include <iostream>
void IterateColumnFamily(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* cf) {
std::cout << "Iterating column family: " << cf->GetName() << std::endl;
rocksdb::Iterator* it = db->NewIterator(rocksdb::ReadOptions(), cf);
for (it->SeekToFirst(); it->Valid(); it->Next()) {
std::cout << " " << it->key().ToString()
<< " => " << it->value().ToString() << std::endl;
}
delete it;
}
六、列族高级用法 #
6.1 列族封装类 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/column_family.h>
#include <string>
#include <memory>
#include <unordered_map>
class ColumnFamilyManager {
public:
ColumnFamilyManager(const std::string& db_path) : db_path_(db_path) {}
bool Open() {
rocksdb::Options options;
options.create_if_missing = true;
// 获取现有列族
std::vector<std::string> cf_names;
rocksdb::DB::ListColumnFamilies(options, db_path_, &cf_names);
if (cf_names.empty()) {
cf_names.push_back("default");
}
// 创建描述符
std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
for (const auto& name : cf_names) {
descriptors.push_back({name, rocksdb::ColumnFamilyOptions()});
}
// 打开数据库
rocksdb::Status status = rocksdb::DB::Open(
options, db_path_, descriptors, &handles_, &db_);
if (!status.ok()) {
return false;
}
// 建立名称到句柄的映射
for (auto* handle : handles_) {
cf_map_[handle->GetName()] = handle;
}
return true;
}
~ColumnFamilyManager() {
for (auto* handle : handles_) {
delete handle;
}
if (db_) {
delete db_;
}
}
rocksdb::ColumnFamilyHandle* GetCF(const std::string& name) {
auto it = cf_map_.find(name);
return it != cf_map_.end() ? it->second : nullptr;
}
rocksdb::ColumnFamilyHandle* CreateCF(const std::string& name) {
if (cf_map_.count(name)) {
return cf_map_[name];
}
rocksdb::ColumnFamilyHandle* handle;
rocksdb::Status status = db_->CreateColumnFamily(
rocksdb::ColumnFamilyOptions(), name, &handle);
if (status.ok()) {
handles_.push_back(handle);
cf_map_[name] = handle;
return handle;
}
return nullptr;
}
bool DropCF(const std::string& name) {
auto it = cf_map_.find(name);
if (it == cf_map_.end() || name == "default") {
return false;
}
rocksdb::Status status = db_->DropColumnFamily(it->second);
if (status.ok()) {
handles_.erase(std::remove(handles_.begin(), handles_.end(), it->second));
cf_map_.erase(it);
return true;
}
return false;
}
rocksdb::DB* GetDB() { return db_; }
private:
std::string db_path_;
rocksdb::DB* db_ = nullptr;
std::vector<rocksdb::ColumnFamilyHandle*> handles_;
std::unordered_map<std::string, rocksdb::ColumnFamilyHandle*> cf_map_;
};
6.2 按列族分片数据 #
cpp
#include <rocksdb/db.h>
#include <rocksdb/column_family.h>
#include <string>
class ShardedStorage {
public:
ShardedStorage(rocksdb::DB* db, int num_shards) : db_(db), num_shards_(num_shards) {
// 创建分片列族
for (int i = 0; i < num_shards; i++) {
std::string cf_name = "shard_" + std::to_string(i);
rocksdb::ColumnFamilyHandle* handle;
db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), cf_name, &handle);
shards_.push_back(handle);
}
}
~ShardedStorage() {
for (auto* handle : shards_) {
delete handle;
}
}
void Put(const std::string& key, const std::string& value) {
int shard = GetShard(key);
db_->Put(rocksdb::WriteOptions(), shards_[shard], key, value);
}
bool Get(const std::string& key, std::string* value) {
int shard = GetShard(key);
return db_->Get(rocksdb::ReadOptions(), shards_[shard], key, value).ok();
}
void Delete(const std::string& key) {
int shard = GetShard(key);
db_->Delete(rocksdb::WriteOptions(), shards_[shard], key);
}
private:
int GetShard(const std::string& key) {
// 简单哈希分片
size_t hash = std::hash<std::string>{}(key);
return hash % num_shards_;
}
rocksdb::DB* db_;
int num_shards_;
std::vector<rocksdb::ColumnFamilyHandle*> shards_;
};
七、最佳实践 #
7.1 列族使用建议 #
| 建议 | 说明 |
|---|---|
| 合理分区 | 按数据类型或访问模式分区 |
| 独立配置 | 根据数据特点配置压缩、缓存 |
| 避免过多 | 列族数量不宜过多(建议<100) |
| 及时清理 | 不再使用的列族及时删除 |
| 监控大小 | 关注各列族的数据量 |
7.2 列族vs独立数据库 #
| 对比项 | 列族 | 独立数据库 |
|---|---|---|
| 原子写入 | 支持跨列族 | 不支持 |
| 资源共享 | 共享WAL、缓存 | 独立资源 |
| 配置灵活性 | 独立配置 | 完全独立 |
| 管理复杂度 | 较低 | 较高 |
| 适用场景 | 相关数据 | 完全独立数据 |
八、总结 #
8.1 列族API速查 #
| 操作 | 方法 | 说明 |
|---|---|---|
| 创建列族 | CreateColumnFamily(options, name, &handle) |
创建新列族 |
| 删除列族 | DropColumnFamily(handle) |
删除列族 |
| 列出列族 | ListColumnFamilies(options, path, &names) |
获取列族列表 |
| 写入列族 | Put(WriteOptions(), handle, key, value) |
写入指定列族 |
| 读取列族 | Get(ReadOptions(), handle, key, &value) |
读取指定列族 |
| 迭代列族 | NewIterator(ReadOptions(), handle) |
创建列族迭代器 |
8.2 关键要点 #
- 数据隔离:每个列族数据独立存储
- 配置独立:根据数据特点配置选项
- 原子写入:支持跨列族原子操作
- 共享WAL:保证写入原子性
- 合理使用:避免创建过多列族
下一步,让我们学习压缩策略!
最后更新:2026-03-27