RocksDB列族 #

一、列族基础 #

1.1 什么是列族 #

列族(Column Family)是RocksDB提供的逻辑数据分区机制,允许在同一个数据库实例中存储多组独立的数据。

text
数据库结构:
┌─────────────────────────────────────┐
│           RocksDB 数据库             │
├─────────────────────────────────────┤
│  列族: default                       │
│  ├── MemTable                       │
│  ├── SST Files                      │
│  └── 独立配置                        │
├─────────────────────────────────────┤
│  列族: users                        │
│  ├── MemTable                       │
│  ├── SST Files                      │
│  └── 独立配置                        │
├─────────────────────────────────────┤
│  列族: orders                       │
│  ├── MemTable                       │
│  ├── SST Files                      │
│  └── 独立配置                        │
└─────────────────────────────────────┘
           ↑
        共享WAL

1.2 列族特点 #

特点 说明
数据隔离 每个列族数据独立存储
配置独立 每个列族可独立配置压缩、缓存等
共享WAL 所有列族共享WAL,保证原子性
原子写入 跨列族写入支持原子性
独立Flush 每个列族独立Flush到磁盘

1.3 基本使用 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <iostream>
#include <vector>

int main() {
    rocksdb::DB* db;
    rocksdb::Options options;
    options.create_if_missing = true;
    
    // 打开数据库,获取所有列族
    std::vector<std::string> column_families;
    rocksdb::Status status = rocksdb::DB::ListColumnFamilies(
        options, "/tmp/testdb", &column_families);
    
    // 创建列族句柄
    std::vector<rocksdb::ColumnFamilyHandle*> handles;
    
    // 打开数据库
    status = rocksdb::DB::Open(options, "/tmp/testdb", &db);
    if (!status.ok()) {
        std::cerr << "Open failed: " << status.ToString() << std::endl;
        return 1;
    }
    
    // 创建新列族
    rocksdb::ColumnFamilyHandle* users_cf;
    status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "users", &users_cf);
    
    if (status.ok()) {
        std::cout << "Column family 'users' created!" << std::endl;
    }
    
    // 写入数据到指定列族
    db->Put(rocksdb::WriteOptions(), users_cf, "user:1", "Alice");
    db->Put(rocksdb::WriteOptions(), users_cf, "user:2", "Bob");
    
    // 写入数据到默认列族
    db->Put(rocksdb::WriteOptions(), "default_key", "default_value");
    
    // 从指定列族读取
    std::string value;
    status = db->Get(rocksdb::ReadOptions(), users_cf, "user:1", &value);
    if (status.ok()) {
        std::cout << "user:1 = " << value << std::endl;
    }
    
    // 清理
    delete users_cf;
    delete db;
    return 0;
}

二、列族操作 #

2.1 创建列族 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <iostream>

void CreateColumnFamilies(rocksdb::DB* db) {
    rocksdb::ColumnFamilyHandle* users_cf = nullptr;
    rocksdb::ColumnFamilyHandle* orders_cf = nullptr;
    rocksdb::ColumnFamilyHandle* products_cf = nullptr;
    
    // 创建列族
    rocksdb::Status status;
    
    status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "users", &users_cf);
    if (status.ok()) {
        std::cout << "Created column family: users" << std::endl;
    }
    
    status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "orders", &orders_cf);
    if (status.ok()) {
        std::cout << "Created column family: orders" << std::endl;
    }
    
    status = db->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "products", &products_cf);
    if (status.ok()) {
        std::cout << "Created column family: products" << std::endl;
    }
    
    // 使用列族...
    
    // 清理列族句柄
    if (users_cf) delete users_cf;
    if (orders_cf) delete orders_cf;
    if (products_cf) delete products_cf;
}

2.2 删除列族 #

cpp
#include <rocksdb/db.h>
#include <iostream>

void DropColumnFamily(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* handle) {
    std::string cf_name = handle->GetName();
    
    rocksdb::Status status = db->DropColumnFamily(handle);
    
    if (status.ok()) {
        std::cout << "Dropped column family: " << cf_name << std::endl;
    } else {
        std::cerr << "Failed to drop column family: " << status.ToString() << std::endl;
    }
    
    // 注意:删除后句柄仍然需要delete
    delete handle;
}

2.3 列出所有列族 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <iostream>
#include <vector>

void ListColumnFamilies(const std::string& db_path) {
    std::vector<std::string> column_families;
    
    rocksdb::Options options;
    rocksdb::Status status = rocksdb::DB::ListColumnFamilies(
        options, db_path, &column_families);
    
    if (status.ok()) {
        std::cout << "Column families in database:" << std::endl;
        for (const auto& cf : column_families) {
            std::cout << "  - " << cf << std::endl;
        }
    } else {
        std::cerr << "Failed to list column families: " << status.ToString() << std::endl;
    }
}

三、打开带列族的数据库 #

3.1 打开现有列族 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <iostream>
#include <vector>

int main() {
    rocksdb::DB* db;
    rocksdb::Options options;
    options.create_if_missing = true;
    
    // 获取现有列族列表
    std::vector<std::string> column_families;
    rocksdb::DB::ListColumnFamilies(options, "/tmp/testdb", &column_families);
    
    // 准备列族描述符
    std::vector<rocksdb::ColumnFamilyDescriptor> column_family_descriptors;
    for (const auto& cf_name : column_families) {
        column_family_descriptors.push_back(
            rocksdb::ColumnFamilyDescriptor(cf_name, rocksdb::ColumnFamilyOptions())
        );
    }
    
    // 打开数据库和所有列族
    std::vector<rocksdb::ColumnFamilyHandle*> handles;
    rocksdb::Status status = rocksdb::DB::Open(
        options, 
        "/tmp/testdb", 
        column_family_descriptors, 
        &handles, 
        &db
    );
    
    if (!status.ok()) {
        std::cerr << "Open failed: " << status.ToString() << std::endl;
        return 1;
    }
    
    // 使用列族
    for (size_t i = 0; i < handles.size(); i++) {
        std::cout << "Column family: " << handles[i]->GetName() << std::endl;
    }
    
    // 清理
    for (auto handle : handles) {
        delete handle;
    }
    delete db;
    
    return 0;
}

3.2 打开特定列族 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/column_family.h>
#include <vector>

rocksdb::DB* OpenWithColumnFamilies(
    const std::string& db_path,
    const std::vector<std::string>& cf_names,
    std::vector<rocksdb::ColumnFamilyHandle*>* handles) {
    
    rocksdb::Options options;
    options.create_if_missing = true;
    
    // 获取所有列族
    std::vector<std::string> all_cfs;
    rocksdb::DB::ListColumnFamilies(options, db_path, &all_cfs);
    
    // 确保default列族在列表中
    if (std::find(all_cfs.begin(), all_cfs.end(), "default") == all_cfs.end()) {
        all_cfs.insert(all_cfs.begin(), "default");
    }
    
    // 创建描述符
    std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
    for (const auto& cf : all_cfs) {
        descriptors.push_back(rocksdb::ColumnFamilyDescriptor(cf, rocksdb::ColumnFamilyOptions()));
    }
    
    rocksdb::DB* db;
    rocksdb::Status status = rocksdb::DB::Open(options, db_path, descriptors, handles, &db);
    
    return status.ok() ? db : nullptr;
}

四、列族配置 #

4.1 独立配置选项 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <rocksdb/table.h>
#include <rocksdb/filter_policy.h>

rocksdb::ColumnFamilyOptions GetUsersCFOptions() {
    rocksdb::ColumnFamilyOptions options;
    
    // 压缩配置
    options.compression = rocksdb::CompressionType::kLZ4Compression;
    
    // 写缓冲区配置
    options.write_buffer_size = 64 * 1024 * 1024;  // 64MB
    options.max_write_buffer_number = 4;
    
    // Compaction配置
    options.max_bytes_for_level_base = 256 * 1024 * 1024;  // 256MB
    
    // 布隆过滤器
    rocksdb::BlockBasedTableOptions table_options;
    table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10));
    options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
    
    return options;
}

rocksdb::ColumnFamilyOptions GetOrdersCFOptions() {
    rocksdb::ColumnFamilyOptions options;
    
    // 订单数据使用更高压缩比
    options.compression = rocksdb::CompressionType::kZSTD;
    
    // 更大的写缓冲区
    options.write_buffer_size = 128 * 1024 * 1024;  // 128MB
    
    return options;
}

4.2 不同列族不同配置 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/options.h>
#include <iostream>

int main() {
    rocksdb::DB* db;
    rocksdb::Options options;
    options.create_if_missing = true;
    
    rocksdb::DB::Open(options, "/tmp/testdb", &db);
    
    // 为不同列族创建不同配置
    rocksdb::ColumnFamilyOptions users_options;
    users_options.compression = rocksdb::CompressionType::kLZ4Compression;
    users_options.write_buffer_size = 64 * 1024 * 1024;
    
    rocksdb::ColumnFamilyOptions logs_options;
    logs_options.compression = rocksdb::CompressionType::kZSTD;
    logs_options.write_buffer_size = 256 * 1024 * 1024;
    logs_options.ttl = 7 * 24 * 3600;  // 7天TTL
    
    // 创建列族
    rocksdb::ColumnFamilyHandle* users_cf;
    rocksdb::ColumnFamilyHandle* logs_cf;
    
    db->CreateColumnFamily(users_options, "users", &users_cf);
    db->CreateColumnFamily(logs_options, "logs", &logs_cf);
    
    std::cout << "Created column families with different configurations" << std::endl;
    
    delete users_cf;
    delete logs_cf;
    delete db;
    
    return 0;
}

五、跨列族操作 #

5.1 跨列族原子写入 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/write_batch.h>
#include <iostream>

void AtomicCrossCFWrite(
    rocksdb::DB* db,
    rocksdb::ColumnFamilyHandle* users_cf,
    rocksdb::ColumnFamilyHandle* orders_cf) {
    
    // 创建WriteBatch
    rocksdb::WriteBatch batch;
    
    // 向不同列族写入数据
    batch.Put(users_cf, "user:1", "Alice");
    batch.Put(users_cf, "user:1:email", "alice@example.com");
    batch.Put(orders_cf, "order:1", "user:1|product:100|qty:2");
    batch.Put(orders_cf, "order:2", "user:1|product:200|qty:1");
    
    // 原子写入
    rocksdb::Status status = db->Write(rocksdb::WriteOptions(), &batch);
    
    if (status.ok()) {
        std::cout << "Atomic cross-column-family write successful!" << std::endl;
    } else {
        std::cerr << "Write failed: " << status.ToString() << std::endl;
    }
}

5.2 跨列族迭代 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/column_family.h>
#include <iostream>

void IterateColumnFamily(rocksdb::DB* db, rocksdb::ColumnFamilyHandle* cf) {
    std::cout << "Iterating column family: " << cf->GetName() << std::endl;
    
    rocksdb::Iterator* it = db->NewIterator(rocksdb::ReadOptions(), cf);
    
    for (it->SeekToFirst(); it->Valid(); it->Next()) {
        std::cout << "  " << it->key().ToString() 
                  << " => " << it->value().ToString() << std::endl;
    }
    
    delete it;
}

六、列族高级用法 #

6.1 列族封装类 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/column_family.h>
#include <string>
#include <memory>
#include <unordered_map>

class ColumnFamilyManager {
public:
    ColumnFamilyManager(const std::string& db_path) : db_path_(db_path) {}
    
    bool Open() {
        rocksdb::Options options;
        options.create_if_missing = true;
        
        // 获取现有列族
        std::vector<std::string> cf_names;
        rocksdb::DB::ListColumnFamilies(options, db_path_, &cf_names);
        
        if (cf_names.empty()) {
            cf_names.push_back("default");
        }
        
        // 创建描述符
        std::vector<rocksdb::ColumnFamilyDescriptor> descriptors;
        for (const auto& name : cf_names) {
            descriptors.push_back({name, rocksdb::ColumnFamilyOptions()});
        }
        
        // 打开数据库
        rocksdb::Status status = rocksdb::DB::Open(
            options, db_path_, descriptors, &handles_, &db_);
        
        if (!status.ok()) {
            return false;
        }
        
        // 建立名称到句柄的映射
        for (auto* handle : handles_) {
            cf_map_[handle->GetName()] = handle;
        }
        
        return true;
    }
    
    ~ColumnFamilyManager() {
        for (auto* handle : handles_) {
            delete handle;
        }
        if (db_) {
            delete db_;
        }
    }
    
    rocksdb::ColumnFamilyHandle* GetCF(const std::string& name) {
        auto it = cf_map_.find(name);
        return it != cf_map_.end() ? it->second : nullptr;
    }
    
    rocksdb::ColumnFamilyHandle* CreateCF(const std::string& name) {
        if (cf_map_.count(name)) {
            return cf_map_[name];
        }
        
        rocksdb::ColumnFamilyHandle* handle;
        rocksdb::Status status = db_->CreateColumnFamily(
            rocksdb::ColumnFamilyOptions(), name, &handle);
        
        if (status.ok()) {
            handles_.push_back(handle);
            cf_map_[name] = handle;
            return handle;
        }
        return nullptr;
    }
    
    bool DropCF(const std::string& name) {
        auto it = cf_map_.find(name);
        if (it == cf_map_.end() || name == "default") {
            return false;
        }
        
        rocksdb::Status status = db_->DropColumnFamily(it->second);
        if (status.ok()) {
            handles_.erase(std::remove(handles_.begin(), handles_.end(), it->second));
            cf_map_.erase(it);
            return true;
        }
        return false;
    }
    
    rocksdb::DB* GetDB() { return db_; }

private:
    std::string db_path_;
    rocksdb::DB* db_ = nullptr;
    std::vector<rocksdb::ColumnFamilyHandle*> handles_;
    std::unordered_map<std::string, rocksdb::ColumnFamilyHandle*> cf_map_;
};

6.2 按列族分片数据 #

cpp
#include <rocksdb/db.h>
#include <rocksdb/column_family.h>
#include <string>

class ShardedStorage {
public:
    ShardedStorage(rocksdb::DB* db, int num_shards) : db_(db), num_shards_(num_shards) {
        // 创建分片列族
        for (int i = 0; i < num_shards; i++) {
            std::string cf_name = "shard_" + std::to_string(i);
            rocksdb::ColumnFamilyHandle* handle;
            db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), cf_name, &handle);
            shards_.push_back(handle);
        }
    }
    
    ~ShardedStorage() {
        for (auto* handle : shards_) {
            delete handle;
        }
    }
    
    void Put(const std::string& key, const std::string& value) {
        int shard = GetShard(key);
        db_->Put(rocksdb::WriteOptions(), shards_[shard], key, value);
    }
    
    bool Get(const std::string& key, std::string* value) {
        int shard = GetShard(key);
        return db_->Get(rocksdb::ReadOptions(), shards_[shard], key, value).ok();
    }
    
    void Delete(const std::string& key) {
        int shard = GetShard(key);
        db_->Delete(rocksdb::WriteOptions(), shards_[shard], key);
    }

private:
    int GetShard(const std::string& key) {
        // 简单哈希分片
        size_t hash = std::hash<std::string>{}(key);
        return hash % num_shards_;
    }
    
    rocksdb::DB* db_;
    int num_shards_;
    std::vector<rocksdb::ColumnFamilyHandle*> shards_;
};

七、最佳实践 #

7.1 列族使用建议 #

建议 说明
合理分区 按数据类型或访问模式分区
独立配置 根据数据特点配置压缩、缓存
避免过多 列族数量不宜过多(建议<100)
及时清理 不再使用的列族及时删除
监控大小 关注各列族的数据量

7.2 列族vs独立数据库 #

对比项 列族 独立数据库
原子写入 支持跨列族 不支持
资源共享 共享WAL、缓存 独立资源
配置灵活性 独立配置 完全独立
管理复杂度 较低 较高
适用场景 相关数据 完全独立数据

八、总结 #

8.1 列族API速查 #

操作 方法 说明
创建列族 CreateColumnFamily(options, name, &handle) 创建新列族
删除列族 DropColumnFamily(handle) 删除列族
列出列族 ListColumnFamilies(options, path, &names) 获取列族列表
写入列族 Put(WriteOptions(), handle, key, value) 写入指定列族
读取列族 Get(ReadOptions(), handle, key, &value) 读取指定列族
迭代列族 NewIterator(ReadOptions(), handle) 创建列族迭代器

8.2 关键要点 #

  1. 数据隔离:每个列族数据独立存储
  2. 配置独立:根据数据特点配置选项
  3. 原子写入:支持跨列族原子操作
  4. 共享WAL:保证写入原子性
  5. 合理使用:避免创建过多列族

下一步,让我们学习压缩策略!

最后更新:2026-03-27