I am looking for a way to have a fixed size prefix for one specific column family in a RocksDB
database, as this column family will be used to implement a join index between two existing "full key indexing" column families
I don't see any concrete mention in the column family docs about per-column family specialization of this extractor, but it is clearly defined in the ColumnFamilyOptions, so perhaps this is intended to work.
However my drafted attempt doesn't seem to work very well. It manages to initialize the database and add the column family with the given descriptors, but it fails the second time the test executable is run, unable to load the database with column family descriptors (code below):
#include <cassert>
#include <cstdint>
#include <iostream>
#include <memory>
#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
namespace rdb = rocksdb;
int main() {
rdb::Options options;
options.write_buffer_size = (1U << 28);
options.max_open_files = (1U << 10);
options.create_if_missing = true;
options.wal_bytes_per_sync = 32768;
options.bytes_per_sync = 32768;
options.recycle_log_file_num = 8;
rdb::BlockBasedTableOptions blockbased_options;
blockbased_options.block_size = 1ULL << 17;
blockbased_options.enable_index_compression = false;
options.table_factory.reset(rdb::NewBlockBasedTableFactory(blockbased_options));
options.whole_key_filtering = false;
std::vector<rdb::ColumnFamilyDescriptor> cf_descriptors;
{
rdb::ColumnFamilyOptions cf_normal_options;
cf_descriptors.emplace_back("normal_cf", cf_normal_options);
rdb::ColumnFamilyOptions cf_key_prefix_options;
blockbased_options.whole_key_filtering = false;
cf_pwo_edge_options.table_factory.reset(rdb::NewBlockBasedTableFactory(blockbased_options));
cf_pwo_edge_options.prefix_extractor.reset( rdb::NewFixedPrefixTransform( sizeof(std::uint64_t) ) );
cf_descriptors.emplace_back("key_prefix_cf", cf_key_prefix_options);
}
std::unique_ptr<rdb::DB> db;
std::vector< std::unique_ptr< rdb::ColumnFamilyHandle > > cf_safe_handles;
{
rdb::DB* db_temp;
std::vector<rdb::ColumnFamilyHandle*> cf_handles;
auto s = rdb::DB::Open(options, "/tmp/test_cf", cf_descriptors, &cf_handles, &db_temp);
if (!s.ok())
{
std::cout << "unable to open db with column family descriptors " << std::endl;
s = rdb::DB::Open(options, "/tmp/test_cf", &db_temp);
if (!s.ok())
{
std::cout << "unable to open uninitialized db " << std::endl;
std::exit(1);
}
if (nullptr == db_temp)
{
std::cout << "db_temp pointer uninitialized " << std::endl;
std::exit(1);
}
for (auto&& cf_desc : cf_descriptors)
{
rdb::ColumnFamilyHandle* cf_handle;
db_temp->CreateColumnFamily(cf_desc.options, cf_desc.name, &cf_handle);
cf_handles.push_back(cf_handle);
}
}
db.reset(db_temp);
for (auto&& cf_ptr : cf_handles)
{
cf_safe_handles.emplace_back(cf_ptr);
}
}
// Operations
std::string value;
auto s = db->Put(rdb::WriteOptions(), cf_safe_handles[0].get(), "key1", "value1");
{
std::string obj("phenomenal but will be removed shortly");
s = db->Put(rdb::WriteOptions(), cf_safe_handles[1].get(), "key2", obj);
}
s = db->Get(rdb::ReadOptions(), cf_safe_handles[0].get(), "key1", &value);
assert(s.ok());
std::cout << " key1 := " << value << std::endl;
s = db->Get(rdb::ReadOptions(), cf_safe_handles[1].get(), "key2", &value);
std::cout << " key2 := " << value << std::endl;
assert(s.ok());
};
What's the message in the status when you called rdb::DB::Open(options, "/tmp/test_cf", cf_descriptors, &cf_handles, &db_temp);
and you didn't get ok status?