From 7be3940b9c61b5b77b60a86bc8a4cc5cbcb0a059 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Wed, 14 Jul 2021 23:35:12 +0200 Subject: [PATCH] kv/RocksDBStore: Add handling of block_cache option for resharding Synchronized all situations when we initialize DB to include handling of block_cache option. Lack of it prevented ability to reshard into specification that we have as default. Conflicts: src/kv/RocksDBStore.cc Trivial conflict, related to gist of the change. No logic involved in resolving. Fixes: https://tracker.ceph.com/issues/52246 Cherry-picked from: 2d6b20f7aaaf0e6 Signed-off-by: Adam Kupczyk --- src/kv/RocksDBStore.cc | 259 ++++++++++++++++++++++------------------- src/kv/RocksDBStore.h | 8 +- 2 files changed, 148 insertions(+), 119 deletions(-) diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc index 54ba0bd2712d..7d7fee68c73d 100644 --- a/src/kv/RocksDBStore.cc +++ b/src/kv/RocksDBStore.cc @@ -774,23 +774,12 @@ int RocksDBStore::create_shards(const rocksdb::Options& opt, // copy default CF settings, block cache, merge operators as // the base for new CF rocksdb::ColumnFamilyOptions cf_opt(opt); - // user input options will override the base options - std::unordered_map column_opts_map; - std::string block_cache_opts; - int r = extract_block_cache_options(p.options, &column_opts_map, &block_cache_opts); - if (r != 0) { - derr << __func__ << " failed to parse options; column family=" << p.name << - " options=" << p.options << dendl; - return -EINVAL; - } rocksdb::Status status; - status = rocksdb::GetColumnFamilyOptionsFromMap(cf_opt, column_opts_map, &cf_opt); - if (!status.ok()) { - derr << __func__ << " invalid db options; column family=" - << p.name << " options=" << p.options << dendl; - return -EINVAL; + // apply options to column family + int r = update_column_family_options(p.name, p.options, &cf_opt); + if (r != 0) { + return r; } - install_cf_mergeop(p.name, &cf_opt); for (size_t idx = 0; idx < p.shard_cnt; idx++) { std::string cf_name; if (p.shard_cnt == 1) @@ -846,34 +835,153 @@ int RocksDBStore::apply_sharding(const rocksdb::Options& opt, } return 0; } + // linking to rocksdb function defined in options_helper.cc // it can parse nested params like "nested_opt={opt1=1;opt2=2}" - extern rocksdb::Status rocksdb::StringToMap(const std::string& opts_str, std::unordered_map* opts_map); -int RocksDBStore::extract_block_cache_options(const std::string& opts_str, - std::unordered_map* column_opts_map, +// Splits column family options from single string into name->value column_opts_map. +// The split is done using RocksDB parser that understands "{" and "}", so it +// properly extracts compound options. +// If non-RocksDB option "block_cache" is defined it is extracted to block_cache_opt. +int RocksDBStore::split_column_family_options(const std::string& options, + std::unordered_map* opt_map, std::string* block_cache_opt) { - dout(5) << __func__ << " opts_str=" << opts_str << dendl; - rocksdb::Status status = rocksdb::StringToMap(opts_str, column_opts_map); + dout(20) << __func__ << " options=" << options << dendl; + rocksdb::Status status = rocksdb::StringToMap(options, opt_map); if (!status.ok()) { - dout(5) << __func__ << " error '" << status.getState() << - "' while parsing options '" << opts_str << "'" << dendl; + dout(5) << __func__ << " error '" << status.getState() + << "' while parsing options '" << options << "'" << dendl; return -EINVAL; } - //extract "block_cache" option - if (auto it = column_opts_map->find("block_cache"); it != column_opts_map->end()) { + // if "block_cache" option exists, then move it to separate string + if (auto it = opt_map->find("block_cache"); it != opt_map->end()) { *block_cache_opt = it->second; - column_opts_map->erase(it); + opt_map->erase(it); } else { block_cache_opt->clear(); } return 0; } +// Updates column family options. +// Take options from more_options and apply them to cf_opt. +// Allowed options are exactly the same as allowed for column families in RocksDB. +// Ceph addition is "block_cache" option that is translated to block_cache and +// allows to specialize separate block cache for O column family. +// +// base_name - name of column without shard suffix: "-"+number +// options - additional options to apply +// cf_opt - column family options to update +int RocksDBStore::update_column_family_options(const std::string& base_name, + const std::string& more_options, + rocksdb::ColumnFamilyOptions* cf_opt) +{ + std::unordered_map options_map; + std::string block_cache_opt; + rocksdb::Status status; + int r = split_column_family_options(more_options, &options_map, &block_cache_opt); + if (r != 0) { + dout(5) << __func__ << " failed to parse options; column family=" << base_name + << " options=" << more_options << dendl; + return r; + } + status = rocksdb::GetColumnFamilyOptionsFromMap(*cf_opt, options_map, cf_opt); + if (!status.ok()) { + dout(5) << __func__ << " invalid column family optionsp; column family=" + << base_name << " options=" << more_options << dendl; + dout(5) << __func__ << " RocksDB error='" << status.getState() << "'" << dendl; + return -EINVAL; + } + if (base_name != rocksdb::kDefaultColumnFamilyName) { + // default cf has its merge operator defined in load_rocksdb_options, should not override it + install_cf_mergeop(base_name, cf_opt); + } + if (!block_cache_opt.empty()) { + r = apply_block_cache_options(base_name, block_cache_opt, cf_opt); + if (r != 0) { + // apply_block_cache_options already does all necessary douts + return r; + } + } + return 0; +} +int RocksDBStore::apply_block_cache_options(const std::string& column_name, + const std::string& block_cache_opt, + rocksdb::ColumnFamilyOptions* cf_opt) +{ + rocksdb::Status status; + std::unordered_map cache_options_map; + status = rocksdb::StringToMap(block_cache_opt, &cache_options_map); + if (!status.ok()) { + dout(5) << __func__ << " invalid block cache options; column=" << column_name + << " options=" << block_cache_opt << dendl; + dout(5) << __func__ << " RocksDB error='" << status.getState() << "'" << dendl; + return -EINVAL; + } + bool require_new_block_cache = false; + std::string cache_type = cct->_conf->rocksdb_cache_type; + if (const auto it = cache_options_map.find("type"); it != cache_options_map.end()) { + cache_type = it->second; + cache_options_map.erase(it); + require_new_block_cache = true; + } + size_t cache_size = cct->_conf->rocksdb_cache_size; + if (auto it = cache_options_map.find("size"); it != cache_options_map.end()) { + std::string error; + cache_size = strict_iecstrtoll(it->second.c_str(), &error); + if (!error.empty()) { + dout(10) << __func__ << " invalid size: '" << it->second << "'" << dendl; + return -EINVAL; + } + cache_options_map.erase(it); + require_new_block_cache = true; + } + double high_pri_pool_ratio = 0.0; + if (auto it = cache_options_map.find("high_ratio"); it != cache_options_map.end()) { + std::string error; + high_pri_pool_ratio = strict_strtod(it->second.c_str(), &error); + if (!error.empty()) { + dout(10) << __func__ << " invalid high_pri (float): '" << it->second << "'" << dendl; + return -EINVAL; + } + cache_options_map.erase(it); + require_new_block_cache = true; + } + + rocksdb::BlockBasedTableOptions column_bbt_opts; + status = GetBlockBasedTableOptionsFromMap(bbt_opts, cache_options_map, &column_bbt_opts); + if (!status.ok()) { + dout(5) << __func__ << " invalid block cache options; column=" << column_name + << " options=" << block_cache_opt << dendl; + dout(5) << __func__ << " RocksDB error='" << status.getState() << "'" << dendl; + return -EINVAL; + } + std::shared_ptr block_cache; + if (column_bbt_opts.no_block_cache) { + // clear all settings except no_block_cache + // rocksdb does not like then + column_bbt_opts = rocksdb::BlockBasedTableOptions(); + column_bbt_opts.no_block_cache = true; + } else { + if (require_new_block_cache) { + block_cache = create_block_cache(cache_type, cache_size, high_pri_pool_ratio); + if (!block_cache) { + dout(5) << __func__ << " failed to create block cache for params: " << block_cache_opt << dendl; + return -EINVAL; + } + } else { + block_cache = bbt_opts.block_cache; + } + } + column_bbt_opts.block_cache = block_cache; + cf_bbt_opts[column_name] = column_bbt_opts; + cf_opt->table_factory.reset(NewBlockBasedTableFactory(cf_bbt_opts[column_name])); + return 0; +} int RocksDBStore::verify_sharding(const rocksdb::Options& opt, std::vector& existing_cfs, @@ -928,89 +1036,9 @@ int RocksDBStore::verify_sharding(const rocksdb::Options& opt, for (auto& column : stored_sharding_def) { rocksdb::ColumnFamilyOptions cf_opt(opt); - std::unordered_map options_map; - std::string block_cache_opt; - - int r = extract_block_cache_options(column.options, &options_map, &block_cache_opt); + int r = update_column_family_options(column.name, column.options, &cf_opt); if (r != 0) { - derr << __func__ << " failed to parse options; column family=" << column.name << - " options=" << column.options << dendl; - return -EINVAL; - } - status = rocksdb::GetColumnFamilyOptionsFromMap(cf_opt, options_map, &cf_opt); - if (!status.ok()) { - derr << __func__ << " invalid db column family options for CF '" - << column.name << "': " << column.options << dendl; - derr << __func__ << " error = '" << status.getState() << "'" << dendl; - return -EINVAL; - } - install_cf_mergeop(column.name, &cf_opt); - - if (!block_cache_opt.empty()) { - std::unordered_map cache_options_map; - status = rocksdb::StringToMap(block_cache_opt, &cache_options_map); - if (!status.ok()) { - derr << __func__ << " invalid block cache options; column=" << column.name << - " options=" << block_cache_opt << dendl; - derr << __func__ << " error = '" << status.getState() << "'" << dendl; - return -EINVAL; - } - bool require_new_block_cache = false; - std::string cache_type = cct->_conf->rocksdb_cache_type; - if (const auto it = cache_options_map.find("type"); it !=cache_options_map.end()) { - cache_type = it->second; - cache_options_map.erase(it); - require_new_block_cache = true; - } - size_t cache_size = cct->_conf->rocksdb_cache_size; - if (auto it = cache_options_map.find("size"); it !=cache_options_map.end()) { - std::string error; - cache_size = strict_iecstrtoll(it->second.c_str(), &error); - if (!error.empty()) { - derr << __func__ << " invalid size: '" << it->second << "'" << dendl; - } - cache_options_map.erase(it); - require_new_block_cache = true; - } - double high_pri_pool_ratio = 0.0; - if (auto it = cache_options_map.find("high_ratio"); it !=cache_options_map.end()) { - std::string error; - high_pri_pool_ratio = strict_strtod(it->second.c_str(), &error); - if (!error.empty()) { - derr << __func__ << " invalid high_pri (float): '" << it->second << "'" << dendl; - } - cache_options_map.erase(it); - require_new_block_cache = true; - } - - rocksdb::BlockBasedTableOptions column_bbt_opts; - status = GetBlockBasedTableOptionsFromMap(bbt_opts, cache_options_map, &column_bbt_opts); - if (!status.ok()) { - derr << __func__ << " invalid block cache options; column=" << column.name << - " options=" << block_cache_opt << dendl; - derr << __func__ << " error = '" << status.getState() << "'" << dendl; - return -EINVAL; - } - std::shared_ptr block_cache; - if (column_bbt_opts.no_block_cache) { - // clear all settings except no_block_cache - // rocksdb does not like then - column_bbt_opts = rocksdb::BlockBasedTableOptions(); - column_bbt_opts.no_block_cache = true; - } else { - if (require_new_block_cache) { - block_cache = create_block_cache(cache_type, cache_size, high_pri_pool_ratio); - if (!block_cache) { - dout(5) << __func__ << " failed to create block cache for params: " << block_cache_opt << dendl; - return -EINVAL; - } - } else { - block_cache = bbt_opts.block_cache; - } - } - column_bbt_opts.block_cache = block_cache; - cf_bbt_opts[column.name] = column_bbt_opts; - cf_opt.table_factory.reset(NewBlockBasedTableFactory(cf_bbt_opts[column.name])); + return r; } if (column.shard_cnt == 1) { emplace_cf(column, 0, column.name, cf_opt); @@ -3015,13 +3043,10 @@ int RocksDBStore::prepare_for_reshard(const std::string& new_sharding, break; } } - status = rocksdb::GetColumnFamilyOptionsFromString(cf_opt, options, &cf_opt); - if (!status.ok()) { - derr << __func__ << " failure parsing column options: " << options << dendl; - return -EINVAL; + int r = update_column_family_options(base_name, options, &cf_opt); + if (r != 0) { + return r; } - if (base_name != rocksdb::kDefaultColumnFamilyName) - install_cf_mergeop(base_name, &cf_opt); cfs_to_open.emplace_back(full_name, cf_opt); } @@ -3075,12 +3100,10 @@ int RocksDBStore::prepare_for_reshard(const std::string& new_sharding, break; } } - status = rocksdb::GetColumnFamilyOptionsFromString(cf_opt, options, &cf_opt); - if (!status.ok()) { - derr << __func__ << " failure parsing column options: " << options << dendl; - return -EINVAL; + int r = update_column_family_options(base_name, options, &cf_opt); + if (r != 0) { + return r; } - install_cf_mergeop(base_name, &cf_opt); rocksdb::ColumnFamilyHandle *cf; status = db->CreateColumnFamily(cf_opt, full_name, &cf); if (!status.ok()) { diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h index 0735e115c364..529b4ffa621a 100644 --- a/src/kv/RocksDBStore.h +++ b/src/kv/RocksDBStore.h @@ -151,9 +151,15 @@ private: std::vector& missing_cfs, std::vector >& missing_cfs_shard); std::shared_ptr create_block_cache(const std::string& cache_type, size_t cache_size, double cache_prio_high = 0.0); - int extract_block_cache_options(const std::string& opts_str, + int split_column_family_options(const std::string& opts_str, std::unordered_map* column_opts_map, std::string* block_cache_opt); + int apply_block_cache_options(const std::string& column_name, + const std::string& block_cache_opt, + rocksdb::ColumnFamilyOptions* cf_opt); + int update_column_family_options(const std::string& base_name, + const std::string& more_options, + rocksdb::ColumnFamilyOptions* cf_opt); // manage async compactions ceph::mutex compact_queue_lock = ceph::make_mutex("RocksDBStore::compact_thread_lock"); -- 2.47.3