]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kv/RocksDBStore: Add handling of block_cache option for resharding 42844/head
authorAdam Kupczyk <akupczyk@redhat.com>
Wed, 14 Jul 2021 21:35:12 +0000 (23:35 +0200)
committerAdam Kupczyk <akupczyk@redhat.com>
Fri, 20 Aug 2021 09:32:37 +0000 (11:32 +0200)
Synchronized all situations when we initialize DB to include handling of block_cache option.
Lack of it prevented ability to reshard into specification that we have as default.

Conflicts:
src/kv/RocksDBStore.cc
Trivial conflict, related to gist of the change. No logic involved in resolving.

Fixes: https://tracker.ceph.com/issues/52246
Cherry-picked from: 2d6b20f7aaaf0e6

Signed-off-by: Adam Kupczyk <akupczyk@redhat.com>
src/kv/RocksDBStore.cc
src/kv/RocksDBStore.h

index 54ba0bd2712d767cb8f3d2eb69ec57d2c5b4183b..7d7fee68c73d5f4f0eb53bb6ffaf4f64806f5185 100644 (file)
@@ -774,23 +774,12 @@ int RocksDBStore::create_shards(const rocksdb::Options& opt,
     // copy default CF settings, block cache, merge operators as
     // the base for new CF
     rocksdb::ColumnFamilyOptions cf_opt(opt);
-    // user input options will override the base options
-    std::unordered_map<std::string, std::string> column_opts_map;
-    std::string block_cache_opts;
-    int r = extract_block_cache_options(p.options, &column_opts_map, &block_cache_opts);
-    if (r != 0) {
-      derr << __func__ << " failed to parse options; column family=" << p.name <<
-       " options=" << p.options << dendl;
-      return -EINVAL;
-    }
     rocksdb::Status status;
-    status = rocksdb::GetColumnFamilyOptionsFromMap(cf_opt, column_opts_map, &cf_opt);
-    if (!status.ok()) {
-      derr << __func__ << " invalid db options; column family="
-          << p.name << " options=" << p.options << dendl;
-      return -EINVAL;
+    // apply options to column family
+    int r = update_column_family_options(p.name, p.options, &cf_opt);
+    if (r != 0) {
+      return r;
     }
-    install_cf_mergeop(p.name, &cf_opt);
     for (size_t idx = 0; idx < p.shard_cnt; idx++) {
       std::string cf_name;
       if (p.shard_cnt == 1)
@@ -846,34 +835,153 @@ int RocksDBStore::apply_sharding(const rocksdb::Options& opt,
   }
   return 0;
 }
+
 // linking to rocksdb function defined in options_helper.cc
 // it can parse nested params like "nested_opt={opt1=1;opt2=2}"
-
 extern rocksdb::Status rocksdb::StringToMap(const std::string& opts_str,
                                   std::unordered_map<std::string, std::string>* opts_map);
 
-int RocksDBStore::extract_block_cache_options(const std::string& opts_str,
-                                             std::unordered_map<std::string, std::string>* column_opts_map,
+// Splits column family options from single string into name->value column_opts_map.
+// The split is done using RocksDB parser that understands "{" and "}", so it
+// properly extracts compound options.
+// If non-RocksDB option "block_cache" is defined it is extracted to block_cache_opt.
+int RocksDBStore::split_column_family_options(const std::string& options,
+                                             std::unordered_map<std::string, std::string>* opt_map,
                                              std::string* block_cache_opt)
 {
-  dout(5) << __func__ << " opts_str=" << opts_str << dendl;
-  rocksdb::Status status = rocksdb::StringToMap(opts_str, column_opts_map);
+  dout(20) << __func__ << " options=" << options << dendl;
+  rocksdb::Status status = rocksdb::StringToMap(options, opt_map);
   if (!status.ok()) {
-    dout(5) << __func__ << " error '" << status.getState() <<
-      "' while parsing options '" << opts_str << "'" << dendl;
+    dout(5) << __func__ << " error '" << status.getState()
+           << "' while parsing options '" << options << "'" << dendl;
     return -EINVAL;
   }
-  //extract "block_cache" option
-  if (auto it = column_opts_map->find("block_cache"); it != column_opts_map->end()) {
+  // if "block_cache" option exists, then move it to separate string
+  if (auto it = opt_map->find("block_cache"); it != opt_map->end()) {
     *block_cache_opt = it->second;
-    column_opts_map->erase(it);
+    opt_map->erase(it);
   } else {
     block_cache_opt->clear();
   }
   return 0;
 }
 
+// Updates column family options.
+// Take options from more_options and apply them to cf_opt.
+// Allowed options are exactly the same as allowed for column families in RocksDB.
+// Ceph addition is "block_cache" option that is translated to block_cache and
+// allows to specialize separate block cache for O column family.
+//
+// base_name - name of column without shard suffix: "-"+number
+// options - additional options to apply
+// cf_opt - column family options to update
+int RocksDBStore::update_column_family_options(const std::string& base_name,
+                                              const std::string& more_options,
+                                              rocksdb::ColumnFamilyOptions* cf_opt)
+{
+  std::unordered_map<std::string, std::string> options_map;
+  std::string block_cache_opt;
+  rocksdb::Status status;
+  int r = split_column_family_options(more_options, &options_map, &block_cache_opt);
+  if (r != 0) {
+    dout(5) << __func__ << " failed to parse options; column family=" << base_name
+           << " options=" << more_options << dendl;
+    return r;
+  }
+  status = rocksdb::GetColumnFamilyOptionsFromMap(*cf_opt, options_map, cf_opt);
+  if (!status.ok()) {
+    dout(5) << __func__ << " invalid column family optionsp; column family="
+           << base_name << " options=" << more_options << dendl;
+    dout(5) << __func__ << " RocksDB error='" << status.getState() << "'" << dendl;
+    return -EINVAL;
+  }
+  if (base_name != rocksdb::kDefaultColumnFamilyName) {
+    // default cf has its merge operator defined in load_rocksdb_options, should not override it
+    install_cf_mergeop(base_name, cf_opt);
+  }
+  if (!block_cache_opt.empty()) {
+    r = apply_block_cache_options(base_name, block_cache_opt, cf_opt);
+    if (r != 0) {
+      // apply_block_cache_options already does all necessary douts
+      return r;
+    }
+  }
+  return 0;
+}
 
+int RocksDBStore::apply_block_cache_options(const std::string& column_name,
+                                           const std::string& block_cache_opt,
+                                           rocksdb::ColumnFamilyOptions* cf_opt)
+{
+  rocksdb::Status status;
+  std::unordered_map<std::string, std::string> cache_options_map;
+  status = rocksdb::StringToMap(block_cache_opt, &cache_options_map);
+  if (!status.ok()) {
+    dout(5) << __func__ << " invalid block cache options; column=" << column_name
+           << " options=" << block_cache_opt << dendl;
+    dout(5) << __func__ << " RocksDB error='" << status.getState() << "'" << dendl;
+    return -EINVAL;
+  }
+  bool require_new_block_cache = false;
+  std::string cache_type = cct->_conf->rocksdb_cache_type;
+  if (const auto it = cache_options_map.find("type"); it != cache_options_map.end()) {
+    cache_type = it->second;
+    cache_options_map.erase(it);
+    require_new_block_cache = true;
+  }
+  size_t cache_size = cct->_conf->rocksdb_cache_size;
+  if (auto it = cache_options_map.find("size"); it != cache_options_map.end()) {
+    std::string error;
+    cache_size = strict_iecstrtoll(it->second.c_str(), &error);
+    if (!error.empty()) {
+      dout(10) << __func__ << " invalid size: '" << it->second << "'" << dendl;
+      return -EINVAL;
+    }
+    cache_options_map.erase(it);
+    require_new_block_cache = true;
+  }
+  double high_pri_pool_ratio = 0.0;
+  if (auto it = cache_options_map.find("high_ratio"); it != cache_options_map.end()) {
+    std::string error;
+    high_pri_pool_ratio = strict_strtod(it->second.c_str(), &error);
+    if (!error.empty()) {
+      dout(10) << __func__ << " invalid high_pri (float): '" << it->second << "'" << dendl;
+      return -EINVAL;
+    }
+    cache_options_map.erase(it);
+    require_new_block_cache = true;
+  }
+
+  rocksdb::BlockBasedTableOptions column_bbt_opts;
+  status = GetBlockBasedTableOptionsFromMap(bbt_opts, cache_options_map, &column_bbt_opts);
+  if (!status.ok()) {
+    dout(5) << __func__ << " invalid block cache options; column=" << column_name
+           << " options=" << block_cache_opt << dendl;
+    dout(5) << __func__ << " RocksDB error='" << status.getState() << "'" << dendl;
+    return -EINVAL;
+  }
+  std::shared_ptr<rocksdb::Cache> block_cache;
+  if (column_bbt_opts.no_block_cache) {
+    // clear all settings except no_block_cache
+    // rocksdb does not like then
+    column_bbt_opts = rocksdb::BlockBasedTableOptions();
+    column_bbt_opts.no_block_cache = true;
+  } else {
+    if (require_new_block_cache) {
+      block_cache = create_block_cache(cache_type, cache_size, high_pri_pool_ratio);
+      if (!block_cache) {
+       dout(5) << __func__ << " failed to create block cache for params: " << block_cache_opt << dendl;
+       return -EINVAL;
+      }
+    } else {
+      block_cache = bbt_opts.block_cache;
+    }
+  }
+  column_bbt_opts.block_cache = block_cache;
+  cf_bbt_opts[column_name] = column_bbt_opts;
+  cf_opt->table_factory.reset(NewBlockBasedTableFactory(cf_bbt_opts[column_name]));
+  return 0;
+}
 
 int RocksDBStore::verify_sharding(const rocksdb::Options& opt,
                                  std::vector<rocksdb::ColumnFamilyDescriptor>& existing_cfs,
@@ -928,89 +1036,9 @@ int RocksDBStore::verify_sharding(const rocksdb::Options& opt,
 
   for (auto& column : stored_sharding_def) {
     rocksdb::ColumnFamilyOptions cf_opt(opt);
-    std::unordered_map<std::string, std::string> options_map;
-    std::string block_cache_opt;
-
-    int r = extract_block_cache_options(column.options, &options_map, &block_cache_opt);
+    int r = update_column_family_options(column.name, column.options, &cf_opt);
     if (r != 0) {
-      derr << __func__ << " failed to parse options; column family=" << column.name <<
-       " options=" << column.options << dendl;
-      return -EINVAL;
-    }
-    status = rocksdb::GetColumnFamilyOptionsFromMap(cf_opt, options_map, &cf_opt);
-    if (!status.ok()) {
-      derr << __func__ << " invalid db column family options for CF '"
-          << column.name << "': " << column.options << dendl;
-      derr << __func__ << " error = '" << status.getState() << "'" << dendl;
-      return -EINVAL;
-    }
-    install_cf_mergeop(column.name, &cf_opt);
-
-    if (!block_cache_opt.empty()) {
-      std::unordered_map<std::string, std::string> cache_options_map;
-      status = rocksdb::StringToMap(block_cache_opt, &cache_options_map);
-      if (!status.ok()) {
-       derr << __func__ << " invalid block cache options; column=" << column.name <<
-         " options=" << block_cache_opt << dendl;
-       derr << __func__ << " error = '" << status.getState() << "'" << dendl;
-       return -EINVAL;
-      }
-      bool require_new_block_cache = false;
-      std::string cache_type = cct->_conf->rocksdb_cache_type;
-      if (const auto it = cache_options_map.find("type"); it !=cache_options_map.end()) {
-       cache_type = it->second;
-       cache_options_map.erase(it);
-       require_new_block_cache = true;
-      }
-      size_t cache_size = cct->_conf->rocksdb_cache_size;
-      if (auto it = cache_options_map.find("size"); it !=cache_options_map.end()) {
-       std::string error;
-       cache_size = strict_iecstrtoll(it->second.c_str(), &error);
-       if (!error.empty()) {
-         derr << __func__ << " invalid size: '" << it->second << "'" << dendl;
-       }
-       cache_options_map.erase(it);
-       require_new_block_cache = true;
-      }
-      double high_pri_pool_ratio = 0.0;
-      if (auto it = cache_options_map.find("high_ratio"); it !=cache_options_map.end()) {
-       std::string error;
-       high_pri_pool_ratio = strict_strtod(it->second.c_str(), &error);
-       if (!error.empty()) {
-         derr << __func__ << " invalid high_pri (float): '" << it->second << "'" << dendl;
-       }
-       cache_options_map.erase(it);
-       require_new_block_cache = true;
-      }
-
-      rocksdb::BlockBasedTableOptions column_bbt_opts;
-      status = GetBlockBasedTableOptionsFromMap(bbt_opts, cache_options_map, &column_bbt_opts);
-      if (!status.ok()) {
-       derr << __func__ << " invalid block cache options; column=" << column.name <<
-         " options=" << block_cache_opt << dendl;
-       derr << __func__ << " error = '" << status.getState() << "'" << dendl;
-       return -EINVAL;
-      }
-      std::shared_ptr<rocksdb::Cache> block_cache;
-      if (column_bbt_opts.no_block_cache) {
-       // clear all settings except no_block_cache
-       // rocksdb does not like then
-       column_bbt_opts = rocksdb::BlockBasedTableOptions();
-       column_bbt_opts.no_block_cache = true;
-      } else {
-       if (require_new_block_cache) {
-         block_cache = create_block_cache(cache_type, cache_size, high_pri_pool_ratio);
-         if (!block_cache) {
-           dout(5) << __func__ << " failed to create block cache for params: " << block_cache_opt << dendl;
-           return -EINVAL;
-         }
-       } else {
-         block_cache = bbt_opts.block_cache;
-       }
-      }
-      column_bbt_opts.block_cache = block_cache;
-      cf_bbt_opts[column.name] = column_bbt_opts;
-      cf_opt.table_factory.reset(NewBlockBasedTableFactory(cf_bbt_opts[column.name]));
+      return r;
     }
     if (column.shard_cnt == 1) {
       emplace_cf(column, 0, column.name, cf_opt);
@@ -3015,13 +3043,10 @@ int RocksDBStore::prepare_for_reshard(const std::string& new_sharding,
        break;
       }
     }
-    status = rocksdb::GetColumnFamilyOptionsFromString(cf_opt, options, &cf_opt);
-    if (!status.ok()) {
-      derr << __func__ << " failure parsing column options: " << options << dendl;
-      return -EINVAL;
+    int r = update_column_family_options(base_name, options, &cf_opt);
+    if (r != 0) {
+      return r;
     }
-    if (base_name != rocksdb::kDefaultColumnFamilyName)
-      install_cf_mergeop(base_name, &cf_opt);
     cfs_to_open.emplace_back(full_name, cf_opt);
   }
 
@@ -3075,12 +3100,10 @@ int RocksDBStore::prepare_for_reshard(const std::string& new_sharding,
        break;
       }
     }
-    status = rocksdb::GetColumnFamilyOptionsFromString(cf_opt, options, &cf_opt);
-    if (!status.ok()) {
-      derr << __func__ << " failure parsing column options: " << options << dendl;
-      return -EINVAL;
+    int r = update_column_family_options(base_name, options, &cf_opt);
+    if (r != 0) {
+      return r;
     }
-    install_cf_mergeop(base_name, &cf_opt);
     rocksdb::ColumnFamilyHandle *cf;
     status = db->CreateColumnFamily(cf_opt, full_name, &cf);
     if (!status.ok()) {
index 0735e115c36489d1acd0d0f1dbc60bb501774126..529b4ffa621ab26564b521b2ddef6e90d11569bb 100644 (file)
@@ -151,9 +151,15 @@ private:
                      std::vector<rocksdb::ColumnFamilyDescriptor>& missing_cfs,
                      std::vector<std::pair<size_t, RocksDBStore::ColumnFamily> >& missing_cfs_shard);
   std::shared_ptr<rocksdb::Cache> create_block_cache(const std::string& cache_type, size_t cache_size, double cache_prio_high = 0.0);
-  int extract_block_cache_options(const std::string& opts_str,
+  int split_column_family_options(const std::string& opts_str,
                                  std::unordered_map<std::string, std::string>* column_opts_map,
                                  std::string* block_cache_opt);
+  int apply_block_cache_options(const std::string& column_name,
+                               const std::string& block_cache_opt,
+                               rocksdb::ColumnFamilyOptions* cf_opt);
+  int update_column_family_options(const std::string& base_name,
+                                  const std::string& more_options,
+                                  rocksdb::ColumnFamilyOptions* cf_opt);
   // manage async compactions
   ceph::mutex compact_queue_lock =
     ceph::make_mutex("RocksDBStore::compact_thread_lock");