]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kv/RocksDBStore: Add CompactOnDeletion support
authorMark Nelson <mnelson@redhat.com>
Thu, 21 Jul 2022 21:31:07 +0000 (21:31 +0000)
committerYuri Weinstein <yweinste@redhat.com>
Fri, 5 May 2023 19:16:35 +0000 (19:16 +0000)
This commit adds support to compact column families when a certain number
of tombstone entries have been observed within a certain sliding window
during iteration.  It only helps when itereating over entries already in
SST files and not when iterating over ranges in memtables.

Likely we will still need to provide a mechanism to flush memtables and
compact column families once a certain number of rmkey or rm_range_key
calls are made.

Signed-off-by: Mark Nelson <mnelson@redhat.com>
(cherry picked from commit fba5488728e89d9b0a1c1ab94b7024fcc81b3b15)

Conflicts:
src/common/options/global.yaml.in

Cherry-pick notes:
- Conflicts due to change in configuration options format after Pacific

(cherry picked from commit 317eb8f69834fa08dbb17b19656db2359dbdcbd0)

src/common/legacy_config_opts.h
src/common/options.cc
src/kv/RocksDBStore.cc

index d9a9872784d32527bd9b7b68438f38d1c71f2bc3..ea103d7de9ebb047527a73422ccefe5c44b1ff66 100644 (file)
@@ -806,6 +806,10 @@ OPTION(rocksdb_collect_compaction_stats, OPT_BOOL) //For rocksdb, this behavior
 OPTION(rocksdb_collect_extended_stats, OPT_BOOL) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
 OPTION(rocksdb_collect_memory_stats, OPT_BOOL) //For rocksdb, this behavior will be an overhead of 5%~10%, collected only rocksdb_perf is enabled.
 
+OPTION(rocksdb_cf_compact_on_deletion, OPT_BOOL)
+OPTION(rocksdb_cf_compact_on_deletion_sliding_window, OPT_INT)
+OPTION(rocksdb_cf_compact_on_deletion_trigger, OPT_INT)
+
 // rocksdb options that will be used for omap(if omap_backend is rocksdb)
 OPTION(filestore_rocksdb_options, OPT_STR)
 // rocksdb options that will be used in monstore
index 3d82cfc05a5cbf7d4480acddb46c069c1baae2fa..457d0ef81216347e72dd063bcdaf57241b7ff65a 100644 (file)
@@ -3972,6 +3972,22 @@ std::vector<Option> get_global_options() {
     .set_default(4_K)
     .set_description("The block size for index partitions. (0 = rocksdb default)"),
 
+    Option("rocksdb_cf_compact_on_deletion", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Compact the column family when a certain number of tombstones are observed within a given window.")
+    .set_long_description("This setting instructs RocksDB to compact a column family when a certain number of tombstones are observed during iteration within a certain sliding window. For instance if rocksdb_cf_compact_on_deletion_sliding_window is 8192 and rocksdb_cf_compact_on_deletion_trigger is 4096,  then once 4096 tombstones are observed after iteration over 8192 entries, the column family will be compacted.")
+    .add_see_also({"rocksdb_cf_compact_on_deletion_sliding_window", "rocksdb_cf_compact_on_deletion_trigger"}),
+
+    Option("rocksdb_cf_compact_on_deletion_sliding_window", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(32768)
+    .set_description("The sliding window to use when rocksdb_cf_compact_on_deletion is enabled.")
+    .add_see_also({"rocksdb_cf_compact_on_deletion"}),
+
+    Option("rocksdb_cf_compact_on_deletion_trigger", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(16384)
+    .set_description("The trigger to use when rocksdb_cf_compact_on_deletion is enabled.")
+    .add_see_also({"rocksdb_cf_compact_on_deletion"}),
+
     Option("mon_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
     .set_default("write_buffer_size=33554432,"
                 "compression=kNoCompression,"
index 8e8983c18e5e68a3151a9f85c59b0098e1b2db16..73aef36bed94c09c086a3b11b2a79362fa5e77f1 100644 (file)
@@ -24,6 +24,7 @@ namespace fs = std::experimental::filesystem;
 #include "rocksdb/cache.h"
 #include "rocksdb/filter_policy.h"
 #include "rocksdb/utilities/convenience.h"
+#include "rocksdb/utilities/table_properties_collectors.h"
 #include "rocksdb/merge_operator.h"
 
 #include "common/perf_counters.h"
@@ -938,6 +939,14 @@ int RocksDBStore::update_column_family_options(const std::string& base_name,
       return r;
     }
   }
+
+  // Set Compact on Deletion Factory
+  if (cct->_conf->rocksdb_cf_compact_on_deletion) {
+    size_t sliding_window = cct->_conf->rocksdb_cf_compact_on_deletion_sliding_window;
+    size_t trigger = cct->_conf->rocksdb_cf_compact_on_deletion_trigger;
+    cf_opt->table_properties_collector_factories.emplace_back(
+        rocksdb::NewCompactOnDeletionCollectorFactory(sliding_window, trigger));
+  }
   return 0;
 }