From d91dff3e3418b5dd977f026f8393328f47e7bab5 Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Thu, 21 Jul 2022 21:31:07 +0000 Subject: [PATCH] kv/RocksDBStore: Add CompactOnDeletion support This commit adds support to compact column families when a certain number of tombstone entries have been observed within a certain sliding window during iteration. It only helps when itereating over entries already in SST files and not when iterating over ranges in memtables. Likely we will still need to provide a mechanism to flush memtables and compact column families once a certain number of rmkey or rm_range_key calls are made. Signed-off-by: Mark Nelson (cherry picked from commit fba5488728e89d9b0a1c1ab94b7024fcc81b3b15) --- src/common/options/global.yaml.in | 30 ++++++++++++++++++++++++++++++ src/kv/RocksDBStore.cc | 9 +++++++++ 2 files changed, 39 insertions(+) diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 2e8d3304bf6c0..18890f2c8492c 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -3528,6 +3528,36 @@ options: # osd_*_priority determines the ratio of available io between client and # recovery. Each option may be set between # 1..63. +- name: rocksdb_cf_compact_on_deletion + type: bool + level: dev + desc: Compact the column family when a certain number of tombstones are observed within a given window. + long_desc: 'This setting instructs RocksDB to compact a column family when a certain + number of tombstones are observed during iteration within a certain sliding window. + For instance if rocksdb_cf_compact_on_deletion_sliding_window is 8192 and + rocksdb_cf_compact_on_deletion_trigger is 4096, then once 4096 tombstones are + observed after iteration over 8192 entries, the column family will be compacted.' + default: true + with_legacy: true + see_also: + - rocksdb_cf_compact_on_deletion_sliding_window + - rocksdb_cf_compact_on_deletion_trigger +- name: rocksdb_cf_compact_on_deletion_sliding_window + type: int + level: dev + desc: The sliding window to use when rocksdb_cf_compact_on_deletion is enabled. + default: 32768 + with_legacy: true + see_also: + - rocksdb_cf_compact_on_deletion +- name: rocksdb_cf_compact_on_deletion_trigger + type: int + level: dev + desc: The trigger to use when rocksdb_cf_compact_on_deletion is enabled. + default: 16384 + with_legacy: true + see_also: + - rocksdb_cf_compact_on_deletion - name: osd_client_op_priority type: uint level: advanced diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc index be032b4fa862a..a3ecf3a5a1f7d 100644 --- a/src/kv/RocksDBStore.cc +++ b/src/kv/RocksDBStore.cc @@ -18,6 +18,7 @@ #include "rocksdb/cache.h" #include "rocksdb/filter_policy.h" #include "rocksdb/utilities/convenience.h" +#include "rocksdb/utilities/table_properties_collectors.h" #include "rocksdb/merge_operator.h" #include "common/perf_counters.h" @@ -934,6 +935,14 @@ int RocksDBStore::update_column_family_options(const std::string& base_name, return r; } } + + // Set Compact on Deletion Factory + if (cct->_conf->rocksdb_cf_compact_on_deletion) { + size_t sliding_window = cct->_conf->rocksdb_cf_compact_on_deletion_sliding_window; + size_t trigger = cct->_conf->rocksdb_cf_compact_on_deletion_trigger; + cf_opt->table_properties_collector_factories.emplace_back( + rocksdb::NewCompactOnDeletionCollectorFactory(sliding_window, trigger)); + } return 0; } -- 2.39.5