From e4a584a1edac8a3ba45ab4e78ece88d9e5ea72d5 Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Thu, 28 Apr 2022 21:59:43 +0000 Subject: [PATCH] Revert "bluestore: set upper and lower bounds on rocksdb omap iterators" This reverts commit d0b03f227ca7338ec9825b5ce9e549336ef82e9f. Caused a regression https://tracker.ceph.com/issues/55444 Signed-off-by: Neha Ojha --- src/kv/KeyValueDB.h | 13 +-- src/kv/LevelDBStore.h | 2 +- src/kv/MemDB.h | 2 +- src/kv/RocksDBStore.cc | 122 ++++++------------------- src/kv/RocksDBStore.h | 40 +------- src/os/bluestore/BlueStore.cc | 25 +---- src/test/ObjectMap/KeyValueDBMemory.cc | 2 +- src/test/ObjectMap/KeyValueDBMemory.h | 2 +- 8 files changed, 41 insertions(+), 167 deletions(-) diff --git a/src/kv/KeyValueDB.h b/src/kv/KeyValueDB.h index 2ecccf850713..a9a7965117c0 100644 --- a/src/kv/KeyValueDB.h +++ b/src/kv/KeyValueDB.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include "include/encoding.h" @@ -315,17 +314,11 @@ private: public: typedef uint32_t IteratorOpts; static const uint32_t ITERATOR_NOCACHE = 1; - - struct IteratorBounds { - std::optional lower_bound; - std::optional upper_bound; - }; - - virtual WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) = 0; - virtual Iterator get_iterator(const std::string &prefix, IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) { + virtual WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) = 0; + virtual Iterator get_iterator(const std::string &prefix, IteratorOpts opts = 0) { return std::make_shared( prefix, - get_wholespace_iterator(opts, std::move(bounds))); + get_wholespace_iterator(opts)); } virtual uint64_t get_estimated_size(std::map &extra) = 0; diff --git a/src/kv/LevelDBStore.h b/src/kv/LevelDBStore.h index b48642d12c34..085193ee0332 100644 --- a/src/kv/LevelDBStore.h +++ b/src/kv/LevelDBStore.h @@ -402,7 +402,7 @@ err: } - WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) override { + WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) override { return std::make_shared( db->NewIterator(leveldb::ReadOptions())); } diff --git a/src/kv/MemDB.h b/src/kv/MemDB.h index 80fed647efbd..32d81db225ed 100644 --- a/src/kv/MemDB.h +++ b/src/kv/MemDB.h @@ -212,7 +212,7 @@ public: return 0; } - WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) override { + WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) override { return std::shared_ptr( new MDBWholeSpaceIteratorImpl(&m_map, &m_lock, &iterator_seq_no, m_using_btree)); } diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc index dcfa645dc01c..7d7fee68c73d 100644 --- a/src/kv/RocksDBStore.cc +++ b/src/kv/RocksDBStore.cc @@ -626,18 +626,6 @@ bool RocksDBStore::is_column_family(const std::string& prefix) { return cf_handles.count(prefix); } -std::string_view RocksDBStore::get_key_hash_view(const prefix_shards& shards, const char* key, const size_t keylen) { - uint32_t hash_l = std::min(shards.hash_l, keylen); - uint32_t hash_h = std::min(shards.hash_h, keylen); - return { key + hash_l, hash_h - hash_l }; -} - -rocksdb::ColumnFamilyHandle *RocksDBStore::get_key_cf(const prefix_shards& shards, const char* key, const size_t keylen) { - auto sv = get_key_hash_view(shards, key, keylen); - uint32_t hash = ceph_str_hash_rjenkins(sv.data(), sv.size()); - return shards.handles[hash % shards.handles.size()]; -} - rocksdb::ColumnFamilyHandle *RocksDBStore::get_cf_handle(const std::string& prefix, const std::string& key) { auto iter = cf_handles.find(prefix); if (iter == cf_handles.end()) { @@ -646,7 +634,10 @@ rocksdb::ColumnFamilyHandle *RocksDBStore::get_cf_handle(const std::string& pref if (iter->second.handles.size() == 1) { return iter->second.handles[0]; } else { - return get_key_cf(iter->second, key.data(), key.size()); + uint32_t hash_l = std::min(iter->second.hash_l, key.size()); + uint32_t hash_h = std::min(iter->second.hash_h, key.size()); + uint32_t hash = ceph_str_hash_rjenkins(&key[hash_l], hash_h - hash_l); + return iter->second.handles[hash % iter->second.handles.size()]; } } } @@ -659,36 +650,10 @@ rocksdb::ColumnFamilyHandle *RocksDBStore::get_cf_handle(const std::string& pref if (iter->second.handles.size() == 1) { return iter->second.handles[0]; } else { - return get_key_cf(iter->second, key, keylen); - } - } -} - -/** - * If the specified IteratorBounds arg has both an upper and a lower bound defined, and they have equal placement hash - * strings, we can be sure that the entire iteration range exists in a single CF. In that case, we return the relevant - * CF handle. In all other cases, we return a nullptr to indicate that the specified bounds cannot necessarily be mapped - * to a single CF. - */ -rocksdb::ColumnFamilyHandle *RocksDBStore::get_cf_handle(const std::string& prefix, const IteratorBounds& bounds) { - if (!bounds.lower_bound || !bounds.upper_bound) { - return nullptr; - } - auto iter = cf_handles.find(prefix); - if (iter == cf_handles.end() || iter->second.hash_l != 0) { - return nullptr; - } else { - if (iter->second.handles.size() == 1) { - return iter->second.handles[0]; - } else { - auto lower_bound_hash_str = get_key_hash_view(iter->second, bounds.lower_bound->data(), bounds.lower_bound->size()); - auto upper_bound_hash_str = get_key_hash_view(iter->second, bounds.upper_bound->data(), bounds.upper_bound->size()); - if (lower_bound_hash_str == upper_bound_hash_str) { - auto key = *bounds.lower_bound; - return get_key_cf(iter->second, key.data(), key.size()); - } else { - return nullptr; - } + uint32_t hash_l = std::min(iter->second.hash_l, keylen); + uint32_t hash_h = std::min(iter->second.hash_h, keylen); + uint32_t hash = ceph_str_hash_rjenkins(&key[hash_l], hash_h - hash_l); + return iter->second.handles[hash % iter->second.handles.size()]; } } } @@ -2237,27 +2202,10 @@ class CFIteratorImpl : public KeyValueDB::IteratorImpl { protected: string prefix; rocksdb::Iterator *dbiter; - const KeyValueDB::IteratorBounds bounds; - const rocksdb::Slice iterate_lower_bound; - const rocksdb::Slice iterate_upper_bound; public: - explicit CFIteratorImpl(const RocksDBStore* db, - const std::string& p, - rocksdb::ColumnFamilyHandle* cf, - KeyValueDB::IteratorBounds bounds_) - : prefix(p), bounds(std::move(bounds_)), - iterate_lower_bound(make_slice(bounds.lower_bound)), - iterate_upper_bound(make_slice(bounds.upper_bound)) - { - auto options = rocksdb::ReadOptions(); - if (bounds.lower_bound) { - options.iterate_lower_bound = &iterate_lower_bound; - } - if (bounds.upper_bound) { - options.iterate_upper_bound = &iterate_upper_bound; - } - dbiter = db->db->NewIterator(options, cf); - } + explicit CFIteratorImpl(const std::string& p, + rocksdb::Iterator *iter) + : prefix(p), dbiter(iter) { } ~CFIteratorImpl() { delete dbiter; } @@ -2789,29 +2737,16 @@ private: const RocksDBStore* db; KeyLess keyless; string prefix; - const KeyValueDB::IteratorBounds bounds; - const rocksdb::Slice iterate_lower_bound; - const rocksdb::Slice iterate_upper_bound; std::vector iters; public: explicit ShardMergeIteratorImpl(const RocksDBStore* db, const std::string& prefix, - const std::vector& shards, - KeyValueDB::IteratorBounds bounds_) - : db(db), keyless(db->comparator), prefix(prefix), bounds(std::move(bounds_)), - iterate_lower_bound(make_slice(bounds.lower_bound)), - iterate_upper_bound(make_slice(bounds.upper_bound)) + const std::vector& shards) + : db(db), keyless(db->comparator), prefix(prefix) { iters.reserve(shards.size()); - auto options = rocksdb::ReadOptions(); - if (bounds.lower_bound) { - options.iterate_lower_bound = &iterate_lower_bound; - } - if (bounds.upper_bound) { - options.iterate_upper_bound = &iterate_upper_bound; - } for (auto& s : shards) { - iters.push_back(db->db->NewIterator(options, s)); + iters.push_back(db->db->NewIterator(rocksdb::ReadOptions(), s)); } } ~ShardMergeIteratorImpl() { @@ -2982,31 +2917,22 @@ public: } }; -KeyValueDB::Iterator RocksDBStore::get_iterator(const std::string& prefix, IteratorOpts opts, IteratorBounds bounds) +KeyValueDB::Iterator RocksDBStore::get_iterator(const std::string& prefix, IteratorOpts opts) { auto cf_it = cf_handles.find(prefix); if (cf_it != cf_handles.end()) { - rocksdb::ColumnFamilyHandle* cf = nullptr; if (cf_it->second.handles.size() == 1) { - cf = cf_it->second.handles[0]; - } else { - cf = get_cf_handle(prefix, bounds); - } - if (cf) { return std::make_shared( - this, - prefix, - cf, - std::move(bounds)); + prefix, + db->NewIterator(rocksdb::ReadOptions(), cf_it->second.handles[0])); } else { return std::make_shared( this, prefix, - cf_it->second.handles, - std::move(bounds)); + cf_it->second.handles); } } else { - return KeyValueDB::get_iterator(prefix, opts, std::move(bounds)); + return KeyValueDB::get_iterator(prefix, opts); } } @@ -3015,11 +2941,14 @@ rocksdb::Iterator* RocksDBStore::new_shard_iterator(rocksdb::ColumnFamilyHandle* return db->NewIterator(rocksdb::ReadOptions(), cf); } -RocksDBStore::WholeSpaceIterator RocksDBStore::get_wholespace_iterator(IteratorOpts opts, IteratorBounds bounds) +RocksDBStore::WholeSpaceIterator RocksDBStore::get_wholespace_iterator(IteratorOpts opts) { if (cf_handles.size() == 0) { + rocksdb::ReadOptions opt = rocksdb::ReadOptions(); + if (opts & ITERATOR_NOCACHE) + opt.fill_cache=false; return std::make_shared( - this, default_cf, opts, std::move(bounds)); + db->NewIterator(opt, default_cf)); } else { return std::make_shared(this); } @@ -3027,7 +2956,8 @@ RocksDBStore::WholeSpaceIterator RocksDBStore::get_wholespace_iterator(IteratorO RocksDBStore::WholeSpaceIterator RocksDBStore::get_default_cf_iterator() { - return std::make_shared(this, default_cf, 0, IteratorBounds()); + return std::make_shared( + db->NewIterator(rocksdb::ReadOptions(), default_cf)); } int RocksDBStore::prepare_for_reshard(const std::string& new_sharding, diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h index 37833bf74a4d..529b4ffa621a 100644 --- a/src/kv/RocksDBStore.h +++ b/src/kv/RocksDBStore.h @@ -65,14 +65,6 @@ namespace rocksdb{ extern rocksdb::Logger *create_rocksdb_ceph_logger(); -inline rocksdb::Slice make_slice(const std::optional& bound) { - if (bound) { - return {*bound}; - } else { - return {}; - } -} - /** * Uses RocksDB to implement the KeyValueDB interface */ @@ -92,7 +84,6 @@ class RocksDBStore : public KeyValueDB { uint64_t cache_size = 0; bool set_cache_flag = false; friend class ShardMergeIteratorImpl; - friend class CFIteratorImpl; friend class WholeMergeIteratorImpl; /* * See RocksDB's definition of a column family(CF) and how to use it. @@ -129,11 +120,8 @@ private: void add_column_family(const std::string& cf_name, uint32_t hash_l, uint32_t hash_h, size_t shard_idx, rocksdb::ColumnFamilyHandle *handle); bool is_column_family(const std::string& prefix); - std::string_view get_key_hash_view(const prefix_shards& shards, const char* key, const size_t keylen); - rocksdb::ColumnFamilyHandle *get_key_cf(const prefix_shards& shards, const char* key, const size_t keylen); rocksdb::ColumnFamilyHandle *get_cf_handle(const std::string& prefix, const std::string& key); rocksdb::ColumnFamilyHandle *get_cf_handle(const std::string& prefix, const char* key, size_t keylen); - rocksdb::ColumnFamilyHandle *get_cf_handle(const std::string& prefix, const IteratorBounds& bounds); int submit_common(rocksdb::WriteOptions& woptions, KeyValueDB::Transaction t); int install_cf_mergeop(const std::string &cf_name, rocksdb::ColumnFamilyOptions *cf_opt); @@ -354,29 +342,9 @@ public: public KeyValueDB::WholeSpaceIteratorImpl { protected: rocksdb::Iterator *dbiter; - const KeyValueDB::IteratorBounds bounds; - const rocksdb::Slice iterate_lower_bound; - const rocksdb::Slice iterate_upper_bound; public: - explicit RocksDBWholeSpaceIteratorImpl(const RocksDBStore* db, - rocksdb::ColumnFamilyHandle* cf, - const KeyValueDB::IteratorOpts opts, - KeyValueDB::IteratorBounds bounds_) : - bounds(std::move(bounds_)), - iterate_lower_bound(make_slice(bounds.lower_bound)), - iterate_upper_bound(make_slice(bounds.upper_bound)) - { - rocksdb::ReadOptions options = rocksdb::ReadOptions(); - if (opts & ITERATOR_NOCACHE) - options.fill_cache=false; - if (bounds.lower_bound) { - options.iterate_lower_bound = &iterate_lower_bound; - } - if (bounds.upper_bound) { - options.iterate_upper_bound = &iterate_upper_bound; - } - dbiter = db->db->NewIterator(options, cf); - } + explicit RocksDBWholeSpaceIteratorImpl(rocksdb::Iterator *iter) : + dbiter(iter) { } //virtual ~RocksDBWholeSpaceIteratorImpl() { } ~RocksDBWholeSpaceIteratorImpl() override; @@ -399,7 +367,7 @@ public: size_t value_size() override; }; - Iterator get_iterator(const std::string& prefix, IteratorOpts opts = 0, IteratorBounds = IteratorBounds()) override; + Iterator get_iterator(const std::string& prefix, IteratorOpts opts = 0) override; private: /// this iterator spans single cf rocksdb::Iterator* new_shard_iterator(rocksdb::ColumnFamilyHandle* cf); @@ -532,7 +500,7 @@ err: return nullptr; } - WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) override; + WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) override; private: WholeSpaceIterator get_default_cf_iterator(); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 43a206e55251..6ab4922da6bb 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -11001,13 +11001,10 @@ int BlueStore::_onode_omap_get( o->flush(); { const string& prefix = o->get_omap_prefix(); + KeyValueDB::Iterator it = db->get_iterator(prefix); string head, tail; o->get_omap_header(&head); o->get_omap_tail(&tail); - auto bounds = KeyValueDB::IteratorBounds(); - bounds.lower_bound = head; - bounds.upper_bound = tail; - KeyValueDB::Iterator it = db->get_iterator(prefix, 0, std::move(bounds)); it->lower_bound(head); while (it->valid()) { if (it->key() == head) { @@ -11089,13 +11086,10 @@ int BlueStore::omap_get_keys( o->flush(); { const string& prefix = o->get_omap_prefix(); + KeyValueDB::Iterator it = db->get_iterator(prefix); string head, tail; o->get_omap_key(string(), &head); o->get_omap_tail(&tail); - auto bounds = KeyValueDB::IteratorBounds(); - bounds.lower_bound = head; - bounds.upper_bound = tail; - KeyValueDB::Iterator it = db->get_iterator(prefix, 0, std::move(bounds)); it->lower_bound(head); while (it->valid()) { if (it->key() >= tail) { @@ -11280,15 +11274,7 @@ ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator( } o->flush(); dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <onode.has_omap()) { - std::string lower_bound, upper_bound; - o->get_omap_key(string(), &lower_bound); - o->get_omap_tail(&upper_bound); - bounds.lower_bound = std::move(lower_bound); - bounds.upper_bound = std::move(upper_bound); - } - KeyValueDB::Iterator it = db->get_iterator(o->get_omap_prefix(), 0, std::move(bounds)); + KeyValueDB::Iterator it = db->get_iterator(o->get_omap_prefix()); return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o, it)); } @@ -15554,13 +15540,10 @@ int BlueStore::_clone(TransContext *txc, newo->onode.set_omap_flags(per_pool_omap == OMAP_BULK); } const string& prefix = newo->get_omap_prefix(); + KeyValueDB::Iterator it = db->get_iterator(prefix); string head, tail; oldo->get_omap_header(&head); oldo->get_omap_tail(&tail); - auto bounds = KeyValueDB::IteratorBounds(); - bounds.lower_bound = head; - bounds.upper_bound = tail; - KeyValueDB::Iterator it = db->get_iterator(prefix, 0, std::move(bounds)); it->lower_bound(head); while (it->valid()) { if (it->key() >= tail) { diff --git a/src/test/ObjectMap/KeyValueDBMemory.cc b/src/test/ObjectMap/KeyValueDBMemory.cc index b2b351baef44..234e963397e3 100644 --- a/src/test/ObjectMap/KeyValueDBMemory.cc +++ b/src/test/ObjectMap/KeyValueDBMemory.cc @@ -234,7 +234,7 @@ int KeyValueDBMemory::rm_range_keys(const string &prefix, const string &start, c return 0; } -KeyValueDB::WholeSpaceIterator KeyValueDBMemory::get_wholespace_iterator(IteratorOpts opts, IteratorBounds bounds) { +KeyValueDB::WholeSpaceIterator KeyValueDBMemory::get_wholespace_iterator(IteratorOpts opts) { return std::shared_ptr( new WholeSpaceMemIterator(this) ); diff --git a/src/test/ObjectMap/KeyValueDBMemory.h b/src/test/ObjectMap/KeyValueDBMemory.h index da386ccb908b..5e7d3f0e1675 100644 --- a/src/test/ObjectMap/KeyValueDBMemory.h +++ b/src/test/ObjectMap/KeyValueDBMemory.h @@ -186,5 +186,5 @@ private: friend class WholeSpaceMemIterator; public: - WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) override; + WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) override; }; -- 2.47.3