From df18ccbff92d915c50f23500e396fda796592d43 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 15 Oct 2024 15:49:47 +0000 Subject: [PATCH] DNB, os/bluestore: minimize size of critical section in omap_iterate() This commit takes out both seeing and iterating over onode's OMAP out of the `Collection::lock`. It bases on work by Adam Kupczyk on the behavior of RocksDB::DBIter maintaning consistent view despite ongoing writes. Kudos to Adam. This is experimental commit; I'm adding it for testing early. DNB -- Do Not Backport. Signed-off-by: Radoslaw Zarzynski --- src/os/bluestore/BlueStore.cc | 60 +++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 5f86212318274..8d22a763fc2cc 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -13934,44 +13934,54 @@ int BlueStore::omap_iterate( if (!c->exists) { return -ENOENT; } - std::shared_lock l(c->lock); - OnodeRef o = c->get_onode(oid, false); - if (!o || !o->exists) { - dout(10) << __func__ << " " << oid << "doesn't exist" <flush(); - dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <onode.has_omap()) { - // nothing to do - return 0; - } KeyValueDB::Iterator it; + std::string tail; + std::string seek_key; + std::string_view::size_type userkey_offset_in_dbkey; { - auto bounds = KeyValueDB::IteratorBounds(); - std::string lower_bound, upper_bound; - o->get_omap_key(string(), &lower_bound); - o->get_omap_tail(&upper_bound); - bounds.lower_bound = std::move(lower_bound); - bounds.upper_bound = std::move(upper_bound); - it = db->get_iterator(o->get_omap_prefix(), 0, std::move(bounds)); + std::shared_lock l(c->lock); + + OnodeRef o = c->get_onode(oid, false); + if (!o || !o->exists) { + dout(10) << __func__ << " " << oid << "doesn't exist" <flush(); + dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <onode.has_omap()) { + return 0; // nothing to do + } + + // acquire data depedencies for seek & iterate + o->get_omap_key(start_from.seek_position, &seek_key); + o->get_omap_tail(&tail); + userkey_offset_in_dbkey = o->calc_userkey_offset_in_omap_key(); + + // acquire the iterator + { + auto bounds = KeyValueDB::IteratorBounds(); + std::string lower_bound, upper_bound; + o->get_omap_key(string(), &lower_bound); + o->get_omap_tail(&upper_bound); + bounds.lower_bound = std::move(lower_bound); + bounds.upper_bound = std::move(upper_bound); + it = db->get_iterator(o->get_omap_prefix(), 0, std::move(bounds)); + } } // seek the iterator { - std::string key; - o->get_omap_key(start_from.seek_position, &key); auto start = ceph::mono_clock::now(); if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) { - it->lower_bound(key); + it->lower_bound(seek_key); c->store->log_latency( __func__, l_bluestore_omap_lower_bound_lat, ceph::mono_clock::now() - start, c->store->cct->_conf->bluestore_log_omap_iterator_age); } else { - it->upper_bound(key); + it->upper_bound(seek_key); c->store->log_latency( __func__, l_bluestore_omap_upper_bound_lat, @@ -13981,10 +13991,6 @@ int BlueStore::omap_iterate( } // iterate! - std::string tail; - o->get_omap_tail(&tail); - const std::string_view::size_type userkey_offset_in_dbkey = - o->calc_userkey_offset_in_omap_key(); ceph::timespan next_lat_acc{0}; while (it->valid()) { const auto& db_key = it->raw_key_as_sv().second; -- 2.39.5