From: Chunmei Liu Date: Tue, 19 Aug 2025 21:49:41 +0000 (+0000) Subject: crimson/os/seastore: optimize rm_key_range by removing one leaf node at X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6a864c660f52990cb7e13c5864cecceeff2898a9;p=ceph-ci.git crimson/os/seastore: optimize rm_key_range by removing one leaf node at once. Signed-off-by: Chunmei Liu --- diff --git a/src/crimson/os/seastore/omap_manager.h b/src/crimson/os/seastore/omap_manager.h index 33174b22e4c..e5664ae25d8 100644 --- a/src/crimson/os/seastore/omap_manager.h +++ b/src/crimson/os/seastore/omap_manager.h @@ -225,6 +225,14 @@ public: * @param string &first, range start * @param string &last, range end */ + struct key_range_t{ + std::string first; + std::string last; + depth_t root_depth; + bool get_next; + bool total_complete; + }; + using omap_rm_key_range_iertr = base_iertr; using omap_rm_key_range_ret = omap_rm_key_range_iertr::future<>; virtual omap_rm_key_range_ret omap_rm_key_range( diff --git a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc index 7495839ec2f..a413ee84d43 100644 --- a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc +++ b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc @@ -210,12 +210,12 @@ BtreeOMapManager::omap_rm_key( return seastar::now(); } else if (mresult.status == mutation_status_t::WAS_SPLIT) { return handle_root_split( - get_omap_context(t, omap_root), omap_root, mresult); + get_omap_context(t, omap_root), omap_root, mresult); } else if (mresult.status == mutation_status_t::NEED_MERGE) { auto root = *(mresult.need_merge); if (root->get_node_size() == 1 && omap_root.depth != 1) { return handle_root_merge( - get_omap_context(t, omap_root), omap_root, mresult); + get_omap_context(t, omap_root), omap_root, mresult); } else { return seastar::now(); } @@ -223,7 +223,6 @@ BtreeOMapManager::omap_rm_key( return seastar::now(); } }); - } BtreeOMapManager::omap_rm_key_range_ret @@ -237,35 +236,23 @@ BtreeOMapManager::omap_rm_key_range( LOG_PREFIX(BtreeOMapManager::omap_rm_key_range); DEBUGT("{} ~ {}", t, first, last); assert(first <= last); - return seastar::do_with( - std::make_optional(first), - std::make_optional(last), - [this, &omap_root, &t, config](auto &first, auto &last) { - return omap_list( - omap_root, - t, - first, - last, - config); - }).si_then([this, &omap_root, &t](auto results) { - LOG_PREFIX(BtreeOMapManager::omap_rm_key_range); - auto &[complete, kvs] = results; - std::vector keys; - for (const auto& [k, _] : kvs) { - keys.push_back(k); + assert(last != ""); + key_range_t key_range(first, last, 0, false, false); + while (key_range.total_complete == false) { + auto root = co_await get_omap_root(get_omap_context(t, omap_root), omap_root); + key_range.root_depth = omap_root.depth; + auto mresult = co_await root->rm_key_range(get_omap_context(t, omap_root), key_range); + if (mresult.status == mutation_status_t::SUCCESS) { + continue; + } else if (mresult.status == mutation_status_t::WAS_SPLIT) { + co_await handle_root_split(get_omap_context(t, omap_root), omap_root, mresult); + } else if (mresult.status == mutation_status_t::NEED_MERGE) { + auto root = *(mresult.need_merge); + if (root->get_node_size() == 1 && omap_root.depth != 1) { + co_await handle_root_merge(get_omap_context(t, omap_root), omap_root, mresult); + } } - DEBUGT("total {} keys to remove", t, keys.size()); - return seastar::do_with( - std::move(keys), - [this, &omap_root, &t](auto& keys) { - return trans_intr::do_for_each( - keys.begin(), - keys.end(), - [this, &omap_root, &t](auto& key) { - return omap_rm_key(omap_root, t, key); - }); - }); - }); + } } BtreeOMapManager::omap_iterate_ret diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h index b9f49104190..a6005727948 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h @@ -79,6 +79,13 @@ struct OMapNode : LogicalChildNode { omap_context_t oc, const std::string &key) = 0; + using rm_key_range_iertr = base_iertr; + using rm_key_range_ret = rm_key_range_iertr::future; + using key_range_t = OMapManager::key_range_t; + virtual rm_key_range_ret rm_key_range( + omap_context_t oc, + key_range_t &key_range) = 0; + using iterate_iertr = base_iertr; using iterate_ret = OMapManager::omap_iterate_ret; using omap_iterate_cb_t = OMapManager::omap_iterate_cb_t; diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc index 6eb054e099b..3c9c7746686 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc @@ -223,6 +223,57 @@ OMapInnerNode::rm_key(omap_context_t oc, const std::string &key) }); } +OMapInnerNode::rm_key_range_ret +OMapInnerNode::rm_key_range(omap_context_t oc, key_range_t &key_range) +{ + LOG_PREFIX(OMapInnerNode::rm_key_range); + DEBUGT("key range={}-{}, this: {}", oc.t, key_range.first, key_range.last, *this); + auto child_pt = get_containing_child(key_range.first); + auto extent = co_await get_child_node(oc, child_pt); + ceph_assert(!extent->is_btree_root()); + auto mresult = co_await extent->rm_key_range(oc, key_range); + if (!key_range.total_complete && key_range.get_next) { + auto next_iter = child_pt + 1; + if (next_iter != iter_cend()) { + key_range.first = (next_iter).get_key(); + if (!key_range.last.empty() && key_range.first >= key_range.last) { + key_range.total_complete = true; + } + key_range.get_next = false; + } else { + if (get_meta().depth == key_range.root_depth) { + key_range.total_complete = true; + } else { + key_range.get_next = true; + } + } + } + DEBUGT("key range after rm_key_range: {}-{}, get_next: {}, total_complete: {}", + oc.t, key_range.first, key_range.last, + key_range.get_next, key_range.total_complete); + switch (mresult.status) { + case mutation_status_t::SUCCESS: + case mutation_status_t::FAIL: + co_return mresult; + case mutation_status_t::NEED_MERGE: { + if (get_node_size() >1) { + co_return co_await merge_entry(oc, child_pt, *(mresult.need_merge)); + } else { + co_return mutation_result_t(mutation_status_t::SUCCESS, + std::nullopt, std::nullopt); + } + } + case mutation_status_t::WAS_SPLIT: + co_return co_await handle_split(oc, child_pt, mresult + ).handle_error_interruptible( + rm_key_range_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected error"} + ); + default: + co_return mresult; + } +} + OMapInnerNode::iterate_ret OMapInnerNode::iterate( omap_context_t oc, @@ -794,6 +845,62 @@ OMapLeafNode::rm_key(omap_context_t oc, const std::string &key) } +OMapLeafNode::rm_key_range_ret +OMapLeafNode::rm_key_range(omap_context_t oc, + key_range_t &key_range) +{ + LOG_PREFIX(OMapLeafNode::rm_key_range); + DEBUGT("remove kyes: {}--{}, this: {}", oc.t, key_range.first, key_range.last, *this); + + auto fiter = key_range.first.empty() ? iter_begin() : string_lower_bound(key_range.first); //include the first key + auto liter = key_range.last.empty() ? iter_end() : string_lower_bound(key_range.last); //not include the last key + + DEBUGT("list from {} to {}, begin off {} to end off {}, this: {}", oc.t, + fiter != iter_end() ? fiter->get_key() : "", + liter != iter_end() ? liter->get_key() : "", + fiter != iter_end() ? fiter->get_offset() : get_node_size(), + liter != iter_end() ? liter->get_offset() : get_node_size(), + *this); + if (fiter == iter_end() || fiter == liter) { //out of range or needn't remove anything + key_range.total_complete = true; + return rm_key_range_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::FAIL, std::nullopt, std::nullopt)); + } + if (!is_mutable() && fiter != iter_end()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast(); + return mut->rm_key_range(oc, key_range); + } + + key_range.total_complete = false; + key_range.get_next = false; + if (liter == iter_end()) { + if (get_meta().depth == key_range.root_depth) { + key_range.total_complete = true; + } else { + key_range.get_next = true; + key_range.first = ""; + } + } else { + // liter != iter_end() + key_range.total_complete = true; + } + + auto dist = liter - fiter; + (oc.t.get_omap_tree_stats().num_erases) += dist; + journal_leaf_remove_range(fiter, liter, maybe_get_delta_buffer()); + + if (extent_is_below_min()) { + return rm_key_range_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::NEED_MERGE, std::nullopt, this->cast())); + } else { + return rm_key_range_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } +} + OMapLeafNode::iterate_ret OMapLeafNode::iterate( omap_context_t oc, diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h index 844042d84c6..42f127b1a29 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h @@ -180,6 +180,10 @@ struct OMapInnerNode omap_context_t oc, const std::string &key) final; + rm_key_range_ret rm_key_range( + omap_context_t oc, + key_range_t &key_range) final; + iterate_ret iterate( omap_context_t oc, ObjectStore::omap_iter_seek_t &start_from, @@ -435,6 +439,10 @@ struct OMapLeafNode rm_key_ret rm_key( omap_context_t oc, const std::string &key) final; + rm_key_range_ret rm_key_range( + omap_context_t oc, + key_range_t &key_range) final; + iterate_ret iterate( omap_context_t oc, ObjectStore::omap_iter_seek_t &start_from, diff --git a/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h index 914f9b10226..b4791ca37b4 100644 --- a/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h +++ b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h @@ -70,18 +70,28 @@ static void copy_from_local( assert(tgt->node == from_src->node); assert(to_src->node == from_src->node); + auto end = to_src->get_right_ptr_end(); + auto key_end = to_src->get_node_key_ptr(); + auto key_len = from_src->get_node_key_ptr() - tgt->get_node_key_ptr(); + auto to_copy = from_src->get_right_ptr_end() - to_src->get_right_ptr_end(); - assert(to_copy > 0); + assert(to_copy >= 0); int adjust_offset = tgt > from_src? -len : len; memmove(to_src->get_right_ptr_end() + adjust_offset, to_src->get_right_ptr_end(), to_copy); + if (from_src > tgt) { //keep same content for rm_key and rm_keyrange in case replay has crc error + memset(end, 0, len); + } for ( auto ite = from_src; ite < to_src; ite++) { ite->update_offset(-adjust_offset); } memmove(tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + if (from_src > tgt) { + memset(key_end - key_len, 0, key_len); + } } struct delta_inner_t { @@ -1137,6 +1147,21 @@ public: } leaf_remove(iter); } + void journal_leaf_remove_range( + const_iterator _fiter, + const_iterator _liter, + delta_leaf_buffer_t *recorder) { + assert(_fiter != iter_end()); + assert(_fiter != _liter); + auto fiter = iterator(this, _fiter.index); + auto liter = iterator(this, _liter.index); + if (recorder) { + for(auto iter = fiter; iter != liter; iter++) { + recorder->remove(iter->get_key()); + } + } + leaf_remove_range(fiter, liter); + } StringKVLeafNodeLayout() : buf(nullptr) {} @@ -1517,13 +1542,21 @@ private: void leaf_remove(iterator iter) { assert(iter != iter_end()); - if ((iter + 1) != iter_end()) { - omap_leaf_key_t key = iter->get_node_key(); - copy_from_local(key.key_len + key.val_len, iter, iter + 1, iter_end()); - } + + omap_leaf_key_t key = iter->get_node_key(); + copy_from_local(key.key_len + key.val_len, iter, iter + 1, iter_end()); + set_size(get_size() - 1); } + void leaf_remove_range(iterator fiter, iterator liter) { + assert(fiter != iter_end()); + auto adjust_len = fiter->get_right_ptr_end() - liter->get_right_ptr_end(); + copy_from_local(adjust_len, fiter, liter, iter_end()); + + + set_size(get_size() - (liter - fiter)); + } /** * get_key_ptr *