From 01873f48d0650cd02a08664e4786690cad10d73d Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Mon, 12 Apr 2021 11:59:31 +0800 Subject: [PATCH] crimson/onode-staged-tree: implement staged::erase() Signed-off-by: Yingxin Cheng --- .../staged-fltree/node_layout_replayable.h | 44 ++++++++- .../stages/item_iterator_stage.cc | 13 +++ .../stages/item_iterator_stage.h | 3 + .../staged-fltree/stages/node_stage.cc | 16 +++ .../staged-fltree/stages/node_stage.h | 3 + .../staged-fltree/stages/node_stage_layout.cc | 24 +++++ .../staged-fltree/stages/node_stage_layout.h | 1 + .../staged-fltree/stages/stage.h | 99 +++++++++++++++++++ .../staged-fltree/stages/sub_items_stage.cc | 44 +++++++++ .../staged-fltree/stages/sub_items_stage.h | 6 ++ 10 files changed, 251 insertions(+), 2 deletions(-) diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h index 795c1cce23da6..df54e8f7c73a9 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h @@ -76,8 +76,18 @@ struct NodeLayoutReplayableT { NodeExtentMutable& mut, const node_stage_t& node_stage, const position_t& _erase_pos) { - // TODO - ceph_abort("not implemented"); + if (_erase_pos.is_end()) { + // must be internal node + assert(node_stage.is_level_tail()); + // return erase_stage, last_pos + return update_last_to_tail(mut, node_stage); + } + + assert(node_stage.keys() != 0); + position_t erase_pos = _erase_pos; + auto erase_stage = STAGE_T::erase(mut, node_stage, erase_pos); + // return erase_stage, next_pos + return {erase_stage, erase_pos}; } static position_t make_tail( @@ -87,6 +97,36 @@ struct NodeLayoutReplayableT { // TODO ceph_abort("not implemented"); } + + private: + static std::tuple update_last_to_tail( + NodeExtentMutable& mut, + const node_stage_t& node_stage) { + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + assert(node_stage.keys() != 0); + position_t last_pos; + laddr_t last_value; + { + const laddr_packed_t* p_last_value; + STAGE_T::template get_largest_slot( + node_stage, &last_pos, nullptr, &p_last_value); + last_value = p_last_value->value; + } + + auto erase_pos = last_pos; + auto erase_stage = STAGE_T::erase(mut, node_stage, erase_pos); + assert(erase_pos.is_end()); + + node_stage_t::update_is_level_tail(mut, node_stage, true); + auto p_last_value = const_cast( + node_stage.get_end_p_laddr()); + mut.copy_in_absolute(p_last_value, last_value); + // return erase_stage, last_pos + return {erase_stage, last_pos}; + } else { + ceph_abort("impossible path"); + } + } }; } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc index 2ca2b28e92b89..b322caa3db677 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc @@ -82,6 +82,19 @@ node_offset_t ITER_T::trim_at( return trim_size; } +template +node_offset_t ITER_T::erase( + NodeExtentMutable& mut, const ITER_T& iter, const char* p_left_bound) +{ + node_offset_t erase_size = iter.p_end() - iter.p_start(); + const char* p_shift_start = p_left_bound; + assert(p_left_bound <= iter.p_start()); + extent_len_t shift_len = iter.p_start() - p_left_bound; + int shift_off = erase_size; + mut.shift_absolute(p_shift_start, shift_len, shift_off); + return erase_size; +} + #define ITER_TEMPLATE(NT) template class ITER_INST(NT) ITER_TEMPLATE(node_type_t::LEAF); ITER_TEMPLATE(node_type_t::INTERNAL); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h index c880b3c100e61..cdfa642723996 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h @@ -138,6 +138,9 @@ class item_iterator_t { static node_offset_t trim_at( NodeExtentMutable&, const item_iterator_t&, node_offset_t trimmed); + static node_offset_t erase( + NodeExtentMutable&, const item_iterator_t&, const char*); + template class Appender; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc index 88378713b7053..b617023b83d74 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc @@ -176,6 +176,22 @@ node_offset_t NODE_T::trim_at( return 0; } +template +node_offset_t NODE_T::erase_at( + NodeExtentMutable& mut, const node_extent_t& node, + index_t index, const char* p_left_bound) +{ + if constexpr (FIELD_TYPE == field_type_t::N0 || + FIELD_TYPE == field_type_t::N1) { + assert(node.keys() > 0); + assert(index < node.keys()); + assert(p_left_bound == node.p_left_bound()); + return FieldType::erase_at(mut, node.fields(), index, p_left_bound); + } else { + ceph_abort("not implemented"); + } +} + #define NODE_TEMPLATE(FT, NT) template class NODE_INST(FT, NT) NODE_TEMPLATE(node_fields_0_t, node_type_t::INTERNAL); NODE_TEMPLATE(node_fields_1_t, node_type_t::INTERNAL); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h index 1d88d0dc93782..14f022714b174 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h @@ -175,6 +175,9 @@ class node_extent_t { static node_offset_t trim_at(NodeExtentMutable&, const node_extent_t&, index_t index, node_offset_t trimmed); + static node_offset_t erase_at(NodeExtentMutable&, const node_extent_t&, + index_t index, const char* p_left_bound); + template class Appender; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc index 8fb3ff1425d87..b2c4ef71a477e 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc @@ -103,6 +103,30 @@ IA_TEMPLATE(slot_0_t, KeyT::HOBJ); IA_TEMPLATE(slot_1_t, KeyT::HOBJ); IA_TEMPLATE(slot_3_t, KeyT::HOBJ); +template +node_offset_t F013_T::erase_at( + NodeExtentMutable& mut, const me_t& node, index_t index, const char* p_left_bound) +{ + auto offset_item_start = node.get_item_start_offset(index); + auto offset_item_end = node.get_item_end_offset(index); + assert(offset_item_start < offset_item_end); + auto erase_size = offset_item_end - offset_item_start; + // fix and shift the left part + update_size_at(mut, node, index + 1, -erase_size); + const char* p_shift_start = fields_start(node) + node.get_key_start_offset(index + 1); + extent_len_t shift_len = sizeof(SlotType) * (node.num_keys - index - 1); + int shift_off = -(int)sizeof(SlotType); + mut.shift_absolute(p_shift_start, shift_len, shift_off); + // shift the right part + p_shift_start = p_left_bound; + shift_len = fields_start(node) + offset_item_start - p_left_bound; + shift_off = erase_size; + mut.shift_absolute(p_shift_start, shift_len, shift_off); + // fix num_keys + mut.copy_in_absolute((void*)&node.num_keys, num_keys_t(node.num_keys - 1)); + return erase_size; +} + #define F013_TEMPLATE(ST) template struct F013_INST(ST) F013_TEMPLATE(slot_0_t); F013_TEMPLATE(slot_1_t); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h index 14ba95bf4467d..10a6e9778e93a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h @@ -171,6 +171,7 @@ struct _node_fields_013_t { static void insert_at( NodeExtentMutable&, const full_key_t& key, const me_t& node, index_t index, node_offset_t size_right); + static node_offset_t erase_at(NodeExtentMutable&, const me_t&, index_t, const char*); static void update_size_at( NodeExtentMutable&, const me_t& node, index_t index, int change); static void append_key( diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h index 3475784ce9b22..c3f270ef744e6 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h @@ -225,6 +225,7 @@ struct staged { * (!IS_BOTTOM) update_size_at(mut, src, index, size) * trim_until(mut, container, index) -> trim_size * (!IS_BOTTOM) trim_at(mut, container, index, trimmed) -> trim_size + * erase_at(mut, container, index, p_left_bound) -> erase_size * * Appender::append(const container_t& src, from, items) */ @@ -450,6 +451,11 @@ struct staged { return container_t::trim_at(mut, container, _index, trimmed); } + node_offset_t erase(NodeExtentMutable& mut, const char* p_left_bound) { + assert(!is_end()); + return container_t::erase_at(mut, container, _index, p_left_bound); + } + void encode(const char* p_node_start, ceph::bufferlist& encoded) const { container.encode(p_node_start, encoded); ceph::encode(_index, encoded); @@ -502,6 +508,7 @@ struct staged { * update_size(mut, src, size) * trim_until(mut, container) -> trim_size * trim_at(mut, container, trimmed) -> trim_size + * erase(mut, container, p_left_bound) -> erase_size */ // currently the iterative iterator is only implemented with STAGE_STRING // for in-node space efficiency @@ -764,6 +771,11 @@ struct staged { return container_t::trim_at(mut, container, trimmed); } + node_offset_t erase(NodeExtentMutable& mut, const char* p_left_bound) { + assert(!is_end()); + return container_t::erase(mut, container, p_left_bound); + } + void encode(const char* p_node_start, ceph::bufferlist& encoded) const { container.encode(p_node_start, encoded); uint8_t is_end = _is_end; @@ -833,6 +845,8 @@ struct staged { * copy_out_until(appender, to_index) (can be end) * trim_until(mut) -> trim_size * (!IS_BOTTOM) trim_at(mut, trimmed) -> trim_size + * erase: + * erase(mut, p_left_bound) -> erase_size * denc: * encode(p_node_start, encoded) * decode(p_node_start, delta) -> iterator_t @@ -2130,6 +2144,91 @@ struct staged { } } + static std::optional> + proceed_erase_recursively( + NodeExtentMutable& mut, + const container_t& container, // IN + const char* p_left_bound, // IN + position_t& pos) { // IN&OUT + auto iter = iterator_t(container); + auto& index = pos.index; + assert(is_valid_index(index)); + iter.seek_at(index); + bool is_last = iter.is_last(); + + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + auto ret = NXT_STAGE_T::proceed_erase_recursively( + mut, nxt_container, p_left_bound, pos.nxt); + if (ret.has_value()) { + // erased at lower level + auto [r_stage, r_erase_size, r_done] = *ret; + assert(r_erase_size != 0); + iter.update_size(mut, -r_erase_size); + if (r_done) { + // done, the next_pos is calculated + return ret; + } else { + if (is_last) { + // need to find the next pos at upper stage + return ret; + } else { + // done, calculate the next pos + ++index; + pos.nxt = NXT_STAGE_T::position_t::begin(); + return {{r_stage, r_erase_size, true}}; + } + } + } + // not erased at lower level + } + + // not erased yet + if (index == 0 && is_last) { + // need to erase from the upper stage + return std::nullopt; + } else { + auto erase_size = iter.erase(mut, p_left_bound); + assert(erase_size != 0); + if (is_last) { + // need to find the next pos at upper stage + return {{STAGE, erase_size, false}}; + } else { + // done, calculate the next pos (should be correct already) + if constexpr (!IS_BOTTOM) { + assert(pos.nxt == NXT_STAGE_T::position_t::begin()); + } + return {{STAGE, erase_size, true}}; + } + } + } + + static match_stage_t erase( + NodeExtentMutable& mut, + const container_t& node_stage, // IN + position_t& erase_pos) { // IN&OUT + auto p_left_bound = node_stage.p_left_bound(); + auto ret = proceed_erase_recursively( + mut, node_stage, p_left_bound, erase_pos); + if (ret.has_value()) { + auto [r_stage, r_erase_size, r_done] = *ret; + std::ignore = r_erase_size; + if (r_done) { + assert(!erase_pos.is_end()); + return r_stage; + } else { + // erased the last kv + erase_pos = position_t::end(); + return r_stage; + } + } else { + assert(node_stage.keys() == 1); + node_stage.erase_at(mut, node_stage, 0, p_left_bound); + erase_pos = position_t::end(); + return STAGE; + } + } + static std::tuple evaluate_merge( const full_key_t& left_pivot_index, const container_t& right_container) { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc index c36776ccbdb88..84e6b49c5a215 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc @@ -44,6 +44,19 @@ node_offset_t internal_sub_items_t::trim_until( return ret; } +node_offset_t internal_sub_items_t::erase_at( + NodeExtentMutable& mut, const internal_sub_items_t& sub_items, + index_t index, const char* p_left_bound) +{ + assert(index < sub_items.keys()); + node_offset_t erase_size = sizeof(internal_sub_item_t); + const char* p_shift_start = p_left_bound; + const char* p_shift_end = reinterpret_cast( + sub_items.p_first_item - index); + mut.shift_absolute(p_shift_start, p_shift_end - p_shift_start, erase_size); + return erase_size; +} + template void internal_sub_items_t::Appender::append( const internal_sub_items_t& src, index_t from, index_t items) @@ -143,6 +156,37 @@ node_offset_t leaf_sub_items_t::trim_until( return ret; } +node_offset_t leaf_sub_items_t::erase_at( + NodeExtentMutable& mut, const leaf_sub_items_t& sub_items, + index_t index, const char* p_left_bound) +{ + assert(sub_items.keys() > 0); + assert(index < sub_items.keys()); + auto p_item_start = sub_items.get_item_start(index); + auto p_item_end = sub_items.get_item_end(index); + assert(p_item_start < p_item_end); + node_offset_t item_erase_size = p_item_end - p_item_start; + node_offset_t erase_size = item_erase_size + sizeof(node_offset_t); + auto p_offset_end = (const char*)&sub_items.get_offset(index); + + // a. compensate affected offset[n] ... offset[index+1] + for (auto i = index + 1; i < sub_items.keys(); ++i) { + const node_offset_packed_t& offset_i = sub_items.get_offset(i); + mut.copy_in_absolute((void*)&offset_i, node_offset_t(offset_i.value - item_erase_size)); + } + + // b. kv[index-1] ... kv[0] ... offset[index+1] >> sizeof(node_offset_t) + mut.shift_absolute(p_item_end, p_offset_end - p_item_end, sizeof(node_offset_t)); + + // c. ... kv[n] ... kv[index+1] >> item_erase_size + mut.shift_absolute(p_left_bound, p_item_start - p_left_bound, erase_size); + + // d. update num_keys + mut.copy_in_absolute((void*)sub_items.p_num_keys, num_keys_t(sub_items.keys() - 1)); + + return erase_size; +} + template class internal_sub_items_t::Appender; template class internal_sub_items_t::Appender; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h index b4666ede2d0b1..51c67bf35c2d7 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h @@ -110,6 +110,9 @@ class internal_sub_items_t { static node_offset_t trim_until(NodeExtentMutable&, internal_sub_items_t&, index_t); + static node_offset_t erase_at( + NodeExtentMutable&, const internal_sub_items_t&, index_t, const char*); + template class Appender; @@ -270,6 +273,9 @@ class leaf_sub_items_t { static node_offset_t trim_until(NodeExtentMutable&, leaf_sub_items_t&, index_t index); + static node_offset_t erase_at( + NodeExtentMutable&, const leaf_sub_items_t&, index_t, const char*); + template class Appender; -- 2.39.5