From 4ff02f53fe722c20dbf0bb51d9b786d5f94d18ab Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sat, 7 Oct 2023 14:01:32 +0800 Subject: [PATCH] crimson/os/seastore/onode_manager: avoid unnecessary delta related overhead Signed-off-by: Xuehan Xu --- .../staged-fltree/fltree_onode_manager.cc | 126 ++++++++++++- .../staged-fltree/fltree_onode_manager.h | 175 ++++++++++++------ .../onode_tree/test_fltree_onode_manager.cc | 9 +- 3 files changed, 246 insertions(+), 64 deletions(-) diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc index dc6c183f538..6243252682a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc @@ -9,6 +9,125 @@ SET_SUBSYS(seastore_onode); namespace crimson::os::seastore::onode { +void FLTreeOnode::Recorder::apply_value_delta( + ceph::bufferlist::const_iterator &bliter, + NodeExtentMutable &value, + laddr_t value_addr) +{ + LOG_PREFIX(FLTreeOnode::Recorder::apply_value_delta); + delta_op_t op; + try { + ceph::decode(op, bliter); + auto &mlayout = *reinterpret_cast(value.get_write()); + switch (op) { + case delta_op_t::UPDATE_ONODE_SIZE: + DEBUG("update onode size"); + bliter.copy(sizeof(mlayout.size), (char *)&mlayout.size); + break; + case delta_op_t::UPDATE_OMAP_ROOT: + DEBUG("update omap root"); + bliter.copy(sizeof(mlayout.omap_root), (char *)&mlayout.omap_root); + break; + case delta_op_t::UPDATE_XATTR_ROOT: + DEBUG("update xattr root"); + bliter.copy(sizeof(mlayout.xattr_root), (char *)&mlayout.xattr_root); + break; + case delta_op_t::UPDATE_OBJECT_DATA: + DEBUG("update object data"); + bliter.copy(sizeof(mlayout.object_data), (char *)&mlayout.object_data); + break; + case delta_op_t::UPDATE_OBJECT_INFO: + DEBUG("update object info"); + bliter.copy(onode_layout_t::MAX_OI_LENGTH, (char *)&mlayout.oi[0]); + ceph::decode(mlayout.oi_size, bliter); + break; + case delta_op_t::UPDATE_SNAPSET: + DEBUG("update snapset"); + bliter.copy(onode_layout_t::MAX_SS_LENGTH, (char *)&mlayout.ss[0]); + ceph::decode(mlayout.ss_size, bliter); + break; + case delta_op_t::CLEAR_OBJECT_INFO: + DEBUG("clear object info"); + memset(&mlayout.oi[0], 0, mlayout.oi_size); + mlayout.oi_size = 0; + break; + case delta_op_t::CLEAR_SNAPSET: + DEBUG("clear snapset"); + memset(&mlayout.ss[0], 0, mlayout.ss_size); + mlayout.ss_size = 0; + break; + case delta_op_t::CREATE_DEFAULT: + mlayout = onode_layout_t{}; + break; + default: + ceph_abort(); + } + } catch (buffer::error& e) { + ceph_abort(); + } +} + +void FLTreeOnode::Recorder::encode_update( + NodeExtentMutable &payload_mut, delta_op_t op) +{ + LOG_PREFIX(FLTreeOnode::Recorder::encode_update); + auto &layout = *reinterpret_cast( + payload_mut.get_read()); + auto &encoded = get_encoded(payload_mut); + ceph::encode(op, encoded); + switch(op) { + case delta_op_t::UPDATE_ONODE_SIZE: + DEBUG("update onode size"); + encoded.append( + (const char *)&layout.size, + sizeof(layout.size)); + break; + case delta_op_t::UPDATE_OMAP_ROOT: + DEBUG("update omap root"); + encoded.append( + (const char *)&layout.omap_root, + sizeof(layout.omap_root)); + break; + case delta_op_t::UPDATE_XATTR_ROOT: + DEBUG("update xattr root"); + encoded.append( + (const char *)&layout.xattr_root, + sizeof(layout.xattr_root)); + break; + case delta_op_t::UPDATE_OBJECT_DATA: + DEBUG("update object data"); + encoded.append( + (const char *)&layout.object_data, + sizeof(layout.object_data)); + break; + case delta_op_t::UPDATE_OBJECT_INFO: + DEBUG("update object info"); + encoded.append( + (const char *)&layout.oi[0], + onode_layout_t::MAX_OI_LENGTH); + ceph::encode(layout.oi_size, encoded); + break; + case delta_op_t::UPDATE_SNAPSET: + DEBUG("update snapset"); + encoded.append( + (const char *)&layout.ss[0], + onode_layout_t::MAX_SS_LENGTH); + ceph::encode(layout.ss_size, encoded); + break; + case delta_op_t::CREATE_DEFAULT: + DEBUG("create default layout"); + [[fallthrough]]; + case delta_op_t::CLEAR_OBJECT_INFO: + DEBUG("clear object info"); + [[fallthrough]]; + case delta_op_t::CLEAR_SNAPSET: + DEBUG("clear snapset"); + break; + default: + ceph_abort(); + } +} + FLTreeOnodeManager::contains_onode_ret FLTreeOnodeManager::contains_onode( Transaction &trans, const ghobject_t &hoid) @@ -57,9 +176,7 @@ FLTreeOnodeManager::get_or_create_onode( cursor.value()); if (created) { DEBUGT("created onode for entry for {}", trans, hoid); - onode->with_mutable_layout(trans, [](onode_layout_t &mlayout) { - mlayout = onode_layout_t{}; - }); + onode->create_default_layout(trans); } return get_or_create_onode_iertr::make_ready_future(onode); }); @@ -93,9 +210,6 @@ FLTreeOnodeManager::erase_onode_ret FLTreeOnodeManager::erase_onode( { auto &flonode = static_cast(*onode); assert(flonode.is_alive()); - if (flonode.status == FLTreeOnode::status_t::MUTATED) { - flonode.populate_recorder(trans); - } flonode.mark_delete(); return tree.erase(trans, flonode); } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index d5a0dfc7bd5..86f5cea883b 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -47,6 +47,17 @@ struct FLTreeOnode final : Onode, Value { Value(std::forward(args)...) {} struct Recorder : public ValueDeltaRecorder { + enum class delta_op_t : uint8_t { + UPDATE_ONODE_SIZE, + UPDATE_OMAP_ROOT, + UPDATE_XATTR_ROOT, + UPDATE_OBJECT_DATA, + UPDATE_OBJECT_INFO, + UPDATE_SNAPSET, + CLEAR_OBJECT_INFO, + CLEAR_SNAPSET, + CREATE_DEFAULT + }; Recorder(bufferlist &bl) : ValueDeltaRecorder(bl) {} value_magic_t get_header_magic() const final { @@ -56,18 +67,9 @@ struct FLTreeOnode final : Onode, Value { void apply_value_delta( ceph::bufferlist::const_iterator &bliter, NodeExtentMutable &value, - laddr_t) final { - assert(value.get_length() == sizeof(onode_layout_t)); - bliter.copy(value.get_length(), value.get_write()); - } + laddr_t value_addr) final; - void record_delta(NodeExtentMutable &value) { - // TODO: probably could use versioning, etc - assert(value.get_length() == sizeof(onode_layout_t)); - ceph::buffer::ptr bptr(value.get_length()); - memcpy(bptr.c_str(), value.get_read(), value.get_length()); - get_encoded(value).append(bptr); - } + void encode_update(NodeExtentMutable &payload_mut, delta_op_t op); }; bool is_alive() const { @@ -78,86 +80,153 @@ struct FLTreeOnode final : Onode, Value { return *read_payload(); } - template - void with_mutable_layout(Transaction &t, Func&& f) { + template + void with_mutable_layout( + Transaction &t, + layout_func_t &&layout_func) { assert(status != status_t::DELETED); auto p = prepare_mutate_payload< onode_layout_t, Recorder>(t); status = status_t::MUTATED; - f(*reinterpret_cast(p.first.get_write())); - populate_recorder(t); + layout_func(p.first, p.second); + status = status_t::STABLE; } - void populate_recorder(Transaction &t) { - assert(status == status_t::MUTATED); - auto p = prepare_mutate_payload< - onode_layout_t, - Recorder>(t); - if (p.second) { - p.second->record_delta( - p.first); - } - status = status_t::STABLE; + void create_default_layout(Transaction &t) { + with_mutable_layout( + t, + [](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + mlayout = onode_layout_t{}; + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::CREATE_DEFAULT); + } + }); } void update_onode_size(Transaction &t, uint32_t size) final { - with_mutable_layout(t, [size](onode_layout_t &mlayout) { - mlayout.size = size; + with_mutable_layout( + t, + [size](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + mlayout.size = size; + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::UPDATE_ONODE_SIZE); + } }); } void update_omap_root(Transaction &t, omap_root_t &oroot) final { - with_mutable_layout(t, [&oroot](onode_layout_t &mlayout) { - mlayout.omap_root.update(oroot); + with_mutable_layout( + t, + [&oroot](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + mlayout.omap_root.update(oroot); + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::UPDATE_OMAP_ROOT); + } }); } void update_xattr_root(Transaction &t, omap_root_t &xroot) final { - with_mutable_layout(t, [&xroot](onode_layout_t &mlayout) { - mlayout.xattr_root.update(xroot); + with_mutable_layout( + t, + [&xroot](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + mlayout.xattr_root.update(xroot); + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::UPDATE_XATTR_ROOT); + } }); } void update_object_data(Transaction &t, object_data_t &odata) final { - with_mutable_layout(t, [&odata](onode_layout_t &mlayout) { - mlayout.object_data.update(odata); + with_mutable_layout( + t, + [&odata](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + mlayout.object_data.update(odata); + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::UPDATE_OBJECT_DATA); + } }); } void update_object_info(Transaction &t, ceph::bufferlist &oi_bl) final { - with_mutable_layout(t, [&oi_bl](onode_layout_t &mlayout) { - maybe_inline_memcpy( - &mlayout.oi[0], - oi_bl.c_str(), - oi_bl.length(), - onode_layout_t::MAX_OI_LENGTH); - mlayout.oi_size = oi_bl.length(); + with_mutable_layout( + t, + [&oi_bl](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + maybe_inline_memcpy( + &mlayout.oi[0], + oi_bl.c_str(), + oi_bl.length(), + onode_layout_t::MAX_OI_LENGTH); + mlayout.oi_size = oi_bl.length(); + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::UPDATE_OBJECT_INFO); + } }); } void clear_object_info(Transaction &t) final { - with_mutable_layout(t, [](onode_layout_t &mlayout) { - memset(&mlayout.oi[0], 0, mlayout.oi_size); - mlayout.oi_size = 0; + with_mutable_layout( + t, [](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + memset(&mlayout.oi[0], 0, mlayout.oi_size); + mlayout.oi_size = 0; + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::CLEAR_OBJECT_INFO); + } }); } void update_snapset(Transaction &t, ceph::bufferlist &ss_bl) final { - with_mutable_layout(t, [&ss_bl](onode_layout_t &mlayout) { - maybe_inline_memcpy( - &mlayout.ss[0], - ss_bl.c_str(), - ss_bl.length(), - onode_layout_t::MAX_OI_LENGTH); - mlayout.ss_size = ss_bl.length(); + with_mutable_layout( + t, + [&ss_bl](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + maybe_inline_memcpy( + &mlayout.ss[0], + ss_bl.c_str(), + ss_bl.length(), + onode_layout_t::MAX_OI_LENGTH); + mlayout.ss_size = ss_bl.length(); + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::UPDATE_SNAPSET); + } }); } void clear_snapset(Transaction &t) final { - with_mutable_layout(t, [](onode_layout_t &mlayout) { - memset(&mlayout.ss[0], 0, mlayout.ss_size); - mlayout.ss_size = 0; + with_mutable_layout( + t, + [](NodeExtentMutable &payload_mut, Recorder *recorder) { + auto &mlayout = *reinterpret_cast( + payload_mut.get_write()); + memset(&mlayout.ss[0], 0, mlayout.ss_size); + mlayout.ss_size = 0; + if (recorder) { + recorder->encode_update( + payload_mut, Recorder::delta_op_t::CLEAR_SNAPSET); + } }); } diff --git a/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc index cd5dd4407f8..92ab147ef89 100644 --- a/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc +++ b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc @@ -31,11 +31,10 @@ struct onode_item_t { void initialize(Transaction& t, Onode& value) const { auto &ftvalue = static_cast(value); - ftvalue.with_mutable_layout(t, [this, &value](auto &mlayout) { - mlayout.size = size; - mlayout.omap_root.update(omap_root_t(id, cnt_modify, - value.get_metadata_hint(block_size))); - }); + ftvalue.update_onode_size(t, size); + auto oroot = omap_root_t(id, cnt_modify, + value.get_metadata_hint(block_size)); + ftvalue.update_omap_root(t, oroot); validate(value); } -- 2.39.5