From a8b3206ce98479664366c889d7afb79b6d51c2f3 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Fri, 30 Oct 2020 14:35:23 +0800 Subject: [PATCH] crimson/onode-staged-tree: implement DeltaRecorder Signed-off-by: Yingxin Cheng --- src/crimson/os/seastore/CMakeLists.txt | 1 + .../onode_manager/staged-fltree/fwd.h | 2 + .../onode_manager/staged-fltree/node.cc | 11 +- .../staged-fltree/node_delta_recorder.h | 36 ++++++ .../staged-fltree/node_extent_manager.cc | 12 ++ .../staged-fltree/node_extent_manager.h | 14 ++- .../staged-fltree/node_extent_manager/dummy.h | 4 +- .../node_extent_manager/seastore.cc | 88 +++++++++++++ .../node_extent_manager/seastore.h | 36 ++---- .../staged-fltree/node_extent_visitor.h | 117 +++++++++++++++--- 10 files changed, 269 insertions(+), 52 deletions(-) create mode 100644 src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h create mode 100644 src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index a486e0fc89f..5a764f19cc3 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(crimson-seastore STATIC onode_manager/simple-fltree/onode_node.cc onode_manager/staged-fltree/node.cc onode_manager/staged-fltree/node_extent_manager.cc + onode_manager/staged-fltree/node_extent_manager/seastore.cc onode_manager/staged-fltree/node_extent_mutable.cc onode_manager/staged-fltree/node_impl.cc onode_manager/staged-fltree/stages/item_iterator_stage.cc diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h index 477c939fcdb..3f3eb092c4f 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h @@ -24,9 +24,11 @@ using crimson::os::seastore::L_ADDR_MIN; using crimson::os::seastore::L_ADDR_NULL; using crimson::os::seastore::extent_len_t; +class DeltaRecorder; class NodeExtent; class NodeExtentManager; class RootNodeTracker; +using DeltaRecorderURef = std::unique_ptr; using NodeExtentRef = crimson::os::seastore::TCachedExtentRef; using NodeExtentManagerURef = std::unique_ptr; using RootNodeTrackerURef = std::unique_ptr; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc index db729bd920e..79e23c6aeb3 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc @@ -245,17 +245,12 @@ node_future> Node::load( // option3: length is totally flexible; return c.nm.read_extent(c.t, addr, NODE_BLOCK_SIZE ).safe_then([expect_is_level_tail](auto extent) { - const auto header = reinterpret_cast(extent->get_read()); - auto node_type = header->get_node_type(); - auto field_type = header->get_field_type(); - if (!field_type.has_value()) { - throw std::runtime_error("load failed: bad field type"); - } + auto [node_type, field_type] = extent->get_types(); if (node_type == node_type_t::LEAF) { - auto impl = LeafNodeImpl::load(extent, *field_type, expect_is_level_tail); + auto impl = LeafNodeImpl::load(extent, field_type, expect_is_level_tail); return Ref(new LeafNode(impl.get(), std::move(impl))); } else if (node_type == node_type_t::INTERNAL) { - auto impl = InternalNodeImpl::load(extent, *field_type, expect_is_level_tail); + auto impl = InternalNodeImpl::load(extent, field_type, expect_is_level_tail); return Ref(new InternalNode(impl.get(), std::move(impl))); } else { ceph_abort("impossible path"); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h new file mode 100644 index 00000000000..5461747e932 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h @@ -0,0 +1,36 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "include/buffer.h" +#include "node_types.h" + +namespace crimson::os::seastore::onode { + +class DeltaRecorder { + public: + virtual ~DeltaRecorder() { + assert(is_empty()); + } + + bool is_empty() const { + return encoded.length() == 0; + } + + ceph::bufferlist get_delta() { + assert(!is_empty()); + return std::move(encoded); + } + + virtual node_type_t node_type() const = 0; + virtual field_type_t field_type() const = 0; + virtual void apply_delta(ceph::bufferlist::const_iterator&, + NodeExtentMutable&) = 0; + + protected: + DeltaRecorder() = default; + ceph::bufferlist encoded; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc index 21a08b05969..c5bd5a3fb96 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc @@ -2,11 +2,23 @@ // vim: ts=8 sw=2 smarttab #include "node_extent_manager.h" + #include "node_extent_manager/dummy.h" #include "node_extent_manager/seastore.h" +#include "stages/node_stage_layout.h" namespace crimson::os::seastore::onode { +std::pair NodeExtent::get_types() const { + const auto header = reinterpret_cast(get_read()); + auto node_type = header->get_node_type(); + auto field_type = header->get_field_type(); + if (!field_type.has_value()) { + throw std::runtime_error("load failed: bad field type"); + } + return {node_type, *field_type}; +} + NodeExtentManagerURef NodeExtentManager::create_dummy(bool is_sync) { if (is_sync) { return NodeExtentManagerURef(new DummyNodeExtentManager()); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h index 1f85c9d3cb4..a8c05d15b8a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h @@ -10,6 +10,7 @@ #include "fwd.h" #include "super.h" #include "node_extent_mutable.h" +#include "node_types.h" namespace crimson::os::seastore::onode { @@ -17,19 +18,26 @@ using crimson::os::seastore::LogicalCachedExtent; class NodeExtent : public LogicalCachedExtent { public: virtual ~NodeExtent() = default; + std::pair get_types() const; const char* get_read() const { return get_bptr().c_str(); } - auto get_mutable() { + NodeExtentMutable get_mutable() { assert(is_pending()); - return NodeExtentMutable(*this); + return do_get_mutable(); } - virtual NodeExtentRef mutate(context_t/* DeltaBuffer::Ref */) = 0; + + virtual DeltaRecorder* get_recorder() const = 0; + virtual NodeExtentRef mutate(context_t, DeltaRecorderURef&&) = 0; protected: template NodeExtent(T&&... t) : LogicalCachedExtent(std::forward(t)...) {} + NodeExtentMutable do_get_mutable() { + return NodeExtentMutable(*this); + } + /** * abstracted: * - CacheExtent::duplicate_for_write() -> CachedExtentRef diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h index f77335d31bc..bc389358943 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h @@ -38,8 +38,10 @@ class DummyNodeExtent final: public NodeExtent { } ~DummyNodeExtent() override = default; protected: - NodeExtentRef mutate(context_t) override { + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override { ceph_abort("impossible path"); } + DeltaRecorder* get_recorder() const override { + return nullptr; } CachedExtentRef duplicate_for_write() override { ceph_abort("impossible path"); } extent_types_t get_type() const override { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc new file mode 100644 index 00000000000..c51ec24740c --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "seastore.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h" + +namespace { + +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); +} + +} + +namespace crimson::os::seastore::onode { + +static DeltaRecorderURef create_recorder( + node_type_t node_type, field_type_t field_type) { + if (node_type == node_type_t::LEAF) { + if (field_type == field_type_t::N0) { + return DeltaRecorderT::create(); + } else if (field_type == field_type_t::N1) { + return DeltaRecorderT::create(); + } else if (field_type == field_type_t::N2) { + return DeltaRecorderT::create(); + } else if (field_type == field_type_t::N3) { + return DeltaRecorderT::create(); + } else { + ceph_abort("impossible path"); + } + } else if (node_type == node_type_t::INTERNAL) { + if (field_type == field_type_t::N0) { + return DeltaRecorderT::create(); + } else if (field_type == field_type_t::N1) { + return DeltaRecorderT::create(); + } else if (field_type == field_type_t::N2) { + return DeltaRecorderT::create(); + } else if (field_type == field_type_t::N3) { + return DeltaRecorderT::create(); + } else { + ceph_abort("impossible path"); + } + } else { + ceph_abort("impossible path"); + } +} + +void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) { + logger().info("OTree::Seastore: update root {:#x} ...", addr); + root_addr = addr; + auto nm = static_cast(&c.nm); + nm->get_tm().write_onode_root(c.t, addr); +} + +NodeExtentRef SeastoreNodeExtent::mutate( + context_t c, DeltaRecorderURef&& _recorder) { + logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr()); + auto nm = static_cast(&c.nm); + auto extent = nm->get_tm().get_mutable_extent(c.t, this); + auto ret = extent->cast(); + assert(!ret->recorder || ret->recorder->is_empty()); + ret->recorder = std::move(_recorder); + return ret; +} + +void SeastoreNodeExtent::apply_delta(const ceph::bufferlist& bl) { + logger().debug("OTree::Seastore: replay {:#x} ...", get_laddr()); + if (!recorder) { + auto [node_type, field_type] = get_types(); + recorder = create_recorder(node_type, field_type); + } else { +#ifndef NDEBUG + auto [node_type, field_type] = get_types(); + assert(recorder->node_type() == node_type); + assert(recorder->field_type() == field_type); +#endif + } + assert(is_clean()); + auto node = do_get_mutable(); + auto p = bl.cbegin(); + while (p != bl.end()) { + recorder->apply_delta(p, node); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h index 1ec2e75e8fb..48968cff4ef 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -5,6 +5,7 @@ #include "crimson/common/log.h" #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h" namespace crimson::os::seastore::onode { @@ -35,7 +36,12 @@ class SeastoreNodeExtent final: public NodeExtent { : NodeExtent(other) {} ~SeastoreNodeExtent() override = default; protected: - NodeExtentRef mutate(context_t c) override; + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override; + + DeltaRecorder* get_recorder() const override { + return recorder.get(); + } + CachedExtentRef duplicate_for_write() override { return CachedExtentRef(new SeastoreNodeExtent(*this)); } @@ -43,18 +49,12 @@ class SeastoreNodeExtent final: public NodeExtent { return extent_types_t::ONODE_BLOCK_STAGED; } ceph::bufferlist get_delta() override { - //TODO - ceph_abort("not implemented"); - } - void apply_delta(const ceph::bufferlist&) override { - //TODO - ceph_abort("not implemented"); + assert(recorder); + return recorder->get_delta(); } + void apply_delta(const ceph::bufferlist&) override; private: - static seastar::logger& logger() { - return crimson::get_logger(ceph_subsys_filestore); - } - //TODO: recorder + DeltaRecorderURef recorder; }; class SeastoreNodeExtentManager final: public NodeExtentManager { @@ -109,18 +109,4 @@ class SeastoreNodeExtentManager final: public NodeExtentManager { const laddr_t addr_min; }; -inline void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) { - logger().info("OTree::Seastore: update root {:#x} ...", addr); - root_addr = addr; - auto nm = static_cast(&c.nm); - nm->get_tm().write_onode_root(c.t, addr); -} - -inline NodeExtentRef SeastoreNodeExtent::mutate(context_t c) { - logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr()); - auto nm = static_cast(&c.nm); - auto ret = nm->get_tm().get_mutable_extent(c.t, this); - return ret->cast(); -} - } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h index 2668b916655..8fa7cf76497 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h @@ -4,38 +4,111 @@ #pragma once #include "node_extent_manager.h" +#include "node_delta_recorder.h" #include "node_layout_replayable.h" namespace crimson::os::seastore::onode { +template +class DeltaRecorderT final: public DeltaRecorder { + public: + using layout_t = NodeLayoutReplayableT; + using position_t = typename layout_t::position_t; + using StagedIterator = typename layout_t::StagedIterator; + using value_t = typename layout_t::value_t; + static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE; + + ~DeltaRecorderT() override = default; + + template + void encode_insert( + const full_key_t& key, + const value_t& value, + const position_t& insert_pos, + const match_stage_t& insert_stage, + const node_offset_t& insert_size) { + // TODO encode to encoded + } + + void encode_split( + const StagedIterator& split_at, + const char* p_start) { + // TODO encode to encoded + } + + template + void encode_split_insert( + const StagedIterator& split_at, + const full_key_t& key, + const value_t& value, + const position_t& insert_pos, + const match_stage_t& insert_stage, + const node_offset_t& insert_size, + const char* p_start) { + // TODO encode to encoded + } + + void encode_update_child_addr( + const laddr_t new_addr, + const laddr_packed_t* p_addr, + const char* p_start) { + // TODO encode to encoded + } + + static DeltaRecorderURef create() { + return std::unique_ptr(new DeltaRecorderT()); + } + + private: + DeltaRecorderT() = default; + node_type_t node_type() const override { return NODE_TYPE; } + field_type_t field_type() const override { return FIELD_TYPE; } + void apply_delta(ceph::bufferlist::const_iterator& delta, + NodeExtentMutable& node) override { + assert(is_empty()); + // TODO decode and apply + assert(false && "not implemented"); + } +}; + template class NodeExtentT { public: using layout_t = NodeLayoutReplayableT; using node_stage_t = typename layout_t::node_stage_t; using position_t = typename layout_t::position_t; + using recorder_t = DeltaRecorderT; using StagedIterator = typename layout_t::StagedIterator; using value_t = typename layout_t::value_t; static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE; enum class state_t { NO_RECORDING, // extent_state_t::INITIAL_WRITE_PENDING + // can mutate, no recording RECORDING, // extent_state_t::MUTATION_PENDING + // can mutate, recording PENDING_MUTATE // extent_state_t::CLEAN/DIRTY + // cannot mutate }; NodeExtentT(state_t state, NodeExtentRef extent) : state{state}, extent{extent}, node_stage{reinterpret_cast(extent->get_read())} { if (state == state_t::NO_RECORDING) { - assert(!mut.has_value()); mut.emplace(extent->get_mutable()); - // TODO: recorder = nullptr; + assert(extent->get_recorder() == nullptr); + recorder = nullptr; } else if (state == state_t::RECORDING) { - assert(!mut.has_value()); mut.emplace(extent->get_mutable()); - // TODO: get recorder from extent + auto p_recorder = extent->get_recorder(); + assert(p_recorder != nullptr); + assert(p_recorder->node_type() == NODE_TYPE); + assert(p_recorder->field_type() == FIELD_TYPE); + recorder = static_cast(p_recorder); } else if (state == state_t::PENDING_MUTATE) { - // TODO: recorder = nullptr; + // mut is empty + assert(extent->get_recorder() == nullptr || + extent->get_recorder()->is_empty()); + recorder = nullptr; } else { ceph_abort("impossible path"); } @@ -54,17 +127,19 @@ class NodeExtentT { void prepare_mutate(context_t c) { if (state == state_t::PENDING_MUTATE) { assert(!extent->is_pending()); - // TODO: create and set recorder DeltaRecorderT - extent = extent->mutate(c/* recorder */); - assert(extent->is_mutation_pending()); + auto ref_recorder = recorder_t::create(); + recorder = static_cast(ref_recorder.get()); + extent = extent->mutate(c, std::move(ref_recorder)); + state = state_t::RECORDING; + assert(extent->is_mutation_pending()); node_stage = node_stage_t( reinterpret_cast(extent->get_read())); + assert(recorder == static_cast(extent->get_recorder())); mut.emplace(extent->get_mutable()); } } - // TODO: translate absolute modifications to relative template const value_t* insert_replayable( const full_key_t& key, @@ -73,7 +148,10 @@ class NodeExtentT { match_stage_t& insert_stage, node_offset_t& insert_size) { assert(state != state_t::PENDING_MUTATE); - // TODO: encode params to recorder as delta + if (state == state_t::RECORDING) { + recorder->template encode_insert( + key, value, insert_pos, insert_stage, insert_size); + } return layout_t::template insert( *mut, read(), key, value, insert_pos, insert_stage, insert_size); @@ -81,7 +159,9 @@ class NodeExtentT { void split_replayable(StagedIterator& split_at) { assert(state != state_t::PENDING_MUTATE); - // TODO: encode params to recorder as delta + if (state == state_t::RECORDING) { + recorder->encode_split(split_at, read().p_start()); + } layout_t::split(*mut, read(), split_at); } @@ -94,7 +174,11 @@ class NodeExtentT { match_stage_t& insert_stage, node_offset_t& insert_size) { assert(state != state_t::PENDING_MUTATE); - // TODO: encode params to recorder as delta + if (state == state_t::RECORDING) { + recorder->template encode_split_insert( + split_at, key, value, insert_pos, insert_stage, insert_size, + read().p_start()); + } return layout_t::template split_insert( *mut, read(), split_at, key, value, insert_pos, insert_stage, insert_size); @@ -103,7 +187,9 @@ class NodeExtentT { void update_child_addr_replayable( const laddr_t new_addr, laddr_packed_t* p_addr) { assert(state != state_t::PENDING_MUTATE); - // TODO: encode params to recorder as delta + if (state == state_t::RECORDING) { + recorder->encode_update_child_addr(new_addr, p_addr, read().p_start()); + } return layout_t::update_child_addr(*mut, new_addr, p_addr); } @@ -118,7 +204,7 @@ class NodeExtentT { state = state_t::NO_RECORDING; } else if (extent->is_mutation_pending()) { state = state_t::RECORDING; - } else if (!extent->is_valid()) { + } else if (extent->is_valid()) { state = state_t::PENDING_MUTATE; } else { ceph_abort("invalid extent"); @@ -140,7 +226,8 @@ class NodeExtentT { NodeExtentRef extent; node_stage_t node_stage; std::optional mut; - // TODO: DeltaRecorderT* recorder; + // owned by extent + recorder_t* recorder; }; } -- 2.39.5