]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/onode-staged-tree: implement DeltaRecorder
authorYingxin Cheng <yingxin.cheng@intel.com>
Fri, 30 Oct 2020 06:35:23 +0000 (14:35 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Tue, 1 Dec 2020 04:50:54 +0000 (12:50 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h
src/crimson/os/seastore/onode_manager/staged-fltree/node.cc
src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h [new file with mode: 0644]
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc [new file with mode: 0644]
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h
src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h

index a486e0fc89f03bbc238cd2d484f417d504cf13c4..5a764f19cc3ba8a29d18c25e3d5b27c316e38fc7 100644 (file)
@@ -17,6 +17,7 @@ add_library(crimson-seastore STATIC
   onode_manager/simple-fltree/onode_node.cc
   onode_manager/staged-fltree/node.cc
   onode_manager/staged-fltree/node_extent_manager.cc
+  onode_manager/staged-fltree/node_extent_manager/seastore.cc
   onode_manager/staged-fltree/node_extent_mutable.cc
   onode_manager/staged-fltree/node_impl.cc
   onode_manager/staged-fltree/stages/item_iterator_stage.cc
index 477c939fcdb19ee592e86b25a94b7d810bd05f34..3f3eb092c4fde53c8f3575098326d716ee64e057 100644 (file)
@@ -24,9 +24,11 @@ using crimson::os::seastore::L_ADDR_MIN;
 using crimson::os::seastore::L_ADDR_NULL;
 using crimson::os::seastore::extent_len_t;
 
+class DeltaRecorder;
 class NodeExtent;
 class NodeExtentManager;
 class RootNodeTracker;
+using DeltaRecorderURef = std::unique_ptr<DeltaRecorder>;
 using NodeExtentRef = crimson::os::seastore::TCachedExtentRef<NodeExtent>;
 using NodeExtentManagerURef = std::unique_ptr<NodeExtentManager>;
 using RootNodeTrackerURef = std::unique_ptr<RootNodeTracker>;
index db729bd920e6c591d898fda4cf5fa943829221c2..79e23c6aeb31baea6bcb54bf12c857b905eb2d11 100644 (file)
@@ -245,17 +245,12 @@ node_future<Ref<Node>> Node::load(
   // option3: length is totally flexible;
   return c.nm.read_extent(c.t, addr, NODE_BLOCK_SIZE
   ).safe_then([expect_is_level_tail](auto extent) {
-    const auto header = reinterpret_cast<const node_header_t*>(extent->get_read());
-    auto node_type = header->get_node_type();
-    auto field_type = header->get_field_type();
-    if (!field_type.has_value()) {
-      throw std::runtime_error("load failed: bad field type");
-    }
+    auto [node_type, field_type] = extent->get_types();
     if (node_type == node_type_t::LEAF) {
-      auto impl = LeafNodeImpl::load(extent, *field_type, expect_is_level_tail);
+      auto impl = LeafNodeImpl::load(extent, field_type, expect_is_level_tail);
       return Ref<Node>(new LeafNode(impl.get(), std::move(impl)));
     } else if (node_type == node_type_t::INTERNAL) {
-      auto impl = InternalNodeImpl::load(extent, *field_type, expect_is_level_tail);
+      auto impl = InternalNodeImpl::load(extent, field_type, expect_is_level_tail);
       return Ref<Node>(new InternalNode(impl.get(), std::move(impl)));
     } else {
       ceph_abort("impossible path");
diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h
new file mode 100644 (file)
index 0000000..5461747
--- /dev/null
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "include/buffer.h"
+#include "node_types.h"
+
+namespace crimson::os::seastore::onode {
+
+class DeltaRecorder {
+ public:
+  virtual ~DeltaRecorder() {
+    assert(is_empty());
+  }
+
+  bool is_empty() const {
+    return encoded.length() == 0;
+  }
+
+  ceph::bufferlist get_delta() {
+    assert(!is_empty());
+    return std::move(encoded);
+  }
+
+  virtual node_type_t node_type() const = 0;
+  virtual field_type_t field_type() const = 0;
+  virtual void apply_delta(ceph::bufferlist::const_iterator&,
+                           NodeExtentMutable&) = 0;
+
+ protected:
+  DeltaRecorder() = default;
+  ceph::bufferlist encoded;
+};
+
+}
index 21a08b0596929e7550e63d82f16b15c275bfccb6..c5bd5a3fb966d69facae65f4783d044fbdf553f7 100644 (file)
@@ -2,11 +2,23 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "node_extent_manager.h"
+
 #include "node_extent_manager/dummy.h"
 #include "node_extent_manager/seastore.h"
+#include "stages/node_stage_layout.h"
 
 namespace crimson::os::seastore::onode {
 
+std::pair<node_type_t, field_type_t> NodeExtent::get_types() const {
+  const auto header = reinterpret_cast<const node_header_t*>(get_read());
+  auto node_type = header->get_node_type();
+  auto field_type = header->get_field_type();
+  if (!field_type.has_value()) {
+    throw std::runtime_error("load failed: bad field type");
+  }
+  return {node_type, *field_type};
+}
+
 NodeExtentManagerURef NodeExtentManager::create_dummy(bool is_sync) {
   if (is_sync) {
     return NodeExtentManagerURef(new DummyNodeExtentManager<true>());
index 1f85c9d3cb4255ecd6d76bc996191deb44cc6dc3..a8c05d15b8af28e2693506781c3f4b599af377ba 100644 (file)
@@ -10,6 +10,7 @@
 #include "fwd.h"
 #include "super.h"
 #include "node_extent_mutable.h"
+#include "node_types.h"
 
 namespace crimson::os::seastore::onode {
 
@@ -17,19 +18,26 @@ using crimson::os::seastore::LogicalCachedExtent;
 class NodeExtent : public LogicalCachedExtent {
  public:
   virtual ~NodeExtent() = default;
+  std::pair<node_type_t, field_type_t> get_types() const;
   const char* get_read() const {
     return get_bptr().c_str();
   }
-  auto get_mutable() {
+  NodeExtentMutable get_mutable() {
     assert(is_pending());
-    return NodeExtentMutable(*this);
+    return do_get_mutable();
   }
-  virtual NodeExtentRef mutate(context_t/* DeltaBuffer::Ref */) = 0;
+
+  virtual DeltaRecorder* get_recorder() const = 0;
+  virtual NodeExtentRef mutate(context_t, DeltaRecorderURef&&) = 0;
 
  protected:
   template <typename... T>
   NodeExtent(T&&... t) : LogicalCachedExtent(std::forward<T>(t)...) {}
 
+  NodeExtentMutable do_get_mutable() {
+    return NodeExtentMutable(*this);
+  }
+
   /**
    * abstracted:
    * - CacheExtent::duplicate_for_write() -> CachedExtentRef
index f77335d31bcb383f9854b53c432831759525a88a..bc389358943f0cf0f886e3b5791b1bac098dd121 100644 (file)
@@ -38,8 +38,10 @@ class DummyNodeExtent final: public NodeExtent {
   }
   ~DummyNodeExtent() override = default;
  protected:
-  NodeExtentRef mutate(context_t) override {
+  NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override {
     ceph_abort("impossible path"); }
+  DeltaRecorder* get_recorder() const override {
+    return nullptr; }
   CachedExtentRef duplicate_for_write() override {
     ceph_abort("impossible path"); }
   extent_types_t get_type() const override {
diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc
new file mode 100644 (file)
index 0000000..c51ec24
--- /dev/null
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "seastore.h"
+
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h"
+
+namespace {
+
+seastar::logger& logger() {
+  return crimson::get_logger(ceph_subsys_filestore);
+}
+
+}
+
+namespace crimson::os::seastore::onode {
+
+static DeltaRecorderURef create_recorder(
+    node_type_t node_type, field_type_t field_type) {
+  if (node_type == node_type_t::LEAF) {
+    if (field_type == field_type_t::N0) {
+      return DeltaRecorderT<node_fields_0_t, node_type_t::LEAF>::create();
+    } else if (field_type == field_type_t::N1) {
+      return DeltaRecorderT<node_fields_1_t, node_type_t::LEAF>::create();
+    } else if (field_type == field_type_t::N2) {
+      return DeltaRecorderT<node_fields_2_t, node_type_t::LEAF>::create();
+    } else if (field_type == field_type_t::N3) {
+      return DeltaRecorderT<leaf_fields_3_t, node_type_t::LEAF>::create();
+    } else {
+      ceph_abort("impossible path");
+    }
+  } else if (node_type == node_type_t::INTERNAL) {
+    if (field_type == field_type_t::N0) {
+      return DeltaRecorderT<node_fields_0_t, node_type_t::INTERNAL>::create();
+    } else if (field_type == field_type_t::N1) {
+      return DeltaRecorderT<node_fields_1_t, node_type_t::INTERNAL>::create();
+    } else if (field_type == field_type_t::N2) {
+      return DeltaRecorderT<node_fields_2_t, node_type_t::INTERNAL>::create();
+    } else if (field_type == field_type_t::N3) {
+      return DeltaRecorderT<internal_fields_3_t, node_type_t::INTERNAL>::create();
+    } else {
+      ceph_abort("impossible path");
+    }
+  } else {
+    ceph_abort("impossible path");
+  }
+}
+
+void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) {
+  logger().info("OTree::Seastore: update root {:#x} ...", addr);
+  root_addr = addr;
+  auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
+  nm->get_tm().write_onode_root(c.t, addr);
+}
+
+NodeExtentRef SeastoreNodeExtent::mutate(
+    context_t c, DeltaRecorderURef&& _recorder) {
+  logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr());
+  auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
+  auto extent = nm->get_tm().get_mutable_extent(c.t, this);
+  auto ret = extent->cast<SeastoreNodeExtent>();
+  assert(!ret->recorder || ret->recorder->is_empty());
+  ret->recorder = std::move(_recorder);
+  return ret;
+}
+
+void SeastoreNodeExtent::apply_delta(const ceph::bufferlist& bl) {
+  logger().debug("OTree::Seastore: replay {:#x} ...", get_laddr());
+  if (!recorder) {
+    auto [node_type, field_type] = get_types();
+    recorder = create_recorder(node_type, field_type);
+  } else {
+#ifndef NDEBUG
+    auto [node_type, field_type] = get_types();
+    assert(recorder->node_type() == node_type);
+    assert(recorder->field_type() == field_type);
+#endif
+  }
+  assert(is_clean());
+  auto node = do_get_mutable();
+  auto p = bl.cbegin();
+  while (p != bl.end()) {
+    recorder->apply_delta(p, node);
+  }
+}
+
+}
index 1ec2e75e8fbc17802c4e479a8b69359afca97ee8..48968cff4ef439ef2792f9e5f97b31562de7d03d 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "crimson/common/log.h"
 #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h"
 
 namespace crimson::os::seastore::onode {
 
@@ -35,7 +36,12 @@ class SeastoreNodeExtent final: public NodeExtent {
     : NodeExtent(other) {}
   ~SeastoreNodeExtent() override = default;
  protected:
-  NodeExtentRef mutate(context_t c) override;
+  NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override;
+
+  DeltaRecorder* get_recorder() const override {
+    return recorder.get();
+  }
+
   CachedExtentRef duplicate_for_write() override {
     return CachedExtentRef(new SeastoreNodeExtent(*this));
   }
@@ -43,18 +49,12 @@ class SeastoreNodeExtent final: public NodeExtent {
     return extent_types_t::ONODE_BLOCK_STAGED;
   }
   ceph::bufferlist get_delta() override {
-    //TODO
-    ceph_abort("not implemented");
-  }
-  void apply_delta(const ceph::bufferlist&) override {
-    //TODO
-    ceph_abort("not implemented");
+    assert(recorder);
+    return recorder->get_delta();
   }
+  void apply_delta(const ceph::bufferlist&) override;
  private:
-  static seastar::logger& logger() {
-    return crimson::get_logger(ceph_subsys_filestore);
-  }
-  //TODO: recorder
+  DeltaRecorderURef recorder;
 };
 
 class SeastoreNodeExtentManager final: public NodeExtentManager {
@@ -109,18 +109,4 @@ class SeastoreNodeExtentManager final: public NodeExtentManager {
   const laddr_t addr_min;
 };
 
-inline void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) {
-  logger().info("OTree::Seastore: update root {:#x} ...", addr);
-  root_addr = addr;
-  auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
-  nm->get_tm().write_onode_root(c.t, addr);
-}
-
-inline NodeExtentRef SeastoreNodeExtent::mutate(context_t c) {
-  logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr());
-  auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
-  auto ret = nm->get_tm().get_mutable_extent(c.t, this);
-  return ret->cast<SeastoreNodeExtent>();
-}
-
 }
index 2668b916655e4f81af41eba303cfbb5f70cd2086..8fa7cf76497493f07c5479734b93e32853549aeb 100644 (file)
 #pragma once
 
 #include "node_extent_manager.h"
+#include "node_delta_recorder.h"
 #include "node_layout_replayable.h"
 
 namespace crimson::os::seastore::onode {
 
+template <typename FieldType, node_type_t NODE_TYPE>
+class DeltaRecorderT final: public DeltaRecorder {
+ public:
+  using layout_t = NodeLayoutReplayableT<FieldType, NODE_TYPE>;
+  using position_t = typename layout_t::position_t;
+  using StagedIterator = typename layout_t::StagedIterator;
+  using value_t = typename layout_t::value_t;
+  static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE;
+
+  ~DeltaRecorderT() override = default;
+
+  template <KeyT KT>
+  void encode_insert(
+      const full_key_t<KT>& key,
+      const value_t& value,
+      const position_t& insert_pos,
+      const match_stage_t& insert_stage,
+      const node_offset_t& insert_size) {
+    // TODO encode to encoded
+  }
+
+  void encode_split(
+      const StagedIterator& split_at,
+      const char* p_start) {
+    // TODO encode to encoded
+  }
+
+  template <KeyT KT>
+  void encode_split_insert(
+      const StagedIterator& split_at,
+      const full_key_t<KT>& key,
+      const value_t& value,
+      const position_t& insert_pos,
+      const match_stage_t& insert_stage,
+      const node_offset_t& insert_size,
+      const char* p_start) {
+    // TODO encode to encoded
+  }
+
+  void encode_update_child_addr(
+      const laddr_t new_addr,
+      const laddr_packed_t* p_addr,
+      const char* p_start) {
+    // TODO encode to encoded
+  }
+
+  static DeltaRecorderURef create() {
+    return std::unique_ptr<DeltaRecorder>(new DeltaRecorderT());
+  }
+
+ private:
+  DeltaRecorderT() = default;
+  node_type_t node_type() const override { return NODE_TYPE; }
+  field_type_t field_type() const override { return FIELD_TYPE; }
+  void apply_delta(ceph::bufferlist::const_iterator& delta,
+                   NodeExtentMutable& node) override {
+    assert(is_empty());
+    // TODO decode and apply
+    assert(false && "not implemented");
+  }
+};
+
 template <typename FieldType, node_type_t NODE_TYPE>
 class NodeExtentT {
  public:
   using layout_t = NodeLayoutReplayableT<FieldType, NODE_TYPE>;
   using node_stage_t = typename layout_t::node_stage_t;
   using position_t = typename layout_t::position_t;
+  using recorder_t = DeltaRecorderT<FieldType, NODE_TYPE>;
   using StagedIterator = typename layout_t::StagedIterator;
   using value_t = typename layout_t::value_t;
   static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE;
   enum class state_t {
     NO_RECORDING,  // extent_state_t::INITIAL_WRITE_PENDING
+                   //   can mutate, no recording
     RECORDING,     // extent_state_t::MUTATION_PENDING
+                   //   can mutate, recording
     PENDING_MUTATE // extent_state_t::CLEAN/DIRTY
+                   //   cannot mutate
   };
 
   NodeExtentT(state_t state, NodeExtentRef extent)
       : state{state}, extent{extent},
         node_stage{reinterpret_cast<const FieldType*>(extent->get_read())} {
     if (state == state_t::NO_RECORDING) {
-      assert(!mut.has_value());
       mut.emplace(extent->get_mutable());
-      // TODO: recorder = nullptr;
+      assert(extent->get_recorder() == nullptr);
+      recorder = nullptr;
     } else if (state == state_t::RECORDING) {
-      assert(!mut.has_value());
       mut.emplace(extent->get_mutable());
-      // TODO: get recorder from extent
+      auto p_recorder = extent->get_recorder();
+      assert(p_recorder != nullptr);
+      assert(p_recorder->node_type() == NODE_TYPE);
+      assert(p_recorder->field_type() == FIELD_TYPE);
+      recorder = static_cast<recorder_t*>(p_recorder);
     } else if (state == state_t::PENDING_MUTATE) {
-      // TODO: recorder = nullptr;
+      // mut is empty
+      assert(extent->get_recorder() == nullptr ||
+             extent->get_recorder()->is_empty());
+      recorder = nullptr;
     } else {
       ceph_abort("impossible path");
     }
@@ -54,17 +127,19 @@ class NodeExtentT {
   void prepare_mutate(context_t c) {
     if (state == state_t::PENDING_MUTATE) {
       assert(!extent->is_pending());
-      // TODO: create and set recorder DeltaRecorderT
-      extent = extent->mutate(c/* recorder */);
-      assert(extent->is_mutation_pending());
+      auto ref_recorder = recorder_t::create();
+      recorder = static_cast<recorder_t*>(ref_recorder.get());
+      extent = extent->mutate(c, std::move(ref_recorder));
+
       state = state_t::RECORDING;
+      assert(extent->is_mutation_pending());
       node_stage = node_stage_t(
           reinterpret_cast<const FieldType*>(extent->get_read()));
+      assert(recorder == static_cast<recorder_t*>(extent->get_recorder()));
       mut.emplace(extent->get_mutable());
     }
   }
 
-  // TODO: translate absolute modifications to relative
   template <KeyT KT>
   const value_t* insert_replayable(
       const full_key_t<KT>& key,
@@ -73,7 +148,10 @@ class NodeExtentT {
       match_stage_t& insert_stage,
       node_offset_t& insert_size) {
     assert(state != state_t::PENDING_MUTATE);
-    // TODO: encode params to recorder as delta
+    if (state == state_t::RECORDING) {
+      recorder->template encode_insert<KT>(
+          key, value, insert_pos, insert_stage, insert_size);
+    }
     return layout_t::template insert<KT>(
         *mut, read(), key, value,
         insert_pos, insert_stage, insert_size);
@@ -81,7 +159,9 @@ class NodeExtentT {
 
   void split_replayable(StagedIterator& split_at) {
     assert(state != state_t::PENDING_MUTATE);
-    // TODO: encode params to recorder as delta
+    if (state == state_t::RECORDING) {
+      recorder->encode_split(split_at, read().p_start());
+    }
     layout_t::split(*mut, read(), split_at);
   }
 
@@ -94,7 +174,11 @@ class NodeExtentT {
       match_stage_t& insert_stage,
       node_offset_t& insert_size) {
     assert(state != state_t::PENDING_MUTATE);
-    // TODO: encode params to recorder as delta
+    if (state == state_t::RECORDING) {
+      recorder->template encode_split_insert<KT>(
+          split_at, key, value, insert_pos, insert_stage, insert_size,
+          read().p_start());
+    }
     return layout_t::template split_insert<KT>(
         *mut, read(), split_at, key, value,
         insert_pos, insert_stage, insert_size);
@@ -103,7 +187,9 @@ class NodeExtentT {
   void update_child_addr_replayable(
       const laddr_t new_addr, laddr_packed_t* p_addr) {
     assert(state != state_t::PENDING_MUTATE);
-    // TODO: encode params to recorder as delta
+    if (state == state_t::RECORDING) {
+      recorder->encode_update_child_addr(new_addr, p_addr, read().p_start());
+    }
     return layout_t::update_child_addr(*mut, new_addr, p_addr);
   }
 
@@ -118,7 +204,7 @@ class NodeExtentT {
       state = state_t::NO_RECORDING;
     } else if (extent->is_mutation_pending()) {
       state = state_t::RECORDING;
-    } else if (!extent->is_valid()) {
+    } else if (extent->is_valid()) {
       state = state_t::PENDING_MUTATE;
     } else {
       ceph_abort("invalid extent");
@@ -140,7 +226,8 @@ class NodeExtentT {
   NodeExtentRef extent;
   node_stage_t node_stage;
   std::optional<NodeExtentMutable> mut;
-  // TODO: DeltaRecorderT* recorder;
+  // owned by extent
+  recorder_t* recorder;
 };
 
 }