onode_manager/simple-fltree/onode_node.cc
onode_manager/staged-fltree/node.cc
onode_manager/staged-fltree/node_extent_manager.cc
+ onode_manager/staged-fltree/node_extent_manager/seastore.cc
onode_manager/staged-fltree/node_extent_mutable.cc
onode_manager/staged-fltree/node_impl.cc
onode_manager/staged-fltree/stages/item_iterator_stage.cc
using crimson::os::seastore::L_ADDR_NULL;
using crimson::os::seastore::extent_len_t;
+class DeltaRecorder;
class NodeExtent;
class NodeExtentManager;
class RootNodeTracker;
+using DeltaRecorderURef = std::unique_ptr<DeltaRecorder>;
using NodeExtentRef = crimson::os::seastore::TCachedExtentRef<NodeExtent>;
using NodeExtentManagerURef = std::unique_ptr<NodeExtentManager>;
using RootNodeTrackerURef = std::unique_ptr<RootNodeTracker>;
// option3: length is totally flexible;
return c.nm.read_extent(c.t, addr, NODE_BLOCK_SIZE
).safe_then([expect_is_level_tail](auto extent) {
- const auto header = reinterpret_cast<const node_header_t*>(extent->get_read());
- auto node_type = header->get_node_type();
- auto field_type = header->get_field_type();
- if (!field_type.has_value()) {
- throw std::runtime_error("load failed: bad field type");
- }
+ auto [node_type, field_type] = extent->get_types();
if (node_type == node_type_t::LEAF) {
- auto impl = LeafNodeImpl::load(extent, *field_type, expect_is_level_tail);
+ auto impl = LeafNodeImpl::load(extent, field_type, expect_is_level_tail);
return Ref<Node>(new LeafNode(impl.get(), std::move(impl)));
} else if (node_type == node_type_t::INTERNAL) {
- auto impl = InternalNodeImpl::load(extent, *field_type, expect_is_level_tail);
+ auto impl = InternalNodeImpl::load(extent, field_type, expect_is_level_tail);
return Ref<Node>(new InternalNode(impl.get(), std::move(impl)));
} else {
ceph_abort("impossible path");
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "include/buffer.h"
+#include "node_types.h"
+
+namespace crimson::os::seastore::onode {
+
+class DeltaRecorder {
+ public:
+ virtual ~DeltaRecorder() {
+ assert(is_empty());
+ }
+
+ bool is_empty() const {
+ return encoded.length() == 0;
+ }
+
+ ceph::bufferlist get_delta() {
+ assert(!is_empty());
+ return std::move(encoded);
+ }
+
+ virtual node_type_t node_type() const = 0;
+ virtual field_type_t field_type() const = 0;
+ virtual void apply_delta(ceph::bufferlist::const_iterator&,
+ NodeExtentMutable&) = 0;
+
+ protected:
+ DeltaRecorder() = default;
+ ceph::bufferlist encoded;
+};
+
+}
// vim: ts=8 sw=2 smarttab
#include "node_extent_manager.h"
+
#include "node_extent_manager/dummy.h"
#include "node_extent_manager/seastore.h"
+#include "stages/node_stage_layout.h"
namespace crimson::os::seastore::onode {
+std::pair<node_type_t, field_type_t> NodeExtent::get_types() const {
+ const auto header = reinterpret_cast<const node_header_t*>(get_read());
+ auto node_type = header->get_node_type();
+ auto field_type = header->get_field_type();
+ if (!field_type.has_value()) {
+ throw std::runtime_error("load failed: bad field type");
+ }
+ return {node_type, *field_type};
+}
+
NodeExtentManagerURef NodeExtentManager::create_dummy(bool is_sync) {
if (is_sync) {
return NodeExtentManagerURef(new DummyNodeExtentManager<true>());
#include "fwd.h"
#include "super.h"
#include "node_extent_mutable.h"
+#include "node_types.h"
namespace crimson::os::seastore::onode {
class NodeExtent : public LogicalCachedExtent {
public:
virtual ~NodeExtent() = default;
+ std::pair<node_type_t, field_type_t> get_types() const;
const char* get_read() const {
return get_bptr().c_str();
}
- auto get_mutable() {
+ NodeExtentMutable get_mutable() {
assert(is_pending());
- return NodeExtentMutable(*this);
+ return do_get_mutable();
}
- virtual NodeExtentRef mutate(context_t/* DeltaBuffer::Ref */) = 0;
+
+ virtual DeltaRecorder* get_recorder() const = 0;
+ virtual NodeExtentRef mutate(context_t, DeltaRecorderURef&&) = 0;
protected:
template <typename... T>
NodeExtent(T&&... t) : LogicalCachedExtent(std::forward<T>(t)...) {}
+ NodeExtentMutable do_get_mutable() {
+ return NodeExtentMutable(*this);
+ }
+
/**
* abstracted:
* - CacheExtent::duplicate_for_write() -> CachedExtentRef
}
~DummyNodeExtent() override = default;
protected:
- NodeExtentRef mutate(context_t) override {
+ NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override {
ceph_abort("impossible path"); }
+ DeltaRecorder* get_recorder() const override {
+ return nullptr; }
CachedExtentRef duplicate_for_write() override {
ceph_abort("impossible path"); }
extent_types_t get_type() const override {
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "seastore.h"
+
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_visitor.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h"
+
+namespace {
+
+seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+}
+
+}
+
+namespace crimson::os::seastore::onode {
+
+static DeltaRecorderURef create_recorder(
+ node_type_t node_type, field_type_t field_type) {
+ if (node_type == node_type_t::LEAF) {
+ if (field_type == field_type_t::N0) {
+ return DeltaRecorderT<node_fields_0_t, node_type_t::LEAF>::create();
+ } else if (field_type == field_type_t::N1) {
+ return DeltaRecorderT<node_fields_1_t, node_type_t::LEAF>::create();
+ } else if (field_type == field_type_t::N2) {
+ return DeltaRecorderT<node_fields_2_t, node_type_t::LEAF>::create();
+ } else if (field_type == field_type_t::N3) {
+ return DeltaRecorderT<leaf_fields_3_t, node_type_t::LEAF>::create();
+ } else {
+ ceph_abort("impossible path");
+ }
+ } else if (node_type == node_type_t::INTERNAL) {
+ if (field_type == field_type_t::N0) {
+ return DeltaRecorderT<node_fields_0_t, node_type_t::INTERNAL>::create();
+ } else if (field_type == field_type_t::N1) {
+ return DeltaRecorderT<node_fields_1_t, node_type_t::INTERNAL>::create();
+ } else if (field_type == field_type_t::N2) {
+ return DeltaRecorderT<node_fields_2_t, node_type_t::INTERNAL>::create();
+ } else if (field_type == field_type_t::N3) {
+ return DeltaRecorderT<internal_fields_3_t, node_type_t::INTERNAL>::create();
+ } else {
+ ceph_abort("impossible path");
+ }
+ } else {
+ ceph_abort("impossible path");
+ }
+}
+
+void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) {
+ logger().info("OTree::Seastore: update root {:#x} ...", addr);
+ root_addr = addr;
+ auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
+ nm->get_tm().write_onode_root(c.t, addr);
+}
+
+NodeExtentRef SeastoreNodeExtent::mutate(
+ context_t c, DeltaRecorderURef&& _recorder) {
+ logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr());
+ auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
+ auto extent = nm->get_tm().get_mutable_extent(c.t, this);
+ auto ret = extent->cast<SeastoreNodeExtent>();
+ assert(!ret->recorder || ret->recorder->is_empty());
+ ret->recorder = std::move(_recorder);
+ return ret;
+}
+
+void SeastoreNodeExtent::apply_delta(const ceph::bufferlist& bl) {
+ logger().debug("OTree::Seastore: replay {:#x} ...", get_laddr());
+ if (!recorder) {
+ auto [node_type, field_type] = get_types();
+ recorder = create_recorder(node_type, field_type);
+ } else {
+#ifndef NDEBUG
+ auto [node_type, field_type] = get_types();
+ assert(recorder->node_type() == node_type);
+ assert(recorder->field_type() == field_type);
+#endif
+ }
+ assert(is_clean());
+ auto node = do_get_mutable();
+ auto p = bl.cbegin();
+ while (p != bl.end()) {
+ recorder->apply_delta(p, node);
+ }
+}
+
+}
#include "crimson/common/log.h"
#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h"
namespace crimson::os::seastore::onode {
: NodeExtent(other) {}
~SeastoreNodeExtent() override = default;
protected:
- NodeExtentRef mutate(context_t c) override;
+ NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override;
+
+ DeltaRecorder* get_recorder() const override {
+ return recorder.get();
+ }
+
CachedExtentRef duplicate_for_write() override {
return CachedExtentRef(new SeastoreNodeExtent(*this));
}
return extent_types_t::ONODE_BLOCK_STAGED;
}
ceph::bufferlist get_delta() override {
- //TODO
- ceph_abort("not implemented");
- }
- void apply_delta(const ceph::bufferlist&) override {
- //TODO
- ceph_abort("not implemented");
+ assert(recorder);
+ return recorder->get_delta();
}
+ void apply_delta(const ceph::bufferlist&) override;
private:
- static seastar::logger& logger() {
- return crimson::get_logger(ceph_subsys_filestore);
- }
- //TODO: recorder
+ DeltaRecorderURef recorder;
};
class SeastoreNodeExtentManager final: public NodeExtentManager {
const laddr_t addr_min;
};
-inline void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) {
- logger().info("OTree::Seastore: update root {:#x} ...", addr);
- root_addr = addr;
- auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
- nm->get_tm().write_onode_root(c.t, addr);
-}
-
-inline NodeExtentRef SeastoreNodeExtent::mutate(context_t c) {
- logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr());
- auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm);
- auto ret = nm->get_tm().get_mutable_extent(c.t, this);
- return ret->cast<SeastoreNodeExtent>();
-}
-
}
#pragma once
#include "node_extent_manager.h"
+#include "node_delta_recorder.h"
#include "node_layout_replayable.h"
namespace crimson::os::seastore::onode {
+template <typename FieldType, node_type_t NODE_TYPE>
+class DeltaRecorderT final: public DeltaRecorder {
+ public:
+ using layout_t = NodeLayoutReplayableT<FieldType, NODE_TYPE>;
+ using position_t = typename layout_t::position_t;
+ using StagedIterator = typename layout_t::StagedIterator;
+ using value_t = typename layout_t::value_t;
+ static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE;
+
+ ~DeltaRecorderT() override = default;
+
+ template <KeyT KT>
+ void encode_insert(
+ const full_key_t<KT>& key,
+ const value_t& value,
+ const position_t& insert_pos,
+ const match_stage_t& insert_stage,
+ const node_offset_t& insert_size) {
+ // TODO encode to encoded
+ }
+
+ void encode_split(
+ const StagedIterator& split_at,
+ const char* p_start) {
+ // TODO encode to encoded
+ }
+
+ template <KeyT KT>
+ void encode_split_insert(
+ const StagedIterator& split_at,
+ const full_key_t<KT>& key,
+ const value_t& value,
+ const position_t& insert_pos,
+ const match_stage_t& insert_stage,
+ const node_offset_t& insert_size,
+ const char* p_start) {
+ // TODO encode to encoded
+ }
+
+ void encode_update_child_addr(
+ const laddr_t new_addr,
+ const laddr_packed_t* p_addr,
+ const char* p_start) {
+ // TODO encode to encoded
+ }
+
+ static DeltaRecorderURef create() {
+ return std::unique_ptr<DeltaRecorder>(new DeltaRecorderT());
+ }
+
+ private:
+ DeltaRecorderT() = default;
+ node_type_t node_type() const override { return NODE_TYPE; }
+ field_type_t field_type() const override { return FIELD_TYPE; }
+ void apply_delta(ceph::bufferlist::const_iterator& delta,
+ NodeExtentMutable& node) override {
+ assert(is_empty());
+ // TODO decode and apply
+ assert(false && "not implemented");
+ }
+};
+
template <typename FieldType, node_type_t NODE_TYPE>
class NodeExtentT {
public:
using layout_t = NodeLayoutReplayableT<FieldType, NODE_TYPE>;
using node_stage_t = typename layout_t::node_stage_t;
using position_t = typename layout_t::position_t;
+ using recorder_t = DeltaRecorderT<FieldType, NODE_TYPE>;
using StagedIterator = typename layout_t::StagedIterator;
using value_t = typename layout_t::value_t;
static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE;
enum class state_t {
NO_RECORDING, // extent_state_t::INITIAL_WRITE_PENDING
+ // can mutate, no recording
RECORDING, // extent_state_t::MUTATION_PENDING
+ // can mutate, recording
PENDING_MUTATE // extent_state_t::CLEAN/DIRTY
+ // cannot mutate
};
NodeExtentT(state_t state, NodeExtentRef extent)
: state{state}, extent{extent},
node_stage{reinterpret_cast<const FieldType*>(extent->get_read())} {
if (state == state_t::NO_RECORDING) {
- assert(!mut.has_value());
mut.emplace(extent->get_mutable());
- // TODO: recorder = nullptr;
+ assert(extent->get_recorder() == nullptr);
+ recorder = nullptr;
} else if (state == state_t::RECORDING) {
- assert(!mut.has_value());
mut.emplace(extent->get_mutable());
- // TODO: get recorder from extent
+ auto p_recorder = extent->get_recorder();
+ assert(p_recorder != nullptr);
+ assert(p_recorder->node_type() == NODE_TYPE);
+ assert(p_recorder->field_type() == FIELD_TYPE);
+ recorder = static_cast<recorder_t*>(p_recorder);
} else if (state == state_t::PENDING_MUTATE) {
- // TODO: recorder = nullptr;
+ // mut is empty
+ assert(extent->get_recorder() == nullptr ||
+ extent->get_recorder()->is_empty());
+ recorder = nullptr;
} else {
ceph_abort("impossible path");
}
void prepare_mutate(context_t c) {
if (state == state_t::PENDING_MUTATE) {
assert(!extent->is_pending());
- // TODO: create and set recorder DeltaRecorderT
- extent = extent->mutate(c/* recorder */);
- assert(extent->is_mutation_pending());
+ auto ref_recorder = recorder_t::create();
+ recorder = static_cast<recorder_t*>(ref_recorder.get());
+ extent = extent->mutate(c, std::move(ref_recorder));
+
state = state_t::RECORDING;
+ assert(extent->is_mutation_pending());
node_stage = node_stage_t(
reinterpret_cast<const FieldType*>(extent->get_read()));
+ assert(recorder == static_cast<recorder_t*>(extent->get_recorder()));
mut.emplace(extent->get_mutable());
}
}
- // TODO: translate absolute modifications to relative
template <KeyT KT>
const value_t* insert_replayable(
const full_key_t<KT>& key,
match_stage_t& insert_stage,
node_offset_t& insert_size) {
assert(state != state_t::PENDING_MUTATE);
- // TODO: encode params to recorder as delta
+ if (state == state_t::RECORDING) {
+ recorder->template encode_insert<KT>(
+ key, value, insert_pos, insert_stage, insert_size);
+ }
return layout_t::template insert<KT>(
*mut, read(), key, value,
insert_pos, insert_stage, insert_size);
void split_replayable(StagedIterator& split_at) {
assert(state != state_t::PENDING_MUTATE);
- // TODO: encode params to recorder as delta
+ if (state == state_t::RECORDING) {
+ recorder->encode_split(split_at, read().p_start());
+ }
layout_t::split(*mut, read(), split_at);
}
match_stage_t& insert_stage,
node_offset_t& insert_size) {
assert(state != state_t::PENDING_MUTATE);
- // TODO: encode params to recorder as delta
+ if (state == state_t::RECORDING) {
+ recorder->template encode_split_insert<KT>(
+ split_at, key, value, insert_pos, insert_stage, insert_size,
+ read().p_start());
+ }
return layout_t::template split_insert<KT>(
*mut, read(), split_at, key, value,
insert_pos, insert_stage, insert_size);
void update_child_addr_replayable(
const laddr_t new_addr, laddr_packed_t* p_addr) {
assert(state != state_t::PENDING_MUTATE);
- // TODO: encode params to recorder as delta
+ if (state == state_t::RECORDING) {
+ recorder->encode_update_child_addr(new_addr, p_addr, read().p_start());
+ }
return layout_t::update_child_addr(*mut, new_addr, p_addr);
}
state = state_t::NO_RECORDING;
} else if (extent->is_mutation_pending()) {
state = state_t::RECORDING;
- } else if (!extent->is_valid()) {
+ } else if (extent->is_valid()) {
state = state_t::PENDING_MUTATE;
} else {
ceph_abort("invalid extent");
NodeExtentRef extent;
node_stage_t node_stage;
std::optional<NodeExtentMutable> mut;
- // TODO: DeltaRecorderT* recorder;
+ // owned by extent
+ recorder_t* recorder;
};
}