onode_manager/simple-fltree/onode_delta.cc
onode_manager/simple-fltree/onode_node.cc
seastore.cc
- root_block.cc
)
target_link_libraries(crimson-seastore
crimson)
namespace crimson::os::seastore {
+Cache::Cache(SegmentManager &segment_manager) :
+ segment_manager(segment_manager) {}
+
Cache::~Cache()
{
for (auto &i: extents) {
return i;
auto ret = i->duplicate_for_write();
- ret->version++;
- ret->last_committed_crc = i->last_committed_crc;
- ret->state = CachedExtent::extent_state_t::MUTATION_PENDING;
-
if (ret->get_type() == extent_types_t::ROOT) {
t.root = ret->cast<RootBlock>();
+ } else {
+ ret->last_committed_crc = i->last_committed_crc;
+ t.add_to_retired_set(i);
+ t.add_mutated_extent(ret);
}
- t.add_to_retired_set(i);
- t.add_mutated_extent(ret);
-
+ ret->version++;
+ ret->state = CachedExtent::extent_state_t::MUTATION_PENDING;
return ret;
}
i->last_committed_crc = final_crc;
}
+ if (t.root) {
+ record.deltas.push_back(
+ delta_info_t{
+ extent_types_t::ROOT,
+ paddr_t{},
+ 0,
+ 0,
+ 0,
+ t.root->get_version() - 1,
+ t.root->get_delta()
+ });
+ }
+
record.extents.reserve(t.fresh_block_list.size());
for (auto &i: t.fresh_block_list) {
logger().debug("try_construct_record: fresh block {}", *i);
i->prepare_write();
bl.append(i->get_bptr());
if (i->get_type() == extent_types_t::ROOT) {
- record.deltas.push_back(
- delta_info_t{
- extent_types_t::ROOT_LOCATION,
- i->get_paddr(),
- 0,
- 0,
- 0,
- 0,
- bufferlist()
- });
+ assert(0 == "ROOT never gets written as a fresh block");
}
record.extents.push_back(extent_t{std::move(bl)});
}
Transaction &t,
paddr_t final_block_start)
{
- if (t.root)
+ if (t.root) {
root = t.root;
+ root->on_delta_write(final_block_start);
+ root->state = CachedExtent::extent_state_t::DIRTY;
+ logger().debug("complete_commit: new root {}", *t.root);
+ }
paddr_t cur = final_block_start;
for (auto &i: t.fresh_block_list) {
}
}
+void Cache::init() {
+ root = new RootBlock();
+ root->state = CachedExtent::extent_state_t::DIRTY;
+}
+
Cache::mkfs_ertr::future<> Cache::mkfs(Transaction &t)
{
- t.root = alloc_new_extent<RootBlock>(t, RootBlock::SIZE);
+ duplicate_for_write(t, root);
return mkfs_ertr::now();
}
Cache::close_ertr::future<> Cache::close()
{
- retire_extent(root);
root.reset();
for (auto i = dirty.begin(); i != dirty.end(); ) {
auto ptr = &*i;
Cache::replay_delta_ret
Cache::replay_delta(paddr_t record_base, const delta_info_t &delta)
{
- if (delta.type == extent_types_t::ROOT_LOCATION) {
- auto root_location = delta.paddr.is_relative()
- ? record_base.add_record_relative(delta.paddr)
- : delta.paddr;
- logger().debug("replay_delta: found root addr {}", root_location);
+ if (delta.type == extent_types_t::ROOT) {
+ logger().debug("replay_delta: found root delta");
root->apply_delta_and_adjust_crc(record_base, delta.bl);
- return get_extent<RootBlock>(
- root_location,
- RootBlock::SIZE
- ).safe_then([this, root_location](auto ref) {
- logger().debug("replay_delta: finished reading root at {}", root_location);
- root = ref;
- return root->complete_load();
- }).safe_then([root_location] {
- logger().debug("replay_delta: finished loading root at {}", root_location);
- return replay_delta_ret(replay_delta_ertr::ready_future_marker{});
- });
+ return replay_delta_ertr::now();
+ } else {
+ return get_extent_by_type(
+ delta.type,
+ delta.paddr,
+ delta.length).safe_then([this, record_base, delta](auto extent) {
+ logger().debug(
+ "replay_delta: replaying {} on {}",
+ *extent,
+ delta);
+
+ assert(extent->version == delta.pversion);
+
+ assert(extent->last_committed_crc == delta.prev_crc);
+ extent->apply_delta_and_adjust_crc(record_base, delta.bl);
+ assert(extent->last_committed_crc == delta.final_crc);
+
+ extent->version++;
+ mark_dirty(extent);
+ });
}
-
- return get_extent_by_type(
- delta.type,
- delta.paddr,
- delta.length).safe_then([this, record_base, delta](auto extent) {
- /* TODO asserts about version */
- logger().debug(
- "replay_delta: replaying {} on {}",
- *extent,
- delta);
-
- assert(extent->version == delta.pversion);
-
- assert(extent->last_committed_crc == delta.prev_crc);
- extent->apply_delta_and_adjust_crc(record_base, delta.bl);
- assert(extent->last_committed_crc == delta.final_crc);
-
- extent->version++;
- mark_dirty(extent);
- });
}
Cache::get_root_ret Cache::get_root(Transaction &t)
segment_off_t length)
{
switch (type) {
- case extent_types_t::ROOT_LOCATION: {
- ceph_assert(0 == "root location deltas are handled specially");
- return get_extent_ertr::make_ready_future<CachedExtentRef>();
- }
case extent_types_t::ROOT:
- return get_extent<RootBlock>(offset, length
- ).safe_then([](auto extent) {
- return CachedExtentRef(extent.detach(), false /* add_ref */);
- });
+ assert(0 == "ROOT is never directly read");
+ return get_extent_ertr::make_ready_future<CachedExtentRef>();
case extent_types_t::LADDR_INTERNAL:
return get_extent<lba_manager::btree::LBAInternalNode>(offset, length
).safe_then([](auto extent) {
*/
class Cache {
public:
- Cache(SegmentManager &segment_manager) : segment_manager(segment_manager) {}
+ Cache(SegmentManager &segment_manager);
~Cache();
TransactionRef get_transaction() {
paddr_t final_block_start ///< [in] offset of initial block
);
+ /**
+ * init
+ */
+ void init();
+
/**
* mkfs
*
* Intended for use in Journal::delta. For each delta, should decode delta,
* read relevant block from disk or cache (using correct type), and call
* CachedExtent::apply_delta marking the extent dirty.
- *
- * TODO: currently only handles the ROOT_LOCATION delta.
*/
using replay_delta_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
Transaction &t)
{
logger().debug("BtreeLBAManager::mkfs");
- return cache.get_root(t).safe_then([this, &t](auto root) {
+ return cache.get_root(t).safe_then([this, &t](auto croot) {
auto root_leaf = cache.alloc_new_extent<LBALeafNode>(
t,
LBA_BLOCK_SIZE);
root_leaf->set_size(0);
- root->get_lba_root() =
- btree_lba_root_t{
+ croot->get_lba_root() =
+ root_t{
0,
0,
- root_leaf->get_paddr() - root->get_paddr(),
- paddr_t{}};
+ root_leaf->get_paddr(),
+ make_record_relative_paddr(0)};
return mkfs_ertr::now();
});
}
BtreeLBAManager::get_root_ret
BtreeLBAManager::get_root(Transaction &t)
{
- return cache.get_root(t).safe_then([this, &t](auto root) {
- paddr_t root_addr = root->get_lba_root().lba_root_addr;
- root_addr = root_addr.maybe_relative_to(root->get_paddr());
+ return cache.get_root(t).safe_then([this, &t](auto croot) {
logger().debug(
"BtreeLBAManager::get_root: reading root at {} depth {}",
- root->get_lba_root().lba_root_addr,
- root->get_lba_root().lba_depth);
+ paddr_t{croot->get_lba_root().lba_root_addr},
+ unsigned(croot->get_lba_root().lba_depth));
return get_lba_btree_extent(
cache,
t,
- root->get_lba_root().lba_depth,
- root->get_lba_root().lba_root_addr,
- root->get_paddr());
+ croot->get_lba_root().lba_depth,
+ croot->get_lba_root().lba_root_addr,
+ paddr_t());
});
}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "crimson/os/seastore/root_block.h"
-
-namespace crimson::os::seastore {
-
-void RootBlock::prepare_write()
-{
- bufferlist tmp;
- ::encode(root, tmp);
- auto bpiter = tmp.begin();
- bpiter.copy(tmp.length(), get_bptr().c_str());
-}
-
-RootBlock::complete_load_ertr::future<> RootBlock::complete_load()
-{
- auto biter = get_bptr().cbegin();
- root.decode(biter);
- if (root.lba_root.lba_root_addr.is_relative()) {
- root.lba_root.lba_root_addr = get_paddr().add_block_relative(
- root.lba_root.lba_root_addr);
- }
- return complete_load_ertr::now();
-}
-
-void RootBlock::on_delta_write(paddr_t record_block_offset)
-{
- if (root.lba_root.lba_root_addr.is_relative()) {
- root.lba_root.lba_root_addr = record_block_offset.add_record_relative(
- root.lba_root.lba_root_addr);
- }
-}
-
-void RootBlock::on_initial_write()
-{
- if (root.lba_root.lba_root_addr.is_relative()) {
- root.lba_root.lba_root_addr = get_paddr().add_block_relative(
- root.lba_root.lba_root_addr);
- }
-}
-
-btree_lba_root_t &RootBlock::get_lba_root()
-{
- return root.lba_root;
-}
-
-}
using depth_t = uint32_t;
-/* Belongs in lba_manager/btree, TODO: generalize this to
- * permit more than one lba_manager implementation
+/**
+ * root_t
+ *
+ * Contains information required to find metadata roots.
+ * TODO: generalize this to permit more than one lba_manager implementation
*/
-struct btree_lba_root_t {
+struct __attribute__((aligned(8), packed)) root_t {
depth_t lba_depth = 0;
depth_t segment_depth = 0;
paddr_t lba_root_addr;
paddr_t segment_root;
- DENC(btree_lba_root_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.lba_depth, p);
- denc(v.segment_depth, p);
- denc(v.lba_root_addr, p);
- denc(v.segment_root, p);
- DENC_FINISH(p);
- }
-};
-
-struct root_block_t {
- btree_lba_root_t lba_root;
-
- DENC(root_block_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.lba_root, p);
- DENC_FINISH(p);
+ void adjust_addrs_from_base(paddr_t base) {
+ if (lba_root_addr.is_relative()) {
+ lba_root_addr = base.add_record_relative(lba_root_addr);
+ }
}
};
* In-memory values may be
* - absolute: reference to block which predates the current transaction
* - record_relative: reference to block updated in this transaction
- * if !is_initial_pending()
- * - block_relative: reference to block updated in this transaction
- * if is_initial_pending()
+ * if !pending()
*
- * Upon initial commit, on_initial_write checks physical references and updates
- * based on newly discovered address (relative ones must be block_relative).
+ * Journal replay only considers deltas and must always discover the most
+ * recent value for the RootBlock. Because the contents of root_t above are
+ * very small, it's simplest to stash the entire root_t value into the delta
+ * and never actually write the RootBlock to a physical location (safe since
+ * nothing references the location of the RootBlock).
*
- * complete_load also updates addresses in memory post load based on block addr.
+ * As a result, Cache treats the root differently in a few ways including:
+ * - state will only ever be DIRTY or MUTATION_PENDING
+ * - RootBlock's never show up in the transaction fresh or dirty lists --
+ * there's a special Transaction::root member for when the root needs to
+ * be mutated.
*
- * Upon delta commit, on_delta_write uses record addr to update in-memory values.
- * apply_delta will do the same once implemented (TODO).
+ * TODO: Journal trimming will need to be aware of the most recent RootBlock
+ * delta location, or, even easier, just always write one out with the
+ * mutation which changes the journal trim bound.
*/
struct RootBlock : CachedExtent {
constexpr static segment_off_t SIZE = 4<<10;
using Ref = TCachedExtentRef<RootBlock>;
- root_block_t root;
+ root_t root;
- template <typename... T>
- RootBlock(T&&... t) : CachedExtent(std::forward<T>(t)...) {}
+ RootBlock() : CachedExtent(0) {}
RootBlock(const RootBlock &rhs) = default;
return CachedExtentRef(new RootBlock(*this));
};
- /**
- * prepare_write
- *
- * For RootBlock, serializes RootBlock::root into the bptr.
- */
- void prepare_write() final;
-
static constexpr extent_types_t TYPE = extent_types_t::ROOT;
extent_types_t get_type() const final {
return extent_types_t::ROOT;
}
+ /// dumps root as delta
ceph::bufferlist get_delta() final {
- return ceph::bufferlist();
+ ceph::bufferlist bl;
+ ceph::buffer::ptr bptr(sizeof(root_t));
+ *reinterpret_cast<root_t*>(bptr.c_str()) = root;
+ bl.append(bptr);
+ return bl;
}
+ /// overwrites root
void apply_delta_and_adjust_crc(paddr_t base, const ceph::bufferlist &_bl) final {
- ceph_assert(0 == "TODO");
+ assert(_bl.length() == sizeof(root_t));
+ ceph::bufferlist bl = _bl;
+ bl.rebuild();
+ root = *reinterpret_cast<const root_t*>(bl.front().c_str());
+ root.adjust_addrs_from_base(base);
}
- /// Patches relative addrs in memory based on actual address
- complete_load_ertr::future<> complete_load() final;
-
/// Patches relative addrs in memory based on record commit addr
- void on_delta_write(paddr_t record_block_offset) final;
+ void on_delta_write(paddr_t record_block_offset) final {
+ root.adjust_addrs_from_base(record_block_offset);
+ }
- /// Patches relative addrs in memory based on record addr
- void on_initial_write() final;
+ complete_load_ertr::future<> complete_load() final {
+ assert(0 == "Root is only written via deltas");
+ }
- btree_lba_root_t &get_lba_root();
+ void on_initial_write() final {
+ assert(0 == "Root is only written via deltas");
+ }
+ root_t &get_lba_root() { return root; }
};
using RootBlockRef = RootBlock::Ref;
}
-
-WRITE_CLASS_DENC(crimson::os::seastore::btree_lba_root_t)
-WRITE_CLASS_DENC(crimson::os::seastore::root_block_t)
std::ostream &operator<<(std::ostream &out, extent_types_t t)
{
switch (t) {
- case extent_types_t::ROOT_LOCATION:
- return out << "ROOT_LOCATION";
case extent_types_t::ROOT:
return out << "ROOT";
case extent_types_t::LADDR_INTERNAL:
* Cache::get_extent_by_type in cache.cc
*/
enum class extent_types_t : uint8_t {
- ROOT_LOCATION = 0, // delta only
- ROOT = 1,
- LADDR_INTERNAL = 2,
- LADDR_LEAF = 3,
- ONODE_BLOCK = 4,
+ ROOT = 0,
+ LADDR_INTERNAL = 1,
+ LADDR_LEAF = 2,
+ ONODE_BLOCK = 3,
// Test Block Types
TEST_BLOCK = 0xF0,
lba_manager.create_transaction(),
[this](auto &transaction) {
logger().debug("TransactionManager::mkfs: about to cache.mkfs");
+ cache.init();
return cache.mkfs(*transaction
).safe_then([this, &transaction] {
return lba_manager.mkfs(*transaction);
TransactionManager::mount_ertr::future<> TransactionManager::mount()
{
+ cache.init();
return journal.replay([this](auto paddr, const auto &e) {
return cache.replay_delta(paddr, e);
}).safe_then([this] {
return seastar::do_with(
lba_manager->create_transaction(),
[this](auto &transaction) {
+ cache.init();
return cache.mkfs(*transaction
).safe_then([this, &transaction] {
return lba_manager->mkfs(*transaction);
return seastar::do_with(
TransactionRef(new Transaction()),
[this](auto &transaction) {
+ cache.init();
return cache.mkfs(*transaction).safe_then(
[this, &transaction] {
return submit_transaction(std::move(transaction)).then(
}
seastar::future<> tear_down_fut() final {
- return seastar::now();
+ return cache.close().handle_error(
+ Cache::close_ertr::assert_all{});
}
};