From: Samuel Just Date: Tue, 11 Aug 2020 17:47:28 +0000 (-0700) Subject: crimson/os/seastore/lba_manager: add rewrite_extent X-Git-Tag: v17.0.0~1015^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c907e2a11da8ae14f794d54b13953769a17c2df9;p=ceph.git crimson/os/seastore/lba_manager: add rewrite_extent Adds support for writing out an extent to a new location and updating lba mappings. Signed-off-by: Samuel Just --- diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 48ff8a36f07b0..649c511c1e7d7 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -115,6 +115,36 @@ void Cache::replace_extent(CachedExtentRef next, CachedExtentRef prev) } } +CachedExtentRef Cache::alloc_new_extent_by_type( + Transaction &t, ///< [in, out] current transaction + extent_types_t type, ///< [in] type tag + segment_off_t length ///< [in] length +) +{ + switch (type) { + case extent_types_t::ROOT: + assert(0 == "ROOT is never directly alloc'd"); + return CachedExtentRef(); + case extent_types_t::LADDR_INTERNAL: + return alloc_new_extent(t, length); + case extent_types_t::LADDR_LEAF: + return alloc_new_extent(t, length); + case extent_types_t::ONODE_BLOCK: + return alloc_new_extent(t, length); + case extent_types_t::TEST_BLOCK: + return alloc_new_extent(t, length); + case extent_types_t::TEST_BLOCK_PHYSICAL: + return alloc_new_extent(t, length); + case extent_types_t::NONE: { + ceph_assert(0 == "NONE is an invalid extent type"); + return CachedExtentRef(); + } + default: + ceph_assert(0 == "impossible"); + return CachedExtentRef(); + } +} + CachedExtentRef Cache::duplicate_for_write( Transaction &t, CachedExtentRef i) { diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index d73433d4833ae..72c98e65a9d50 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -244,6 +244,17 @@ public: return ret; } + /** + * alloc_new_extent + * + * Allocates a fresh extent. addr will be relative until commit. + */ + CachedExtentRef alloc_new_extent_by_type( + Transaction &t, ///< [in, out] current transaction + extent_types_t type, ///< [in] type tag + segment_off_t length ///< [in] length + ); + /** * Allocates mutable buffer from extent_set on offset~len * @@ -354,6 +365,31 @@ public: }); } + /** + * update_extent_from_transaction + * + * Updates passed extent based on t. If extent has been retired, + * a null result will be returned. + */ + CachedExtentRef update_extent_from_transaction( + Transaction &t, + CachedExtentRef extent) { + if (extent->get_type() == extent_types_t::ROOT) { + if (t.root) { + return t.root; + } else { + return extent; + } + } else { + auto result = t.get_extent(extent->get_paddr(), &extent); + if (result == Transaction::get_extent_ret::RETIRED) { + return CachedExtentRef(); + } else { + return extent; + } + } + } + /** * print * diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index d9c68ef0c392d..fa897a2777908 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -135,6 +135,20 @@ public: Transaction &t, CachedExtentRef e) = 0; + + /** + * rewrite_extent + * + * rewrite extent into passed transaction + */ + using rewrite_extent_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using rewrite_extent_ret = rewrite_extent_ertr::future<>; + virtual rewrite_extent_ret rewrite_extent( + Transaction &t, + CachedExtentRef extent) = 0; + + virtual void add_pin(LBAPin &pin) = 0; virtual ~LBAManager() {} diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index 7d6ae234e8cc6..edc55a0c44167 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -293,6 +293,82 @@ BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent( }); } +BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent( + Transaction &t, + CachedExtentRef extent) +{ + if (extent->is_logical()) { + auto lextent = extent->cast(); + cache.retire_extent(t, extent); + auto nlextent = cache.alloc_new_extent_by_type( + t, + lextent->get_type(), + lextent->get_length())->cast(); + lextent->get_bptr().copy_out( + 0, + lextent->get_length(), + nlextent->get_bptr().c_str()); + nlextent->set_laddr(lextent->get_laddr()); + nlextent->set_pin(lextent->get_pin().duplicate()); + + logger().debug( + "{}: rewriting {} into {}", + __func__, + *lextent, + *nlextent); + + return update_mapping( + t, + lextent->get_laddr(), + [prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()]( + const lba_map_val_t &in) { + lba_map_val_t ret = in; + ceph_assert(in.paddr == prev_addr); + ret.paddr = addr; + return ret; + }).safe_then([nlextent](auto e) {}).handle_error( + rewrite_extent_ertr::pass_further{}, + /* ENOENT in particular should be impossible */ + crimson::ct_error::assert_all{} + ); + } else if (is_lba_node(*extent)) { + auto lba_extent = extent->cast(); + cache.retire_extent(t, extent); + auto nlba_extent = cache.alloc_new_extent_by_type( + t, + lba_extent->get_type(), + lba_extent->get_length())->cast(); + lba_extent->get_bptr().copy_out( + 0, + lba_extent->get_length(), + nlba_extent->get_bptr().c_str()); + nlba_extent->pin.set_range(nlba_extent->get_node_meta()); + + /* This is a bit underhanded. Any relative addrs here must necessarily + * be record relative as we are rewriting a dirty extent. Thus, we + * are using resolve_relative_addrs with a (likely negative) block + * relative offset to correct them to block-relative offsets adjusted + * for our new transaction location. + * + * Upon commit, these now block relative addresses will be interpretted + * against the real final address. + */ + nlba_extent->resolve_relative_addrs( + make_record_relative_paddr(0) - nlba_extent->get_paddr()); + + return update_internal_mapping( + t, + nlba_extent->get_node_meta().depth, + nlba_extent->get_node_meta().begin, + nlba_extent->get_paddr()).safe_then( + [](auto) {}, + rewrite_extent_ertr::pass_further {}, + crimson::ct_error::assert_all{}); + } else { + return rewrite_extent_ertr::now(); + } +} + BtreeLBAManager::BtreeLBAManager( SegmentManager &segment_manager, Cache &cache) @@ -373,4 +449,48 @@ BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping( }); } +BtreeLBAManager::update_internal_mapping_ret +BtreeLBAManager::update_internal_mapping( + Transaction &t, + depth_t depth, + laddr_t laddr, + paddr_t paddr) +{ + return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) { + if (depth == croot->get_lba_root().lba_depth) { + logger().debug( + "update_internal_mapping: updating lba root to: {}->{}", + laddr, + paddr); + { + auto mut_croot = cache.duplicate_for_write(t, croot); + croot = mut_croot->cast(); + } + ceph_assert(laddr == 0); + auto old_paddr = croot->get_lba_root().lba_root_addr; + croot->get_lba_root().lba_root_addr = paddr; + return update_internal_mapping_ret( + update_internal_mapping_ertr::ready_future_marker{}, + old_paddr); + } else { + logger().debug( + "update_internal_mapping: updating lba node at depth {} to: {}->{}", + depth, + laddr, + paddr); + return get_lba_btree_extent( + get_context(t), + croot->get_lba_root().lba_depth, + croot->get_lba_root().lba_root_addr, + paddr_t()).safe_then([=, &t](LBANodeRef broot) { + return broot->mutate_internal_address( + get_context(t), + depth, + laddr, + paddr); + }); + } + }); +} + } diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index 720f3ae4e79ec..1dec91c9c15df 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -87,6 +87,10 @@ public: Transaction &t, CachedExtentRef e) final; + rewrite_extent_ret rewrite_extent( + Transaction &t, + CachedExtentRef extent); + void add_pin(LBAPin &pin) final { pin_set.add_pin(reinterpret_cast(&pin)->pin); } @@ -151,6 +155,14 @@ private: Transaction &t, laddr_t addr, update_func_t &&f); + + using update_internal_mapping_ertr = LBANode::mutate_internal_address_ertr; + using update_internal_mapping_ret = LBANode::mutate_internal_address_ret; + update_internal_mapping_ret update_internal_mapping( + Transaction &t, + depth_t depth, + laddr_t laddr, + paddr_t paddr); }; using BtreeLBAManagerRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index 86b7c8c5b619e..852e989818c2d 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -138,6 +138,25 @@ struct LBANode : CachedExtent { laddr_t laddr, mutate_func_t &&f) = 0; + /** + * mutate_internal_address + * + * Looks up internal node mapping at laddr, depth and + * updates the mapping to paddr. Returns previous paddr + * (for debugging purposes). + */ + using mutate_internal_address_ertr = crimson::errorator< + crimson::ct_error::enoent, ///< mapping does not exist + crimson::ct_error::input_output_error + >; + using mutate_internal_address_ret = mutate_internal_address_ertr::future< + paddr_t>; + virtual mutate_internal_address_ret mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) = 0; + /** * make_split_children * @@ -201,7 +220,6 @@ struct LBANode : CachedExtent { resolve_relative_addrs(get_paddr()); } -protected: virtual void resolve_relative_addrs(paddr_t base) = 0; }; using LBANodeRef = LBANode::LBANodeRef; diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc index 40ebcedd65d09..07a7b4f2e9ab8 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc @@ -39,12 +39,11 @@ LBAInternalNode::lookup_ret LBAInternalNode::lookup( } assert(meta.begin <= addr); assert(meta.end > addr); - auto [begin, end] = bound(addr, 0); - assert(begin == end + 1); + auto iter = lower_bound(addr); return get_lba_btree_extent( c, - meta.depth, - begin->get_val(), + meta.depth - 1, + iter->get_val(), get_paddr()).safe_then([c, addr, depth](auto child) { return child->lookup(c, addr, depth); }); @@ -131,6 +130,54 @@ LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping( }); } +LBAInternalNode::mutate_internal_address_ret LBAInternalNode::mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) +{ + if (get_meta().depth == (depth + 1)) { + if (!is_pending()) { + return c.cache.duplicate_for_write(c.trans, this)->cast( + )->mutate_internal_address( + c, + depth, + laddr, + paddr); + } + auto iter = get_containing_child(laddr); + if (iter->get_key() != laddr) { + return crimson::ct_error::enoent::make(); + } + + auto old_paddr = iter->get_val(); + + journal_update( + iter, + maybe_generate_relative(paddr), + maybe_get_delta_buffer()); + + return mutate_internal_address_ret( + mutate_internal_address_ertr::ready_future_marker{}, + old_paddr + ); + } else { + auto iter = get_containing_child(laddr); + return get_lba_btree_extent( + c, + get_meta().depth - 1, + iter->get_val(), + get_paddr() + ).safe_then([=](auto node) { + return node->mutate_internal_address( + c, + depth, + laddr, + paddr); + }); + } +} + LBAInternalNode::find_hole_ret LBAInternalNode::find_hole( op_context_t c, laddr_t min, @@ -434,6 +481,18 @@ LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping( } } +LBALeafNode::mutate_internal_address_ret LBALeafNode::mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) +{ + ceph_assert(0 == "Impossible"); + return mutate_internal_address_ret( + mutate_internal_address_ertr::ready_future_marker{}, + paddr); +} + LBALeafNode::find_hole_ret LBALeafNode::find_hole( op_context_t c, laddr_t min, diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h index f6167b7e58450..dbdf62e35b198 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h @@ -109,6 +109,12 @@ struct LBAInternalNode laddr_t laddr, mutate_func_t &&f) final; + mutate_internal_address_ret mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) final; + find_hole_ret find_hole( op_context_t c, laddr_t min, @@ -348,6 +354,12 @@ struct LBALeafNode laddr_t laddr, mutate_func_t &&f) final; + mutate_internal_address_ret mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) final; + find_hole_ret find_hole( op_context_t c, laddr_t min,