From f19de7c77f7d6b783802f6c18e2e9fe106c90243 Mon Sep 17 00:00:00 2001 From: Xinyu Huang Date: Sat, 10 Jun 2023 10:16:29 +0800 Subject: [PATCH] crimson/os/seastore: support no data extent in read path Signed-off-by: Xinyu Huang --- .../seastore/backref/btree_backref_manager.cc | 8 +- .../os/seastore/btree/fixed_kv_btree.h | 16 +- src/crimson/os/seastore/cache.cc | 4 + src/crimson/os/seastore/cache.h | 168 ++++++++++++------ src/crimson/os/seastore/cached_extent.h | 13 +- .../lba_manager/btree/btree_lba_manager.cc | 8 +- src/crimson/os/seastore/transaction_manager.h | 4 +- 7 files changed, 148 insertions(+), 73 deletions(-) diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc index 8be43f5ade5c5..6b75a34c981e2 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.cc +++ b/src/crimson/os/seastore/backref/btree_backref_manager.cc @@ -40,14 +40,14 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node< } else { return {false, trans_intr::make_interruptible( - seastar::make_ready_future< - CachedExtentRef>(CachedExtentRef()))}; + Cache::get_extent_ertr::make_ready_future< + CachedExtentRef>())}; } } else { return {false, trans_intr::make_interruptible( - seastar::make_ready_future< - CachedExtentRef>(CachedExtentRef()))}; + Cache::get_extent_ertr::make_ready_future< + CachedExtentRef>())}; } } diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index 197c198787f76..3ae801ea2b86f 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -28,10 +28,12 @@ bool is_valid_child_ptr(ChildableCachedExtent* child); template phy_tree_root_t& get_phy_tree_root(root_t& r); +using get_child_iertr = + ::crimson::interruptible::interruptible_errorator< + typename trans_intr::condition, + get_child_ertr>; using get_phy_tree_root_node_ret = - std::pair>; + std::pair>; template const get_phy_tree_root_node_ret get_phy_tree_root_node( @@ -1400,7 +1402,7 @@ private: }; if (found) { - return fut.then_interruptible( + return fut.si_then( [this, c, on_found_internal=std::move(on_found_internal), on_found_leaf=std::move(on_found_leaf)](auto root) { LOG_PREFIX(FixedKVBtree::lookup_root); @@ -1479,7 +1481,7 @@ private: auto v = parent->template get_child(c, node_iter); if (v.has_child()) { - return v.get_child_fut().then( + return v.get_child_fut().safe_then( [on_found=std::move(on_found), node_iter, c, parent_entry](auto child) mutable { LOG_PREFIX(FixedKVBtree::lookup_internal_level); @@ -1547,7 +1549,7 @@ private: auto v = parent->template get_child(c, node_iter); if (v.has_child()) { - return v.get_child_fut().then( + return v.get_child_fut().safe_then( [on_found=std::move(on_found), node_iter, c, parent_entry](auto child) mutable { LOG_PREFIX(FixedKVBtree::lookup_leaf); @@ -2100,7 +2102,7 @@ private: auto v = parent_pos.node->template get_child(c, donor_iter); if (v.has_child()) { - return v.get_child_fut().then( + return v.get_child_fut().safe_then( [do_merge=std::move(do_merge), &pos, donor_iter, donor_is_left, c, parent_pos](auto child) mutable { LOG_PREFIX(FixedKVBtree::merge_level); diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 7e7012226d4c1..07000dc8f76e2 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1003,6 +1003,8 @@ CachedExtentRef Cache::duplicate_for_write( Transaction &t, CachedExtentRef i) { LOG_PREFIX(Cache::duplicate_for_write); + assert(i->is_fully_loaded()); + if (i->is_mutable()) return i; @@ -1838,6 +1840,8 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents( i != dirty.end() && bytes_so_far < max_bytes; ++i) { auto dirty_from = i->get_dirty_from(); + //dirty extents must be fully loaded + assert(i->is_fully_loaded()); if (unlikely(dirty_from == JOURNAL_SEQ_NULL)) { ERRORT("got dirty extent with JOURNAL_SEQ_NULL -- {}", t, *i); ceph_abort(); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 9289dda0881b5..153d7c3c96ca3 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -332,6 +332,16 @@ public: extent_init_func(*ret); return read_extent( std::move(ret)); + } else if (!cached->is_fully_loaded()) { + auto ret = TCachedExtentRef(static_cast(cached.get())); + on_cache(*ret); + SUBDEBUG(seastore_cache, + "{} {}~{} is present without been fully loaded, reading ... -- {}", + T::TYPE, offset, length, *ret); + auto bp = alloc_cache_buf(length); + ret->set_bptr(std::move(bp)); + return read_extent( + std::move(ret)); } else { SUBTRACE(seastore_cache, "{} {}~{} is present in cache -- {}", @@ -377,31 +387,43 @@ public: auto result = t.get_extent(offset, &ret); if (result == Transaction::get_extent_ret::RETIRED) { SUBDEBUGT(seastore_cache, "{} {} is retired on t -- {}", - t, type, offset, *ret); + t, type, offset, *ret); return get_extent_if_cached_iertr::make_ready_future< CachedExtentRef>(ret); } else if (result == Transaction::get_extent_ret::PRESENT) { - SUBTRACET(seastore_cache, "{} {} is present on t -- {}", - t, type, offset, *ret); - return ret->wait_io().then([ret] { - return get_extent_if_cached_iertr::make_ready_future< - CachedExtentRef>(ret); - }); + if (ret->is_fully_loaded()) { + SUBTRACET(seastore_cache, "{} {} is present on t -- {}", + t, type, offset, *ret); + return ret->wait_io().then([ret] { + return get_extent_if_cached_iertr::make_ready_future< + CachedExtentRef>(ret); + }); + } else { + SUBDEBUGT(seastore_cache, "{} {} is present on t -- {}" + " without being fully loaded", t, type, offset, *ret); + return get_extent_if_cached_iertr::make_ready_future< + CachedExtentRef>(); + } } // get_extent_ret::ABSENT from transaction auto metric_key = std::make_pair(t.get_src(), type); ret = query_cache(offset, &metric_key); - if (!ret || - // retired_placeholder is not really cached yet - ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) { - SUBDEBUGT(seastore_cache, "{} {} is absent{}", - t, type, offset, !!ret ? "(placeholder)" : ""); - return get_extent_if_cached_iertr::make_ready_future< - CachedExtentRef>(); + if (!ret) { + SUBDEBUGT(seastore_cache, "{} {} is absent", t, type, offset); + return get_extent_if_cached_iertr::make_ready_future(); + } else if (ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) { + // retired_placeholder is not really cached yet + SUBDEBUGT(seastore_cache, "{} {} is absent(placeholder)", + t, type, offset); + return get_extent_if_cached_iertr::make_ready_future(); + } else if (!ret->is_fully_loaded()) { + SUBDEBUGT(seastore_cache, "{} {} is present without " + "being fully loaded", t, type, offset); + return get_extent_if_cached_iertr::make_ready_future(); } - // present in cache and is not a retired_placeholder + // present in cache(fully loaded) and is not a retired_placeholder SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}", t, type, offset, *ret); t.add_to_read_set(ret); @@ -432,33 +454,41 @@ public: CachedExtentRef ret; LOG_PREFIX(Cache::get_extent); auto result = t.get_extent(offset, &ret); - if (result != Transaction::get_extent_ret::ABSENT) { - SUBTRACET(seastore_cache, "{} {}~{} is {} on t -- {}", - t, - T::TYPE, - offset, - length, - result == Transaction::get_extent_ret::PRESENT ? "present" : "retired", - *ret); - assert(result != Transaction::get_extent_ret::RETIRED); - return ret->wait_io().then([ret] { - return seastar::make_ready_future>( - ret->cast()); - }); + if (result == Transaction::get_extent_ret::RETIRED) { + SUBERRORT(seastore_cache, "{} {}~{} is retired on t -- {}", + t, T::TYPE, offset, length, *ret); + ceph_abort("impossible"); + } else if (result == Transaction::get_extent_ret::PRESENT) { + if (ret->is_fully_loaded()) { + SUBTRACET(seastore_cache, "{} {}~{} is present on t -- {}", + t, T::TYPE, offset, length, *ret); + return ret->wait_io().then([ret] { + return seastar::make_ready_future>( + ret->cast()); + }); + } else { + touch_extent(*ret); + SUBDEBUGT(seastore_cache, "{} {}~{} is present on t without been \ + fully loaded, reading ...", t, T::TYPE, offset, length); + auto bp = alloc_cache_buf(ret->get_length()); + ret->set_bptr(std::move(bp)); + return read_extent( + ret->cast()); + } + } else { + SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...", + t, T::TYPE, offset, length); + auto f = [&t, this](CachedExtent &ext) { + t.add_to_read_set(CachedExtentRef(&ext)); + touch_extent(ext); + }; + auto metric_key = std::make_pair(t.get_src(), T::TYPE); + return trans_intr::make_interruptible( + get_extent( + offset, length, &metric_key, + std::forward(extent_init_func), std::move(f)) + ); } - - SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...", - t, T::TYPE, offset, length); - auto f = [&t, this](CachedExtent &ext) { - t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext); - }; - auto metric_key = std::make_pair(t.get_src(), T::TYPE); - return trans_intr::make_interruptible( - get_extent( - offset, length, &metric_key, - std::forward(extent_init_func), std::move(f)) - ); } /* @@ -522,7 +552,7 @@ public: return get_absent_extent(t, offset, length, [](T &){}); } - seastar::future get_extent_viewable_by_trans( + get_extent_ertr::future get_extent_viewable_by_trans( Transaction &t, CachedExtentRef extent) { @@ -533,19 +563,33 @@ public: touch_extent(*p_extent); } } + // user should not see RETIRED_PLACEHOLDER extents + ceph_assert(p_extent->get_type() != extent_types_t::RETIRED_PLACEHOLDER); + if (!p_extent->is_fully_loaded()) { + touch_extent(*p_extent); + LOG_PREFIX(Cache::get_extent_viewable_by_trans); + SUBDEBUG(seastore_cache, + "{} {}~{} is present without been fully loaded, reading ... -- {}", + p_extent->get_type(), p_extent->get_paddr(),p_extent->get_length(), + *p_extent); + auto bp = alloc_cache_buf(p_extent->get_length()); + p_extent->set_bptr(std::move(bp)); + return read_extent(CachedExtentRef(p_extent)); + } return p_extent->wait_io( ).then([p_extent] { - return CachedExtentRef(p_extent); + return get_extent_ertr::make_ready_future( + CachedExtentRef(p_extent)); }); } template - seastar::future> get_extent_viewable_by_trans( + get_extent_ertr::future> get_extent_viewable_by_trans( Transaction &t, TCachedExtentRef extent) { return get_extent_viewable_by_trans(t, CachedExtentRef(extent.get()) - ).then([](auto p_extent) { + ).safe_then([](auto p_extent) { return p_extent->template cast(); }); } @@ -606,15 +650,25 @@ private: CachedExtentRef ret; auto status = t.get_extent(offset, &ret); if (status == Transaction::get_extent_ret::RETIRED) { - SUBDEBUGT(seastore_cache, "{} {}~{} {} is retired on t -- {}", + SUBERRORT(seastore_cache, "{} {}~{} {} is retired on t -- {}", t, type, offset, length, laddr, *ret); - return seastar::make_ready_future(); + ceph_abort("impossible"); } else if (status == Transaction::get_extent_ret::PRESENT) { - SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}", - t, type, offset, length, laddr, *ret); - return ret->wait_io().then([ret] { - return seastar::make_ready_future(ret); - }); + if (ret->is_fully_loaded()) { + SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}", + t, type, offset, length, laddr, *ret); + return ret->wait_io().then([ret] { + return seastar::make_ready_future(ret); + }); + } else { + touch_extent(*ret); + SUBDEBUGT(seastore_cache, "{} {}~{} {} is present on t without been \ + fully loaded, reading ...", t, type, offset, length, laddr); + auto bp = alloc_cache_buf(ret->get_length()); + ret->set_bptr(std::move(bp)); + return read_extent( + std::move(ret)); + } } else { SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...", t, type, offset, length, laddr); @@ -1515,7 +1569,9 @@ private: get_extent_ret read_extent( TCachedExtentRef&& extent ) { - assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING); + assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING || + extent->state == CachedExtent::extent_state_t::EXIST_CLEAN || + extent->state == CachedExtent::extent_state_t::CLEAN); extent->set_io_wait(); return epm.read( extent->get_paddr(), @@ -1530,7 +1586,11 @@ private: extent->last_committed_crc = extent->get_crc32c(); extent->on_clean_read(); - } else { + } else if (extent->state == CachedExtent::extent_state_t::EXIST_CLEAN || + extent->state == CachedExtent::extent_state_t::CLEAN) { + /* TODO: crc should be checked against LBA manager */ + extent->last_committed_crc = extent->get_crc32c(); + } else { ceph_assert(!extent->is_valid()); } extent->complete_io(); diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 331135261ff43..25ae023b2fab8 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -616,6 +616,11 @@ private: return extent_index_hook.is_linked(); } + /// set bufferptr + void set_bptr(ceph::bufferptr &&nptr) { + ptr = nptr; + } + /// Returns true if the extent part of the open transaction bool is_pending_in_trans(transaction_id_t id) const { return is_pending() && pending_for_transaction == id; @@ -966,12 +971,14 @@ private: uint16_t pos = std::numeric_limits::max(); }; +using get_child_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; template struct get_child_ret_t { - std::variant>> ret; + std::variant>> ret; get_child_ret_t(child_pos_t pos) : ret(std::move(pos)) {} - get_child_ret_t(seastar::future> child) + get_child_ret_t(get_child_ertr::future> child) : ret(std::move(child)) {} bool has_child() const { @@ -983,7 +990,7 @@ struct get_child_ret_t { return std::get<0>(ret); } - seastar::future> &get_child_fut() { + get_child_ertr::future> &get_child_fut() { ceph_assert(ret.index() == 1); return std::get<1>(ret); } diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index 296af756b756a..f1add39ba5b32 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -64,14 +64,14 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node< } else { return {false, trans_intr::make_interruptible( - seastar::make_ready_future< - CachedExtentRef>(CachedExtentRef()))}; + Cache::get_extent_ertr::make_ready_future< + CachedExtentRef>())}; } } else { return {false, trans_intr::make_interruptible( - seastar::make_ready_future< - CachedExtentRef>(CachedExtentRef()))}; + Cache::get_extent_ertr::make_ready_future< + CachedExtentRef>())}; } } diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 7a67d4efe9c4d..80b292203fedf 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -178,7 +178,7 @@ public: { auto v = pin->get_logical_extent(t); if (v.has_child()) { - return v.get_child_fut().then([](auto extent) { + return v.get_child_fut().safe_then([](auto extent) { return extent->template cast(); }); } else { @@ -635,6 +635,7 @@ private: } ).si_then([FNAME, &t](auto ref) mutable -> ret { SUBTRACET(seastore_tm, "got extent -- {}", t, *ref); + assert(ref->is_fully_loaded()); return pin_to_extent_ret( interruptible::ready_future_marker{}, std::move(ref)); @@ -675,6 +676,7 @@ private: } ).si_then([FNAME, &t](auto ref) { SUBTRACET(seastore_tm, "got extent -- {}", t, *ref); + assert(ref->is_fully_loaded()); return pin_to_extent_by_type_ret( interruptible::ready_future_marker{}, std::move(ref->template cast())); -- 2.39.5