From c84eb61f1e63e2b5d4919c509da8fac069e4b625 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Mon, 5 Jul 2021 11:28:35 +0800 Subject: [PATCH] crimson/os/seastore: leverage RetiredExtentPlaceholder to detect transaction conflicts Signed-off-by: Yingxin Cheng --- src/crimson/os/seastore/cache.cc | 67 ++++++--------- src/crimson/os/seastore/cache.h | 105 +++++++++++++++--------- src/crimson/os/seastore/cached_extent.h | 16 ++-- src/crimson/os/seastore/transaction.h | 26 ++++++ 4 files changed, 125 insertions(+), 89 deletions(-) diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 980ac230910..12d2215a398 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -43,34 +43,31 @@ Cache::retire_extent_ret Cache::retire_extent_addr( } // absent from transaction - result = query_cache_for_extent(addr, &ext); + result = query_cache_with_placeholders(addr, &ext); if (result == Transaction::get_extent_ret::PRESENT) { - t.add_to_read_set(ext); - return trans_intr::make_interruptible( - ext->wait_io() - ).then_interruptible([&t, ext=std::move(ext)]() mutable { - t.add_to_retired_set(ext); - return retire_extent_iertr::now(); - }); + if (ext->get_type() != extent_types_t::RETIRED_PLACEHOLDER) { + t.add_to_read_set(ext); + return trans_intr::make_interruptible( + ext->wait_io() + ).then_interruptible([&t, ext=std::move(ext)]() mutable { + t.add_to_retired_set(ext); + return retire_extent_iertr::now(); + }); + } + // the retired-placeholder exists } else { // result == get_extent_ret::ABSENT - // FIXME this will cause incorrect transaction invalidation because t - // will not be notified if other transactions that modify the extent at - // this addr are committed. - // - // Say that we have transaction A and B, conflicting on extent E at laddr - // L: - // A: dec_ref(L) // cause uncached retirement - // B: read(L) -> E - // B: mutate(E) - // B: submit_transaction() // assume successful - // A: about to submit... - // - // A cannot be invalidated because E is not in A's read-set - // - // TODO: leverage RetiredExtentPlaceholder to fix the issue. - t.add_to_retired_uncached(addr, length); - return retire_extent_iertr::now(); + // add a new placeholder to Cache + ext = CachedExtent::make_cached_extent_ref< + RetiredExtentPlaceholder>(length); + ext->set_paddr(addr); + ext->state = CachedExtent::extent_state_t::CLEAN; + add_extent(ext); } + + // add the retired-placeholder to transaction + t.add_to_read_set(ext); + t.add_to_retired_set(ext); + return retire_extent_iertr::now(); } void Cache::dump_contents() @@ -319,21 +316,6 @@ record_t Cache::prepare_record(Transaction &t) // Transaction is now a go, set up in-memory cache state // invalidate now invalid blocks - for (auto &&i : t.retired_uncached) { - CachedExtentRef to_retire; - if (query_cache_for_extent(i.first, &to_retire) == - Transaction::get_extent_ret::ABSENT) { - to_retire = CachedExtent::make_cached_extent_ref< - RetiredExtentPlaceholder - >(i.second); - to_retire->set_paddr(i.first); - to_retire->state = CachedExtent::extent_state_t::CLEAN; - } - - t.retired_set.insert(to_retire); - extents.insert(*to_retire); - } - for (auto &i: t.retired_set) { DEBUGT("retiring {}", t, *i); retire_extent(i); @@ -485,8 +467,11 @@ Cache::replay_delta( auto _get_extent_if_cached = [this](paddr_t addr) -> get_extent_ertr::future { CachedExtentRef ret; - auto result = query_cache_for_extent(addr, &ret); + auto result = query_cache_with_placeholders(addr, &ret); if (result == Transaction::get_extent_ret::PRESENT) { + // no retired-placeholder should be exist yet because no transaction + // has been created. + assert(ret->get_type() != extent_types_t::RETIRED_PLACEHOLDER); return ret->wait_io().then([ret] { return ret; }); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 34d7ce64b4c..87605d59b91 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -181,43 +181,42 @@ public: paddr_t offset, ///< [in] starting addr segment_off_t length ///< [in] length ) { - CachedExtentRef _ret; - auto result = query_cache_for_extent(offset, &_ret); - if (result == Transaction::get_extent_ret::PRESENT) { - auto ret = TCachedExtentRef(static_cast(_ret.get())); + CachedExtentRef cached; + auto result = query_cache_with_placeholders(offset, &cached); + if (result == Transaction::get_extent_ret::ABSENT) { + auto ret = CachedExtent::make_cached_extent_ref( + alloc_cache_buf(length)); + ret->set_paddr(offset); + ret->state = CachedExtent::extent_state_t::CLEAN; + add_extent(ret); + return read_extent(std::move(ret)); + } + + // extent PRESENT in cache + if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) { + auto ret = CachedExtent::make_cached_extent_ref( + alloc_cache_buf(length)); + ret->set_paddr(offset); + ret->state = CachedExtent::extent_state_t::CLEAN; + extents.replace(*ret, *cached); + + // replace placeholder in transactions + while (!cached->transactions.empty()) { + auto t = cached->transactions.begin()->t; + t->replace_placeholder(*cached, *ret); + } + + cached->state = CachedExtent::extent_state_t::INVALID; + return read_extent(std::move(ret)); + } else { + auto ret = TCachedExtentRef(static_cast(cached.get())); return ret->wait_io( ).then([ret=std::move(ret)]() mutable -> get_extent_ret { - // ret may be invalid, caller must check - return get_extent_ret( - get_extent_ertr::ready_future_marker{}, - std::move(ret)); + // ret may be invalid, caller must check + return get_extent_ret( + get_extent_ertr::ready_future_marker{}, + std::move(ret)); }); - } else { // get_extent_ret::ABSENT - auto ref = CachedExtent::make_cached_extent_ref( - alloc_cache_buf(length)); - ref->set_io_wait(); - ref->set_paddr(offset); - ref->state = CachedExtent::extent_state_t::CLEAN; - add_extent(ref); - - return segment_manager.read( - offset, - length, - ref->get_bptr()).safe_then( - [ref=std::move(ref)]() mutable { - /* TODO: crc should be checked against LBA manager */ - ref->last_committed_crc = ref->get_crc32c(); - - ref->on_clean_read(); - ref->complete_io(); - return get_extent_ertr::make_ready_future>( - std::move(ref)); - }, - get_extent_ertr::pass_further{}, - crimson::ct_error::assert_all{ - "Cache::get_extent: invalid error" - } - ); } } @@ -240,14 +239,16 @@ public: CachedExtentRef>(ret); } - // get_extent_ret::PRESENT from transaction - result = query_cache_for_extent(offset, &ret); - if (result == Transaction::get_extent_ret::ABSENT) { + // get_extent_ret::ABSENT from transaction + result = query_cache_with_placeholders(offset, &ret); + if (result == Transaction::get_extent_ret::ABSENT || + // retired_placeholder is not really cached yet + ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) { return get_extent_if_cached_iertr::make_ready_future< CachedExtentRef>(); } - // get_extent_ret::PRESENT from cache + // get_extent_ret::PRESENT from cache and is not placeholder t.add_to_read_set(ret); return ret->wait_io().then([ret] { return get_extent_if_cached_iertr::make_ready_future< @@ -590,7 +591,33 @@ private: /// Invalidate extent and mark affected transactions void invalidate(CachedExtent &extent); - Transaction::get_extent_ret query_cache_for_extent( + template + get_extent_ret read_extent( + TCachedExtentRef&& extent + ) { + extent->set_io_wait(); + return segment_manager.read( + extent->get_paddr(), + extent->get_length(), + extent->get_bptr() + ).safe_then( + [extent=std::move(extent)]() mutable { + /* TODO: crc should be checked against LBA manager */ + extent->last_committed_crc = extent->get_crc32c(); + + extent->on_clean_read(); + extent->complete_io(); + return get_extent_ertr::make_ready_future>( + std::move(extent)); + }, + get_extent_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "Cache::get_extent: invalid error" + } + ); + } + + Transaction::get_extent_ret query_cache_with_placeholders( paddr_t offset, CachedExtentRef *out) { if (auto iter = extents.find_offset(offset); diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index ed696491c2d..4cfc76c0168 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -694,15 +694,13 @@ inline void retired_extent_gate_t::token_t::drop_self() { /** * RetiredExtentPlaceholder * - * Cache::retire_extent(Transaction&, paddr_t, extent_len_t) can retire - * an extent not currently in cache. In that case, we need to add a - * placeholder to the cache until transactions that might reference - * the extent complete as in the case where the extent is already cached. - * Cache::complete_commit thus creates a RetiredExtentPlaceholder to - * serve that purpose. ptr is not populated, and state is set to - * RETIRED. Cache interfaces check for RETIRED and return EAGAIN if - * encountered, so references to these placeholder extents should not - * escape the Cache interface boundary. + * Cache::retire_extent_addr(Transaction&, paddr_t, extent_len_t) can retire an + * extent not currently in cache. In that case, in order to detect transaction + * invalidation, we need to add a placeholder to the cache to create the + * mapping back to the transaction. And whenever there is a transaction tries + * to read the placeholder extent out, Cache is responsible to replace the + * placeholder by the real one. Anyway, No placeholder extents should escape + * the Cache interface boundary. */ class RetiredExtentPlaceholder : public CachedExtent { extent_len_t length; diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 25b437f2770..ef77e17924a 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -42,6 +42,9 @@ public: } else if ( auto iter = read_set.find(addr); iter != read_set.end()) { + // placeholder in read-set should be in the retired-set + // at the same time. + assert(iter->ref->get_type() != extent_types_t::RETIRED_PLACEHOLDER); if (out) *out = iter->ref; return get_extent_ret::PRESENT; @@ -90,6 +93,29 @@ public: write_set.insert(*ref); } + void replace_placeholder(CachedExtent& placeholder, CachedExtent& extent) { + ceph_assert(!is_weak()); + + assert(placeholder.get_type() == extent_types_t::RETIRED_PLACEHOLDER); + assert(extent.get_type() != extent_types_t::RETIRED_PLACEHOLDER); + assert(extent.get_type() != extent_types_t::ROOT); + assert(extent.get_paddr() == placeholder.get_paddr()); + { + auto where = read_set.find(placeholder.get_paddr()); + assert(where != read_set.end()); + assert(where->ref.get() == &placeholder); + where = read_set.erase(where); + read_set.emplace_hint(where, this, &extent); + } + { + auto where = retired_set.find(&placeholder); + assert(where != retired_set.end()); + assert(where->get() == &placeholder); + where = retired_set.erase(where); + retired_set.emplace_hint(where, &extent); + } + } + void mark_segment_to_release(segment_id_t segment) { assert(to_release == NULL_SEG_ID); to_release = segment; -- 2.39.5