]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: support no data extent in read path
authorXinyu Huang <xinyu.huang@intel.com>
Sat, 10 Jun 2023 02:16:29 +0000 (10:16 +0800)
committerXinyu Huang <xinyu.huang@intel.com>
Tue, 4 Jul 2023 09:07:08 +0000 (17:07 +0800)
Signed-off-by: Xinyu Huang <xinyu.huang@intel.com>
src/crimson/os/seastore/backref/btree_backref_manager.cc
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
src/crimson/os/seastore/transaction_manager.h

index 8be43f5ade5c5918deddd3ea0ae00888c5321d12..6b75a34c981e24319c19daae8f4821e926bca203 100644 (file)
@@ -40,14 +40,14 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node<
     } else {
       return {false,
              trans_intr::make_interruptible(
-               seastar::make_ready_future<
-                 CachedExtentRef>(CachedExtentRef()))};
+               Cache::get_extent_ertr::make_ready_future<
+                 CachedExtentRef>())};
     }
   } else {
     return {false,
            trans_intr::make_interruptible(
-             seastar::make_ready_future<
-               CachedExtentRef>(CachedExtentRef()))};
+             Cache::get_extent_ertr::make_ready_future<
+               CachedExtentRef>())};
   }
 }
 
index 197c198787f766e3773cc6b6874f3a982d725b88..3ae801ea2b86fff517026a013ae20e79a08fc316 100644 (file)
@@ -28,10 +28,12 @@ bool is_valid_child_ptr(ChildableCachedExtent* child);
 template <typename T>
 phy_tree_root_t& get_phy_tree_root(root_t& r);
 
+using get_child_iertr =
+  ::crimson::interruptible::interruptible_errorator<
+    typename trans_intr::condition,
+    get_child_ertr>;
 using get_phy_tree_root_node_ret =
-  std::pair<bool,
-            ::crimson::interruptible::interruptible_future<
-              typename trans_intr::condition, CachedExtentRef>>;
+  std::pair<bool, get_child_iertr::future<CachedExtentRef>>;
 
 template <typename T, typename key_t>
 const get_phy_tree_root_node_ret get_phy_tree_root_node(
@@ -1400,7 +1402,7 @@ private:
     };
 
     if (found) {
-      return fut.then_interruptible(
+      return fut.si_then(
         [this, c, on_found_internal=std::move(on_found_internal),
         on_found_leaf=std::move(on_found_leaf)](auto root) {
         LOG_PREFIX(FixedKVBtree::lookup_root);
@@ -1479,7 +1481,7 @@ private:
 
     auto v = parent->template get_child<internal_node_t>(c, node_iter);
     if (v.has_child()) {
-      return v.get_child_fut().then(
+      return v.get_child_fut().safe_then(
         [on_found=std::move(on_found), node_iter, c,
         parent_entry](auto child) mutable {
         LOG_PREFIX(FixedKVBtree::lookup_internal_level);
@@ -1547,7 +1549,7 @@ private:
 
     auto v = parent->template get_child<leaf_node_t>(c, node_iter);
     if (v.has_child()) {
-      return v.get_child_fut().then(
+      return v.get_child_fut().safe_then(
         [on_found=std::move(on_found), node_iter, c,
         parent_entry](auto child) mutable {
         LOG_PREFIX(FixedKVBtree::lookup_leaf);
@@ -2100,7 +2102,7 @@ private:
 
     auto v = parent_pos.node->template get_child<NodeType>(c, donor_iter);
     if (v.has_child()) {
-      return v.get_child_fut().then(
+      return v.get_child_fut().safe_then(
         [do_merge=std::move(do_merge), &pos,
         donor_iter, donor_is_left, c, parent_pos](auto child) mutable {
         LOG_PREFIX(FixedKVBtree::merge_level);
index 7e7012226d4c12e54930f7ecdc2f29a3efd39c41..07000dc8f76e22e872fdf56a44fdb03067c171fd 100644 (file)
@@ -1003,6 +1003,8 @@ CachedExtentRef Cache::duplicate_for_write(
   Transaction &t,
   CachedExtentRef i) {
   LOG_PREFIX(Cache::duplicate_for_write);
+  assert(i->is_fully_loaded());
+
   if (i->is_mutable())
     return i;
 
@@ -1838,6 +1840,8 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
        i != dirty.end() && bytes_so_far < max_bytes;
        ++i) {
     auto dirty_from = i->get_dirty_from();
+    //dirty extents must be fully loaded
+    assert(i->is_fully_loaded());
     if (unlikely(dirty_from == JOURNAL_SEQ_NULL)) {
       ERRORT("got dirty extent with JOURNAL_SEQ_NULL -- {}", t, *i);
       ceph_abort();
index 9289dda0881b579a35fffd9c5f0a10aba3591a4f..153d7c3c96ca32ec8ab6ccbe41a3c75c4fa60926 100644 (file)
@@ -332,6 +332,16 @@ public:
       extent_init_func(*ret);
       return read_extent<T>(
        std::move(ret));
+    } else if (!cached->is_fully_loaded()) {
+      auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get()));
+      on_cache(*ret);
+      SUBDEBUG(seastore_cache,
+        "{} {}~{} is present without been fully loaded, reading ... -- {}",
+        T::TYPE, offset, length, *ret);
+      auto bp = alloc_cache_buf(length);
+      ret->set_bptr(std::move(bp));
+      return read_extent<T>(
+        std::move(ret));
     } else {
       SUBTRACE(seastore_cache,
           "{} {}~{} is present in cache -- {}",
@@ -377,31 +387,43 @@ public:
     auto result = t.get_extent(offset, &ret);
     if (result == Transaction::get_extent_ret::RETIRED) {
       SUBDEBUGT(seastore_cache, "{} {} is retired on t -- {}",
-          t, type, offset, *ret);
+                t, type, offset, *ret);
       return get_extent_if_cached_iertr::make_ready_future<
         CachedExtentRef>(ret);
     } else if (result == Transaction::get_extent_ret::PRESENT) {
-      SUBTRACET(seastore_cache, "{} {} is present on t -- {}",
-          t, type, offset, *ret);
-      return ret->wait_io().then([ret] {
-       return get_extent_if_cached_iertr::make_ready_future<
-         CachedExtentRef>(ret);
-      });
+      if (ret->is_fully_loaded()) {
+        SUBTRACET(seastore_cache, "{} {} is present on t -- {}",
+                  t, type, offset, *ret);
+        return ret->wait_io().then([ret] {
+         return get_extent_if_cached_iertr::make_ready_future<
+           CachedExtentRef>(ret);
+        });
+      } else {
+        SUBDEBUGT(seastore_cache, "{} {} is present on t -- {}"
+                  " without being fully loaded", t, type, offset, *ret);
+        return get_extent_if_cached_iertr::make_ready_future<
+          CachedExtentRef>();
+      }
     }
 
     // get_extent_ret::ABSENT from transaction
     auto metric_key = std::make_pair(t.get_src(), type);
     ret = query_cache(offset, &metric_key);
-    if (!ret ||
-        // retired_placeholder is not really cached yet
-        ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
-      SUBDEBUGT(seastore_cache, "{} {} is absent{}",
-                t, type, offset, !!ret ? "(placeholder)" : "");
-      return get_extent_if_cached_iertr::make_ready_future<
-        CachedExtentRef>();
+    if (!ret) {
+      SUBDEBUGT(seastore_cache, "{} {} is absent", t, type, offset);
+      return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
+    } else if (ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
+      // retired_placeholder is not really cached yet
+      SUBDEBUGT(seastore_cache, "{} {} is absent(placeholder)",
+                t, type, offset);
+      return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
+    } else if (!ret->is_fully_loaded()) {
+      SUBDEBUGT(seastore_cache, "{} {} is present without "
+                "being fully loaded", t, type, offset);
+      return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
     }
 
-    // present in cache and is not a retired_placeholder
+    // present in cache(fully loaded) and is not a retired_placeholder
     SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}",
               t, type, offset, *ret);
     t.add_to_read_set(ret);
@@ -432,33 +454,41 @@ public:
     CachedExtentRef ret;
     LOG_PREFIX(Cache::get_extent);
     auto result = t.get_extent(offset, &ret);
-    if (result != Transaction::get_extent_ret::ABSENT) {
-      SUBTRACET(seastore_cache, "{} {}~{} is {} on t -- {}",
-         t,
-         T::TYPE,
-         offset,
-         length,
-         result == Transaction::get_extent_ret::PRESENT ? "present" : "retired",
-         *ret);
-      assert(result != Transaction::get_extent_ret::RETIRED);
-      return ret->wait_io().then([ret] {
-       return seastar::make_ready_future<TCachedExtentRef<T>>(
-         ret->cast<T>());
-      });
+    if (result == Transaction::get_extent_ret::RETIRED) {
+      SUBERRORT(seastore_cache, "{} {}~{} is retired on t -- {}",
+                t, T::TYPE, offset, length, *ret);
+      ceph_abort("impossible");
+    } else if (result == Transaction::get_extent_ret::PRESENT) {
+      if (ret->is_fully_loaded()) {
+        SUBTRACET(seastore_cache, "{} {}~{} is present on t -- {}",
+                  t, T::TYPE, offset, length, *ret);
+        return ret->wait_io().then([ret] {
+         return seastar::make_ready_future<TCachedExtentRef<T>>(
+            ret->cast<T>());
+        });
+      } else {
+        touch_extent(*ret);
+        SUBDEBUGT(seastore_cache, "{} {}~{} is present on t without been \
+          fully loaded, reading ...", t, T::TYPE, offset, length);
+        auto bp = alloc_cache_buf(ret->get_length());
+        ret->set_bptr(std::move(bp));
+        return read_extent<T>(
+          ret->cast<T>());
+      }
+    } else {
+      SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...",
+                t, T::TYPE, offset, length);
+      auto f = [&t, this](CachedExtent &ext) {
+        t.add_to_read_set(CachedExtentRef(&ext));
+        touch_extent(ext);
+      };
+      auto metric_key = std::make_pair(t.get_src(), T::TYPE);
+      return trans_intr::make_interruptible(
+        get_extent<T>(
+         offset, length, &metric_key,
+         std::forward<Func>(extent_init_func), std::move(f))
+      );
     }
-
-    SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...",
-             t, T::TYPE, offset, length);
-    auto f = [&t, this](CachedExtent &ext) {
-      t.add_to_read_set(CachedExtentRef(&ext));
-      touch_extent(ext);
-    };
-    auto metric_key = std::make_pair(t.get_src(), T::TYPE);
-    return trans_intr::make_interruptible(
-      get_extent<T>(
-       offset, length, &metric_key,
-       std::forward<Func>(extent_init_func), std::move(f))
-    );
   }
 
   /*
@@ -522,7 +552,7 @@ public:
     return get_absent_extent<T>(t, offset, length, [](T &){});
   }
 
-  seastar::future<CachedExtentRef> get_extent_viewable_by_trans(
+  get_extent_ertr::future<CachedExtentRef> get_extent_viewable_by_trans(
     Transaction &t,
     CachedExtentRef extent)
   {
@@ -533,19 +563,33 @@ public:
        touch_extent(*p_extent);
       }
     }
+    // user should not see RETIRED_PLACEHOLDER extents
+    ceph_assert(p_extent->get_type() != extent_types_t::RETIRED_PLACEHOLDER);
+    if (!p_extent->is_fully_loaded()) {
+      touch_extent(*p_extent);
+      LOG_PREFIX(Cache::get_extent_viewable_by_trans);
+      SUBDEBUG(seastore_cache,
+        "{} {}~{} is present without been fully loaded, reading ... -- {}",
+        p_extent->get_type(), p_extent->get_paddr(),p_extent->get_length(),
+        *p_extent);
+      auto bp = alloc_cache_buf(p_extent->get_length());
+      p_extent->set_bptr(std::move(bp));
+      return read_extent<CachedExtent>(CachedExtentRef(p_extent));
+    }
     return p_extent->wait_io(
     ).then([p_extent] {
-      return CachedExtentRef(p_extent);
+      return get_extent_ertr::make_ready_future<CachedExtentRef>(
+        CachedExtentRef(p_extent));
     });
   }
 
   template <typename T>
-  seastar::future<TCachedExtentRef<T>> get_extent_viewable_by_trans(
+  get_extent_ertr::future<TCachedExtentRef<T>> get_extent_viewable_by_trans(
     Transaction &t,
     TCachedExtentRef<T> extent)
   {
     return get_extent_viewable_by_trans(t, CachedExtentRef(extent.get())
-    ).then([](auto p_extent) {
+    ).safe_then([](auto p_extent) {
       return p_extent->template cast<T>();
     });
   }
@@ -606,15 +650,25 @@ private:
     CachedExtentRef ret;
     auto status = t.get_extent(offset, &ret);
     if (status == Transaction::get_extent_ret::RETIRED) {
-      SUBDEBUGT(seastore_cache, "{} {}~{} {} is retired on t -- {}",
+      SUBERRORT(seastore_cache, "{} {}~{} {} is retired on t -- {}",
                 t, type, offset, length, laddr, *ret);
-      return seastar::make_ready_future<CachedExtentRef>();
+      ceph_abort("impossible");
     } else if (status == Transaction::get_extent_ret::PRESENT) {
-      SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}",
-                t, type, offset, length, laddr, *ret);
-      return ret->wait_io().then([ret] {
-       return seastar::make_ready_future<CachedExtentRef>(ret);
-      });
+      if (ret->is_fully_loaded()) {
+        SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}",
+                  t, type, offset, length, laddr, *ret);
+        return ret->wait_io().then([ret] {
+         return seastar::make_ready_future<CachedExtentRef>(ret);
+        });
+      } else {
+        touch_extent(*ret);
+        SUBDEBUGT(seastore_cache, "{} {}~{} {} is present on t without been \
+                  fully loaded, reading ...", t, type, offset, length, laddr);
+        auto bp = alloc_cache_buf(ret->get_length());
+        ret->set_bptr(std::move(bp));
+        return read_extent<CachedExtent>(
+          std::move(ret));
+      }
     } else {
       SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...",
                 t, type, offset, length, laddr);
@@ -1515,7 +1569,9 @@ private:
   get_extent_ret<T> read_extent(
     TCachedExtentRef<T>&& extent
   ) {
-    assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING);
+    assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING ||
+      extent->state == CachedExtent::extent_state_t::EXIST_CLEAN ||
+      extent->state == CachedExtent::extent_state_t::CLEAN);
     extent->set_io_wait();
     return epm.read(
       extent->get_paddr(),
@@ -1530,7 +1586,11 @@ private:
          extent->last_committed_crc = extent->get_crc32c();
 
          extent->on_clean_read();
-       } else {
+       } else if (extent->state == CachedExtent::extent_state_t::EXIST_CLEAN ||
+          extent->state == CachedExtent::extent_state_t::CLEAN) {
+         /* TODO: crc should be checked against LBA manager */
+         extent->last_committed_crc = extent->get_crc32c();
+        } else {
          ceph_assert(!extent->is_valid());
        }
         extent->complete_io();
index 331135261ff43ab058542b2e2f6b408a8f275aea..25ae023b2fab8ebbe8c2f84fd2d83f0c19e75359 100644 (file)
@@ -616,6 +616,11 @@ private:
     return extent_index_hook.is_linked();
   }
 
+  /// set bufferptr
+  void set_bptr(ceph::bufferptr &&nptr) {
+    ptr = nptr;
+  }
+
   /// Returns true if the extent part of the open transaction
   bool is_pending_in_trans(transaction_id_t id) const {
     return is_pending() && pending_for_transaction == id;
@@ -966,12 +971,14 @@ private:
   uint16_t pos = std::numeric_limits<uint16_t>::max();
 };
 
+using get_child_ertr = crimson::errorator<
+  crimson::ct_error::input_output_error>;
 template <typename T>
 struct get_child_ret_t {
-  std::variant<child_pos_t, seastar::future<TCachedExtentRef<T>>> ret;
+  std::variant<child_pos_t, get_child_ertr::future<TCachedExtentRef<T>>> ret;
   get_child_ret_t(child_pos_t pos)
     : ret(std::move(pos)) {}
-  get_child_ret_t(seastar::future<TCachedExtentRef<T>> child)
+  get_child_ret_t(get_child_ertr::future<TCachedExtentRef<T>> child)
     : ret(std::move(child)) {}
 
   bool has_child() const {
@@ -983,7 +990,7 @@ struct get_child_ret_t {
     return std::get<0>(ret);
   }
 
-  seastar::future<TCachedExtentRef<T>> &get_child_fut() {
+  get_child_ertr::future<TCachedExtentRef<T>> &get_child_fut() {
     ceph_assert(ret.index() == 1);
     return std::get<1>(ret);
   }
index 296af756b756a67ecbf525bd02547fd776b20377..f1add39ba5b32c4cb08b07ee5f7d6c6335f42003 100644 (file)
@@ -64,14 +64,14 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node<
     } else {
       return {false,
              trans_intr::make_interruptible(
-               seastar::make_ready_future<
-                 CachedExtentRef>(CachedExtentRef()))};
+               Cache::get_extent_ertr::make_ready_future<
+                 CachedExtentRef>())};
     }
   } else {
     return {false,
            trans_intr::make_interruptible(
-             seastar::make_ready_future<
-               CachedExtentRef>(CachedExtentRef()))};
+             Cache::get_extent_ertr::make_ready_future<
+               CachedExtentRef>())};
   }
 }
 
index 7a67d4efe9c4dc0b5fb9a00c824db6b4fc9deb4d..80b292203fedf8eae13bf9b9223593905d031065 100644 (file)
@@ -178,7 +178,7 @@ public:
   {
     auto v = pin->get_logical_extent(t);
     if (v.has_child()) {
-      return v.get_child_fut().then([](auto extent) {
+      return v.get_child_fut().safe_then([](auto extent) {
        return extent->template cast<T>();
       });
     } else {
@@ -635,6 +635,7 @@ private:
       }
     ).si_then([FNAME, &t](auto ref) mutable -> ret {
       SUBTRACET(seastore_tm, "got extent -- {}", t, *ref);
+      assert(ref->is_fully_loaded());
       return pin_to_extent_ret<T>(
        interruptible::ready_future_marker{},
        std::move(ref));
@@ -675,6 +676,7 @@ private:
       }
     ).si_then([FNAME, &t](auto ref) {
       SUBTRACET(seastore_tm, "got extent -- {}", t, *ref);
+      assert(ref->is_fully_loaded());
       return pin_to_extent_by_type_ret(
        interruptible::ready_future_marker{},
        std::move(ref->template cast<LogicalCachedExtent>()));