]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/transaction_manager: wait for the demotion when the
authorXuehan Xu <xuxuehan@qianxin.com>
Sat, 16 May 2026 05:40:35 +0000 (13:40 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Sat, 23 May 2026 09:12:02 +0000 (17:12 +0800)
rbm main device is not enough

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/backref/btree_backref_manager.cc
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/lba/btree_lba_manager.cc
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc

index d77d69490fc1648a249e59fe46a72cfd7c7d7942..abdea5cc6b247036eea5d3d22e87bc6ddd570c25 100644 (file)
@@ -1287,6 +1287,8 @@ public:
 
   virtual bool should_clean_space() const = 0;
 
+  virtual double get_alive_ratio() const = 0;
+
   using clean_space_ertr = base_ertr;
   using clean_space_ret = clean_space_ertr::future<>;
   virtual clean_space_ret clean_space() = 0;
@@ -1645,7 +1647,7 @@ private:
     if (segments.get_unavailable_bytes() == 0) return 0;
     return (double)get_unavailable_unused_bytes() / (double)segments.get_unavailable_bytes();
   }
-  double get_alive_ratio() const {
+  double get_alive_ratio() const final {
     return stats.used_bytes / (double)segments.get_total_bytes();
   }
 
@@ -1811,6 +1813,10 @@ public:
     return st;
   }
 
+  double get_alive_ratio() const final {
+    return stats.used_bytes / (double)get_total_bytes();
+  }
+
   void print(std::ostream &, bool is_detailed) const final;
 
   mount_ret mount() final;
index e394758a76bc68ed719165883b069cf1ab95f9ff..cc0d657934769f26240d02c81a65d43b47d235fc 100644 (file)
@@ -72,16 +72,16 @@ BtreeBackrefManager::mkfs(
 {
   LOG_PREFIX(BtreeBackrefManager::mkfs);
   INFOT("start", t);
-  return cache.get_root(t).si_then([this, &t](auto croot) {
-    assert(croot->is_mutation_pending());
-    croot->get_root().backref_root = BackrefBtree::mkfs(croot, get_context(t));
-    return mkfs_iertr::now();
-  }).handle_error_interruptible(
-    mkfs_iertr::pass_further{},
-    crimson::ct_error::assert_all{
-      "Invalid error in BtreeBackrefManager::mkfs"
-    }
-  );
+
+  auto croot = co_await cache.get_root(t);
+  croot->get_root().backref_root =
+    co_await BackrefBtree::mkfs(croot, get_context(t)
+    ).handle_error_interruptible(
+      mkfs_iertr::pass_further{},
+      crimson::ct_error::assert_all{
+        "Invalid error in BtreeBackrefManager::mkfs"
+      }
+    );;
 }
 
 BtreeBackrefManager::get_mapping_ret
index 48aad788577afdcd680ed7366c7376376b1da888..8fe052412899f83819a078e5eea925574fb676e5 100644 (file)
@@ -468,11 +468,25 @@ public:
   }
 
   /// mkfs
-  using mkfs_ret = phy_tree_root_t;
+  using mkfs_iertr = base_iertr;
+  using mkfs_ret = mkfs_iertr::future<phy_tree_root_t>;
   static mkfs_ret mkfs(RootBlockRef &root_block, op_context_t c) {
     assert(root_block->is_mutation_pending());
-    auto root_leaf = c.cache.template alloc_new_non_data_extent<leaf_node_t>(
-      c.trans, node_size, {placement_hint_t::HOT, INIT_GENERATION});
+    LeafNodeRef root_leaf;
+    while (!root_leaf) {
+      try {
+        root_leaf = c.cache.template alloc_new_non_data_extent<leaf_node_t>(
+          c.trans, node_size, {placement_hint_t::HOT, INIT_GENERATION});
+      } catch (crimson::ct_error::eagain&) {
+      } catch (crimson::ct_error::enospc&) {
+        ceph_abort("shouldn't have no space");
+      }
+      if (!root_leaf) {
+        c.cache.get_epm().maybe_wake_background();
+        co_await trans_intr::make_interruptible(
+          c.cache.get_epm().wait_background());
+      }
+    }
     root_leaf->set_size(0);
     fixed_kv_node_meta_t<node_key_t> meta{min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, 1};
     root_leaf->set_meta(meta);
@@ -480,7 +494,7 @@ public:
     get_tree_stats<self_type>(c.trans).depth = 1u;
     get_tree_stats<self_type>(c.trans).extents_num_delta++;
     TreeRootLinker<RootBlock, leaf_node_t>::link_root(root_block, root_leaf.get());
-    return phy_tree_root_t{root_leaf->get_paddr(), 1u};
+    co_return phy_tree_root_t{root_leaf->get_paddr(), 1u};
   }
 
   iterator make_partial_iter(
@@ -1217,18 +1231,29 @@ public:
     LOG_PREFIX(FixedKVBtree::rewrite_extent);
     assert(is_lba_backref_node(e->get_type()));
     
-    auto do_rewrite = [&](auto &fixed_kv_extent) {
+    auto do_rewrite = [&](auto &fixed_kv_extent) -> rewrite_extent_ret {
       auto opt = Cache::alloc_option_t {
         fixed_kv_extent.get_user_hint(),
         // get target rewrite generation
         fixed_kv_extent.get_rewrite_generation()
       };
-      auto n_fixed_kv_extent = c.cache.template alloc_new_non_data_extent<
-        std::remove_reference_t<decltype(fixed_kv_extent)>
-        >(
-        c.trans,
-        fixed_kv_extent.get_length(),
-        opt);
+      TCachedExtentRef<std::remove_reference_t<
+        decltype(fixed_kv_extent)>> n_fixed_kv_extent;
+      while (!n_fixed_kv_extent) {
+        try {
+          n_fixed_kv_extent = c.cache.template alloc_new_non_data_extent<
+            std::remove_reference_t<decltype(fixed_kv_extent)>
+            >(
+            c.trans,
+            fixed_kv_extent.get_length(),
+            opt);
+        } catch (crimson::ct_error::eagain&) {}
+        if (!n_fixed_kv_extent) {
+          c.cache.get_epm().maybe_wake_background();
+          co_await trans_intr::make_interruptible(
+            c.cache.get_epm().wait_background());
+        }
+      }
       n_fixed_kv_extent->rewrite(c.trans, fixed_kv_extent, 0);
       
       SUBTRACET(
@@ -1238,25 +1263,23 @@ public:
         fixed_kv_extent,
         *n_fixed_kv_extent);
       
-      return update_internal_mapping(
+      co_await update_internal_mapping(
         c,
         n_fixed_kv_extent->get_node_meta().depth,
         n_fixed_kv_extent->get_node_meta().begin,
         e->get_paddr(),
         n_fixed_kv_extent->get_paddr(),
-        n_fixed_kv_extent
-      ).si_then([c, e] {
-        c.cache.retire_extent(c.trans, e);
-      });
+        n_fixed_kv_extent);
+      c.cache.retire_extent(c.trans, e);
     };
     
     if (e->get_type() == internal_node_t::TYPE) {
       auto lint = e->cast<internal_node_t>();
-      return do_rewrite(*lint);
+      co_await do_rewrite(*lint);
     } else {
       assert(e->get_type() == leaf_node_t::TYPE);
       auto lleaf = e->cast<leaf_node_t>();
-      return do_rewrite(*lleaf);
+      co_await do_rewrite(*lleaf);
     }
   }
 
@@ -1999,126 +2022,150 @@ private:
   {
     LOG_PREFIX(FixedKVBtree::handle_split);
 
-    return iter.check_split(c
-    ).si_then([FNAME, this, c, &iter](auto split_from) {
-      SUBTRACET(seastore_fixedkv_tree,
-        "split_from {}, depth {}", c.trans, split_from, iter.get_depth());
-
-      if (split_from == iter.get_depth()) {
-        assert(iter.is_full());
-        auto nroot = c.cache.template alloc_new_non_data_extent<internal_node_t>(
-          c.trans, node_size, {placement_hint_t::HOT, INIT_GENERATION});
-        fixed_kv_node_meta_t<node_key_t> meta{
-          min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, iter.get_depth() + 1};
-        nroot->set_meta(meta);
-        nroot->range = meta;
-        nroot->journal_insert(
-          nroot->begin(),
-          min_max_t<node_key_t>::min,
-          get_root().get_location(),
-          nullptr);
-        iter.internal.push_back({nroot, 0});
+    auto split_from = co_await iter.check_split(c);
+    SUBTRACET(seastore_fixedkv_tree,
+      "split_from {}, depth {}", c.trans, split_from, iter.get_depth());
 
-        get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
-        get_tree_stats<self_type>(c.trans).extents_num_delta++;
+    if (split_from == iter.get_depth()) {
+      assert(iter.is_full());
+      InternalNodeRef nroot;
+      while (!nroot) {
+        try {
+          nroot = c.cache.template alloc_new_non_data_extent<internal_node_t>(
+            c.trans, node_size, {placement_hint_t::HOT, INIT_GENERATION});
+        } catch (crimson::ct_error::eagain&) {}
+        if (!nroot) {
+          c.cache.get_epm().maybe_wake_background();
+          co_await trans_intr::make_interruptible(
+            c.cache.get_epm().wait_background());
+        }
+      }
+      fixed_kv_node_meta_t<node_key_t> meta{
+        min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, iter.get_depth() + 1};
+      nroot->set_meta(meta);
+      nroot->range = meta;
+      nroot->journal_insert(
+        nroot->begin(),
+        min_max_t<node_key_t>::min,
+        get_root().get_location(),
+        nullptr);
+      iter.internal.push_back({nroot, 0});
+
+      get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
+      get_tree_stats<self_type>(c.trans).extents_num_delta++;
+
+      root_block = c.cache.duplicate_for_write(
+        c.trans, root_block)->template cast<RootBlock>();
+      get_root().set_location(nroot->get_paddr());
+      get_root().set_depth(iter.get_depth());
+      ceph_assert(get_root().get_depth() <= MAX_DEPTH);
+      set_root_node(nroot);
+    }
 
-        root_block = c.cache.duplicate_for_write(
-          c.trans, root_block)->template cast<RootBlock>();
-        get_root().set_location(nroot->get_paddr());
-        get_root().set_depth(iter.get_depth());
-        ceph_assert(get_root().get_depth() <= MAX_DEPTH);
-        set_root_node(nroot);
+    /* pos may be either node_position_t<leaf_node_t> or
+     * node_position_t<internal_node_t> */
+    auto split_level = [&, c, FNAME](auto &parent_pos, auto &pos)
+                        -> handle_split_iertr::future<std::pair<
+                          decltype(pos.node), decltype(pos.node)>> {
+      decltype(pos.node) left, right;
+      node_key_t pivot = min_max_t<node_key_t>::null;
+      while (!left) {
+        assert(!right);
+        assert(pivot == min_max_t<node_key_t>::null);
+        try {
+          auto t = pos.node->make_split_children(c);
+          left = std::get<0>(t);
+          right = std::get<1>(t);
+          pivot = std::get<2>(t);
+        } catch (crimson::ct_error::eagain&) {}
+        if (!left) {
+          c.cache.get_epm().maybe_wake_background();
+          co_await trans_intr::make_interruptible(
+            c.cache.get_epm().wait_background());
+        }
       }
 
-      /* pos may be either node_position_t<leaf_node_t> or
-       * node_position_t<internal_node_t> */
-      auto split_level = [&, c, FNAME](auto &parent_pos, auto &pos) {
-        auto [left, right, pivot] = pos.node->make_split_children(c);
+      auto parent_node = parent_pos.node;
+      auto parent_iter = parent_pos.get_iter();
 
-        auto parent_node = parent_pos.node;
-        auto parent_iter = parent_pos.get_iter();
+      parent_node->update(
+        parent_iter,
+        left->get_paddr(),
+        left.get());
+      parent_node->insert(
+        parent_iter + 1,
+        pivot,
+        right->get_paddr(),
+        right.get());
 
-        parent_node->update(
-          parent_iter,
-          left->get_paddr(),
-          left.get());
-        parent_node->insert(
-          parent_iter + 1,
-          pivot,
-          right->get_paddr(),
-          right.get());
+      SUBTRACET(
+        seastore_fixedkv_tree,
+        "splitted {} into left: {}, right: {}",
+        c.trans,
+        *pos.node,
+        *left,
+        *right);
+      c.cache.retire_extent(c.trans, pos.node);
+
+      get_tree_stats<self_type>(c.trans).extents_num_delta++;
+      co_return std::make_pair(left, right);
+    };
 
+    for (; split_from > 0; --split_from) {
+      auto &parent_pos = iter.get_internal(split_from + 1);
+      if (!parent_pos.node->is_mutable()) {
+        parent_pos.node = c.cache.duplicate_for_write(
+          c.trans, parent_pos.node
+        )->template cast<internal_node_t>();
+      }
+
+      if (split_from > 1) {
+        auto &pos = iter.get_internal(split_from);
         SUBTRACET(
           seastore_fixedkv_tree,
-          "splitted {} into left: {}, right: {}",
+          "splitting internal {} at depth {}, parent: {} at pos: {}",
           c.trans,
           *pos.node,
-          *left,
-          *right);
-        c.cache.retire_extent(c.trans, pos.node);
+          split_from,
+          *parent_pos.node,
+          parent_pos.pos);
+        auto [left, right] = co_await split_level(parent_pos, pos);
 
-        get_tree_stats<self_type>(c.trans).extents_num_delta++;
-        return std::make_pair(left, right);
-      };
+        if (pos.pos < left->get_size()) {
+          pos.node = left;
+        } else {
+          pos.node = right;
+          pos.pos -= left->get_size();
 
-      for (; split_from > 0; --split_from) {
-        auto &parent_pos = iter.get_internal(split_from + 1);
-        if (!parent_pos.node->is_mutable()) {
-          parent_pos.node = c.cache.duplicate_for_write(
-            c.trans, parent_pos.node
-          )->template cast<internal_node_t>();
+          parent_pos.pos += 1;
         }
-
-        if (split_from > 1) {
-          auto &pos = iter.get_internal(split_from);
-          SUBTRACET(
-            seastore_fixedkv_tree,
-            "splitting internal {} at depth {}, parent: {} at pos: {}",
-            c.trans,
-            *pos.node,
-            split_from,
-            *parent_pos.node,
-            parent_pos.pos);
-          auto [left, right] = split_level(parent_pos, pos);
-
-          if (pos.pos < left->get_size()) {
-            pos.node = left;
-          } else {
-            pos.node = right;
-            pos.pos -= left->get_size();
-
-            parent_pos.pos += 1;
-          }
+      } else {
+        auto &pos = iter.leaf;
+        SUBTRACET(
+          seastore_fixedkv_tree,
+          "splitting leaf {}, parent: {} at pos: {}",
+          c.trans,
+          *pos.node,
+          *parent_pos.node,
+          parent_pos.pos);
+        auto [left, right] = co_await split_level(parent_pos, pos);
+
+        /* right->get_node_meta().begin == pivot == right->begin()->get_key()
+         * Thus, if pos.pos == left->get_size(), we want iter to point to
+         * left with pos.pos at the end rather than right with pos.pos = 0
+         * since the insertion would be to the left of the first element
+         * of right and thus necessarily less than right->get_node_meta().begin.
+         */
+        if (pos.pos <= left->get_size()) {
+          pos.node = left;
         } else {
-          auto &pos = iter.leaf;
-          SUBTRACET(
-            seastore_fixedkv_tree,
-            "splitting leaf {}, parent: {} at pos: {}",
-            c.trans,
-            *pos.node,
-            *parent_pos.node,
-            parent_pos.pos);
-          auto [left, right] = split_level(parent_pos, pos);
-
-          /* right->get_node_meta().begin == pivot == right->begin()->get_key()
-           * Thus, if pos.pos == left->get_size(), we want iter to point to
-           * left with pos.pos at the end rather than right with pos.pos = 0
-           * since the insertion would be to the left of the first element
-           * of right and thus necessarily less than right->get_node_meta().begin.
-           */
-          if (pos.pos <= left->get_size()) {
-            pos.node = left;
-          } else {
-            pos.node = right;
-            pos.pos -= left->get_size();
+          pos.node = right;
+          pos.pos -= left->get_size();
 
-            parent_pos.pos += 1;
-          }
+          parent_pos.pos += 1;
         }
       }
-
-      return seastar::now();
-    });
+    }
   }
 
 
@@ -2266,7 +2313,7 @@ private:
     
     SUBTRACET(seastore_fixedkv_tree, "parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
     auto do_merge = [c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
-                typename NodeType::Ref donor) {
+                typename NodeType::Ref donor) -> handle_merge_ret {
       LOG_PREFIX(FixedKVBtree::merge_level);
       auto [l, r] = donor_is_left ?
         std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
@@ -2275,7 +2322,17 @@ private:
         std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
 
       if (donor->at_min_capacity()) {
-        auto replacement = l->make_full_merge(c, r);
+        typename NodeType::Ref replacement;
+        while (!replacement) {
+          try {
+            replacement = l->make_full_merge(c, r);
+          } catch (crimson::ct_error::eagain&) {}
+          if (!replacement) {
+            c.cache.get_epm().maybe_wake_background();
+            co_await trans_intr::make_interruptible(
+              c.cache.get_epm().wait_background());
+          }
+        }
 
         parent_pos.node->update(
           liter,
@@ -2296,11 +2353,23 @@ private:
       } else {
         auto pivot_idx = l->get_balance_pivot_idx(*l, *r);
         LOG_PREFIX(FixedKVBtree::merge_level);
-        auto [replacement_l, replacement_r, pivot] =
-          l->make_balanced(
-            c,
-            r,
-            pivot_idx);
+        typename NodeType::Ref replacement_l, replacement_r;
+        node_key_t pivot = min_max_t<node_key_t>::null;
+        while (!replacement_l) {
+          assert(!replacement_r);
+          assert(pivot == min_max_t<node_key_t>::null);
+          try {
+            auto t = l->make_balanced(c, r, pivot_idx);
+            replacement_l = std::get<0>(t);
+            replacement_r = std::get<1>(t);
+            pivot = std::get<2>(t);
+          } catch (crimson::ct_error::eagain&) {}
+          if (!replacement_l) {
+            c.cache.get_epm().maybe_wake_background();
+            co_await trans_intr::make_interruptible(
+              c.cache.get_epm().wait_background());
+          }
+        }
 
         parent_pos.node->update(
           liter,
@@ -2336,8 +2405,6 @@ private:
         c.cache.retire_extent(c.trans, l);
         c.cache.retire_extent(c.trans, r);
       }
-
-      return seastar::now();
     };
 
     auto v = parent_pos.node->template get_child<NodeType>(
@@ -2345,30 +2412,26 @@ private:
     // checking the lba child must be atomic with creating
     // and linking the absent child
     if (v.has_child()) {
-      return std::move(v.get_child_fut()
-      ).si_then([do_merge=std::move(do_merge), &pos,
-                donor_iter, donor_is_left, c, parent_pos](auto child) {
-        LOG_PREFIX(FixedKVBtree::merge_level);
-        SUBTRACET(seastore_fixedkv_tree,
-          "got child on {}, pos: {}, res: {}",
-          c.trans,
-          *parent_pos.node,
-          donor_iter.get_offset(),
-          *child);
-        std::ignore = donor_is_left;
-        std::ignore = pos;
-        [[maybe_unused]] auto &node = (typename internal_node_t::base_t&)*child;
-        assert(donor_is_left ?
-          node.get_node_meta().end == pos.node->get_node_meta().begin :
-          node.get_node_meta().begin == pos.node->get_node_meta().end);
-        assert(node.get_node_meta().begin == donor_iter.get_key());
-        assert(node.get_node_meta().end > donor_iter.get_key());
-        return do_merge(child->template cast<NodeType>());
-      });
+      auto child = co_await std::move(v.get_child_fut());
+      SUBTRACET(seastore_fixedkv_tree,
+        "got child on {}, pos: {}, res: {}",
+        c.trans,
+        *parent_pos.node,
+        donor_iter.get_offset(),
+        *child);
+      std::ignore = donor_is_left;
+      std::ignore = pos;
+      [[maybe_unused]] auto &node = (typename internal_node_t::base_t&)*child;
+      assert(donor_is_left ?
+        node.get_node_meta().end == pos.node->get_node_meta().begin :
+        node.get_node_meta().begin == pos.node->get_node_meta().end);
+      assert(node.get_node_meta().begin == donor_iter.get_key());
+      assert(node.get_node_meta().end > donor_iter.get_key());
+      co_return co_await do_merge(child->template cast<NodeType>());
     }
 
     auto child_pos = v.get_child_pos();
-    return get_node<NodeType>(
+    auto donor = co_await get_node<NodeType>(
       c,
       depth,
       donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
@@ -2376,10 +2439,8 @@ private:
       end,
       std::make_optional<node_position_t<internal_node_t>>(
         child_pos.get_parent(),
-        child_pos.get_pos())
-    ).si_then([do_merge=std::move(do_merge)](typename NodeType::Ref donor) {
-      return do_merge(donor);
-    });
+        child_pos.get_pos()));
+    co_await do_merge(donor);
   }
 };
 
index 4694e5a8c035bde49d1ffd9bc7cc59a73b9aad1a..0b3d216ffb6308de67e7ff716d559989c9b9ae31 100644 (file)
@@ -731,6 +731,10 @@ public:
     }
   }
 
+  ExtentPlacementManager& get_epm() {
+    return epm;
+  }
+
   extent_len_t get_block_size() const {
     return epm.get_block_size();
   }
@@ -1139,8 +1143,13 @@ public:
               t, T::TYPE, length, opt.hint, rewrite_gen_printer_t{opt.gen});
     auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, opt);
     if (!result) {
-      SUBERRORT(seastore_cache, "insufficient space", t);
-      std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
+      if (epm.is_full()) {
+        SUBERRORT(seastore_cache, "insufficient space", t);
+        std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
+      } else {
+        SUBERRORT(seastore_cache, "insufficient space, wait for demoting", t);
+        std::rethrow_exception(crimson::ct_error::eagain::exception_ptr());
+      }
     }
     auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp));
     assert(is_rewrite_generation(
@@ -1176,8 +1185,13 @@ public:
               t, T::TYPE, length, opt.hint, rewrite_gen_printer_t{opt.gen});
     auto results = epm.alloc_new_data_extents(t, T::TYPE, length, opt);
     if (results.empty()) {
-      SUBERRORT(seastore_cache, "insufficient space", t);
-      std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
+      if (epm.is_full()) {
+        SUBERRORT(seastore_cache, "insufficient space", t);
+        std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
+      } else {
+        SUBERRORT(seastore_cache, "insufficient space, wait for demoting", t);
+        std::rethrow_exception(crimson::ct_error::eagain::exception_ptr());
+      }
     }
     std::vector<TCachedExtentRef<T>> extents;
     for (auto &result : results) {
index 54486cf7b27e9b8440fc1e5a11e30ae47dc4f263..302eecb2ec24d98d7717f277b0fb7370c851e98c 100644 (file)
@@ -373,6 +373,18 @@ public:
 
   void set_primary_device(Device *device);
 
+  bool is_full() const {
+    return background_process.is_full();
+  }
+
+  void maybe_wake_background() {
+    background_process.maybe_wake_background();
+  }
+
+  seastar::future<> wait_background() {
+    return background_process.wait_background();
+  }
+
   void set_extent_callback(ExtentCallbackInterface *cb) {
     background_process.set_extent_callback(cb);
   }
@@ -1067,9 +1079,11 @@ private:
       return !trimmer || !main_cleaner;
     }
 
-  protected:
-    state_t get_state() const final {
-      return state;
+    bool is_full() const {
+      if (has_cold_tier()) {
+        return cold_cleaner->get_alive_ratio() >= 0.99;
+      }
+      return main_cleaner->get_alive_ratio() >= 0.99;
     }
 
     void maybe_wake_background() final {
@@ -1081,6 +1095,18 @@ private:
       }
     }
 
+    seastar::future<> wait_background() {
+      if (!blocking_io) {
+        blocking_io = seastar::promise<>();
+      }
+      return blocking_io->get_future();
+    }
+
+  protected:
+    state_t get_state() const final {
+      return state;
+    }
+
     void maybe_wake_blocked_io() final;
 
     void maybe_wake_promote() final {
index 2f933a4ac13b58cfc6e181cdafd48b56c1d1eb1b..fff5c5bf1646f8c151ee79954b4f6a96c593acc7 100644 (file)
@@ -115,7 +115,12 @@ BtreeLBAManager::mkfs(
   auto croot = co_await cache.get_root(t);
   assert(croot);
   assert(croot->is_mutation_pending());
-  croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t));
+  croot->get_root().lba_root =
+    co_await LBABtree::mkfs(croot, get_context(t)
+    ).handle_error_interruptible(
+      mkfs_iertr::pass_further{},
+      crimson::ct_error::assert_all{"unexpected error"}
+    );
 }
 
 BtreeLBAManager::get_cursors_ret
index c282a98b901f68da8dd7cf9663c0f293c463a459..f7b5aa8af44456cc78face79276289999caf94e4 100644 (file)
@@ -789,15 +789,24 @@ TransactionManager::rewrite_logical_extent(
   if (get_extent_category(extent->get_type()) == data_category_t::METADATA) {
     assert(extent->is_fully_loaded());
     cache->retire_extent(t, extent);
-    auto nextent = cache->alloc_new_non_data_extent_by_type(
-      t,
-      extent->get_type(),
-      extent->get_length(),
-      extent->get_user_hint(),
-      // get target rewrite generation
-      extent->get_rewrite_generation(),
-      paddr_hint,
-      is_tracked)->cast<LogicalChildNode>();
+    LogicalChildNodeRef nextent;
+    while (!nextent) {
+      try {
+        nextent = cache->alloc_new_non_data_extent_by_type(
+          t,
+          extent->get_type(),
+          extent->get_length(),
+          extent->get_user_hint(),
+          // get target rewrite generation
+          extent->get_rewrite_generation(),
+          paddr_hint,
+          is_tracked)->cast<LogicalChildNode>();
+      } catch (crimson::ct_error::eagain&) {}
+      if (!nextent) {
+        epm->maybe_wake_background();
+        co_await trans_intr::make_interruptible(epm->wait_background());
+      }
+    }
     nextent->rewrite(t, *extent, 0);
 
     DEBUGT("rewriting meta -- {} to {}", t, *extent, *nextent);
@@ -836,15 +845,24 @@ TransactionManager::rewrite_logical_extent(
       t, std::move(extent), 0, length);
     assert(extent->is_fully_loaded());
     cache->retire_extent(t, extent);
-    auto extents = cache->alloc_new_data_extents_by_type(
-      t,
-      extent->get_type(),
-      extent->get_length(),
-      extent->get_user_hint(),
-      // get target rewrite generation
-      extent->get_rewrite_generation(),
-      paddr_hint,
-      is_tracked);
+    std::vector<CachedExtentRef> extents;
+    while (extents.empty()) {
+      try {
+        extents = cache->alloc_new_data_extents_by_type(
+          t,
+          extent->get_type(),
+          extent->get_length(),
+          extent->get_user_hint(),
+          // get target rewrite generation
+          extent->get_rewrite_generation(),
+          paddr_hint,
+          is_tracked);
+      } catch (crimson::ct_error::eagain&) {}
+      if (extents.empty()) {
+        epm->maybe_wake_background();
+        co_await trans_intr::make_interruptible(epm->wait_background());
+      }
+    }
     extent_len_t off = 0;
     auto left = extent->get_length();
     extent_ref_count_t refcount = 0;
@@ -1128,14 +1146,23 @@ TransactionManager::promote_extent(
   cache->retire_extent(t, extent);
 
   if (get_extent_category(extent->get_type()) == data_category_t::DATA) {
-    auto promoted_raw_extents = cache->alloc_new_data_extents_by_type(
-      t,
-      orig_ext->get_type(),
-      orig_ext->get_length(),
-      placement_hint_t::HOT,
-      INIT_GENERATION,
-      P_ADDR_NULL,
-      true);
+    std::vector<CachedExtentRef> promoted_raw_extents;
+    while (promoted_raw_extents.empty()) {
+      try {
+        promoted_raw_extents = cache->alloc_new_data_extents_by_type(
+          t,
+          orig_ext->get_type(),
+          orig_ext->get_length(),
+          placement_hint_t::HOT,
+          INIT_GENERATION,
+          P_ADDR_NULL,
+          true);
+      } catch (crimson::ct_error::eagain&) {}
+      if (promoted_raw_extents.empty()) {
+        epm->maybe_wake_background();
+        co_await trans_intr::make_interruptible(epm->wait_background());
+      }
+    }
     t.touch_laddr_prefix(orig_ext->get_laddr().get_object_prefix());
 
     promoted_extents.reserve(promoted_raw_extents.size());
@@ -1172,14 +1199,23 @@ TransactionManager::promote_extent(
     }
     ceph_assert(offset == orig_length);
   } else {
-    auto promoted_extent = cache->alloc_new_non_data_extent_by_type(
-      t,
-      orig_ext->get_type(),
-      orig_ext->get_length(),
-      placement_hint_t::HOT,
-      INIT_GENERATION,
-      P_ADDR_NULL,
-      true);
+    CachedExtentRef promoted_extent;
+    while (!promoted_extent) {
+      try {
+        promoted_extent = cache->alloc_new_non_data_extent_by_type(
+          t,
+          orig_ext->get_type(),
+          orig_ext->get_length(),
+          placement_hint_t::HOT,
+          INIT_GENERATION,
+          P_ADDR_NULL,
+          true);
+      } catch (crimson::ct_error::eagain&) {}
+      if (!promoted_extent) {
+        epm->maybe_wake_background();
+        co_await trans_intr::make_interruptible(epm->wait_background());
+      }
+    }
     auto lext = promoted_extent->cast<LogicalChildNode>();
     lext->set_laddr(orig_ext->get_laddr());
     lext->rewrite(t, *orig_ext, 0);
index da1a3641fdbd8ae5754f3eeb817b6ffea810609b..64ac9c009dea922bfa9e9c79a143941c6ac98699 100644 (file)
@@ -574,21 +574,27 @@ public:
     LOG_PREFIX(TransactionManager::alloc_non_data_extent);
     SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
               t, T::TYPE, laddr_hint, len, placement_hint);
-    auto ext = cache->alloc_new_non_data_extent<T>(
-      t, len, {placement_hint, INIT_GENERATION});
+    TCachedExtentRef<T> ext;
+    while (!ext) {
+      try {
+        ext = cache->alloc_new_non_data_extent<T>(
+          t, len, {placement_hint, INIT_GENERATION});
+      } catch(crimson::ct_error::eagain&) {}
+      if (!ext) {
+        epm->maybe_wake_background();
+        co_await trans_intr::make_interruptible(epm->wait_background());
+      }
+    }
     // user must initialize the logical extent themselves.
     assert(is_user_transaction(t.get_src()));
     ext->set_seen_by_users();
-    return lba_manager->alloc_extent(
+    co_await lba_manager->alloc_extent(
       t,
       laddr_hint,
       *ext,
-      EXTENT_DEFAULT_REF_COUNT
-    ).si_then([ext=std::move(ext), &t, FNAME](auto &&) mutable {
-      SUBDEBUGT(seastore_tm, "allocated {}", t, *ext);
-      return alloc_extent_iertr::make_ready_future<TCachedExtentRef<T>>(
-       std::move(ext));
-    });
+      EXTENT_DEFAULT_REF_COUNT);
+    SUBDEBUGT(seastore_tm, "allocated {}", t, *ext);
+    co_return ext;
   }
 
   /**
@@ -612,12 +618,21 @@ public:
     LOG_PREFIX(TransactionManager::alloc_data_extents);
     SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
               t, T::TYPE, laddr_hint, len, placement_hint);
-    auto exts = cache->alloc_new_data_extents<T>(
-      t, len,
-      {
-        placement_hint, INIT_GENERATION, false, P_ADDR_NULL,
-        epm->get_write_policy(T::TYPE, len)
-      });
+    std::vector<TCachedExtentRef<T>> exts;
+    while (exts.empty()) {
+      try {
+        exts = cache->alloc_new_data_extents<T>(
+          t, len,
+          {
+            placement_hint, INIT_GENERATION, false, P_ADDR_NULL,
+            epm->get_write_policy(T::TYPE, len)
+          });
+      } catch (crimson::ct_error::eagain&) {}
+      if (exts.empty()) {
+        epm->maybe_wake_background();
+        co_await trans_intr::make_interruptible(epm->wait_background());
+      }
+    }
     // user must initialize the logical extent themselves
     assert(is_user_transaction(t.get_src()));
     for (auto& ext : exts) {
index 178b28375a878022bc19b065e395618749a6f28a..56014f77ebd4fbe82ac8678596b0e7b415d34421 100644 (file)
@@ -231,12 +231,12 @@ struct lba_btree_test : btree_test_base {
   LBAManager::mkfs_ret test_structure_setup(Transaction &t) final {
     return cache->get_root(
       t
-    ).si_then([this, &t](RootBlockRef croot) {
+    ).si_then([this, &t](RootBlockRef croot) -> LBAManager::mkfs_ret {
       auto mut_croot = cache->duplicate_for_write(
        t, croot
       )->cast<RootBlock>();
       mut_croot->root.lba_root =
-       LBABtree::mkfs(mut_croot, get_op_context(t));
+       co_await LBABtree::mkfs(mut_croot, get_op_context(t));
     });
   }