]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: skip backref operations for pure rbm seastore
authorXuehan Xu <xuxuehan@qianxin.com>
Thu, 18 Dec 2025 07:00:43 +0000 (15:00 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 19 Jan 2026 02:45:44 +0000 (10:45 +0800)
instancees

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/async_cleaner.cc
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_cbjournal.cc
src/test/crimson/seastore/test_seastore_journal.cc

index f7592810a4012107425bfd5c2ad5d5de62c82083..c52bcb75659c937608f369956ee76286e5361000 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "crimson/os/seastore/async_cleaner.h"
 #include "crimson/os/seastore/backref_manager.h"
+#include "crimson/os/seastore/lba_manager.h"
 #include "crimson/os/seastore/transaction_manager.h"
 
 SET_SUBSYS(seastore_cleaner);
@@ -413,8 +414,10 @@ JournalTrimmerImpl::JournalTrimmerImpl(
   config_t config,
   backend_type_t type,
   device_off_t roll_start,
-  device_off_t roll_size)
-  : backref_manager(backref_manager),
+  device_off_t roll_size,
+  bool tail_include_alloc)
+  : JournalTrimmer(tail_include_alloc),
+    backref_manager(backref_manager),
     config(config),
     backend_type(type),
     roll_start(roll_start),
@@ -487,7 +490,7 @@ void JournalTrimmerImpl::update_journal_tails(
     }
   }
 
-  if (alloc_tail != JOURNAL_SEQ_NULL) {
+  if (tail_include_alloc && alloc_tail != JOURNAL_SEQ_NULL) {
     ceph_assert(journal_head == JOURNAL_SEQ_NULL ||
                 journal_head >= alloc_tail);
     if (journal_alloc_tail != JOURNAL_SEQ_NULL &&
@@ -584,7 +587,8 @@ std::size_t JournalTrimmerImpl::get_dirty_journal_size() const
 
 std::size_t JournalTrimmerImpl::get_alloc_journal_size() const
 {
-  if (!background_callback->is_ready()) {
+  if (!background_callback->is_ready() ||
+      !tail_include_alloc) {
     return 0;
   }
   auto ret = journal_head.relative_to(
@@ -1567,7 +1571,7 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_no_tail_segment(
   });
 }
 
-bool SegmentCleaner::check_usage()
+bool SegmentCleaner::check_usage(bool)
 {
   SpaceTrackerIRef tracker(space_tracker->make_empty());
   extent_callback->with_transaction_weak(
@@ -1767,10 +1771,12 @@ void SegmentCleaner::print(std::ostream &os, bool is_detailed) const
 RBMCleaner::RBMCleaner(
   RBMDeviceGroupRef&& rb_group,
   BackrefManager &backref_manager,
+  LBAManager &lba_manager,
   bool detailed)
   : detailed(detailed),
     rb_group(std::move(rb_group)),
-    backref_manager(backref_manager)
+    backref_manager(backref_manager),
+    lba_manager(lba_manager)
 {}
 
 void RBMCleaner::print(std::ostream &os, bool is_detailed) const
@@ -1873,7 +1879,7 @@ RBMCleaner::mount_ret RBMCleaner::mount()
   });
 }
 
-bool RBMCleaner::check_usage()
+bool RBMCleaner::check_usage(bool has_cold_tier)
 {
   assert(detailed);
   const auto& rbms = rb_group->get_rb_managers();
@@ -1881,39 +1887,56 @@ bool RBMCleaner::check_usage()
   extent_callback->with_transaction_weak(
       "check_usage",
       CACHE_HINT_NOCACHE,
-      [this, &tracker, &rbms](auto &t) {
-    return backref_manager.scan_mapped_space(
-      t,
-      [&tracker, &rbms](
-        paddr_t paddr,
-       paddr_t backref_key,
-        extent_len_t len,
-        extent_types_t type,
-        laddr_t laddr)
-    {
-      for (auto rbm : rbms) {
-       if (rbm->get_device_id() == paddr.get_device_id()) {
-         if (is_backref_node(type)) {
-           assert(laddr == L_ADDR_NULL);
-           assert(backref_key.is_absolute_random_block()
-                  || backref_key == P_ADDR_MIN);
-           tracker.allocate(
-             paddr,
-             len);
-         } else if (laddr == L_ADDR_NULL) {
-           assert(backref_key == P_ADDR_NULL);
-           tracker.release(
-             paddr,
-             len);
-         } else {
-           assert(backref_key == P_ADDR_NULL);
-           tracker.allocate(
-             paddr,
-             len);
-         }
-       }
-      }
-    });
+      [this, &tracker, &rbms, has_cold_tier](auto &t) {
+    if (has_cold_tier) {
+      return backref_manager.scan_mapped_space(
+        t,
+        [&tracker, &rbms](
+          paddr_t paddr,
+          paddr_t backref_key,
+          extent_len_t len,
+          extent_types_t type,
+          laddr_t laddr)
+      {
+        for (auto rbm : rbms) {
+          if (rbm->get_device_id() == paddr.get_device_id()) {
+            if (is_backref_node(type)) {
+              assert(laddr == L_ADDR_NULL);
+              assert(backref_key.is_absolute_random_block()
+                     || backref_key == P_ADDR_MIN);
+              tracker.allocate(
+                paddr,
+                len);
+            } else if (laddr == L_ADDR_NULL) {
+              assert(backref_key == P_ADDR_NULL);
+              tracker.release(
+                paddr,
+                len);
+            } else {
+              assert(backref_key == P_ADDR_NULL);
+              tracker.allocate(
+                paddr,
+                len);
+            }
+          }
+        }
+      });
+    } else {
+      return lba_manager.scan_mapped_space(
+        t,
+        [&tracker, &rbms](
+          paddr_t paddr,
+          extent_len_t len,
+          extent_types_t type,
+          laddr_t laddr)
+      {
+        for (auto rbm : rbms) {
+          if (rbm->get_device_id() == paddr.get_device_id()) {
+            tracker.allocate(paddr, len);
+          }
+        }
+      });
+    }
   }).unsafe_get();
   return equals(tracker);
 }
index 07fc8a81bf73343ddbba0b3015e27bcab4088484..0f335f4edfb8db47ef11eb054e5ce6bb8047e38a 100644 (file)
@@ -431,6 +431,8 @@ struct BackgroundListener {
  */
 class JournalTrimmer {
 public:
+  JournalTrimmer(bool tail_include_alloc)
+    : tail_include_alloc(tail_include_alloc) {}
   // get the committed journal head
   virtual journal_seq_t get_journal_head() const = 0;
 
@@ -465,7 +467,11 @@ public:
   virtual ~JournalTrimmer() {}
 
   journal_seq_t get_journal_tail() const {
-    return std::min(get_alloc_tail(), get_dirty_tail());
+    if (tail_include_alloc) {
+      return std::min(get_alloc_tail(), get_dirty_tail());
+    } else {
+      return get_dirty_tail();
+    }
   }
 
   virtual std::size_t get_trim_size_per_cycle() const = 0;
@@ -473,7 +479,8 @@ public:
   bool check_is_ready() const {
     return (get_journal_head() != JOURNAL_SEQ_NULL &&
             get_dirty_tail() != JOURNAL_SEQ_NULL &&
-            get_alloc_tail() != JOURNAL_SEQ_NULL);
+            (get_alloc_tail() != JOURNAL_SEQ_NULL ||
+             !tail_include_alloc));
   }
 
   std::size_t get_num_rolls() const {
@@ -487,9 +494,12 @@ public:
     return get_journal_head_sequence() + 1 -
            get_journal_tail().segment_seq;
   }
+protected:
+  bool tail_include_alloc = true;
 };
 
 class BackrefManager;
+class LBAManager;
 class JournalTrimmerImpl;
 using JournalTrimmerImplRef = std::unique_ptr<JournalTrimmerImpl>;
 
@@ -531,7 +541,8 @@ public:
     config_t config,
     backend_type_t type,
     device_off_t roll_start,
-    device_off_t roll_size);
+    device_off_t roll_size,
+    bool tail_include_alloc);
 
   ~JournalTrimmerImpl() = default;
 
@@ -618,9 +629,11 @@ public:
       config_t config,
       backend_type_t type,
       device_off_t roll_start,
-      device_off_t roll_size) {
+      device_off_t roll_size,
+      bool tail_include_alloc) {
     return std::make_unique<JournalTrimmerImpl>(
-        backref_manager, config, type, roll_start, roll_size);
+        backref_manager, config, type, roll_start,
+        roll_size, tail_include_alloc);
   }
 
   struct stat_printer_t {
@@ -638,7 +651,14 @@ private:
     return target <= journal_dirty_tail;
   }
 
+  bool can_drop_backref() const {
+    return get_backend_type() == backend_type_t::RANDOM_BLOCK;
+  }
+
   bool should_trim_alloc() const {
+    if (can_drop_backref()) {
+      return false;
+    }
     return get_alloc_tail_target() > journal_alloc_tail;
   }
 
@@ -1228,7 +1248,7 @@ public:
 #endif
 
   // test only
-  virtual bool check_usage() = 0;
+  virtual bool check_usage(bool has_cold_tier) = 0;
 
   struct stat_printer_t {
     const AsyncCleaner &cleaner;
@@ -1429,7 +1449,7 @@ public:
 
   // Testing interfaces
 
-  bool check_usage() final;
+  bool check_usage(bool has_cold_tier) final;
 
 private:
   /*
@@ -1692,14 +1712,16 @@ public:
   RBMCleaner(
     RBMDeviceGroupRef&& rb_group,
     BackrefManager &backref_manager,
+    LBAManager &lba_manager,
     bool detailed);
 
   static RBMCleanerRef create(
       RBMDeviceGroupRef&& rb_group,
       BackrefManager &backref_manager,
+      LBAManager &lba_manager,
       bool detailed) {
     return std::make_unique<RBMCleaner>(
-      std::move(rb_group), backref_manager, detailed);
+      std::move(rb_group), backref_manager, lba_manager, detailed);
   }
 
   RBMDeviceGroup* get_rb_group() {
@@ -1829,7 +1851,7 @@ public:
 
   // Testing interfaces
 
-  bool check_usage() final;
+  bool check_usage(bool has_cold_tier) final;
 
   bool check_usage_is_empty() const final {
     // TODO
@@ -1842,6 +1864,7 @@ private:
   const bool detailed;
   RBMDeviceGroupRef rb_group;
   BackrefManager &backref_manager;
+  LBAManager &lba_manager;
 
   struct {
     /**
index 5b8371f0ab834979ed90d8b8659c3fa7c93e0b0a..864654e3488b26a0ee0afc225291f5d5738c9f98 100644 (file)
@@ -1446,11 +1446,13 @@ record_t Cache::prepare_record(
          extent->get_paddr(),
          extent->get_length(),
          extent->get_type()));
-      backref_entries.emplace_back(
-       backref_entry_t::create_retire(
-         extent->get_paddr(),
-         extent->get_length(),
-         extent->get_type()));
+      if (!can_drop_backref()) {
+        backref_entries.emplace_back(
+          backref_entry_t::create_retire(
+            extent->get_paddr(),
+            extent->get_length(),
+            extent->get_type()));
+      }
     } else if (is_backref_node(extent->get_type())) {
       // The retire alloc deltas are used to identify the invalid backref extent
       // deltas during replay when using CircularBoundedJournal, see
@@ -1460,7 +1462,9 @@ record_t Cache::prepare_record(
          extent->get_paddr(),
          extent->get_length(),
          extent->get_type()));
-      remove_backref_extent(extent->get_paddr());
+      if (!can_drop_backref()) {
+        remove_backref_extent(extent->get_paddr());
+      }
     } else {
       ERRORT("Got unexpected extent type: {}", t, *extent);
       ceph_abort_msg("imposible");
@@ -1632,6 +1636,10 @@ record_t Cache::prepare_record(
        i->get_length(),
        i->get_type()));
 
+    if (can_drop_backref()) {
+      continue;
+    }
+
     // Note: commit extents and backref allocations in the same place
     // Note: remapping is split into 2 steps, retire and alloc, they must be
     //       committed atomically together
@@ -1696,8 +1704,10 @@ record_t Cache::prepare_record(
     record.push_back(std::move(delta));
   }
 
-  apply_backref_mset(backref_entries);
-  t.set_backref_entries(std::move(backref_entries));
+  if (!can_drop_backref()) {
+    apply_backref_mset(backref_entries);
+    t.set_backref_entries(std::move(backref_entries));
+  }
 
   ceph_assert(t.get_fresh_block_stats().num ==
               t.inline_block_list.size() +
@@ -1871,6 +1881,9 @@ void Cache::complete_commit(
     i->complete_io();
     epm.commit_space_used(i->get_paddr(), i->get_length());
 
+    if (can_drop_backref()) {
+      return;
+    }
     // Note: commit extents and backref allocations in the same place
     if (is_backref_mapped_type(i->get_type())) {
       DEBUGT("backref_entry alloc {}~0x{:x}",
@@ -1944,8 +1957,10 @@ void Cache::complete_commit(
 
   last_commit = start_seq;
 
-  apply_backref_byseq(t.move_backref_entries(), start_seq);
-  commit_backref_entries(std::move(backref_entries), start_seq);
+  if (!can_drop_backref()) {
+    apply_backref_byseq(t.move_backref_entries(), start_seq);
+    commit_backref_entries(std::move(backref_entries), start_seq);
+  }
 }
 
 void Cache::init()
@@ -2017,7 +2032,9 @@ Cache::replay_delta(
 {
   LOG_PREFIX(Cache::replay_delta);
   assert(dirty_tail != JOURNAL_SEQ_NULL);
-  assert(alloc_tail != JOURNAL_SEQ_NULL);
+  if (!can_drop_backref()) {
+    assert(alloc_tail != JOURNAL_SEQ_NULL);
+  }
   ceph_assert(modify_time != NULL_TIME);
 
   // FIXME: This is specific to the segmented implementation
@@ -2055,6 +2072,11 @@ Cache::replay_delta(
 
   // replay alloc
   if (delta.type == extent_types_t::ALLOC_INFO) {
+    if (can_drop_backref()) {
+      return replay_delta_ertr::make_ready_future<
+        std::pair<bool, CachedExtentRef>>(std::make_pair(false, nullptr));
+    }
+
     if (journal_seq < alloc_tail) {
       DEBUG("journal_seq {} < alloc_tail {}, don't replay {}",
        journal_seq, alloc_tail, delta);
index 9561bd1516b884bc3448139a72bac12e049e6673..b5967d9f808b7f83d20bf45c607af3e13ae05887 100644 (file)
@@ -743,6 +743,10 @@ public:
     return query_cache(offset);
   }
 
+  bool can_drop_backref() const {
+    return epm.is_pure_rbm();
+  }
+
 private:
   using get_extent_ertr = base_ertr;
   template <typename T>
index 7d98978630e2872dbc5ab028438c1e29185dfc24..a5a6846e8565f9c5da63d13756ab796504776b6c 100644 (file)
@@ -560,6 +560,13 @@ public:
     return primary_device->get_backend_type();
   }
 
+
+  bool is_pure_rbm() const {
+    return get_main_backend_type() == backend_type_t::RANDOM_BLOCK &&
+      // as of now, cold tier can only be segmented.
+      !background_process.has_cold_tier();
+  }
+
   // Testing interfaces
 
   void test_init_no_background(Device *test_device) {
@@ -855,8 +862,8 @@ private:
     // Testing interfaces
 
     bool check_usage() {
-      return main_cleaner->check_usage() &&
-        (!has_cold_tier() || cold_cleaner->check_usage());
+      return main_cleaner->check_usage(has_cold_tier()) &&
+        (!has_cold_tier() || cold_cleaner->check_usage(true));
     }
 
     seastar::future<> run_until_halt();
index 706217ea42a92378be5267481fdc5263ad4deb6d..91197c040d7b2dab427b52f3224e3d6be6473cb4 100644 (file)
@@ -3136,6 +3136,7 @@ template <> struct fmt::formatter<crimson::os::seastore::device_id_printer_t> :
 template <> struct fmt::formatter<crimson::os::seastore::dirty_io_stats_printer_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::extent_types_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::journal_seq_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::backend_type_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::journal_tail_delta_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::laddr_t> : fmt::ostream_formatter {};
 template <> struct fmt::formatter<crimson::os::seastore::laddr_offset_t> : fmt::ostream_formatter {};
index ca1a45af9a77431ae68831c2d138406413c31414..7baea2cd82c4de6a4cfaa3fd83803e5e2dfc65f1 100644 (file)
@@ -143,31 +143,46 @@ TransactionManager::mount()
         }
       }).si_then([this, &t] {
         epm->start_scan_space();
-        return backref_manager->scan_mapped_space(
-          t,
-          [this](
-            paddr_t paddr,
-           paddr_t backref_key,
-            extent_len_t len,
-            extent_types_t type,
-            laddr_t laddr) {
-          assert(paddr.is_absolute());
-          if (is_backref_node(type)) {
-            assert(laddr == L_ADDR_NULL);
-           assert(backref_key.is_absolute() || backref_key == P_ADDR_MIN);
-            backref_manager->cache_new_backref_extent(paddr, backref_key, type);
+        if (can_drop_backref()) {
+          return lba_manager->scan_mapped_space(
+            t,
+            [this](
+              paddr_t paddr,
+              extent_len_t len,
+              extent_types_t type,
+              laddr_t laddr) {
+            assert(paddr.is_absolute());
             cache->update_tree_extents_num(type, 1);
             epm->mark_space_used(paddr, len);
-          } else if (laddr == L_ADDR_NULL) {
-           assert(backref_key == P_ADDR_NULL);
-            cache->update_tree_extents_num(type, -1);
-            epm->mark_space_free(paddr, len);
-          } else {
-           assert(backref_key == P_ADDR_NULL);
-            cache->update_tree_extents_num(type, 1);
-            epm->mark_space_used(paddr, len);
-          }
-        });
+          });
+        } else {
+          return backref_manager->scan_mapped_space(
+            t,
+            [this](
+              paddr_t paddr,
+              paddr_t backref_key,
+              extent_len_t len,
+              extent_types_t type,
+              laddr_t laddr) {
+            assert(paddr.is_absolute());
+            if (is_backref_node(type)) {
+              assert(laddr == L_ADDR_NULL);
+              assert(backref_key.is_absolute() || backref_key == P_ADDR_MIN);
+              backref_manager->cache_new_backref_extent(
+                paddr, backref_key, type);
+              cache->update_tree_extents_num(type, 1);
+              epm->mark_space_used(paddr, len);
+            } else if (laddr == L_ADDR_NULL) {
+              assert(backref_key == P_ADDR_NULL);
+              cache->update_tree_extents_num(type, -1);
+              epm->mark_space_free(paddr, len);
+            } else {
+              assert(backref_key == P_ADDR_NULL);
+              cache->update_tree_extents_num(type, 1);
+              epm->mark_space_used(paddr, len);
+            }
+          });
+        }
       });
     });
   }).safe_then([this] {
@@ -983,6 +998,7 @@ TransactionManagerRef make_transaction_manager(
     shard_stats_t& shard_stats,
     bool is_test)
 {
+  LOG_PREFIX(make_transaction_manager);
   rewrite_gen_t hot_tier_generations = crimson::common::get_conf<uint64_t>(
     "seastore_hot_tier_generations");
   rewrite_gen_t cold_tier_generations = crimson::common::get_conf<uint64_t>(
@@ -1061,9 +1077,12 @@ TransactionManagerRef make_transaction_manager(
         roll_size, backend_type);
   }
 
+  bool pure_rbm_backend =
+      (p_backend_type == backend_type_t::RANDOM_BLOCK) && !cold_sms;
   auto journal_trimmer = JournalTrimmerImpl::create(
       *backref_manager, trimmer_config,
-      backend_type, roll_start, roll_size);
+      backend_type, roll_start, roll_size,
+      !pure_rbm_backend);
 
   AsyncCleanerRef cleaner;
   JournalRef journal;
@@ -1108,6 +1127,7 @@ TransactionManagerRef make_transaction_manager(
     cleaner = RBMCleaner::create(
       std::move(rbs),
       *backref_manager,
+      *lba_manager,
       cleaner_is_detailed);
     journal = journal::make_circularbounded(
       *journal_trimmer,
@@ -1122,6 +1142,8 @@ TransactionManagerRef make_transaction_manager(
            std::move(cold_segment_cleaner));
   epm->set_primary_device(primary_device);
 
+  INFO("main backend type: {}, cold tier: {}",
+    epm->get_main_backend_type(), (bool)cold_sms);
   return std::make_unique<TransactionManager>(
     std::move(journal),
     std::move(cache),
index 8e8086c85955a5254465a4d4fdfd14cb24260d35..1ba7599051f8ea669b9cae071d72a9a86d7dc78e 100644 (file)
@@ -1172,6 +1172,10 @@ private:
 
   shard_stats_t& shard_stats;
 
+  bool can_drop_backref() const {
+    return cache->can_drop_backref();
+  }
+
   using LBALeafNode = lba::LBALeafNode;
   struct unlinked_child_t {
     LBAMapping mapping;
index 53b3d74bd067dec996b611b7b294465591898151..c91a6a2238540473230a7dd7bc120f09d2afce09 100644 (file)
@@ -45,7 +45,7 @@ struct btree_test_base :
 
   mutable segment_info_t tmp_info;
 
-  btree_test_base() = default;
+  btree_test_base() : JournalTrimmer(true) {}
 
   /*
    * JournalTrimmer interfaces
index a7c31fc92d53fee71ba70d7a7257305bbeae0524..22b26b53aba4292fa2376e3c31b22251894d4435 100644 (file)
@@ -142,7 +142,7 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer
   uint64_t block_size;
   WritePipeline pipeline;
 
-  cbjournal_test_t() = default;
+  cbjournal_test_t() : JournalTrimmer(true) {}
 
   /*
    * JournalTrimmer interfaces
index d3ad64d6a650e5d961cad2519cc8bb8392d43437..293874080966f78c4b06502a5d3166fec4035c1e 100644 (file)
@@ -85,7 +85,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer {
 
   mutable segment_info_t tmp_info;
 
-  journal_test_t() = default;
+  journal_test_t() : JournalTrimmer(true) {}
 
   /*
    * JournalTrimmer interfaces