]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/EPM/BackgroundProcess: reserve space on cold cleaner
authorZhang Song <zhangsong325@gmail.com>
Tue, 10 Jan 2023 10:52:04 +0000 (18:52 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Tue, 23 May 2023 08:09:20 +0000 (08:09 +0000)
Signed-off-by: Zhang Song <zhangsong325@gmail.com>
(cherry picked from commit 4aea0e266de43281a5d8c24fe91034bdaae45d98)

src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h

index e45ad7f63605f781edaa87473d37c21f1826a792..1a7a30be610fb3c35f9d2eed89d952a2abba28d8 100644 (file)
@@ -544,14 +544,6 @@ public:
     journal_alloc_tail = JOURNAL_SEQ_NULL;
   }
 
-  bool should_trim_dirty() const {
-    return get_dirty_tail_target() > journal_dirty_tail;
-  }
-
-  bool should_trim_alloc() const {
-    return get_alloc_tail_target() > journal_alloc_tail;
-  }
-
   bool should_trim() const {
     return should_trim_alloc() || should_trim_dirty();
   }
@@ -596,6 +588,14 @@ public:
   friend std::ostream &operator<<(std::ostream &, const stat_printer_t &);
 
 private:
+  bool should_trim_dirty() const {
+    return get_dirty_tail_target() > journal_dirty_tail;
+  }
+
+  bool should_trim_alloc() const {
+    return get_alloc_tail_target() > journal_alloc_tail;
+  }
+
   using trim_ertr = crimson::errorator<
     crimson::ct_error::input_output_error>;
   trim_ertr::future<> trim_dirty();
@@ -1152,6 +1152,8 @@ public:
 
   virtual const std::set<device_id_t>& get_device_ids() const = 0;
 
+  virtual std::size_t get_reclaim_size_per_cycle() const = 0;
+
   // test only
   virtual bool check_usage() = 0;
 
@@ -1335,6 +1337,10 @@ public:
     return sm_group->get_device_ids();
   }
 
+  std::size_t get_reclaim_size_per_cycle() const final {
+    return config.reclaim_bytes_per_cycle;
+  }
+
   // Testing interfaces
 
   bool check_usage() final;
@@ -1656,6 +1662,10 @@ public:
     return rb_group->get_device_ids();
   }
 
+  std::size_t get_reclaim_size_per_cycle() const final {
+    return 0;
+  }
+
   RandomBlockManager* get_rbm(paddr_t paddr) {
     auto rbs = rb_group->get_rb_managers();
     for (auto p : rbs) {
index 027f20d995fcbf911c2674d8ee511e3b856a357f..4ad3074c1d2916325d20517c2982c2f8a8d11001 100644 (file)
@@ -546,12 +546,71 @@ ExtentPlacementManager::BackgroundProcess::run()
   });
 }
 
+/**
+ * Reservation Process
+ *
+ * Most of transctions need to reserve its space usage before performing the
+ * ool writes and committing transactions. If the space reservation is
+ * unsuccessful, the current transaction is blocked, and waits for new
+ * background transactions to finish.
+ *
+ * The following are the reservation requirements for each transaction type:
+ * 1. MUTATE transaction:
+ *      (1) inline usage on the trimmer,
+ *      (2) inline usage with OOL usage on the main cleaner,
+ *      (3) cold OOL usage to the cold cleaner(if it exists).
+ * 2. TRIM_DIRTY/TRIM_ALLOC transaction:
+ *      (1) all extents usage on the main cleaner,
+ *      (2) usage on the cold cleaner(if it exists)
+ * 3. CLEANER_MAIN:
+ *      (1) cleaned extents size on the cold cleaner(if it exists).
+ * 4. CLEANER_COLD transction does not require space reservation.
+ *
+ * The reserve implementation should satisfy the following conditions:
+ * 1. The reservation should be atomic. If a reservation involves several reservations,
+ *    such as the MUTATE transaction that needs to reserve space on both the trimmer
+ *    and cleaner at the same time, the successful condition is that all of its
+ *    sub-reservations succeed. If one or more operations fail, the entire reservation
+ *    fails, and the successful operation should be reverted.
+ * 2. The reserve/block relationship should form a DAG to avoid deadlock. For example,
+ *    TRIM_ALLOC transaction might be blocked by cleaner due to the failure of reserving
+ *    on the cleaner. In such cases, the cleaner must not reserve space on the trimmer
+ *    since the trimmer is already blocked by itself.
+ *
+ * Finally the reserve relationship can be represented as follows:
+ *
+ *    +-------------------------+----------------+
+ *    |                         |                |
+ *    |                         v                v
+ * MUTATE ---> TRIM_* ---> CLEANER_MAIN ---> CLEANER_COLD
+ *              |                                ^
+ *              |                                |
+ *              +--------------------------------+
+ */
+bool ExtentPlacementManager::BackgroundProcess::try_reserve_cold(std::size_t usage)
+{
+  if (has_cold_tier()) {
+    return cold_cleaner->try_reserve_projected_usage(usage);
+  } else {
+    assert(usage == 0);
+    return true;
+  }
+}
+void ExtentPlacementManager::BackgroundProcess::abort_cold_usage(
+  std::size_t usage, bool success)
+{
+  if (has_cold_tier() && success) {
+    cold_cleaner->release_projected_usage(usage);
+  }
+}
+
 reserve_cleaner_result_t
 ExtentPlacementManager::BackgroundProcess::try_reserve_cleaner(
   const cleaner_usage_t &usage)
 {
   return {
-    main_cleaner->try_reserve_projected_usage(usage.main_usage)
+    main_cleaner->try_reserve_projected_usage(usage.main_usage),
+    try_reserve_cold(usage.cold_ool_usage)
   };
 }
 
@@ -562,6 +621,7 @@ void ExtentPlacementManager::BackgroundProcess::abort_cleaner_usage(
   if (result.reserve_main_success) {
     main_cleaner->release_projected_usage(usage.main_usage);
   }
+  abort_cold_usage(usage.cold_ool_usage, result.reserve_cold_success);
 }
 
 reserve_io_result_t
@@ -591,11 +651,22 @@ ExtentPlacementManager::BackgroundProcess::do_background_cycle()
   bool should_trim = trimmer->should_trim();
   bool proceed_trim = false;
   auto trim_size = trimmer->get_trim_size_per_cycle();
-  cleaner_usage_t trim_usage{trim_size};
+  cleaner_usage_t trim_usage{
+    trim_size,
+    // We take a cautious policy here that the trimmer also reserves
+    // the max value on cold cleaner even if no extents will be rewritten
+    // to the cold tier. Cleaner also takes the same policy.
+    // The reason is that we don't know the exact value of reservation until
+    // the construction of trimmer transaction completes after which the reservation
+    // might fail then the trimmer is possible to be invalidated by cleaner.
+    // Reserving the max size at first could help us avoid these trouble.
+    has_cold_tier() ? trim_size : 0
+  };
 
+  reserve_cleaner_result_t trim_reserve_res;
   if (should_trim) {
-    auto res = try_reserve_cleaner(trim_usage);
-    if (res.is_successful()) {
+    trim_reserve_res = try_reserve_cleaner(trim_usage);
+    if (trim_reserve_res.is_successful()) {
       proceed_trim = true;
     } else {
       abort_cleaner_usage(trim_usage, trim_reserve_res);
@@ -605,21 +676,65 @@ ExtentPlacementManager::BackgroundProcess::do_background_cycle()
   if (proceed_trim) {
     return trimmer->trim(
     ).finally([this, trim_usage] {
-      abort_cleaner_usage(trim_usage, {true});
+      abort_cleaner_usage(trim_usage, {true, true});
     });
-  } else if (main_cleaner->should_clean_space() ||
-             // make sure cleaner will start
-             // when the trimmer should run but
-             // failed to reserve space.
-             (should_trim && !proceed_trim)) {
-    return main_cleaner->clean_space(
-    ).handle_error(
-      crimson::ct_error::assert_all{
-       "do_background_cycle encountered invalid error in clean_space"
-      }
-    );
   } else {
-    return seastar::now();
+    bool should_clean_main =
+      main_cleaner->should_clean_space() ||
+      // make sure cleaner will start
+      // when the trimmer should run but
+      // failed to reserve space.
+      (should_trim && !proceed_trim &&
+       !trim_reserve_res.reserve_main_success);
+    bool proceed_clean_main = false;
+
+    auto main_cold_usage = main_cleaner->get_reclaim_size_per_cycle();
+    if (should_clean_main) {
+      if (has_cold_tier()) {
+        proceed_clean_main = try_reserve_cold(main_cold_usage);
+      } else {
+        proceed_clean_main = true;
+      }
+    }
+
+    bool proceed_clean_cold = false;
+    if (has_cold_tier() &&
+        (cold_cleaner->should_clean_space() ||
+         (should_trim && !proceed_trim &&
+          !trim_reserve_res.reserve_cold_success) ||
+         (should_clean_main && !proceed_clean_main))) {
+      proceed_clean_cold = true;
+    }
+
+    if (!proceed_clean_main && !proceed_clean_cold) {
+      ceph_abort("no background process will start");
+    }
+    return seastar::when_all(
+      [this, proceed_clean_main, main_cold_usage] {
+        if (!proceed_clean_main) {
+          return seastar::now();
+        }
+        return main_cleaner->clean_space(
+        ).handle_error(
+          crimson::ct_error::assert_all{
+            "do_background_cycle encountered invalid error in main clean_space"
+          }
+        ).finally([this, main_cold_usage] {
+          abort_cold_usage(main_cold_usage, true);
+        });
+      },
+      [this, proceed_clean_cold] {
+        if (!proceed_clean_cold) {
+          return seastar::now();
+        }
+        return cold_cleaner->clean_space(
+        ).handle_error(
+          crimson::ct_error::assert_all{
+            "do_background_cycle encountered invalid error in cold clean_space"
+          }
+        );
+      }
+    ).discard_result();
   }
 }
 
index 51401e78d12600ee6b047bb6a3b7e51e05d93369..bb9749f04eec3c43bdbcacd7b87d362fd68937b8 100644 (file)
@@ -138,9 +138,11 @@ struct cleaner_usage_t {
 
 struct reserve_cleaner_result_t {
   bool reserve_main_success = true;
+  bool reserve_cold_success = true;
 
   bool is_successful() const {
-    return reserve_main_success;
+    return reserve_main_success &&
+      reserve_cold_success;
   }
 };
 
@@ -589,6 +591,9 @@ private:
       if (is_ready()) {
         trimmer->release_inline_usage(usage.inline_usage);
         main_cleaner->release_projected_usage(usage.cleaner_usage.main_usage);
+        if (has_cold_tier()) {
+          cold_cleaner->release_projected_usage(usage.cleaner_usage.cold_ool_usage);
+        }
       }
     }
 
@@ -636,6 +641,9 @@ private:
 
   private:
     // reserve helpers
+    bool try_reserve_cold(std::size_t usage);
+    void abort_cold_usage(std::size_t usage, bool success);
+
     reserve_cleaner_result_t try_reserve_cleaner(const cleaner_usage_t &usage);
     void abort_cleaner_usage(const cleaner_usage_t &usage,
                              const reserve_cleaner_result_t &result);
@@ -668,14 +676,16 @@ private:
     bool background_should_run() const {
       assert(is_ready());
       return main_cleaner->should_clean_space()
-        || trimmer->should_trim_dirty()
-        || trimmer->should_trim_alloc();
+        || (has_cold_tier() && cold_cleaner->should_clean_space())
+        || trimmer->should_trim();
     }
 
     bool should_block_io() const {
       assert(is_ready());
       return trimmer->should_block_io_on_trim() ||
-             main_cleaner->should_block_io_on_clean();
+             main_cleaner->should_block_io_on_clean() ||
+             (has_cold_tier() &&
+              cold_cleaner->should_block_io_on_clean());
     }
 
     seastar::future<> do_background_cycle();