]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Reduce default max backfill/recoveries for mclock to realistic limits
authorSridhar Seshasayee <sseshasa@redhat.com>
Mon, 19 Sep 2022 08:48:33 +0000 (14:18 +0530)
committerSridhar Seshasayee <sseshasa@redhat.com>
Wed, 14 Dec 2022 19:11:53 +0000 (00:41 +0530)
The earlier max number of backfill and recovery limits were overly
optimistic and caused recovery ops to overwhelm client ops regardless of
the mclock profile type.

Set the max backfill limit to 10. Set the max recovery limit to 10 for
osds having HDD backing device. For osds having SSD backing device, set
the recovery limit to 20.

Introduce and new config option, 'osd_mclock_override_recovery_settings',
that when set (false by default) allows the modification of the max
backfill and recovery options with mclock scheduler enabled.

It is important to note that with 'osd_mclock_override_recovery_settings'
disabled, the config subsystem will still allow the changes made to the
recovery and backfill settings when using the cli commands as the level at
which the change is made is always greater than CONF_DEFAULT. To counter this,
the osd will forcibly revert the recovery and backfill limits to the mClock
defaults by removing the existing setting from the config store and raises a
cluster log warning indicating the revert. This should force the user to set
the 'osd_mclock_override_recovery_settings' option first and then make changes
to the recovery and backfill limits.

Fixes: https://tracker.ceph.com/issues/57529
Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
(cherry picked from commit 89e48395f8b1329066a1d7e05a4e9e083c88c1a6)

src/common/options/osd.yaml.in
src/osd/OSD.cc
src/osd/OSD.h

index d8c98b1f271eba6b57634f0bb15c06a37fa51587..fea7f15342d19997982b0fb7c3988783f18dd820 100644 (file)
@@ -1107,6 +1107,27 @@ options:
   - custom
   flags:
   - runtime
+- name: osd_mclock_override_recovery_settings
+  type: bool
+  level: advanced
+  desc: Setting this option enables the override of recovery/backfill limits
+    for the mClock scheduler.
+  long_desc: This option when set enables the override of the max recovery
+    active and the max backfills limits with mClock scheduler active. These
+    options are not modifiable when mClock scheduler is active. Any attempt
+    to modify these values without setting this option will reset the
+    recovery or backfill option back to its default value.
+  fmt_desc: Setting this option will enable the override of the
+    recovery/backfill limits for the mClock scheduler as defined by the
+    ``osd_recovery_max_active_hdd``, ``osd_recovery_max_active_ssd`` and
+    ``osd_max_backfills`` options.
+  default: false
+  see_also:
+  - osd_recovery_max_active_hdd
+  - osd_recovery_max_active_ssd
+  - osd_max_backfills
+  flags:
+  - runtime
 # Set to true for testing.  Users should NOT set this.
 # If set to true even after reading enough shards to
 # decode the object, any error will be reported.
index 323d1edace30410c910a961164694038ea51c30d..b0be86a19982a34c61e29d8b8103a9c7101a9c9d 100644 (file)
@@ -3861,6 +3861,7 @@ int OSD::init()
   start_boot();
 
   // Override a few options if mclock scheduler is enabled.
+  maybe_override_sleep_options_for_qos();
   maybe_override_options_for_qos();
   maybe_override_max_osd_capacity_for_qos();
 
@@ -9964,7 +9965,17 @@ void OSD::handle_conf_change(const ConfigProxy& conf,
   std::lock_guard l{osd_lock};
 
   if (changed.count("osd_max_backfills") ||
-      changed.count("osd_delete_sleep") ||
+      changed.count("osd_recovery_max_active") ||
+      changed.count("osd_recovery_max_active_hdd") ||
+      changed.count("osd_recovery_max_active_ssd")) {
+    if (!maybe_override_options_for_qos(&changed) &&
+        changed.count("osd_max_backfills")) {
+      // Scheduler is not "mclock". Fallback to earlier behavior
+      service.local_reserver.set_max(cct->_conf->osd_max_backfills);
+      service.remote_reserver.set_max(cct->_conf->osd_max_backfills);
+    }
+  }
+  if (changed.count("osd_delete_sleep") ||
       changed.count("osd_delete_sleep_hdd") ||
       changed.count("osd_delete_sleep_ssd") ||
       changed.count("osd_delete_sleep_hybrid") ||
@@ -9976,16 +9987,8 @@ void OSD::handle_conf_change(const ConfigProxy& conf,
       changed.count("osd_recovery_sleep") ||
       changed.count("osd_recovery_sleep_hdd") ||
       changed.count("osd_recovery_sleep_ssd") ||
-      changed.count("osd_recovery_sleep_hybrid") ||
-      changed.count("osd_recovery_max_active") ||
-      changed.count("osd_recovery_max_active_hdd") ||
-      changed.count("osd_recovery_max_active_ssd")) {
-    if (!maybe_override_options_for_qos() &&
-        changed.count("osd_max_backfills")) {
-      // Scheduler is not "mclock". Fallback to earlier behavior
-      service.local_reserver.set_max(cct->_conf->osd_max_backfills);
-      service.remote_reserver.set_max(cct->_conf->osd_max_backfills);
-    }
+      changed.count("osd_recovery_sleep_hybrid")) {
+    maybe_override_sleep_options_for_qos();
   }
   if (changed.count("osd_min_recovery_priority")) {
     service.local_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority);
@@ -10154,30 +10157,105 @@ void OSD::maybe_override_max_osd_capacity_for_qos()
   }
 }
 
-bool OSD::maybe_override_options_for_qos()
+bool OSD::maybe_override_options_for_qos(const std::set<std::string> *changed)
 {
-  // If the scheduler enabled is mclock, override the recovery, backfill
-  // and sleep options so that mclock can meet the QoS goals.
+  // Override options only if the scheduler enabled is mclock and the
+  // underlying objectstore is supported by mclock
   if (cct->_conf.get_val<std::string>("osd_op_queue") == "mclock_scheduler" &&
       !unsupported_objstore_for_qos()) {
-    dout(1) << __func__
-            << ": Changing recovery/backfill/sleep settings for QoS" << dendl;
-
-    // Set high value for recovery max active
-    uint32_t rec_max_active = 1000;
-    cct->_conf.set_val(
-      "osd_recovery_max_active", std::to_string(rec_max_active));
-    cct->_conf.set_val(
-      "osd_recovery_max_active_hdd", std::to_string(rec_max_active));
-    cct->_conf.set_val(
-      "osd_recovery_max_active_ssd", std::to_string(rec_max_active));
-
-    // Set high value for osd_max_backfill
-    uint32_t max_backfills = 1000;
-    cct->_conf.set_val("osd_max_backfills", std::to_string(max_backfills));
-    service.local_reserver.set_max(max_backfills);
-    service.remote_reserver.set_max(max_backfills);
+    static const std::map<std::string, uint64_t> recovery_qos_defaults {
+      {"osd_recovery_max_active", 0},
+      {"osd_recovery_max_active_hdd", 10},
+      {"osd_recovery_max_active_ssd", 20},
+      {"osd_max_backfills", 10},
+    };
 
+    // Check if we were called because of a configuration change
+    if (changed != nullptr) {
+      if (cct->_conf.get_val<bool>("osd_mclock_override_recovery_settings")) {
+        if (changed->count("osd_max_backfills")) {
+          dout(1) << __func__ << " Set local and remote max backfills to "
+                   << cct->_conf->osd_max_backfills << dendl;
+          service.local_reserver.set_max(cct->_conf->osd_max_backfills);
+          service.remote_reserver.set_max(cct->_conf->osd_max_backfills);
+        }
+      } else {
+        // Recovery options change was attempted without setting
+        // the 'osd_mclock_override_recovery_settings' option.
+        // Find the key to remove from the configuration db.
+        std::string key;
+        if (changed->count("osd_max_backfills")) {
+          key = "osd_max_backfills";
+        } else if (changed->count("osd_recovery_max_active")) {
+          key = "osd_recovery_max_active";
+        } else if (changed->count("osd_recovery_max_active_hdd")) {
+          key = "osd_recovery_max_active_hdd";
+        } else if (changed->count("osd_recovery_max_active_ssd")) {
+          key = "osd_recovery_max_active_ssd";
+        } else {
+          // No key that we are interested in. Return.
+          return true;
+        }
+
+        // Remove the current entry from the configuration if
+        // different from its default value.
+        auto val = recovery_qos_defaults.find(key);
+        if (val != recovery_qos_defaults.end() &&
+            cct->_conf.get_val<uint64_t>(key) != val->second) {
+          static const std::vector<std::string> osds = {
+            "osd",
+            "osd." + std::to_string(whoami)
+          };
+
+          for (auto osd : osds) {
+            std::string cmd =
+              "{"
+                "\"prefix\": \"config rm\", "
+                "\"who\": \"" + osd + "\", "
+                "\"name\": \"" + key + "\""
+              "}";
+            vector<std::string> vcmd{cmd};
+
+            dout(1) << __func__ << " Removing Key: " << key
+                    << " for " << osd << " from Mon db" << dendl;
+            monc->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr);
+          }
+
+          // Raise a cluster warning indicating that the changes did not
+          // take effect and indicate the reason why.
+          clog->warn() << "Change to " << key << " on osd."
+                       << std::to_string(whoami) << " did not take effect."
+                       << " Enable osd_mclock_override_recovery_settings before"
+                       << " setting this option.";
+        }
+      }
+    } else { // if (changed != nullptr) (osd boot-up)
+      // Override the default recovery max active and max backfills to
+      // higher values based on the type of backing device (hdd/ssd).
+      // This section is executed only during osd boot-up.
+      for (auto opt : recovery_qos_defaults) {
+        cct->_conf.set_val_default(opt.first, std::to_string(opt.second));
+        if (opt.first == "osd_max_backfills") {
+          service.local_reserver.set_max(opt.second);
+          service.remote_reserver.set_max(opt.second);
+        }
+        dout(1) << __func__ << " Set default value for " << opt.first
+                << " to " << opt.second << dendl;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+void OSD::maybe_override_sleep_options_for_qos()
+{
+  // Override options only if the scheduler enabled is mclock and the
+  // underlying objectstore is supported by mclock
+  if (cct->_conf.get_val<std::string>("osd_op_queue") == "mclock_scheduler" &&
+      !unsupported_objstore_for_qos()) {
+
+    // Override the various sleep settings
     // Disable recovery sleep
     cct->_conf.set_val("osd_recovery_sleep", std::to_string(0));
     cct->_conf.set_val("osd_recovery_sleep_hdd", std::to_string(0));
@@ -10198,9 +10276,7 @@ bool OSD::maybe_override_options_for_qos()
 
     // Disable scrub sleep
     cct->_conf.set_val("osd_scrub_sleep", std::to_string(0));
-    return true;
   }
-  return false;
 }
 
 /**
index 843c021d0d38ce3c496c989eb07afcd09ba0bdc8..18946f1e967829300e17250e7231a5c10bc15945 100644 (file)
@@ -2057,7 +2057,9 @@ private:
 
   int get_recovery_max_active();
   void maybe_override_max_osd_capacity_for_qos();
-  bool maybe_override_options_for_qos();
+  void maybe_override_sleep_options_for_qos();
+  bool maybe_override_options_for_qos(
+    const std::set<std::string> *changed = nullptr);
   int run_osd_bench_test(int64_t count,
                          int64_t bsize,
                          int64_t osize,