]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Refine implementation that sets built-in profile parameters.
authorSridhar Seshasayee <sseshasa@redhat.com>
Fri, 22 Jan 2021 11:36:59 +0000 (17:06 +0530)
committerSridhar Seshasayee <sseshasa@redhat.com>
Wed, 10 Mar 2021 09:59:38 +0000 (15:29 +0530)
mclock clients are categorized into client, background_recovery and
background_best_effort. Each client category is required to specify
the mclock parameters like reservation, weight and limit. Therefore,
a simple structure (ClientAllocs) is introduced that encapsulates the
mclock parameters and is used to store those parameters for each client
category.

Other changes:
 - The "high_client_ops" profile is now set as the default profile
    instead of the "balanced" profile.
 - Replace "Stringify" with std::to_string().

Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
(cherry picked from commit 2c78cfc30a3d6ce3250b3e2129e42466e28c224e)

src/common/options.cc
src/osd/scheduler/mClockScheduler.cc
src/osd/scheduler/mClockScheduler.h

index f9c7f5870b13155e2ac91a8f98220a309124e5c8..e67e202f76282a9cd663b8dbc80e1097d895e20d 100644 (file)
@@ -3080,7 +3080,7 @@ std::vector<Option> get_global_options() {
     .set_flag(Option::FLAG_RUNTIME),
 
     Option("osd_mclock_profile", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-    .set_default("balanced")
+    .set_default("high_client_ops")
     .set_enum_allowed( { "balanced", "high_recovery_ops", "high_client_ops", "custom" } )
     .set_description("Which mclock profile to use")
     .set_long_description("This option specifies the mclock profile to enable - one among the set of built-in profiles or a custom profile. Only considered for osd_op_queue = mclock_scheduler")
index d3f7b62d41396707a2cddcea96e64e61d033a7d6..d00a8aafbaa8a517c9ad03d901723fe8de1a3513 100644 (file)
@@ -16,7 +16,6 @@
 #include <memory>
 #include <functional>
 
-#include "include/stringify.h"
 #include "osd/scheduler/mClockScheduler.h"
 #include "common/dout.h"
 
@@ -55,9 +54,8 @@ mClockScheduler::mClockScheduler(CephContext *cct,
   }
   set_max_osd_capacity();
   set_osd_mclock_cost_per_io();
-  mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
-  set_client_allocations();
-  enable_mclock_profile();
+  set_mclock_profile();
+  enable_mclock_profile_settings();
   client_registry.update_from_config(cct->_conf);
 }
 
@@ -139,45 +137,156 @@ void mClockScheduler::set_osd_mclock_cost_per_io()
   }
 }
 
-void mClockScheduler::set_client_allocations()
+void mClockScheduler::set_mclock_profile()
 {
-  // Set profile specific client capacity allocations
-  if (mclock_profile == "balanced") {
-    double capacity = std::round(0.5 * max_osd_capacity);
-    client_allocs[op_scheduler_class::client] = capacity;
-    client_allocs[op_scheduler_class::background_recovery] = capacity;
-  } else if (mclock_profile == "high_recovery_ops") {
-    client_allocs[op_scheduler_class::client] =
-      std::round(0.25 * max_osd_capacity);
-    client_allocs[op_scheduler_class::background_recovery] =
-      std::round(0.75 * max_osd_capacity);
-  } else if (mclock_profile == "high_client_ops") {
-    client_allocs[op_scheduler_class::client] =
-      std::round(0.75 * max_osd_capacity);
-    client_allocs[op_scheduler_class::background_recovery] =
-      std::round(0.25 * max_osd_capacity);
-  } else {
-    ceph_assert("Invalid mclock profile" == 0);
-    return;
-  }
+  mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
 }
 
-double mClockScheduler::get_client_allocation(op_type_t op_type)
+std::string mClockScheduler::get_mclock_profile()
 {
-  double default_allocation = 1.0;
+  return mclock_profile;
+}
 
-  switch (op_type) {
-  case op_type_t::client_op:
-    return client_allocs[op_scheduler_class::client];
-  case op_type_t::bg_recovery:
-    return client_allocs[op_scheduler_class::background_recovery];
-  default:
-    // TODO for other op types.
-    return default_allocation;
-  }
+void mClockScheduler::set_balanced_profile_allocations()
+{
+  // Client Allocation:
+  //   reservation: 40% | weight: 1 | limit: 100% |
+  // Background Recovery Allocation:
+  //   reservation: 40% | weight: 1 | limit: 150% |
+  // Background Best Effort Allocation:
+  //   reservation: 20% | weight: 2 | limit: max |
+
+  // Client
+  uint64_t client_res = static_cast<uint64_t>(
+    std::round(0.40 * max_osd_capacity));
+  uint64_t client_lim = static_cast<uint64_t>(
+    std::round(max_osd_capacity));
+  uint64_t client_wgt = default_min;
+
+  // Background Recovery
+  uint64_t rec_res = static_cast<uint64_t>(
+    std::round(0.40 * max_osd_capacity));
+  uint64_t rec_lim = static_cast<uint64_t>(
+    std::round(1.5 * max_osd_capacity));
+  uint64_t rec_wgt = default_min;
+
+  // Background Best Effort
+  uint64_t best_effort_res = static_cast<uint64_t>(
+    std::round(0.20 * max_osd_capacity));
+  uint64_t best_effort_lim = default_max;
+  uint64_t best_effort_wgt = 2;
+
+  // Set the allocations for the mclock clients
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::client)].update(
+      client_res,
+      client_wgt,
+      client_lim);
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_recovery)].update(
+      rec_res,
+      rec_wgt,
+      rec_lim);
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_best_effort)].update(
+      best_effort_res,
+      best_effort_wgt,
+      best_effort_lim);
+}
+
+void mClockScheduler::set_high_recovery_ops_profile_allocations()
+{
+  // Client Allocation:
+  //   reservation: 30% | weight: 1 | limit: 80% |
+  // Background Recovery Allocation:
+  //   reservation: 60% | weight: 2 | limit: 200% |
+  // Background Best Effort Allocation:
+  //   reservation: 1 | weight: 2 | limit: max |
+
+  // Client
+  uint64_t client_res = static_cast<uint64_t>(
+    std::round(0.30 * max_osd_capacity));
+  uint64_t client_lim = static_cast<uint64_t>(
+    std::round(0.80 * max_osd_capacity));
+  uint64_t client_wgt = default_min;
+
+  // Background Recovery
+  uint64_t rec_res = static_cast<uint64_t>(
+    std::round(0.60 * max_osd_capacity));
+  uint64_t rec_lim = static_cast<uint64_t>(
+    std::round(2.0 * max_osd_capacity));
+  uint64_t rec_wgt = 2;
+
+  // Background Best Effort
+  uint64_t best_effort_res = default_min;
+  uint64_t best_effort_lim = default_max;
+  uint64_t best_effort_wgt = 2;
+
+  // Set the allocations for the mclock clients
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::client)].update(
+      client_res,
+      client_wgt,
+      client_lim);
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_recovery)].update(
+      rec_res,
+      rec_wgt,
+      rec_lim);
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_best_effort)].update(
+      best_effort_res,
+      best_effort_wgt,
+      best_effort_lim);
+}
+
+void mClockScheduler::set_high_client_ops_profile_allocations()
+{
+  // Client Allocation:
+  //   reservation: 50% | weight: 2 | limit: max |
+  // Background Recovery Allocation:
+  //   reservation: 25% | weight: 1 | limit: 100% |
+  // Background Best Effort Allocation:
+  //   reservation: 25% | weight: 2 | limit: max |
+
+  // Client
+  uint64_t client_res = static_cast<uint64_t>(
+    std::round(0.50 * max_osd_capacity));
+  uint64_t client_wgt = 2;
+  uint64_t client_lim = default_max;
+
+  // Background Recovery
+  uint64_t rec_res = static_cast<uint64_t>(
+    std::round(0.25 * max_osd_capacity));
+  uint64_t rec_lim = static_cast<uint64_t>(
+    std::round(max_osd_capacity));
+  uint64_t rec_wgt = default_min;
+
+  // Background Best Effort
+  uint64_t best_effort_res = static_cast<uint64_t>(
+    std::round(0.25 * max_osd_capacity));
+  uint64_t best_effort_lim = default_max;
+  uint64_t best_effort_wgt = 2;
+
+  // Set the allocations for the mclock clients
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::client)].update(
+      client_res,
+      client_wgt,
+      client_lim);
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_recovery)].update(
+      rec_res,
+      rec_wgt,
+      rec_lim);
+  client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_best_effort)].update(
+      best_effort_res,
+      best_effort_wgt,
+      best_effort_lim);
 }
 
-void mClockScheduler::enable_mclock_profile()
+void mClockScheduler::enable_mclock_profile_settings()
 {
   // Nothing to do for "custom" profile
   if (mclock_profile == "custom") {
@@ -186,92 +295,54 @@ void mClockScheduler::enable_mclock_profile()
 
   // Set mclock and ceph config options for the chosen profile
   if (mclock_profile == "balanced") {
-    set_balanced_profile_config();
+    set_balanced_profile_allocations();
   } else if (mclock_profile == "high_recovery_ops") {
-    set_high_recovery_ops_profile_config();
+    set_high_recovery_ops_profile_allocations();
   } else if (mclock_profile == "high_client_ops") {
-    set_high_client_ops_profile_config();
+    set_high_client_ops_profile_allocations();
   } else {
     ceph_assert("Invalid choice of mclock profile" == 0);
     return;
   }
 
+  // Set the mclock config parameters
+  set_profile_config();
   // Set recovery specific Ceph options
   set_global_recovery_options();
 }
 
-std::string mClockScheduler::get_mclock_profile()
+void mClockScheduler::set_profile_config()
 {
-  return mclock_profile;
-}
-
-void mClockScheduler::set_balanced_profile_config()
-{
-  double client_lim = get_client_allocation(op_type_t::client_op);
-  double rec_lim = get_client_allocation(op_type_t::bg_recovery);
-  int client_wgt = 10;
+  ClientAllocs client = client_allocs[
+    static_cast<size_t>(op_scheduler_class::client)];
+  ClientAllocs rec = client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_recovery)];
+  ClientAllocs best_effort = client_allocs[
+    static_cast<size_t>(op_scheduler_class::background_best_effort)];
 
   // Set external client params
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_res", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_wgt", stringify(client_wgt));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_lim", stringify(client_lim));
+  cct->_conf.set_val("osd_mclock_scheduler_client_res",
+    std::to_string(client.res));
+  cct->_conf.set_val("osd_mclock_scheduler_client_wgt",
+    std::to_string(client.wgt));
+  cct->_conf.set_val("osd_mclock_scheduler_client_lim",
+    std::to_string(client.lim));
 
   // Set background recovery client params
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_res", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_wgt", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_lim", stringify(rec_lim));
-}
-
-void mClockScheduler::set_high_recovery_ops_profile_config()
-{
-  double client_lim = get_client_allocation(op_type_t::client_op);
-  double rec_lim = get_client_allocation(op_type_t::bg_recovery);
-  int rec_wgt = 10;
-
-  // Set external client params
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_res", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_wgt", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_lim", stringify(client_lim));
-
-  // Set background recovery client params
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_res", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_wgt", stringify(rec_wgt));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_lim", stringify(rec_lim));
-}
-
-void mClockScheduler::set_high_client_ops_profile_config()
-{
-  double client_lim = get_client_allocation(op_type_t::client_op);
-  double rec_lim = get_client_allocation(op_type_t::bg_recovery);
-  int client_wgt = 10;
-
-  // Set external client params
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_res", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_wgt", stringify(client_wgt));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_client_lim", stringify(client_lim));
-
-  // Set background recovery client params
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_res", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_wgt", stringify(default_min));
-  cct->_conf.set_val(
-    "osd_mclock_scheduler_background_recovery_lim", stringify(rec_lim));
+  cct->_conf.set_val("osd_mclock_scheduler_background_recovery_res",
+    std::to_string(rec.res));
+  cct->_conf.set_val("osd_mclock_scheduler_background_recovery_wgt",
+    std::to_string(rec.wgt));
+  cct->_conf.set_val("osd_mclock_scheduler_background_recovery_lim",
+    std::to_string(rec.lim));
+
+  // Set background best effort client params
+  cct->_conf.set_val("osd_mclock_scheduler_background_best_effort_res",
+    std::to_string(best_effort.res));
+  cct->_conf.set_val("osd_mclock_scheduler_background_best_effort_wgt",
+    std::to_string(best_effort.wgt));
+  cct->_conf.set_val("osd_mclock_scheduler_background_best_effort_lim",
+    std::to_string(best_effort.lim));
 }
 
 void mClockScheduler::set_global_recovery_options()
@@ -279,14 +350,14 @@ void mClockScheduler::set_global_recovery_options()
   // Set high value for recovery max active and max backfill
   int rec_max_active = 1000;
   int max_backfills = 1000;
-  cct->_conf.set_val("osd_recovery_max_active", stringify(rec_max_active));
-  cct->_conf.set_val("osd_max_backfills", stringify(max_backfills));
+  cct->_conf.set_val("osd_recovery_max_active", std::to_string(rec_max_active));
+  cct->_conf.set_val("osd_max_backfills", std::to_string(max_backfills));
 
   // Disable recovery sleep
-  cct->_conf.set_val("osd_recovery_sleep", stringify(0));
-  cct->_conf.set_val("osd_recovery_sleep_hdd", stringify(0));
-  cct->_conf.set_val("osd_recovery_sleep_ssd", stringify(0));
-  cct->_conf.set_val("osd_recovery_sleep_hybrid", stringify(0));
+  cct->_conf.set_val("osd_recovery_sleep", std::to_string(0));
+  cct->_conf.set_val("osd_recovery_sleep_hdd", std::to_string(0));
+  cct->_conf.set_val("osd_recovery_sleep_ssd", std::to_string(0));
+  cct->_conf.set_val("osd_recovery_sleep_hybrid", std::to_string(0));
 
   // Apply the changes
   cct->_conf.apply_changes(nullptr);
@@ -448,12 +519,15 @@ void mClockScheduler::handle_conf_change(
       changed.count("osd_mclock_max_capacity_iops_hdd") ||
       changed.count("osd_mclock_max_capacity_iops_ssd")) {
     set_max_osd_capacity();
-    enable_mclock_profile();
-    client_registry.update_from_config(conf);
+    if (mclock_profile != "custom") {
+      enable_mclock_profile_settings();
+      client_registry.update_from_config(conf);
+    }
   }
   if (changed.count("osd_mclock_profile")) {
-    enable_mclock_profile();
+    set_mclock_profile();
     if (mclock_profile != "custom") {
+      enable_mclock_profile_settings();
       client_registry.update_from_config(conf);
     }
   }
@@ -469,4 +543,9 @@ void mClockScheduler::handle_conf_change(
   }
 }
 
+mClockScheduler::~mClockScheduler()
+{
+  cct->_conf.remove_observer(this);
+}
+
 }
index 0517f95ffdcec46d3166ada6a63d5effba3b256e..aa228339845d3deb872daf3254f98b785e7f60e9 100644 (file)
@@ -69,8 +69,32 @@ class mClockScheduler : public OpScheduler, md_config_obs_t {
   bool is_rotational;
   double max_osd_capacity;
   uint64_t osd_mclock_cost_per_io_msec;
-  std::string mclock_profile = "balanced";
-  std::map<op_scheduler_class, double> client_allocs;
+  std::string mclock_profile = "high_client_ops";
+  struct ClientAllocs {
+    uint64_t res;
+    uint64_t wgt;
+    uint64_t lim;
+
+    ClientAllocs(uint64_t _res, uint64_t _wgt, uint64_t _lim) {
+      update(_res, _wgt, _lim);
+    }
+
+    inline void update(uint64_t _res, uint64_t _wgt, uint64_t _lim) {
+      res = _res;
+      wgt = _wgt;
+      lim = _lim;
+    }
+  };
+  std::array<
+    ClientAllocs,
+    static_cast<size_t>(op_scheduler_class::client) + 1
+  > client_allocs = {
+    // Placeholder, get replaced with configured values
+    ClientAllocs(1, 1, 1), // background_recovery
+    ClientAllocs(1, 1, 1), // background_best_effort
+    ClientAllocs(1, 1, 1), // immediate (not used)
+    ClientAllocs(1, 1, 1)  // client
+  };
   std::map<op_type_t, int> client_cost_infos;
   std::map<op_type_t, int> client_scaled_cost_infos;
   class ClientRegistry {
@@ -115,6 +139,7 @@ class mClockScheduler : public OpScheduler, md_config_obs_t {
 
 public:
   mClockScheduler(CephContext *cct, uint32_t num_shards, bool is_rotational);
+  ~mClockScheduler() override;
 
   // Set the max osd capacity in iops
   void set_max_osd_capacity();
@@ -122,26 +147,26 @@ public:
   // Set the cost per io for the osd
   void set_osd_mclock_cost_per_io();
 
-  // Set the mclock related config params based on the profile
-  void enable_mclock_profile();
+  // Set the mclock profile type to enable
+  void set_mclock_profile();
 
   // Get the active mclock profile
   std::string get_mclock_profile();
 
-  // Set client capacity allocations based on profile
-  void set_client_allocations();
+  // Set "balanced" profile allocations
+  void set_balanced_profile_allocations();
 
-  // Get client allocation
-  double get_client_allocation(op_type_t op_type);
+  // Set "high_recovery_ops" profile allocations
+  void set_high_recovery_ops_profile_allocations();
 
-  // Set "balanced" profile parameters
-  void set_balanced_profile_config();
+  // Set "high_client_ops" profile allocations
+  void set_high_client_ops_profile_allocations();
 
-  // Set "high_recovery_ops" profile parameters
-  void set_high_recovery_ops_profile_config();
+  // Set the mclock related config params based on the profile
+  void enable_mclock_profile_settings();
 
-  // Set "high_client_ops" profile parameters
-  void set_high_client_ops_profile_config();
+  // Set mclock config parameter based on allocations
+  void set_profile_config();
 
   // Set recovery specific Ceph settings for profiles
   void set_global_recovery_options();