common/mclock_common: don't use config keys to pass profile info to client_registry

author Samuel Just <sjust@redhat.com>

Mon, 11 Aug 2025 21:14:45 +0000 (14:14 -0700)

committer Samuel Just <sjust@redhat.com>

Fri, 5 Sep 2025 21:47:38 +0000 (14:47 -0700)
author Samuel Just <sjust@redhat.com>
Mon, 11 Aug 2025 21:14:45 +0000 (14:14 -0700)
committer Samuel Just <sjust@redhat.com>
Fri, 5 Sep 2025 21:47:38 +0000 (14:47 -0700)
diff --git a/src/common/mclock_common.cc b/src/common/mclock_common.cc

index d8cb8fa695006939e2e4951d6f3a65f5b1fc7272..cb7be4289f910564aa2a814605aff89bf1e7fdac 100644 (file)
--- a/src/common/mclock_common.cc
+++ b/src/common/mclock_common.cc
@@ -80,9 +80,10 @@ std::ostream& operator<<(std::ostream& out,
   * for the osd_mclock_scheduler_client_* parameters prior to calling
   * update_from_config -- see set_config_defaults_from_profile().
   */
-void ClientRegistry::update_from_config(const ConfigProxy &conf,
-                                        const double capacity_per_shard)
+void ClientRegistry::update_from_profile(const profile_t &current_profile,
+                                        const double capacity_per_shard)
  {
+
    auto get_res = [&](double res) {
      if (res) {
        return res * capacity_per_shard;
@@ -99,43 +100,22 @@ void ClientRegistry::update_from_config(const ConfigProxy &conf,
      }
    };
  
-  // Set external client infos
-  double res = conf.get_val<double>(
-    "osd_mclock_scheduler_client_res");
-  double lim = conf.get_val<double>(
-    "osd_mclock_scheduler_client_lim");
-  uint64_t wgt = conf.get_val<uint64_t>(
-    "osd_mclock_scheduler_client_wgt");
    default_external_client_info.update(
-    get_res(res),
-    wgt,
-    get_lim(lim));
-
-  // Set background recovery client infos
-  res = conf.get_val<double>(
-    "osd_mclock_scheduler_background_recovery_res");
-  lim = conf.get_val<double>(
-    "osd_mclock_scheduler_background_recovery_lim");
-  wgt = conf.get_val<uint64_t>(
-    "osd_mclock_scheduler_background_recovery_wgt");
+    get_res(current_profile.client.reservation),
+    current_profile.client.weight,
+    get_lim(current_profile.client.limit));
+
    internal_client_infos[
      static_cast<size_t>(SchedulerClass::background_recovery)].update(
-      get_res(res),
-      wgt,
-      get_lim(lim));
-
-  // Set background best effort client infos
-  res = conf.get_val<double>(
-    "osd_mclock_scheduler_background_best_effort_res");
-  lim = conf.get_val<double>(
-    "osd_mclock_scheduler_background_best_effort_lim");
-  wgt = conf.get_val<uint64_t>(
-    "osd_mclock_scheduler_background_best_effort_wgt");
+      get_res(current_profile.background_recovery.reservation),
+      current_profile.background_recovery.weight,
+      get_lim(current_profile.background_recovery.limit));
+
    internal_client_infos[
      static_cast<size_t>(SchedulerClass::background_best_effort)].update(
-      get_res(res),
-      wgt,
-      get_lim(lim));
+      get_res(current_profile.background_best_effort.reservation),
+      current_profile.background_best_effort.weight,
+      get_lim(current_profile.background_best_effort.limit));
  }
  
  const dmc::ClientInfo *ClientRegistry::get_external_client(
@@ -180,66 +160,7 @@ static std::ostream &operator<<(std::ostream &lhs, const profile_t &rhs)
               << "]";
  }
  
-void MclockConfig::set_config_defaults_from_profile()
-{
-  // Let only a single osd shard (id:0) set the profile configs
-  if (shard_id > 0) {
-    return;
-  }
-
-  const profile_t *profile = nullptr;
-  auto mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
-  if (mclock_profile == "high_client_ops") {
-    profile = &HIGH_CLIENT_OPS;
-    dout(10) << "Setting high_client_ops profile " << *profile << dendl;
-  } else if (mclock_profile == "high_recovery_ops") {
-    profile = &HIGH_RECOVERY_OPS;
-    dout(10) << "Setting high_recovery_ops profile " << *profile << dendl;
-  } else if (mclock_profile == "balanced") {
-    profile = &BALANCED;
-    dout(10) << "Setting balanced profile " << *profile << dendl;
-  } else if (mclock_profile == "custom") {
-    dout(10) << "Profile set to custom, not setting defaults" << dendl;
-    return;
-  } else {
-    derr << "Invalid mclock profile: " << mclock_profile << dendl;
-    ceph_assert("Invalid choice of mclock profile" == 0);
-    return;
-  }
-  ceph_assert(nullptr != profile);
-
-  auto set_config = [&conf = cct->_conf](const char *key, auto val) {
-    conf.set_val_default(key, std::to_string(val));
-  };
-
-  set_config("osd_mclock_scheduler_client_res", profile->client.reservation);
-  set_config("osd_mclock_scheduler_client_wgt", profile->client.weight);
-  set_config("osd_mclock_scheduler_client_lim", profile->client.limit);
-
-  set_config(
-    "osd_mclock_scheduler_background_recovery_res",
-    profile->background_recovery.reservation);
-  set_config(
-    "osd_mclock_scheduler_background_recovery_wgt",
-    profile->background_recovery.weight);
-  set_config(
-    "osd_mclock_scheduler_background_recovery_lim",
-    profile->background_recovery.limit);
-
-  set_config(
-    "osd_mclock_scheduler_background_best_effort_res",
-    profile->background_best_effort.reservation);
-  set_config(
-    "osd_mclock_scheduler_background_best_effort_wgt",
-    profile->background_best_effort.weight);
-  set_config(
-    "osd_mclock_scheduler_background_best_effort_lim",
-    profile->background_best_effort.limit);
-
-  cct->_conf.apply_changes(nullptr);
-}
-
-void MclockConfig::set_osd_capacity_params_from_config()
+void MclockConfig::set_from_config()
  {
    uint64_t osd_bandwidth_capacity;
    double osd_iop_capacity;
@@ -272,6 +193,47 @@ void MclockConfig::set_osd_capacity_params_from_config()
            << ", osd_bandwidth_capacity_per_shard "
            << osd_bandwidth_capacity_per_shard << " bytes/second"
            << dendl;
+
+  auto mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
+  if (mclock_profile == "high_client_ops") {
+    current_profile = HIGH_CLIENT_OPS;
+    dout(10) << "Setting high_client_ops profile " << current_profile << dendl;
+  } else if (mclock_profile == "high_recovery_ops") {
+    current_profile = HIGH_RECOVERY_OPS;
+    dout(10) << "Setting high_recovery_ops profile " << current_profile << dendl;
+  } else if (mclock_profile == "balanced") {
+    current_profile = BALANCED;
+    dout(10) << "Setting balanced profile " << current_profile << dendl;
+  } else if (mclock_profile == "custom") {
+    current_profile = {
+      {
+       cct->_conf.get_val<double>("osd_mclock_scheduler_client_res"),
+       cct->_conf.get_val<uint64_t>("osd_mclock_scheduler_client_wgt"),
+       cct->_conf.get_val<double>("osd_mclock_scheduler_client_lim")
+      }, {
+       cct->_conf.get_val<double>(
+         "osd_mclock_scheduler_background_recovery_res"),
+       cct->_conf.get_val<uint64_t>(
+         "osd_mclock_scheduler_background_recovery_wgt"),
+       cct->_conf.get_val<double>(
+         "osd_mclock_scheduler_background_recovery_lim")
+      }, {
+       cct->_conf.get_val<double>(
+         "osd_mclock_scheduler_background_best_effort_res"),
+       cct->_conf.get_val<uint64_t>(
+         "osd_mclock_scheduler_background_best_effort_wgt"),
+       cct->_conf.get_val<double>(
+         "osd_mclock_scheduler_background_best_effort_lim")
+      }
+    };
+    dout(10) << "Setting custom profile " << current_profile << dendl;
+  } else {
+    derr << "Invalid mclock profile: " << mclock_profile << dendl;
+    ceph_assert("Invalid choice of mclock profile" == 0);
+    return;
+  }
+  client_registry.update_from_profile(
+    current_profile, osd_bandwidth_capacity_per_shard);
  }
  
  void MclockConfig::init_logger()
@@ -380,80 +342,10 @@ uint32_t MclockConfig::calc_scaled_cost(int item_cost)
  void MclockConfig::handle_conf_change(const ConfigProxy& conf,
                                       const std::set<std::string> &changed)
  {
-  if (changed.count("osd_mclock_max_capacity_iops_hdd") ||
-      changed.count("osd_mclock_max_capacity_iops_ssd")) {
-   set_osd_capacity_params_from_config();
-   client_registry.update_from_config(
-      conf, osd_bandwidth_capacity_per_shard);
-  }
-  if (changed.count("osd_mclock_max_sequential_bandwidth_hdd") ||
-      changed.count("osd_mclock_max_sequential_bandwidth_ssd")) {
-    set_osd_capacity_params_from_config();
-    client_registry.update_from_config(
-      conf, osd_bandwidth_capacity_per_shard);
-  }
-  if (changed.count("osd_mclock_profile")) {
-    set_config_defaults_from_profile();
-    client_registry.update_from_config(
-      conf, osd_bandwidth_capacity_per_shard);
-  }
-
-  auto get_changed_key = [&changed]() -> std::optional<std::string> {
-    static const std::vector<std::string> qos_params = {
-      "osd_mclock_scheduler_client_res",
-      "osd_mclock_scheduler_client_wgt",
-      "osd_mclock_scheduler_client_lim",
-      "osd_mclock_scheduler_background_recovery_res",
-      "osd_mclock_scheduler_background_recovery_wgt",
-      "osd_mclock_scheduler_background_recovery_lim",
-      "osd_mclock_scheduler_background_best_effort_res",
-      "osd_mclock_scheduler_background_best_effort_wgt",
-      "osd_mclock_scheduler_background_best_effort_lim"
-    };
-
-    for (auto &qp : qos_params) {
-      if (changed.count(qp)) {
-        return qp;
-      }
-    }
-    return std::nullopt;
-  };
-  if (auto key = get_changed_key(); key.has_value()) {
-    auto mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
-    if (mclock_profile == "custom") {
-      client_registry.update_from_config(
-        conf, osd_bandwidth_capacity_per_shard);
-    } else {
-      // Attempt to change QoS parameter for a built-in profile. Restore the
-      // profile defaults by making one of the OSD shards remove the key from
-      // config monitor store. Note: monc is included in the check since the
-      // mock unit test currently doesn't initialize it.
-      if (shard_id == 0 && monc) {
-        static const std::vector<std::string> osds = {
-          "osd",
-          "osd." + std::to_string(whoami)
-        };
-
-        for (auto osd : osds) {
-          std::string cmd =
-            "{"
-              "\"prefix\": \"config rm\", "
-              "\"who\": \"" + osd + "\", "
-              "\"name\": \"" + *key + "\""
-            "}";
-          std::vector<std::string> vcmd{cmd};
-
-          dout(10) << __func__ << " Removing Key: " << *key
-                   << " for " << osd << " from Mon db" << dendl;
-                   monc->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr);
-        }
-      }
-    }
-    // Alternatively, the QoS parameter, if set ephemerally for this OSD via
-    // the 'daemon' or 'tell' interfaces must be removed.
-    if (!cct->_conf.rm_val(*key)) {
-      dout(10) << __func__ << " Restored " << *key << " to default" << dendl;
-      cct->_conf.apply_changes(nullptr);
+  for (auto &key : get_tracked_keys()) {
+    if (changed.count(key)) {
+      set_from_config();
+      return;
      }
    }
  }
diff --git a/src/common/mclock_common.h b/src/common/mclock_common.h

index 0c056996338acb237f1756acb6eed5161e2e5f42..428ffeb491a20e1c6e0e422e36e871bc0bf3432c 100644 (file)
--- a/src/common/mclock_common.h
+++ b/src/common/mclock_common.h
@@ -180,8 +180,10 @@ class ClientRegistry {
          internal_client_infos.emplace_back(1, 1, 1);
        }
      }
-    void update_from_config(const ConfigProxy &conf,
-      double capacity_per_shard);
+    void update_from_profile(
+      const profile_t &current_profile,
+      const double capacity_per_shard);
+
      const crimson::dmclock::ClientInfo *get_info(
        const scheduler_id_t &id) const;
  };
@@ -191,55 +193,32 @@ private:
    CephContext *cct;
    uint32_t num_shards;
    bool is_rotational;
-  PerfCounters *logger;
+  PerfCounters *logger = nullptr;
    int shard_id;
    int whoami;
-  double osd_bandwidth_cost_per_io;
-  double osd_bandwidth_capacity_per_shard;
+  double osd_bandwidth_cost_per_io = 0.0;
+  double osd_bandwidth_capacity_per_shard = 0.0;
    ClientRegistry& client_registry;
-  #ifndef WITH_CRIMSON
-  MonClient *monc;
-  #endif
+
+  // currently active profile, will be overridden from config on startup
+  // and upon config change
+  profile_t current_profile = BALANCED;
  public:
-  #ifdef WITH_CRIMSON
    MclockConfig(CephContext *cct, ClientRegistry& creg,
                 uint32_t num_shards, bool is_rotational, int shard_id,
                int whoami):cct(cct),
                             num_shards(num_shards),
                             is_rotational(is_rotational),
-                          logger(nullptr),shard_id(shard_id),
-                           whoami(whoami), osd_bandwidth_cost_per_io(0.0),
-                          osd_bandwidth_capacity_per_shard(0.0),
+                          shard_id(shard_id),
+                           whoami(whoami),
                            client_registry(creg)
    {
      cct->_conf.add_observer(this);
-    set_osd_capacity_params_from_config();
-    set_config_defaults_from_profile();
-    client_registry.update_from_config(
-      cct->_conf, get_capacity_per_shard());
-  }
-  #else
-  MclockConfig(CephContext *cct, ClientRegistry& creg,
-               MonClient *monc, uint32_t num_shards, bool is_rotational,
-              int shard_id, int whoami):cct(cct),
-                                         num_shards(num_shards),
-                                         is_rotational(is_rotational),
-                                        logger(nullptr),shard_id(shard_id),
-                                         whoami(whoami),
-                                        osd_bandwidth_cost_per_io(0.0),
-                                        osd_bandwidth_capacity_per_shard(0.0),
-                                        client_registry(creg), monc(monc)
-  {
-    cct->_conf.add_observer(this);
-    set_osd_capacity_params_from_config();
-    set_config_defaults_from_profile();
-    client_registry.update_from_config(
-      cct->_conf, get_capacity_per_shard());
+    set_from_config();
    }
-#endif
    ~MclockConfig() final;
-  void set_config_defaults_from_profile();
-  void set_osd_capacity_params_from_config();
+
+  void set_from_config();
    void init_logger();
    void get_mclock_counter(scheduler_id_t id);
    void put_mclock_counter(scheduler_id_t id);
diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in

index 7fda01f7c246471b9c3cdd03e87eedac50fb8316..03c052790449b24f7da10990e9d4e6dc17db5569 100644 (file)
--- a/src/common/options/osd.yaml.in
+++ b/src/common/options/osd.yaml.in
@@ -1085,6 +1085,7 @@ options:
      of 0 specifies the lowest possible reservation. Any value greater than
      0 and up to 1.0 specifies the minimum IO proportion to reserve for each
      client in terms of a fraction of the OSD's maximum IOPS capacity.
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO proportion reserved for each client (default).
    default: 0
@@ -1092,15 +1093,18 @@ options:
    max: 1.0
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_client_wgt
    type: uint
    level: advanced
    desc: IO share for each client (default) over reservation
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO share for each client (default) over reservation.
    default: 1
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_client_lim
    type: float
    level: advanced
@@ -1110,6 +1114,7 @@ options:
      than 0 and up to 1.0 specifies the upper IO limit over reservation
      that each client receives in terms of a fraction of the OSD's
      maximum IOPS capacity.
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO limit for each client (default) over reservation.
    default: 0
@@ -1117,6 +1122,7 @@ options:
    max: 1.0
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_background_recovery_res
    type: float
    level: advanced
@@ -1125,6 +1131,7 @@ options:
      greater than 0 and up to 1.0 specifies the minimum IO proportion to
      reserve for background recovery operations in terms of a fraction of
      the OSD's maximum IOPS capacity.
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO proportion reserved for background recovery (default).
    default: 0
@@ -1132,15 +1139,18 @@ options:
    max: 1.0
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_background_recovery_wgt
    type: uint
    level: advanced
    desc: IO share for each background recovery over reservation
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO share for each background recovery over reservation.
    default: 1
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_background_recovery_lim
    type: float
    level: advanced
@@ -1150,6 +1160,7 @@ options:
      OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
      limit over reservation that background recovery operation receives in
      terms of a fraction of the OSD's maximum IOPS capacity.
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO limit for background recovery over reservation.
    default: 0
@@ -1157,6 +1168,7 @@ options:
    max: 1.0
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_background_best_effort_res
    type: float
    level: advanced
@@ -1165,6 +1177,7 @@ options:
      greater than 0 and up to 1.0 specifies the minimum IO proportion to
      reserve for background best_effort operations in terms of a fraction
      of the OSD's maximum IOPS capacity.
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO proportion reserved for background best_effort (default).
    default: 0
@@ -1172,15 +1185,18 @@ options:
    max: 1.0
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_background_best_effort_wgt
    type: uint
    level: advanced
    desc: IO share for each background best_effort over reservation
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO share for each background best_effort over reservation.
    default: 1
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_background_best_effort_lim
    type: float
    level: advanced
@@ -1190,6 +1206,7 @@ options:
      OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
      limit over reservation that background best_effort operation receives
      in terms of a fraction of the OSD's maximum IOPS capacity.
+    Ignored unless osd_mclock_profile is set to 'custom'.
    long_desc: Only considered for osd_op_queue = mclock_scheduler
    fmt_desc: IO limit for background best_effort over reservation.
    default: 0
@@ -1197,6 +1214,7 @@ options:
    max: 1.0
    see_also:
    - osd_op_queue
+  - osd_mclock_profile
  - name: osd_mclock_scheduler_anticipation_timeout
    type: float
    level: advanced
diff --git a/src/osd/scheduler/mClockScheduler.h b/src/osd/scheduler/mClockScheduler.h

index 4442ad40b7c478536c6300bcd3a53371725f0ab6..190b5745bb85d27a2f7db5f0e5cb15dde4883a4f 100644 (file)
--- a/src/osd/scheduler/mClockScheduler.h
+++ b/src/osd/scheduler/mClockScheduler.h
@@ -86,7 +86,7 @@ public:
      : cct(cct),
        cutoff_priority(cutoff_priority),
        monc(monc),
-      mclock_conf(cct, client_registry, monc, num_shards,
+      mclock_conf(cct, client_registry, num_shards,
                   is_rotational, shard_id, whoami),
        scheduler(
         std::bind(&ClientRegistry::get_info,
author	Samuel Just <sjust@redhat.com>
	Mon, 11 Aug 2025 21:14:45 +0000 (14:14 -0700)
committer	Samuel Just <sjust@redhat.com>
	Fri, 5 Sep 2025 21:47:38 +0000 (14:47 -0700)
src/common/mclock_common.cc		patch \| blob \| history
src/common/mclock_common.h		patch \| blob \| history
src/common/options/osd.yaml.in		patch \| blob \| history
src/osd/scheduler/mClockScheduler.h		patch \| blob \| history