* for the osd_mclock_scheduler_client_* parameters prior to calling
* update_from_config -- see set_config_defaults_from_profile().
*/
-void ClientRegistry::update_from_config(const ConfigProxy &conf,
- const double capacity_per_shard)
+void ClientRegistry::update_from_profile(const profile_t ¤t_profile,
+ const double capacity_per_shard)
{
+
auto get_res = [&](double res) {
if (res) {
return res * capacity_per_shard;
}
};
- // Set external client infos
- double res = conf.get_val<double>(
- "osd_mclock_scheduler_client_res");
- double lim = conf.get_val<double>(
- "osd_mclock_scheduler_client_lim");
- uint64_t wgt = conf.get_val<uint64_t>(
- "osd_mclock_scheduler_client_wgt");
default_external_client_info.update(
- get_res(res),
- wgt,
- get_lim(lim));
-
- // Set background recovery client infos
- res = conf.get_val<double>(
- "osd_mclock_scheduler_background_recovery_res");
- lim = conf.get_val<double>(
- "osd_mclock_scheduler_background_recovery_lim");
- wgt = conf.get_val<uint64_t>(
- "osd_mclock_scheduler_background_recovery_wgt");
+ get_res(current_profile.client.reservation),
+ current_profile.client.weight,
+ get_lim(current_profile.client.limit));
+
internal_client_infos[
static_cast<size_t>(SchedulerClass::background_recovery)].update(
- get_res(res),
- wgt,
- get_lim(lim));
-
- // Set background best effort client infos
- res = conf.get_val<double>(
- "osd_mclock_scheduler_background_best_effort_res");
- lim = conf.get_val<double>(
- "osd_mclock_scheduler_background_best_effort_lim");
- wgt = conf.get_val<uint64_t>(
- "osd_mclock_scheduler_background_best_effort_wgt");
+ get_res(current_profile.background_recovery.reservation),
+ current_profile.background_recovery.weight,
+ get_lim(current_profile.background_recovery.limit));
+
internal_client_infos[
static_cast<size_t>(SchedulerClass::background_best_effort)].update(
- get_res(res),
- wgt,
- get_lim(lim));
+ get_res(current_profile.background_best_effort.reservation),
+ current_profile.background_best_effort.weight,
+ get_lim(current_profile.background_best_effort.limit));
}
const dmc::ClientInfo *ClientRegistry::get_external_client(
<< "]";
}
-void MclockConfig::set_config_defaults_from_profile()
-{
- // Let only a single osd shard (id:0) set the profile configs
- if (shard_id > 0) {
- return;
- }
-
- const profile_t *profile = nullptr;
- auto mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
- if (mclock_profile == "high_client_ops") {
- profile = &HIGH_CLIENT_OPS;
- dout(10) << "Setting high_client_ops profile " << *profile << dendl;
- } else if (mclock_profile == "high_recovery_ops") {
- profile = &HIGH_RECOVERY_OPS;
- dout(10) << "Setting high_recovery_ops profile " << *profile << dendl;
- } else if (mclock_profile == "balanced") {
- profile = &BALANCED;
- dout(10) << "Setting balanced profile " << *profile << dendl;
- } else if (mclock_profile == "custom") {
- dout(10) << "Profile set to custom, not setting defaults" << dendl;
- return;
- } else {
- derr << "Invalid mclock profile: " << mclock_profile << dendl;
- ceph_assert("Invalid choice of mclock profile" == 0);
- return;
- }
- ceph_assert(nullptr != profile);
-
- auto set_config = [&conf = cct->_conf](const char *key, auto val) {
- conf.set_val_default(key, std::to_string(val));
- };
-
- set_config("osd_mclock_scheduler_client_res", profile->client.reservation);
- set_config("osd_mclock_scheduler_client_wgt", profile->client.weight);
- set_config("osd_mclock_scheduler_client_lim", profile->client.limit);
-
- set_config(
- "osd_mclock_scheduler_background_recovery_res",
- profile->background_recovery.reservation);
- set_config(
- "osd_mclock_scheduler_background_recovery_wgt",
- profile->background_recovery.weight);
- set_config(
- "osd_mclock_scheduler_background_recovery_lim",
- profile->background_recovery.limit);
-
- set_config(
- "osd_mclock_scheduler_background_best_effort_res",
- profile->background_best_effort.reservation);
- set_config(
- "osd_mclock_scheduler_background_best_effort_wgt",
- profile->background_best_effort.weight);
- set_config(
- "osd_mclock_scheduler_background_best_effort_lim",
- profile->background_best_effort.limit);
-
- cct->_conf.apply_changes(nullptr);
-}
-
-void MclockConfig::set_osd_capacity_params_from_config()
+void MclockConfig::set_from_config()
{
uint64_t osd_bandwidth_capacity;
double osd_iop_capacity;
<< ", osd_bandwidth_capacity_per_shard "
<< osd_bandwidth_capacity_per_shard << " bytes/second"
<< dendl;
+
+ auto mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
+ if (mclock_profile == "high_client_ops") {
+ current_profile = HIGH_CLIENT_OPS;
+ dout(10) << "Setting high_client_ops profile " << current_profile << dendl;
+ } else if (mclock_profile == "high_recovery_ops") {
+ current_profile = HIGH_RECOVERY_OPS;
+ dout(10) << "Setting high_recovery_ops profile " << current_profile << dendl;
+ } else if (mclock_profile == "balanced") {
+ current_profile = BALANCED;
+ dout(10) << "Setting balanced profile " << current_profile << dendl;
+ } else if (mclock_profile == "custom") {
+ current_profile = {
+ {
+ cct->_conf.get_val<double>("osd_mclock_scheduler_client_res"),
+ cct->_conf.get_val<uint64_t>("osd_mclock_scheduler_client_wgt"),
+ cct->_conf.get_val<double>("osd_mclock_scheduler_client_lim")
+ }, {
+ cct->_conf.get_val<double>(
+ "osd_mclock_scheduler_background_recovery_res"),
+ cct->_conf.get_val<uint64_t>(
+ "osd_mclock_scheduler_background_recovery_wgt"),
+ cct->_conf.get_val<double>(
+ "osd_mclock_scheduler_background_recovery_lim")
+ }, {
+ cct->_conf.get_val<double>(
+ "osd_mclock_scheduler_background_best_effort_res"),
+ cct->_conf.get_val<uint64_t>(
+ "osd_mclock_scheduler_background_best_effort_wgt"),
+ cct->_conf.get_val<double>(
+ "osd_mclock_scheduler_background_best_effort_lim")
+ }
+ };
+ dout(10) << "Setting custom profile " << current_profile << dendl;
+ } else {
+ derr << "Invalid mclock profile: " << mclock_profile << dendl;
+ ceph_assert("Invalid choice of mclock profile" == 0);
+ return;
+ }
+ client_registry.update_from_profile(
+ current_profile, osd_bandwidth_capacity_per_shard);
}
void MclockConfig::init_logger()
void MclockConfig::handle_conf_change(const ConfigProxy& conf,
const std::set<std::string> &changed)
{
- if (changed.count("osd_mclock_max_capacity_iops_hdd") ||
- changed.count("osd_mclock_max_capacity_iops_ssd")) {
- set_osd_capacity_params_from_config();
- client_registry.update_from_config(
- conf, osd_bandwidth_capacity_per_shard);
- }
- if (changed.count("osd_mclock_max_sequential_bandwidth_hdd") ||
- changed.count("osd_mclock_max_sequential_bandwidth_ssd")) {
- set_osd_capacity_params_from_config();
- client_registry.update_from_config(
- conf, osd_bandwidth_capacity_per_shard);
- }
- if (changed.count("osd_mclock_profile")) {
- set_config_defaults_from_profile();
- client_registry.update_from_config(
- conf, osd_bandwidth_capacity_per_shard);
- }
-
- auto get_changed_key = [&changed]() -> std::optional<std::string> {
- static const std::vector<std::string> qos_params = {
- "osd_mclock_scheduler_client_res",
- "osd_mclock_scheduler_client_wgt",
- "osd_mclock_scheduler_client_lim",
- "osd_mclock_scheduler_background_recovery_res",
- "osd_mclock_scheduler_background_recovery_wgt",
- "osd_mclock_scheduler_background_recovery_lim",
- "osd_mclock_scheduler_background_best_effort_res",
- "osd_mclock_scheduler_background_best_effort_wgt",
- "osd_mclock_scheduler_background_best_effort_lim"
- };
-
- for (auto &qp : qos_params) {
- if (changed.count(qp)) {
- return qp;
- }
- }
- return std::nullopt;
- };
- if (auto key = get_changed_key(); key.has_value()) {
- auto mclock_profile = cct->_conf.get_val<std::string>("osd_mclock_profile");
- if (mclock_profile == "custom") {
- client_registry.update_from_config(
- conf, osd_bandwidth_capacity_per_shard);
- } else {
- // Attempt to change QoS parameter for a built-in profile. Restore the
- // profile defaults by making one of the OSD shards remove the key from
- // config monitor store. Note: monc is included in the check since the
- // mock unit test currently doesn't initialize it.
- if (shard_id == 0 && monc) {
- static const std::vector<std::string> osds = {
- "osd",
- "osd." + std::to_string(whoami)
- };
-
- for (auto osd : osds) {
- std::string cmd =
- "{"
- "\"prefix\": \"config rm\", "
- "\"who\": \"" + osd + "\", "
- "\"name\": \"" + *key + "\""
- "}";
- std::vector<std::string> vcmd{cmd};
-
- dout(10) << __func__ << " Removing Key: " << *key
- << " for " << osd << " from Mon db" << dendl;
- monc->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr);
- }
- }
- }
- // Alternatively, the QoS parameter, if set ephemerally for this OSD via
- // the 'daemon' or 'tell' interfaces must be removed.
- if (!cct->_conf.rm_val(*key)) {
- dout(10) << __func__ << " Restored " << *key << " to default" << dendl;
- cct->_conf.apply_changes(nullptr);
+ for (auto &key : get_tracked_keys()) {
+ if (changed.count(key)) {
+ set_from_config();
+ return;
}
}
}
internal_client_infos.emplace_back(1, 1, 1);
}
}
- void update_from_config(const ConfigProxy &conf,
- double capacity_per_shard);
+ void update_from_profile(
+ const profile_t ¤t_profile,
+ const double capacity_per_shard);
+
const crimson::dmclock::ClientInfo *get_info(
const scheduler_id_t &id) const;
};
CephContext *cct;
uint32_t num_shards;
bool is_rotational;
- PerfCounters *logger;
+ PerfCounters *logger = nullptr;
int shard_id;
int whoami;
- double osd_bandwidth_cost_per_io;
- double osd_bandwidth_capacity_per_shard;
+ double osd_bandwidth_cost_per_io = 0.0;
+ double osd_bandwidth_capacity_per_shard = 0.0;
ClientRegistry& client_registry;
- #ifndef WITH_CRIMSON
- MonClient *monc;
- #endif
+
+ // currently active profile, will be overridden from config on startup
+ // and upon config change
+ profile_t current_profile = BALANCED;
public:
- #ifdef WITH_CRIMSON
MclockConfig(CephContext *cct, ClientRegistry& creg,
uint32_t num_shards, bool is_rotational, int shard_id,
int whoami):cct(cct),
num_shards(num_shards),
is_rotational(is_rotational),
- logger(nullptr),shard_id(shard_id),
- whoami(whoami), osd_bandwidth_cost_per_io(0.0),
- osd_bandwidth_capacity_per_shard(0.0),
+ shard_id(shard_id),
+ whoami(whoami),
client_registry(creg)
{
cct->_conf.add_observer(this);
- set_osd_capacity_params_from_config();
- set_config_defaults_from_profile();
- client_registry.update_from_config(
- cct->_conf, get_capacity_per_shard());
- }
- #else
- MclockConfig(CephContext *cct, ClientRegistry& creg,
- MonClient *monc, uint32_t num_shards, bool is_rotational,
- int shard_id, int whoami):cct(cct),
- num_shards(num_shards),
- is_rotational(is_rotational),
- logger(nullptr),shard_id(shard_id),
- whoami(whoami),
- osd_bandwidth_cost_per_io(0.0),
- osd_bandwidth_capacity_per_shard(0.0),
- client_registry(creg), monc(monc)
- {
- cct->_conf.add_observer(this);
- set_osd_capacity_params_from_config();
- set_config_defaults_from_profile();
- client_registry.update_from_config(
- cct->_conf, get_capacity_per_shard());
+ set_from_config();
}
-#endif
~MclockConfig() final;
- void set_config_defaults_from_profile();
- void set_osd_capacity_params_from_config();
+
+ void set_from_config();
void init_logger();
void get_mclock_counter(scheduler_id_t id);
void put_mclock_counter(scheduler_id_t id);
of 0 specifies the lowest possible reservation. Any value greater than
0 and up to 1.0 specifies the minimum IO proportion to reserve for each
client in terms of a fraction of the OSD's maximum IOPS capacity.
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO proportion reserved for each client (default).
default: 0
max: 1.0
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_client_wgt
type: uint
level: advanced
desc: IO share for each client (default) over reservation
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO share for each client (default) over reservation.
default: 1
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_client_lim
type: float
level: advanced
than 0 and up to 1.0 specifies the upper IO limit over reservation
that each client receives in terms of a fraction of the OSD's
maximum IOPS capacity.
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO limit for each client (default) over reservation.
default: 0
max: 1.0
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_background_recovery_res
type: float
level: advanced
greater than 0 and up to 1.0 specifies the minimum IO proportion to
reserve for background recovery operations in terms of a fraction of
the OSD's maximum IOPS capacity.
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO proportion reserved for background recovery (default).
default: 0
max: 1.0
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_background_recovery_wgt
type: uint
level: advanced
desc: IO share for each background recovery over reservation
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO share for each background recovery over reservation.
default: 1
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_background_recovery_lim
type: float
level: advanced
OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
limit over reservation that background recovery operation receives in
terms of a fraction of the OSD's maximum IOPS capacity.
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO limit for background recovery over reservation.
default: 0
max: 1.0
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_background_best_effort_res
type: float
level: advanced
greater than 0 and up to 1.0 specifies the minimum IO proportion to
reserve for background best_effort operations in terms of a fraction
of the OSD's maximum IOPS capacity.
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO proportion reserved for background best_effort (default).
default: 0
max: 1.0
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_background_best_effort_wgt
type: uint
level: advanced
desc: IO share for each background best_effort over reservation
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO share for each background best_effort over reservation.
default: 1
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_background_best_effort_lim
type: float
level: advanced
OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
limit over reservation that background best_effort operation receives
in terms of a fraction of the OSD's maximum IOPS capacity.
+ Ignored unless osd_mclock_profile is set to 'custom'.
long_desc: Only considered for osd_op_queue = mclock_scheduler
fmt_desc: IO limit for background best_effort over reservation.
default: 0
max: 1.0
see_also:
- osd_op_queue
+ - osd_mclock_profile
- name: osd_mclock_scheduler_anticipation_timeout
type: float
level: advanced