From: Sridhar Seshasayee Date: Mon, 25 Jul 2022 04:30:41 +0000 (+0530) Subject: osd: Implement Context based completion for mon cmd to set a config option X-Git-Tag: v17.2.6~461^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9516094a2a0977c71558ed43ad088ec72b860f06;p=ceph.git osd: Implement Context based completion for mon cmd to set a config option The method, OSD::mon_cmd_set_config() currently sets a config option related to mClock during OSD boot-up. The method used to wait on a condition variable until the mon ack'ed the command. This was generally not a problem. But there could be scenarios where monitor could be slow to respond, or due to a flaky network, response could be delayed. The OSD could therefore be blocked from booting-up. To avoid this, the conditional wait is replaced with an async Context completion. Moreover, persisting this in the monitor store is not very critical. An existing fallback mechanism stores this value in the in-memory "values" map of the config subsystem. This can be read by the OSD at any point during its operation. The issue of the OSDs being blocked from booting-up properly was observed when running tests with failure injections during OSD boot-up. Following are the changes: The changes to mon_cmd_set_config() are generic and any osd specific option may be set in the config monitor store using this method. - Implement Context based completion tracking of the mon command using MonCmdSetConfigOnFinish which is derived from the base Context class. In case of failures, the finish() method is overriden to save the config option in the config subsystem's "values" map at CONF_DEFAULT level using set_val_default() method. This allows users to modify this at a later point using "ceph config set" cli command. - Additionally, if requested, finish() also checks if the config option needs to be applied on each op shard and calls update_scheduler_config() on each shard. This is required for some config options related to the mClock scheduler. Fixes: https://tracker.ceph.com/issues/57040 Signed-off-by: Sridhar Seshasayee (cherry picked from commit 3c0603cb87a75b7bfb1766e8b9edd1c4a8e43028) --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a21b04a688db..323d1edace30 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3861,8 +3861,8 @@ int OSD::init() start_boot(); // Override a few options if mclock scheduler is enabled. - maybe_override_max_osd_capacity_for_qos(); maybe_override_options_for_qos(); + maybe_override_max_osd_capacity_for_qos(); return 0; @@ -10149,17 +10149,8 @@ void OSD::maybe_override_max_osd_capacity_for_qos() << " elapsed_sec: " << elapsed << dendl; - // Persist iops to the MON store - ret = mon_cmd_set_config(max_capacity_iops_config, std::to_string(iops)); - if (ret < 0) { - // Fallback to setting the config within the in-memory "values" map. - cct->_conf.set_val(max_capacity_iops_config, std::to_string(iops)); - } - - // Override the max osd capacity for all shards - for (auto& shard : shards) { - shard->update_scheduler_config(); - } + // Persist the iops value to the MON store. + mon_cmd_set_config(max_capacity_iops_config, std::to_string(iops)); } } @@ -10212,7 +10203,41 @@ bool OSD::maybe_override_options_for_qos() return false; } -int OSD::mon_cmd_set_config(const std::string &key, const std::string &val) +/** + * A context for receiving status from a background mon command to set + * a config option and optionally apply the changes on each op shard. + */ +class MonCmdSetConfigOnFinish : public Context { + OSD *osd; + CephContext *cct; + std::string key; + std::string val; + bool update_shard; +public: + explicit MonCmdSetConfigOnFinish( + OSD *o, + CephContext *cct, + const std::string &k, + const std::string &v, + const bool s) + : osd(o), cct(cct), key(k), val(v), update_shard(s) {} + void finish(int r) override { + if (r != 0) { + // Fallback to setting the config within the in-memory "values" map. + cct->_conf.set_val_default(key, val); + } + + // If requested, apply this option on the + // active scheduler of each op shard. + if (update_shard) { + for (auto& shard : osd->shards) { + shard->update_scheduler_config(); + } + } + } +}; + +void OSD::mon_cmd_set_config(const std::string &key, const std::string &val) { std::string cmd = "{" @@ -10221,21 +10246,20 @@ int OSD::mon_cmd_set_config(const std::string &key, const std::string &val) "\"name\": \"" + key + "\", " "\"value\": \"" + val + "\"" "}"; - vector vcmd{cmd}; - bufferlist inbl; - std::string outs; - C_SaferCond cond; - monc->start_mon_command(vcmd, inbl, nullptr, &outs, &cond); - int r = cond.wait(); - if (r < 0) { - derr << __func__ << " Failed to set config key " << key - << " err: " << cpp_strerror(r) - << " errstr: " << outs << dendl; - return r; - } - return 0; + // List of config options to be distributed across each op shard. + // Currently limited to a couple of mClock options. + static const std::vector shard_option = + { "osd_mclock_max_capacity_iops_hdd", "osd_mclock_max_capacity_iops_ssd" }; + const bool update_shard = std::find(shard_option.begin(), + shard_option.end(), + key) != shard_option.end(); + + auto on_finish = new MonCmdSetConfigOnFinish(this, cct, key, + val, update_shard); + dout(10) << __func__ << " Set " << key << " = " << val << dendl; + monc->start_mon_command(vcmd, {}, nullptr, nullptr, on_finish); } bool OSD::unsupported_objstore_for_qos() diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 2da5de10aa69..843c021d0d38 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2064,7 +2064,7 @@ private: int64_t onum, double *elapsed, std::ostream& ss); - int mon_cmd_set_config(const std::string &key, const std::string &val); + void mon_cmd_set_config(const std::string &key, const std::string &val); bool unsupported_objstore_for_qos(); void scrub_purged_snaps();