From e65c4bcd96f2a8198147e52a0ddc576fa3e96ab0 Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Wed, 8 Dec 2021 13:06:28 +0530 Subject: [PATCH] osd, mon/OSDMonitor: Force the scheduler type to 'wpq' for filestore OSDs The 'mclock_scheduler' is not supported for filestore OSDs. Enforce the usage of 'wpq' scheduler for such OSDs to avoid issues. Also, in this scenario, the override of various config settings for the 'mclock_scheduler' are not performed. Signed-off-by: Sridhar Seshasayee --- PendingReleaseNotes | 5 ++++- src/mon/OSDMonitor.cc | 4 +++- src/osd/OSD.cc | 17 ++++++++++++++--- src/osd/OSD.h | 1 + src/osd/scheduler/OpScheduler.cc | 9 ++++++--- src/osd/scheduler/OpScheduler.h | 3 ++- 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 06b0f03de7d..64ec0c4842c 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -50,7 +50,10 @@ * RGW: `radosgw-admin realm delete` is now renamed to `radosgw-admin realm rm`. This is consistent with the help message. -* OSD: Ceph now uses mclock_scheduler as its default osd_op_queue to provide QoS. +* OSD: Ceph now uses mclock_scheduler for bluestore OSDs as its default osd_op_queue + to provide QoS. The 'mclock_scheduler' is not supported for filestore OSDs. + Therefore, the default 'osd_op_queue' is set to 'wpq' for filestore OSDs + and is enforced even if the user attempts to change it. * CephFS: Failure to replay the journal by a standby-replay daemon will now cause the rank to be marked damaged. diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index b1b244975d3..29561a1c7e0 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2174,7 +2174,9 @@ void OSDMonitor::check_for_filestore_osds(health_check_map_t *checks) ss << " [Deprecated]"; auto& d = checks->add("OSD_FILESTORE", HEALTH_WARN, ss.str(), filestore_osds.size()); - deprecated_tip << ", which has been deprecated."; + deprecated_tip << ", which has been deprecated and" + << " not been optimized for QoS" + << " (Filestore OSDs will use 'osd_op_queue = wpq' strictly)"; detail.push_back(deprecated_tip.str()); d.detail.swap(detail); } diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 8977b19c85a..8e600f06232 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -9957,7 +9957,8 @@ void OSD::maybe_override_max_osd_capacity_for_qos() // osd capacity with the value obtained from running the // osd bench test. This is later used to setup mclock. if ((cct->_conf.get_val("osd_op_queue") == "mclock_scheduler") && - (cct->_conf.get_val("osd_mclock_skip_benchmark") == false)) { + (cct->_conf.get_val("osd_mclock_skip_benchmark") == false) && + (!unsupported_objstore_for_qos())) { std::string max_capacity_iops_config; bool force_run_benchmark = cct->_conf.get_val("osd_mclock_force_run_benchmark_on_init"); @@ -10041,7 +10042,8 @@ bool OSD::maybe_override_options_for_qos() { // If the scheduler enabled is mclock, override the recovery, backfill // and sleep options so that mclock can meet the QoS goals. - if (cct->_conf.get_val("osd_op_queue") == "mclock_scheduler") { + if (cct->_conf.get_val("osd_op_queue") == "mclock_scheduler" && + !unsupported_objstore_for_qos()) { dout(1) << __func__ << ": Changing recovery/backfill/sleep settings for QoS" << dendl; @@ -10111,6 +10113,14 @@ int OSD::mon_cmd_set_config(const std::string &key, const std::string &val) return 0; } +bool OSD::unsupported_objstore_for_qos() +{ + static const std::vector unsupported_objstores = { "filestore" }; + return std::find(unsupported_objstores.begin(), + unsupported_objstores.end(), + store->get_type()) != unsupported_objstores.end(); +} + void OSD::update_log_config() { auto parsed_options = clog->parse_client_options(cct); @@ -10622,7 +10632,8 @@ OSDShard::OSDShard( shard_lock_name(shard_name + "::shard_lock"), shard_lock{make_mutex(shard_lock_name)}, scheduler(ceph::osd::scheduler::make_scheduler( - cct, osd->num_shards, osd->store->is_rotational())), + cct, osd->num_shards, osd->store->is_rotational(), + osd->store->get_type())), context_queue(sdata_wait_lock, sdata_cond) { dout(0) << "using op scheduler " << *scheduler << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 1914bc1b57f..ca23deb724a 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2062,6 +2062,7 @@ private: double *elapsed, std::ostream& ss); int mon_cmd_set_config(const std::string &key, const std::string &val); + bool unsupported_objstore_for_qos(); void scrub_purged_snaps(); void probe_smart(const std::string& devid, std::ostream& ss); diff --git a/src/osd/scheduler/OpScheduler.cc b/src/osd/scheduler/OpScheduler.cc index 3ce6fdb55d1..b0d14b496b5 100644 --- a/src/osd/scheduler/OpScheduler.cc +++ b/src/osd/scheduler/OpScheduler.cc @@ -22,7 +22,8 @@ namespace ceph::osd::scheduler { OpSchedulerRef make_scheduler( - CephContext *cct, uint32_t num_shards, bool is_rotational) + CephContext *cct, uint32_t num_shards, + bool is_rotational, std::string_view osd_objectstore) { const std::string *type = &cct->_conf->osd_op_queue; if (*type == "debug_random") { @@ -33,8 +34,9 @@ OpSchedulerRef make_scheduler( type = &index_lookup[which]; } - if (*type == "wpq" ) { - // default is 'wpq' + // Force the use of 'wpq' scheduler for filestore OSDs. + // The 'mclock_scheduler' is not supported for filestore OSDs. + if (*type == "wpq" || osd_objectstore == "filestore") { return std::make_unique< ClassedOpQueueScheduler>>( cct, @@ -42,6 +44,7 @@ OpSchedulerRef make_scheduler( cct->_conf->osd_op_pq_min_cost ); } else if (*type == "mclock_scheduler") { + // default is 'mclock_scheduler' return std::make_unique(cct, num_shards, is_rotational); } else { ceph_assert("Invalid choice of wq" == 0); diff --git a/src/osd/scheduler/OpScheduler.h b/src/osd/scheduler/OpScheduler.h index 6e2bb5abd82..dc524314f25 100644 --- a/src/osd/scheduler/OpScheduler.h +++ b/src/osd/scheduler/OpScheduler.h @@ -61,7 +61,8 @@ std::ostream &operator<<(std::ostream &lhs, const OpScheduler &); using OpSchedulerRef = std::unique_ptr; OpSchedulerRef make_scheduler( - CephContext *cct, uint32_t num_shards, bool is_rotational); + CephContext *cct, uint32_t num_shards, bool is_rotational, + std::string_view osd_objectstore); /** * Implements OpScheduler in terms of OpQueue -- 2.39.5