From: Kamoltat Sirivadhna Date: Wed, 28 Aug 2024 13:40:59 +0000 (+0000) Subject: mon [stretch mode]: support disable_stretch_mode X-Git-Tag: v19.2.3~435^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=882b506644c87eeb6da4604fdc8cee870b219f96;p=ceph.git mon [stretch mode]: support disable_stretch_mode Problem: Currently, Ceph lacks the ability to exit stretch mode and move back to normal cluster (non-stretched). Solution: Provide a command to allow the user to exit stretch mode gracefully: `ceph mon disable_stretch_mode --yes-i-really-mean-it` User can either specify a crush rule that they want all pools to move to or not specify a rule and Ceph will use a default replicated crush rule. Fixes: https://tracker.ceph.com/issues/67467 Signed-off-by: Kamoltat Sirivadhna (cherry picked from commit 78ce68de41b1d5278e14cf56dff7f15394969255) --- diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index acb88dfb89b..3c161dd3b2c 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -549,6 +549,11 @@ COMMAND("mon enable_stretch_mode " \ "as the tiebreaker and setting locations " "as the units for stretching across", "mon", "rw") +COMMAND("mon disable_stretch_mode " \ + "name=crush_rule,type=CephString,req=false, " + "name=yes_i_really_mean_it,type=CephBool,req=false, ", + "disable stretch mode, reverting to normal peering rules", + "mon", "rw") COMMAND("mon set_new_tiebreaker " \ "name=name,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index cdc6a3c5778..eb903b907b4 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -6656,6 +6656,8 @@ void Monitor::notify_new_monmap(bool can_change_external_state, bool remove_rank if (monmap->stretch_mode_enabled) { try_engage_stretch_mode(); + } else { + try_disable_stretch_mode(); } if (is_stretch_mode()) { @@ -6714,6 +6716,32 @@ void Monitor::try_engage_stretch_mode() disconnect_disallowed_stretch_sessions(); } } +struct CMonDisableStretchMode : public Context { + Monitor *m; + CMonDisableStretchMode(Monitor *mon) : m(mon) {} + void finish(int r) { + m->try_disable_stretch_mode(); + } +}; +void Monitor::try_disable_stretch_mode() +{ + dout(20) << __func__ << dendl; + if (!stretch_mode_engaged) return; + if (!osdmon()->is_readable()) { + dout(20) << "osdmon is not readable" << dendl; + osdmon()->wait_for_readable_ctx(new CMonDisableStretchMode(this)); + return; + } + if (!osdmon()->osdmap.stretch_mode_enabled && + !monmap->stretch_mode_enabled) { + dout(10) << "Disabling stretch mode!" << dendl; + stretch_mode_engaged = false; + stretch_bucket_divider.clear(); + degraded_stretch_mode = false; + recovering_stretch_mode = false; + } + +} void Monitor::do_stretch_mode_election_work() { @@ -6770,6 +6798,7 @@ struct CMonGoRecovery : public Context { void Monitor::go_recovery_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; dout(20) << "is_leader(): " << is_leader() << dendl; if (!is_leader()) return; dout(20) << "is_degraded_stretch_mode(): " << is_degraded_stretch_mode() << dendl; @@ -6800,6 +6829,7 @@ void Monitor::go_recovery_stretch_mode() void Monitor::set_recovery_stretch_mode() { + if (!is_stretch_mode()) return; degraded_stretch_mode = true; recovering_stretch_mode = true; osdmon()->set_recovery_stretch_mode(); @@ -6808,6 +6838,7 @@ void Monitor::set_recovery_stretch_mode() void Monitor::maybe_go_degraded_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; if (is_degraded_stretch_mode()) return; if (!is_leader()) return; if (dead_mon_buckets.empty()) return; @@ -6846,6 +6877,7 @@ void Monitor::trigger_degraded_stretch_mode(const set& dead_mons, const set& dead_buckets) { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; ceph_assert(osdmon()->is_writeable()); ceph_assert(monmon()->is_writeable()); @@ -6866,6 +6898,7 @@ void Monitor::trigger_degraded_stretch_mode(const set& dead_mons, void Monitor::set_degraded_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; degraded_stretch_mode = true; recovering_stretch_mode = false; osdmon()->set_degraded_stretch_mode(); @@ -6883,6 +6916,7 @@ struct CMonGoHealthy : public Context { void Monitor::trigger_healthy_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; if (!is_degraded_stretch_mode()) return; if (!is_leader()) return; if (!osdmon()->is_writeable()) { @@ -6903,6 +6937,7 @@ void Monitor::trigger_healthy_stretch_mode() void Monitor::set_healthy_stretch_mode() { + if (!is_stretch_mode()) return; degraded_stretch_mode = false; recovering_stretch_mode = false; osdmon()->set_healthy_stretch_mode(); diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 2958388e83a..e932871854f 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -293,6 +293,7 @@ public: * updates across the entire cluster. */ void try_engage_stretch_mode(); + void try_disable_stretch_mode(); void maybe_go_degraded_stretch_mode(); void trigger_degraded_stretch_mode(const std::set& dead_mons, const std::set& dead_buckets); diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 1226c8a8241..732238f4358 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -1187,6 +1187,42 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) ceph_assert(okay == true); } request_proposal(mon.osdmon()); + } else if (prefix == "mon disable_stretch_mode") { + if (!mon.osdmon()->is_writeable()) { + dout(10) << __func__ + << ": waiting for osdmon writeable for stretch mode" << dendl; + mon.osdmon()->wait_for_writeable(op, new Monitor::C_RetryMessage(&mon, op)); + return false; /* do not propose, yet */ + } + bool sure = false; + bool okay = false; + int errcode = 0; + if (!pending_map.stretch_mode_enabled) { + ss << "stretch mode is already disabled"; + err = -EINVAL; + goto reply_no_propose; + } + cmd_getval(cmdmap, "yes_i_really_mean_it", sure); + if (!sure) { + ss << " This command will disable stretch mode, " + "which means all your pools will be reverted back " + "to the default size, min_size and crush_rule. " + "Pass --yes-i-really-mean-it to proceed."; + err = -EPERM; + goto reply_no_propose; + } + string crush_rule = cmd_getval_or(cmdmap, "crush_rule", string{}); + mon.osdmon()->try_disable_stretch_mode(ss, &okay, &errcode, crush_rule); + if (!okay) { + err = errcode; + goto reply_no_propose; + } + pending_map.stretch_mode_enabled = false; + pending_map.tiebreaker_mon = ""; + pending_map.disallowed_leaders.clear(); + pending_map.stretch_marked_down_mons.clear(); + pending_map.last_changed = ceph_clock_now(); + request_proposal(mon.osdmon()); } else { ss << "unknown command " << prefix; err = -EINVAL; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 49324811852..75300e4d549 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -983,6 +983,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) dout(20) << "Checking degraded stretch mode due to osd changes" << dendl; mon.maybe_go_degraded_stretch_mode(); } + } else { + mon.try_disable_stretch_mode(); } } @@ -15068,6 +15070,65 @@ void OSDMonitor::convert_pool_priorities(void) } } +void OSDMonitor::try_disable_stretch_mode(stringstream& ss, + bool *okay, + int *errcode, + const string& crush_rule) +{ + dout(20) << __func__ << dendl; + *okay = false; + if (!osdmap.stretch_mode_enabled) { + ss << "stretch mode is already disabled"; + *errcode = -EINVAL; + return; + } + if (osdmap.recovering_stretch_mode) { + ss << "stretch mode is currently recovering and cannot be disabled"; + *errcode = -EBUSY; + return; + } + for (const auto& pi : osdmap.get_pools()) { + pg_pool_t *pool = pending_inc.get_new_pool(pi.first, &pi.second); + pool->peering_crush_bucket_count = 0; + pool->peering_crush_bucket_target = 0; + pool->peering_crush_bucket_barrier = 0; + pool->peering_crush_mandatory_member = CRUSH_ITEM_NONE; + pool->size = g_conf().get_val("osd_pool_default_size"); + pool->min_size = g_conf().get_osd_pool_default_min_size(pool->size); + // if crush rule is supplied, use it if it exists in crush map + if (!crush_rule.empty()) { + int crush_rule_id = osdmap.crush->get_rule_id(crush_rule); + if (crush_rule_id < 0) { + ss << "unrecognized crush rule " << crush_rule; + *errcode = -EINVAL; + return; + } + if (!osdmap.crush->rule_valid_for_pool_type(crush_rule_id, pool->get_type())) { + ss << "crush rule " << crush_rule << " type does not match pool type"; + *errcode = -EINVAL; + return; + } + if (crush_rule_id == pool->crush_rule) { + ss << "You can't disable stretch mode with the same crush rule you are using"; + *errcode = -EINVAL; + return; + } + pool->crush_rule = crush_rule_id; + } else { + // otherwise, use the default rule + pool->crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_rule(cct); + } + } + pending_inc.change_stretch_mode = true; + pending_inc.stretch_mode_enabled = false; + pending_inc.new_stretch_bucket_count = 0; + pending_inc.new_degraded_stretch_mode = 0; + pending_inc.new_stretch_mode_bucket = 0; + pending_inc.new_recovering_stretch_mode = 0; + *okay = true; + return; +} + void OSDMonitor::try_enable_stretch_mode_pools(stringstream& ss, bool *okay, int *errcode, set* pools, diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index ccd11be8a83..c82373c634d 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -844,6 +844,20 @@ public: uint32_t bucket_count, const std::set& pools, const std::string& new_crush_rule); + /** + * + * Set all stretch mode values of all pools back to pre-stretch mode values. + * Set all stretch mode values of OSDMap back to pre-stretch mode values. + * If crush_rule is not empty, set the crush rule to that value, else use + * the default replicated crush rule. + * @param ss: a stringstream to write errors into + * @param errcode: filled with -errno if there's a problem + * @param crush_rule: the crush rule that will used after disabling stretch mode + */ + void try_disable_stretch_mode(std::stringstream& ss, + bool *okay, + int *errcode, + const std::string& crush_rule); /** * Check the input dead_buckets mapping (buckets->dead monitors) to see * if the OSDs are also down. If so, fill in really_down_buckets and