]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon [stretch mode]: support disable_stretch_mode
authorKamoltat Sirivadhna <ksirivad@redhat.com>
Wed, 28 Aug 2024 13:40:59 +0000 (13:40 +0000)
committerKamoltat Sirivadhna <ksirivad@redhat.com>
Wed, 17 Sep 2025 05:36:41 +0000 (05:36 +0000)
Problem:

Currently, Ceph lacks the ability
to exit stretch mode and move back
to normal cluster (non-stretched).

Solution:

Provide a command to allow
the user to exit stretch mode gracefully:

`ceph mon disable_stretch_mode <crush_rule> --yes-i-really-mean-it`

User can either specify a crush rule that
they want all pools to move to or not specify
a rule and Ceph will use a default replicated crush rule.

Fixes: https://tracker.ceph.com/issues/67467
Signed-off-by: Kamoltat Sirivadhna <ksirivad@redhat.com>
(cherry picked from commit 78ce68de41b1d5278e14cf56dff7f15394969255)

Conflicts:
src/mon/MonmapMonitor.cc - replace `goto reply` with
`goto reply_no_propose`
src/mon/OSDMonitorcc - replace `rule_valid_for_pool_type`
with `get_rule_type` since
`rule_valid_for_pool_type` is not
backported.

src/mon/MonCommands.h
src/mon/Monitor.cc
src/mon/Monitor.h
src/mon/MonmapMonitor.cc
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index 371c77f8ba0fc61b46395ad99114341bb6e0b34c..8dafbd397afd4a968bbd8635d6e8cd400b111fe1 100644 (file)
@@ -550,6 +550,11 @@ COMMAND("mon enable_stretch_mode " \
        "as the tiebreaker and setting <dividing_bucket> locations "
        "as the units for stretching across",
        "mon", "rw")
+COMMAND("mon disable_stretch_mode " \
+       "name=crush_rule,type=CephString,req=false, "
+       "name=yes_i_really_mean_it,type=CephBool,req=false, ",
+       "disable stretch mode, reverting to normal peering rules",
+       "mon", "rw")
 COMMAND("mon set_new_tiebreaker " \
        "name=name,type=CephString "
        "name=yes_i_really_mean_it,type=CephBool,req=false",
index 11660c530b17382550a57c5f7d2d36e3ecb97f8e..8d759285d1fcab61353a30485afbe6f5a6818ab0 100644 (file)
@@ -6684,6 +6684,8 @@ void Monitor::notify_new_monmap(bool can_change_external_state, bool remove_rank
 
   if (monmap->stretch_mode_enabled) {
     try_engage_stretch_mode();
+  } else {
+    try_disable_stretch_mode();
   }
 
   if (is_stretch_mode()) {
@@ -6742,6 +6744,32 @@ void Monitor::try_engage_stretch_mode()
     disconnect_disallowed_stretch_sessions();
   }
 }
+struct CMonDisableStretchMode : public Context {
+  Monitor *m;
+  CMonDisableStretchMode(Monitor *mon) : m(mon) {}
+  void finish(int r) {
+    m->try_disable_stretch_mode();
+  }
+};
+void Monitor::try_disable_stretch_mode()
+{
+  dout(20) << __func__ << dendl;
+  if (!stretch_mode_engaged) return;
+  if (!osdmon()->is_readable()) {
+    dout(20) << "osdmon is not readable" << dendl;
+    osdmon()->wait_for_readable_ctx(new CMonDisableStretchMode(this));
+    return;
+  }
+  if (!osdmon()->osdmap.stretch_mode_enabled &&
+      !monmap->stretch_mode_enabled) {
+    dout(10) << "Disabling stretch mode!" << dendl;
+    stretch_mode_engaged = false;
+    stretch_bucket_divider.clear();
+    degraded_stretch_mode = false;
+    recovering_stretch_mode = false;
+  }
+
+}
 
 void Monitor::do_stretch_mode_election_work()
 {
@@ -6798,6 +6826,7 @@ struct CMonGoRecovery : public Context {
 void Monitor::go_recovery_stretch_mode()
 {
   dout(20) << __func__ << dendl;
+  if (!is_stretch_mode()) return;
   dout(20) << "is_leader(): " << is_leader() << dendl;
   if (!is_leader()) return;
   dout(20) << "is_degraded_stretch_mode(): " << is_degraded_stretch_mode() << dendl;
@@ -6828,6 +6857,7 @@ void Monitor::go_recovery_stretch_mode()
 
 void Monitor::set_recovery_stretch_mode()
 {
+  if (!is_stretch_mode()) return;
   degraded_stretch_mode = true;
   recovering_stretch_mode = true;
   osdmon()->set_recovery_stretch_mode();
@@ -6836,6 +6866,7 @@ void Monitor::set_recovery_stretch_mode()
 void Monitor::maybe_go_degraded_stretch_mode()
 {
   dout(20) << __func__ << dendl;
+  if (!is_stretch_mode()) return;
   if (is_degraded_stretch_mode()) return;
   if (!is_leader()) return;
   if (dead_mon_buckets.empty()) return;
@@ -6874,6 +6905,7 @@ void Monitor::trigger_degraded_stretch_mode(const set<string>& dead_mons,
                                            const set<int>& dead_buckets)
 {
   dout(20) << __func__ << dendl;
+  if (!is_stretch_mode()) return;
   ceph_assert(osdmon()->is_writeable());
   ceph_assert(monmon()->is_writeable());
 
@@ -6894,6 +6926,7 @@ void Monitor::trigger_degraded_stretch_mode(const set<string>& dead_mons,
 void Monitor::set_degraded_stretch_mode()
 {
   dout(20) << __func__ << dendl;
+  if (!is_stretch_mode()) return;
   degraded_stretch_mode = true;
   recovering_stretch_mode = false;
   osdmon()->set_degraded_stretch_mode();
@@ -6911,6 +6944,7 @@ struct CMonGoHealthy : public Context {
 void Monitor::trigger_healthy_stretch_mode()
 {
   dout(20) << __func__ << dendl;
+  if (!is_stretch_mode()) return;
   if (!is_degraded_stretch_mode()) return;
   if (!is_leader()) return;
   if (!osdmon()->is_writeable()) {
@@ -6931,6 +6965,7 @@ void Monitor::trigger_healthy_stretch_mode()
 
 void Monitor::set_healthy_stretch_mode()
 {
+  if (!is_stretch_mode()) return;
   degraded_stretch_mode = false;
   recovering_stretch_mode = false;
   osdmon()->set_healthy_stretch_mode();
index fa98d3b205ebb83f7c2df8d4902f8985de6c14df..629846ad49032f2a7a75f9bdf6f3713775e8761e 100644 (file)
@@ -293,6 +293,7 @@ public:
    * updates across the entire cluster.
    */
   void try_engage_stretch_mode();
+  void try_disable_stretch_mode();
   void maybe_go_degraded_stretch_mode();
   void trigger_degraded_stretch_mode(const std::set<std::string>& dead_mons,
                                     const std::set<int>& dead_buckets);
index 1226c8a82419089b7aacb4d1ff9af284ccd9d7dd..732238f4358d51cba93104814ca49aa4a6ca2ce7 100644 (file)
@@ -1187,6 +1187,42 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op)
       ceph_assert(okay == true);
     }
     request_proposal(mon.osdmon());
+  } else if (prefix == "mon disable_stretch_mode") {
+    if (!mon.osdmon()->is_writeable()) {
+      dout(10) << __func__
+        << ":  waiting for osdmon writeable for stretch mode" << dendl;
+      mon.osdmon()->wait_for_writeable(op, new Monitor::C_RetryMessage(&mon, op));
+      return false;  /* do not propose, yet */
+    }
+    bool sure = false;
+    bool okay = false;
+    int errcode = 0;
+    if (!pending_map.stretch_mode_enabled) {
+      ss << "stretch mode is already disabled";
+      err = -EINVAL;
+      goto reply_no_propose;
+    }
+    cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
+    if (!sure) {
+      ss << " This command will disable stretch mode, "
+      "which means all your pools will be reverted back "
+      "to the default size, min_size and crush_rule. "
+      "Pass --yes-i-really-mean-it to proceed.";
+      err = -EPERM;
+      goto reply_no_propose;
+    }
+    string crush_rule = cmd_getval_or<string>(cmdmap, "crush_rule", string{});
+    mon.osdmon()->try_disable_stretch_mode(ss, &okay, &errcode, crush_rule);
+    if (!okay) {
+      err = errcode;
+      goto reply_no_propose;
+    }
+    pending_map.stretch_mode_enabled = false;
+    pending_map.tiebreaker_mon = "";
+    pending_map.disallowed_leaders.clear();
+    pending_map.stretch_marked_down_mons.clear();
+    pending_map.last_changed = ceph_clock_now();
+    request_proposal(mon.osdmon());
   } else {
     ss << "unknown command " << prefix;
     err = -EINVAL;
index 5a5d92513221c9a04bddbf4658f39cff209c87d9..6a8a737c777243be1112b4167055f662ab290b6c 100644 (file)
@@ -983,6 +983,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
       dout(20) << "Checking degraded stretch mode due to osd changes" << dendl;
       mon.maybe_go_degraded_stretch_mode();
     }
+  } else {
+    mon.try_disable_stretch_mode();
   }
 }
 
@@ -14952,6 +14954,65 @@ void OSDMonitor::convert_pool_priorities(void)
   }
 }
 
+void OSDMonitor::try_disable_stretch_mode(stringstream& ss,
+     bool *okay,
+     int *errcode,
+     const string& crush_rule)
+{
+  dout(20) << __func__ << dendl;
+  *okay = false;
+  if (!osdmap.stretch_mode_enabled) {
+    ss << "stretch mode is already disabled";
+    *errcode = -EINVAL;
+    return;
+  }
+  if (osdmap.recovering_stretch_mode) {
+    ss << "stretch mode is currently recovering and cannot be disabled";
+    *errcode = -EBUSY;
+    return;
+  }
+  for (const auto& pi : osdmap.get_pools()) {
+    pg_pool_t *pool = pending_inc.get_new_pool(pi.first, &pi.second);
+    pool->peering_crush_bucket_count = 0;
+    pool->peering_crush_bucket_target = 0;
+    pool->peering_crush_bucket_barrier = 0;
+    pool->peering_crush_mandatory_member = CRUSH_ITEM_NONE;
+    pool->size = g_conf().get_val<uint64_t>("osd_pool_default_size");
+    pool->min_size = g_conf().get_osd_pool_default_min_size(pool->size);
+    // if crush rule is supplied, use it if it exists in crush map
+    if (!crush_rule.empty()) {
+      int crush_rule_id = osdmap.crush->get_rule_id(crush_rule);
+      if (crush_rule_id < 0) {
+        ss << "unrecognized crush rule " << crush_rule;
+        *errcode = -EINVAL;
+        return;
+      }
+      if (osdmap.crush->get_rule_type(crush_rule_id) != (int)pool->get_type()) {
+        ss << "crush rule " << crush_rule << " type does not match pool type";
+        *errcode = -EINVAL;
+        return;
+      }
+      if (crush_rule_id == pool->crush_rule) {
+        ss << "You can't disable stretch mode with the same crush rule you are using";
+        *errcode = -EINVAL;
+        return;
+      }
+      pool->crush_rule = crush_rule_id;
+    } else {
+      // otherwise, use the default rule
+      pool->crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_rule(cct);
+    }
+  }
+  pending_inc.change_stretch_mode = true;
+  pending_inc.stretch_mode_enabled = false;
+  pending_inc.new_stretch_bucket_count = 0;
+  pending_inc.new_degraded_stretch_mode = 0;
+  pending_inc.new_stretch_mode_bucket = 0;
+  pending_inc.new_recovering_stretch_mode = 0;
+  *okay = true;
+  return;
+}
+
 void OSDMonitor::try_enable_stretch_mode_pools(stringstream& ss, bool *okay,
                                               int *errcode,
                                               set<pg_pool_t*>* pools,
index 675ffd0f0a58b4acd89ec4131076cffebb21dbc4..13044216c7b917bb92a7c646a361cace78026359 100644 (file)
@@ -844,6 +844,20 @@ public:
                               uint32_t bucket_count,
                               const std::set<pg_pool_t*>& pools,
                               const std::string& new_crush_rule);
+  /**
+  *
+  * Set all stretch mode values of all pools back to pre-stretch mode values.
+  * Set all stretch mode values of OSDMap back to pre-stretch mode values.
+  * If crush_rule is not empty, set the crush rule to that value, else use
+  * the default replicated crush rule.
+  * @param ss: a stringstream to write errors into
+  * @param errcode: filled with -errno if there's a problem
+  * @param crush_rule: the crush rule that will used after disabling stretch mode
+  */
+  void try_disable_stretch_mode(std::stringstream& ss,
+          bool *okay,
+          int *errcode,
+          const std::string& crush_rule);
   /**
    * Check the input dead_buckets mapping (buckets->dead monitors) to see
    * if the OSDs are also down. If so, fill in really_down_buckets and