mon: allow a MON_DOWN grace period after cluster mkfs

author Sage Weil <sage@newdream.net>

Thu, 15 Jul 2021 20:17:23 +0000 (15:17 -0500)

committer Sage Weil <sage@newdream.net>

Thu, 15 Jul 2021 20:17:23 +0000 (15:17 -0500)
author Sage Weil <sage@newdream.net>
Thu, 15 Jul 2021 20:17:23 +0000 (15:17 -0500)
committer Sage Weil <sage@newdream.net>
Thu, 15 Jul 2021 20:17:23 +0000 (15:17 -0500)
diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in

index 535dc65e26a2e0a807f3c81e43b45524f85ec78f..1ec9309397ba5325df6e36e4dbe06abe45d0544e 100644 (file)
--- a/src/common/options/mon.yaml.in
+++ b/src/common/options/mon.yaml.in
@@ -39,6 +39,13 @@ options:
    default: 5
    services:
    - mon
+- name: mon_down_mkfs_grace
+  type: secs
+  level: advanced
+  desc: Period in seconds that the cluster may have a mon down after cluster creation
+  default: 1_min
+  services:
+  - mon
  - name: mon_mgr_beacon_grace
    type: secs
    level: advanced
diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc

index bf8cf394bd9a6791fedf192d0e9b35df2aa63676..3adbdc3de59f907ed3e15a3975a9e4af3ae8a542 100644 (file)
--- a/src/mon/HealthMonitor.cc
+++ b/src/mon/HealthMonitor.cc
@@ -797,7 +797,9 @@ void HealthMonitor::check_for_mon_down(health_check_map_t *checks)
  {
    int max = mon.monmap->size();
    int actual = mon.get_quorum().size();
-  if (actual < max) {
+  const auto now = ceph::real_clock::now();
+  if (actual < max &&
+      now > mon.monmap->created.to_real_time() + g_conf().get_val<std::chrono::seconds>("mon_down_mkfs_grace")) {
      ostringstream ss;
      ss << (max-actual) << "/" << max << " mons down, quorum "
         << mon.get_quorum_names();
author	Sage Weil <sage@newdream.net>
	Thu, 15 Jul 2021 20:17:23 +0000 (15:17 -0500)
committer	Sage Weil <sage@newdream.net>
	Thu, 15 Jul 2021 20:17:23 +0000 (15:17 -0500)
src/common/options/mon.yaml.in		patch \| blob \| history
src/mon/HealthMonitor.cc		patch \| blob \| history