From eee041f2f070b88b01d45c04624872681dd158be Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 15 Jul 2021 15:17:23 -0500
Subject: [PATCH] mon: allow a MON_DOWN grace period after cluster mkfs

During teuthology tests, the initial cluster bootstrap often starts up
the mon sbut doesn't include all mons in the initial quorum, due to
mon startup misalignment and random delays.  Provide a short grace period
where we will not raise a MON_DOWN alert even though the quorum is not
complete.

Fixes: https://tracker.ceph.com/issues/43584
Signed-off-by: Sage Weil <sage@newdream.net>
---
 src/common/options/mon.yaml.in | 7 +++++++
 src/mon/HealthMonitor.cc       | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in
index 535dc65e26a2e..1ec9309397ba5 100644
--- a/src/common/options/mon.yaml.in
+++ b/src/common/options/mon.yaml.in
@@ -39,6 +39,13 @@ options:
   default: 5
   services:
   - mon
+- name: mon_down_mkfs_grace
+  type: secs
+  level: advanced
+  desc: Period in seconds that the cluster may have a mon down after cluster creation
+  default: 1_min
+  services:
+  - mon
 - name: mon_mgr_beacon_grace
   type: secs
   level: advanced
diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc
index bf8cf394bd9a6..3adbdc3de59f9 100644
--- a/src/mon/HealthMonitor.cc
+++ b/src/mon/HealthMonitor.cc
@@ -797,7 +797,9 @@ void HealthMonitor::check_for_mon_down(health_check_map_t *checks)
 {
   int max = mon.monmap->size();
   int actual = mon.get_quorum().size();
-  if (actual < max) {
+  const auto now = ceph::real_clock::now();
+  if (actual < max &&
+      now > mon.monmap->created.to_real_time() + g_conf().get_val<std::chrono::seconds>("mon_down_mkfs_grace")) {
     ostringstream ss;
     ss << (max-actual) << "/" << max << " mons down, quorum "
        << mon.get_quorum_names();
-- 
2.39.5