]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/OSDMonitor: Add a safety builtin to mon_osd_report_timeout 33963/head
authorluo rixin <luorixin@huawei.com>
Sat, 14 Mar 2020 01:54:57 +0000 (09:54 +0800)
committerluo rixin <luorixin@huawei.com>
Tue, 1 Sep 2020 00:58:44 +0000 (08:58 +0800)
Add a safety builtin that use max(mon_osd_report_timout,2*osd_beacon_report_interval)
as osd timeout by adding osd_beacon_report_interval into MOSDBeacon. This will allow
for the OSD to miss a beacon and catch it on the next beacon without being marked down
by the monitor.

Fixes: https://tracker.ceph.com/issues/40668
Signed-off-by: luo rixin <luorixin@huawei.com>
src/crimson/osd/osd.cc
src/messages/MOSDBeacon.h
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h
src/osd/OSD.cc

index d1f55ddc781860fda0d386ab219ae99d888f695e..aee48d32019207e96f3a80c60dd86c5a359694b0 100644 (file)
@@ -1208,7 +1208,8 @@ seastar::future<> OSD::send_beacon()
   epoch_t min_last_epoch_clean = osdmap->get_epoch();
   auto m = make_message<MOSDBeacon>(osdmap->get_epoch(),
                                     min_last_epoch_clean,
-                                   superblock.last_purged_snaps_scrub);
+                                    superblock.last_purged_snaps_scrub,
+                                    local_conf()->osd_beacon_report_interval);
   return monc->send_message(m);
 }
 
index 493408f8e88ed9764d38f2660f77f0b432c55864..998d6e638946ab41f9dea192fd75ea268e9a7088 100644 (file)
@@ -7,22 +7,24 @@
 
 class MOSDBeacon : public PaxosServiceMessage {
 private:
-  static constexpr int HEAD_VERSION = 2;
+  static constexpr int HEAD_VERSION = 3;
   static constexpr int COMPAT_VERSION = 1;
 public:
   std::vector<pg_t> pgs;
   epoch_t min_last_epoch_clean = 0;
   utime_t last_purged_snaps_scrub;
+  int osd_beacon_report_interval = 0;
 
   MOSDBeacon()
     : PaxosServiceMessage{MSG_OSD_BEACON, 0,
                          HEAD_VERSION, COMPAT_VERSION}
   {}
-  MOSDBeacon(epoch_t e, epoch_t min_lec, utime_t ls)
+  MOSDBeacon(epoch_t e, epoch_t min_lec, utime_t ls, int interval)
     : PaxosServiceMessage{MSG_OSD_BEACON, e,
                          HEAD_VERSION, COMPAT_VERSION},
       min_last_epoch_clean(min_lec),
-      last_purged_snaps_scrub(ls)
+      last_purged_snaps_scrub(ls),
+      osd_beacon_report_interval(interval)
   {}
   void encode_payload(uint64_t features) override {
     using ceph::encode;
@@ -30,6 +32,7 @@ public:
     encode(pgs, payload);
     encode(min_last_epoch_clean, payload);
     encode(last_purged_snaps_scrub, payload);
+    encode(osd_beacon_report_interval, payload);
   }
   void decode_payload() override {
     auto p = payload.cbegin();
@@ -40,13 +43,19 @@ public:
     if (header.version >= 2) {
       decode(last_purged_snaps_scrub, p);
     }
+    if (header.version >= 3) {
+      decode(osd_beacon_report_interval, p);
+    } else {
+      osd_beacon_report_interval = 0;
+    }
   }
   std::string_view get_type_name() const override { return "osd_beacon"; }
   void print(std::ostream &out) const {
     out << get_type_name()
         << "(pgs " << pgs
         << " lec " << min_last_epoch_clean
-       << " last_purged_snaps_scrub " << last_purged_snaps_scrub
+        << " last_purged_snaps_scrub " << last_purged_snaps_scrub
+        << " osd_beacon_report_interval " << osd_beacon_report_interval
         << " v" << version << ")";
   }
 private:
index fc4bf04e12f0444e2cc9fc0b9fa5a60944e1dfbe..492d27dc66ddca72dcdd2252a7ac34ba30bc266c 100644 (file)
@@ -4331,7 +4331,8 @@ bool OSDMonitor::prepare_beacon(MonOpRequestRef op)
     return false;
   }
 
-  last_osd_report[from] = ceph_clock_now();
+  last_osd_report[from].first = ceph_clock_now();
+  last_osd_report[from].second = beacon->osd_beacon_report_interval;
   osd_epochs[from] = beacon->version;
 
   for (const auto& pg : beacon->pgs) {
@@ -5205,7 +5206,7 @@ void OSDMonitor::_set_new_cache_sizes()
 }
 
 bool OSDMonitor::handle_osd_timeouts(const utime_t &now,
-                                    std::map<int,utime_t> &last_osd_report)
+                                    std::map<int, std::pair<utime_t, int>> &last_osd_report)
 {
   utime_t timeo(g_conf()->mon_osd_report_timeout, 0);
   if (now - mon->get_leader_since() < timeo) {
@@ -5224,19 +5225,24 @@ bool OSDMonitor::handle_osd_timeouts(const utime_t &now,
     }
     if (!osdmap.is_up(i))
       continue;
-    const std::map<int,utime_t>::const_iterator t = last_osd_report.find(i);
+    const std::map<int, std::pair<utime_t, int>>::const_iterator t = last_osd_report.find(i);
     if (t == last_osd_report.end()) {
       // it wasn't in the map; start the timer.
-      last_osd_report[i] = now;
+      last_osd_report[i].first = now;
+      last_osd_report[i].second = 0;
     } else if (can_mark_down(i)) {
-      utime_t diff = now - t->second;
-      if (diff > timeo) {
-       mon->clog->info() << "osd." << i << " marked down after no beacon for "
-                         << diff << " seconds";
-       derr << "no beacon from osd." << i << " since " << t->second
-            << ", " << diff << " seconds ago.  marking down" << dendl;
-       pending_inc.new_state[i] = CEPH_OSD_UP;
-       new_down = true;
+      utime_t diff = now - t->second.first;
+      // we use the max(mon_osd_report_timeout, 2*osd_beacon_report_interval) as timeout
+      // to allow for the osd to miss a beacon.
+      int mon_osd_report_timeout = g_conf()->mon_osd_report_timeout;
+      utime_t max_timeout(std::max(mon_osd_report_timeout,  2 * t->second.second), 0);
+      if (diff > max_timeout) {
+        mon->clog->info() << "osd." << i << " marked down after no beacon for "
+                          << diff << " seconds";
+        derr << "no beacon from osd." << i << " since " << t->second.first
+             << ", " << diff << " seconds ago.  marking down" << dendl;
+        pending_inc.new_state[i] = CEPH_OSD_UP;
+        new_down = true;
       }
     }
   }
index c226118e1cb0de169149ca372aada4c1d16ed1ae..30bc5a4569e27816af579a6864cf6c0e919e99e9 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <map>
 #include <set>
+#include <utility>
 
 #include "include/types.h"
 #include "include/encoding.h"
@@ -633,8 +634,8 @@ protected:
   bool is_pool_currently_all_bluestore(int64_t pool_id, const pg_pool_t &pool,
                                       std::ostream *err);
 
-  // when we last received PG stats from each osd
-  std::map<int,utime_t> last_osd_report;
+  // when we last received PG stats from each osd and the osd's osd_beacon_report_interval
+  std::map<int, std::pair<utime_t, int>> last_osd_report;
   // TODO: use last_osd_report to store the osd report epochs, once we don't
   //       need to upgrade from pre-luminous releases.
   std::map<int,epoch_t> osd_epochs;
@@ -732,7 +733,7 @@ public:
                                bool preparing);
 
   bool handle_osd_timeouts(const utime_t &now,
-                          std::map<int,utime_t> &last_osd_report);
+                          std::map<int, std::pair<utime_t, int>> &last_osd_report);
 
   void send_latest(MonOpRequestRef op, epoch_t start=0);
   void send_latest_now_nodelete(MonOpRequestRef op, epoch_t start=0) {
index eff4759417f5545f4e9d49f5749671093a8a7cad..43845dc704f7108fa3f5f0fbb4e86f05fc442c00 100644 (file)
@@ -6748,7 +6748,8 @@ void OSD::send_beacon(const ceph::coarse_mono_clock::time_point& now)
       std::lock_guard l{min_last_epoch_clean_lock};
       beacon = new MOSDBeacon(get_osdmap_epoch(),
                              min_last_epoch_clean,
-                             superblock.last_purged_snaps_scrub);
+                             superblock.last_purged_snaps_scrub,
+                             cct->_conf->osd_beacon_report_interval);
       beacon->pgs = min_last_epoch_clean_pgs;
       last_sent_beacon = now;
     }