From 78e67e71e10809fd73d38545bace94dbd02e03bb Mon Sep 17 00:00:00 2001 From: luo rixin Date: Sat, 14 Mar 2020 09:54:57 +0800 Subject: [PATCH] mon/OSDMonitor: Add a safety builtin to mon_osd_report_timeout Add a safety builtin that use max(mon_osd_report_timout,2*osd_beacon_report_interval) as osd timeout by adding osd_beacon_report_interval into MOSDBeacon. This will allow for the OSD to miss a beacon and catch it on the next beacon without being marked down by the monitor. Fixes: https://tracker.ceph.com/issues/40668 Signed-off-by: luo rixin --- src/crimson/osd/osd.cc | 3 ++- src/messages/MOSDBeacon.h | 17 +++++++++++++---- src/mon/OSDMonitor.cc | 30 ++++++++++++++++++------------ src/mon/OSDMonitor.h | 7 ++++--- src/osd/OSD.cc | 3 ++- 5 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index d1f55ddc78186..aee48d3201920 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -1208,7 +1208,8 @@ seastar::future<> OSD::send_beacon() epoch_t min_last_epoch_clean = osdmap->get_epoch(); auto m = make_message(osdmap->get_epoch(), min_last_epoch_clean, - superblock.last_purged_snaps_scrub); + superblock.last_purged_snaps_scrub, + local_conf()->osd_beacon_report_interval); return monc->send_message(m); } diff --git a/src/messages/MOSDBeacon.h b/src/messages/MOSDBeacon.h index 493408f8e88ed..998d6e638946a 100644 --- a/src/messages/MOSDBeacon.h +++ b/src/messages/MOSDBeacon.h @@ -7,22 +7,24 @@ class MOSDBeacon : public PaxosServiceMessage { private: - static constexpr int HEAD_VERSION = 2; + static constexpr int HEAD_VERSION = 3; static constexpr int COMPAT_VERSION = 1; public: std::vector pgs; epoch_t min_last_epoch_clean = 0; utime_t last_purged_snaps_scrub; + int osd_beacon_report_interval = 0; MOSDBeacon() : PaxosServiceMessage{MSG_OSD_BEACON, 0, HEAD_VERSION, COMPAT_VERSION} {} - MOSDBeacon(epoch_t e, epoch_t min_lec, utime_t ls) + MOSDBeacon(epoch_t e, epoch_t min_lec, utime_t ls, int interval) : PaxosServiceMessage{MSG_OSD_BEACON, e, HEAD_VERSION, COMPAT_VERSION}, min_last_epoch_clean(min_lec), - last_purged_snaps_scrub(ls) + last_purged_snaps_scrub(ls), + osd_beacon_report_interval(interval) {} void encode_payload(uint64_t features) override { using ceph::encode; @@ -30,6 +32,7 @@ public: encode(pgs, payload); encode(min_last_epoch_clean, payload); encode(last_purged_snaps_scrub, payload); + encode(osd_beacon_report_interval, payload); } void decode_payload() override { auto p = payload.cbegin(); @@ -40,13 +43,19 @@ public: if (header.version >= 2) { decode(last_purged_snaps_scrub, p); } + if (header.version >= 3) { + decode(osd_beacon_report_interval, p); + } else { + osd_beacon_report_interval = 0; + } } std::string_view get_type_name() const override { return "osd_beacon"; } void print(std::ostream &out) const { out << get_type_name() << "(pgs " << pgs << " lec " << min_last_epoch_clean - << " last_purged_snaps_scrub " << last_purged_snaps_scrub + << " last_purged_snaps_scrub " << last_purged_snaps_scrub + << " osd_beacon_report_interval " << osd_beacon_report_interval << " v" << version << ")"; } private: diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index fc4bf04e12f04..492d27dc66ddc 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -4331,7 +4331,8 @@ bool OSDMonitor::prepare_beacon(MonOpRequestRef op) return false; } - last_osd_report[from] = ceph_clock_now(); + last_osd_report[from].first = ceph_clock_now(); + last_osd_report[from].second = beacon->osd_beacon_report_interval; osd_epochs[from] = beacon->version; for (const auto& pg : beacon->pgs) { @@ -5205,7 +5206,7 @@ void OSDMonitor::_set_new_cache_sizes() } bool OSDMonitor::handle_osd_timeouts(const utime_t &now, - std::map &last_osd_report) + std::map> &last_osd_report) { utime_t timeo(g_conf()->mon_osd_report_timeout, 0); if (now - mon->get_leader_since() < timeo) { @@ -5224,19 +5225,24 @@ bool OSDMonitor::handle_osd_timeouts(const utime_t &now, } if (!osdmap.is_up(i)) continue; - const std::map::const_iterator t = last_osd_report.find(i); + const std::map>::const_iterator t = last_osd_report.find(i); if (t == last_osd_report.end()) { // it wasn't in the map; start the timer. - last_osd_report[i] = now; + last_osd_report[i].first = now; + last_osd_report[i].second = 0; } else if (can_mark_down(i)) { - utime_t diff = now - t->second; - if (diff > timeo) { - mon->clog->info() << "osd." << i << " marked down after no beacon for " - << diff << " seconds"; - derr << "no beacon from osd." << i << " since " << t->second - << ", " << diff << " seconds ago. marking down" << dendl; - pending_inc.new_state[i] = CEPH_OSD_UP; - new_down = true; + utime_t diff = now - t->second.first; + // we use the max(mon_osd_report_timeout, 2*osd_beacon_report_interval) as timeout + // to allow for the osd to miss a beacon. + int mon_osd_report_timeout = g_conf()->mon_osd_report_timeout; + utime_t max_timeout(std::max(mon_osd_report_timeout, 2 * t->second.second), 0); + if (diff > max_timeout) { + mon->clog->info() << "osd." << i << " marked down after no beacon for " + << diff << " seconds"; + derr << "no beacon from osd." << i << " since " << t->second.first + << ", " << diff << " seconds ago. marking down" << dendl; + pending_inc.new_state[i] = CEPH_OSD_UP; + new_down = true; } } } diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index c226118e1cb0d..30bc5a4569e27 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -23,6 +23,7 @@ #include #include +#include #include "include/types.h" #include "include/encoding.h" @@ -633,8 +634,8 @@ protected: bool is_pool_currently_all_bluestore(int64_t pool_id, const pg_pool_t &pool, std::ostream *err); - // when we last received PG stats from each osd - std::map last_osd_report; + // when we last received PG stats from each osd and the osd's osd_beacon_report_interval + std::map> last_osd_report; // TODO: use last_osd_report to store the osd report epochs, once we don't // need to upgrade from pre-luminous releases. std::map osd_epochs; @@ -732,7 +733,7 @@ public: bool preparing); bool handle_osd_timeouts(const utime_t &now, - std::map &last_osd_report); + std::map> &last_osd_report); void send_latest(MonOpRequestRef op, epoch_t start=0); void send_latest_now_nodelete(MonOpRequestRef op, epoch_t start=0) { diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index eff4759417f55..43845dc704f71 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -6748,7 +6748,8 @@ void OSD::send_beacon(const ceph::coarse_mono_clock::time_point& now) std::lock_guard l{min_last_epoch_clean_lock}; beacon = new MOSDBeacon(get_osdmap_epoch(), min_last_epoch_clean, - superblock.last_purged_snaps_scrub); + superblock.last_purged_snaps_scrub, + cct->_conf->osd_beacon_report_interval); beacon->pgs = min_last_epoch_clean_pgs; last_sent_beacon = now; } -- 2.39.5