From 472c64268d477f60631af8de90cfb7b35cfaffba Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Thu, 9 Mar 2017 16:25:59 +0800 Subject: [PATCH] mon: dispatch osd_beacon message to OSDMonitor Signed-off-by: Kefu Chai --- src/mon/Monitor.cc | 1 + src/mon/OSDMonitor.cc | 63 +++++++++++++++++++++++++++++++++++++++---- src/mon/OSDMonitor.h | 9 +++++-- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 6dc1e9c9a643a..d41cd4d09b635 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3735,6 +3735,7 @@ void Monitor::dispatch_op(MonOpRequestRef op) // OSDs case CEPH_MSG_MON_GET_OSDMAP: case CEPH_MSG_POOLOP: + case MSG_OSD_BEACON: case MSG_OSD_MARK_ME_DOWN: case MSG_OSD_FULL: case MSG_OSD_FAILURE: diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 9134a43729949..4579047ce01dc 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -31,6 +31,7 @@ #include "crush/CrushTester.h" #include "crush/CrushTreeDumper.h" +#include "messages/MOSDBeacon.h" #include "messages/MOSDFailure.h" #include "messages/MOSDMarkMeDown.h" #include "messages/MOSDFull.h" @@ -1434,6 +1435,8 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op) return preprocess_alive(op); case MSG_OSD_PGTEMP: return preprocess_pgtemp(op); + case MSG_OSD_BEACON: + return preprocess_beacon(op); case CEPH_MSG_POOLOP: return preprocess_pool_op(op); @@ -1467,6 +1470,8 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op) return prepare_alive(op); case MSG_OSD_PGTEMP: return prepare_pgtemp(op); + case MSG_OSD_BEACON: + return prepare_beacon(op); case MSG_MON_COMMAND: return prepare_command(op); @@ -1477,6 +1482,7 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op) case MSG_REMOVE_SNAPS: return prepare_remove_snaps(op); + default: ceph_abort(); } @@ -2734,6 +2740,47 @@ bool OSDMonitor::prepare_remove_snaps(MonOpRequestRef op) return true; } +// osd beacon +bool OSDMonitor::preprocess_beacon(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + auto beacon = static_cast(op->get_req()); + // check caps + auto session = beacon->get_session(); + if (!session) { + dout(10) << __func__ << " no monitor session!" << dendl; + return true; + } + if (!session->is_capable("osd", MON_CAP_X)) { + derr << __func__ << " received from entity " + << "with insufficient privileges " << session->caps << dendl; + return true; + } + // Always forward the beacon to the leader, even if they are the same as + // the old one. The leader will mark as down osds that haven't sent + // beacon for a few minutes. + return false; +} + +bool OSDMonitor::prepare_beacon(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + const auto beacon = static_cast(op->get_req()); + const auto src = beacon->get_orig_source(); + dout(10) << __func__ << " " << *beacon + << " from " << src << dendl; + int from = src.num(); + + if (!src.is_osd() || + !osdmap.is_up(from) || + beacon->get_orig_source_inst() != osdmap.get_inst(from)) { + dout(1) << " ignoring beacon from non-active osd." << dendl; + return false; + } + + last_osd_report[from] = ceph_clock_now(); + return false; +} // --------------- // map helpers @@ -2957,6 +3004,9 @@ void OSDMonitor::tick() bool do_propose = false; utime_t now = ceph_clock_now(); + if (handle_osd_timeouts(now, last_osd_report)) + do_propose = true; + // mark osds down? if (check_failures(now)) do_propose = true; @@ -3088,15 +3138,20 @@ void OSDMonitor::tick() propose_pending(); } -void OSDMonitor::handle_osd_timeouts(const utime_t &now, +bool OSDMonitor::handle_osd_timeouts(const utime_t &now, std::map &last_osd_report) { utime_t timeo(g_conf->mon_osd_report_timeout, 0); + if (now - mon->get_leader_since() < timeo) { + // We haven't been the leader for long enough to consider OSD timeouts + return false; + } + int max_osd = osdmap.get_max_osd(); bool new_down = false; for (int i=0; i < max_osd; ++i) { - dout(30) << "handle_osd_timeouts: checking up on osd " << i << dendl; + dout(30) << __func__ << ": checking up on osd " << i << dendl; if (!osdmap.is_up(i)) continue; const std::map::const_iterator t = last_osd_report.find(i); @@ -3114,9 +3169,7 @@ void OSDMonitor::handle_osd_timeouts(const utime_t &now, } } } - if (new_down) { - propose_pending(); - } + return new_down; } void OSDMonitor::get_health(list >& summary, diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index bcd15add81cbe..8a029d9c90e73 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -414,7 +414,12 @@ private: int load_metadata(int osd, map& m, ostream *err); - public: + // when we last received PG stats from each osd + map last_osd_report; + bool preprocess_beacon(MonOpRequestRef op); + bool prepare_beacon(MonOpRequestRef op); + +public: OSDMonitor(CephContext *cct, Monitor *mn, Paxos *p, const string& service_name); void tick() override; // check state, take actions @@ -431,7 +436,7 @@ private: int prepare_command_pool_set(map &cmdmap, stringstream& ss); - void handle_osd_timeouts(const utime_t &now, + bool handle_osd_timeouts(const utime_t &now, std::map &last_osd_report); void send_latest(MonOpRequestRef op, epoch_t start=0); -- 2.39.5