From 82bd0ba939269c0022ce663ed3a1d81125058606 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Jul 2019 13:37:57 -0500 Subject: [PATCH] mon/OSDMonitor: implement MOSDMarkMeDead handling Allow updates to the dead_epoch in osd_xinfo, so that the OSD can inform us that they know they are down (as of a particular epoch). Signed-off-by: Sage Weil --- src/messages/MOSDMarkMeDead.h | 62 +++++++++++++++++++++++++++++++++++ src/mon/Monitor.cc | 1 + src/mon/OSDMonitor.cc | 61 ++++++++++++++++++++++++++++++++++ src/mon/OSDMonitor.h | 3 ++ src/msg/Message.cc | 4 +++ src/msg/Message.h | 1 + 6 files changed, 132 insertions(+) create mode 100644 src/messages/MOSDMarkMeDead.h diff --git a/src/messages/MOSDMarkMeDead.h b/src/messages/MOSDMarkMeDead.h new file mode 100644 index 0000000000000..9e8b306fcdc5b --- /dev/null +++ b/src/messages/MOSDMarkMeDead.h @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "messages/PaxosServiceMessage.h" + +class MOSDMarkMeDead : public PaxosServiceMessage { +private: + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + + public: + uuid_d fsid; + int32_t target_osd; + epoch_t epoch = 0; + + MOSDMarkMeDead() + : PaxosServiceMessage{MSG_OSD_MARK_ME_DEAD, 0, + HEAD_VERSION, COMPAT_VERSION} { } + MOSDMarkMeDead(const uuid_d &fs, int osd, + epoch_t e) + : PaxosServiceMessage{MSG_OSD_MARK_ME_DEAD, e, + HEAD_VERSION, COMPAT_VERSION}, + fsid(fs), target_osd(osd), + epoch(e) {} + private: + ~MOSDMarkMeDead() override {} + +public: + epoch_t get_epoch() const { return epoch; } + + void decode_payload() override { + auto p = payload.cbegin(); + paxos_decode(p); + decode(fsid, p); + decode(target_osd, p); + decode(epoch, p); + } + + void encode_payload(uint64_t features) override { + using ceph::encode; + paxos_encode(); + header.version = HEAD_VERSION; + header.compat_version = COMPAT_VERSION; + encode(fsid, payload); + encode(target_osd, payload, features); + encode(epoch, payload); + } + + std::string_view get_type_name() const override { return "MOSDMarkMeDead"; } + void print(ostream& out) const override { + out << "MOSDMarkMeDead(" + << "osd." << target_osd + << ", epoch " << epoch + << ", fsid=" << fsid + << ")"; + } +private: + template + friend boost::intrusive_ptr ceph::make_message(Args&&... args); +}; diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 85185adf7a40a..d33b0d89d4c02 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4481,6 +4481,7 @@ void Monitor::dispatch_op(MonOpRequestRef op) case CEPH_MSG_POOLOP: case MSG_OSD_BEACON: case MSG_OSD_MARK_ME_DOWN: + case MSG_OSD_MARK_ME_DEAD: case MSG_OSD_FULL: case MSG_OSD_FAILURE: case MSG_OSD_BOOT: diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index f69fdf0a5841e..0944f8164cf05 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -39,6 +39,7 @@ #include "messages/MOSDBeacon.h" #include "messages/MOSDFailure.h" #include "messages/MOSDMarkMeDown.h" +#include "messages/MOSDMarkMeDead.h" #include "messages/MOSDFull.h" #include "messages/MOSDMap.h" #include "messages/MMonGetOSDMap.h" @@ -2287,6 +2288,8 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op) // damp updates case MSG_OSD_MARK_ME_DOWN: return preprocess_mark_me_down(op); + case MSG_OSD_MARK_ME_DEAD: + return preprocess_mark_me_dead(op); case MSG_OSD_FULL: return preprocess_full(op); case MSG_OSD_FAILURE: @@ -2329,6 +2332,8 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op) // damp updates case MSG_OSD_MARK_ME_DOWN: return prepare_mark_me_down(op); + case MSG_OSD_MARK_ME_DEAD: + return prepare_mark_me_dead(op); case MSG_OSD_FULL: return prepare_full(op); case MSG_OSD_FAILURE: @@ -2607,6 +2612,62 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op) return true; } +bool OSDMonitor::preprocess_mark_me_dead(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + MOSDMarkMeDead *m = static_cast(op->get_req()); + int from = m->target_osd; + + // check permissions + if (check_source(op, m->fsid)) { + mon->no_reply(op); + return true; + } + + // first, verify the reporting host is valid + if (!m->get_orig_source().is_osd()) { + mon->no_reply(op); + return true; + } + + if (!osdmap.exists(from) || + !osdmap.is_down(from)) { + dout(5) << __func__ << " from nonexistent or up osd." << from + << ", ignoring" << dendl; + send_incremental(op, m->get_epoch()+1); + mon->no_reply(op); + return true; + } + + return false; +} + +bool OSDMonitor::prepare_mark_me_dead(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + MOSDMarkMeDead *m = static_cast(op->get_req()); + int target_osd = m->target_osd; + + ceph_assert(osdmap.is_down(target_osd)); + + mon->clog->info() << "osd." << target_osd << " marked itself dead as of e" + << m->get_epoch(); + if (!pending_inc.new_xinfo.count(target_osd)) { + pending_inc.new_xinfo[target_osd] = osdmap.osd_xinfo[target_osd]; + } + pending_inc.new_xinfo[target_osd].dead_epoch = m->get_epoch(); + wait_for_finished_proposal( + op, + new FunctionContext( + [op, this] (int r) { + if (r >= 0) { + mon->no_reply(op); // ignore on success + } + } + )); + return true; +} + bool OSDMonitor::can_mark_down(int i) { if (osdmap.is_nodown(i)) { diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 896f4ee7f63c2..f856f0161ef97 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -403,6 +403,9 @@ private: void process_failures(); void take_all_failures(list& ls); + bool preprocess_mark_me_dead(MonOpRequestRef op); + bool prepare_mark_me_dead(MonOpRequestRef op); + bool preprocess_full(MonOpRequestRef op); bool prepare_full(MonOpRequestRef op); diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 802c73d7a6314..de20575287dee 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -62,6 +62,7 @@ #include "messages/MOSDPGTemp.h" #include "messages/MOSDFailure.h" #include "messages/MOSDMarkMeDown.h" +#include "messages/MOSDMarkMeDead.h" #include "messages/MOSDFull.h" #include "messages/MOSDPing.h" #include "messages/MOSDOp.h" @@ -475,6 +476,9 @@ Message *decode_message(CephContext *cct, int crcflags, case MSG_OSD_MARK_ME_DOWN: m = make_message(); break; + case MSG_OSD_MARK_ME_DEAD: + m = make_message(); + break; case MSG_OSD_FULL: m = make_message(); break; diff --git a/src/msg/Message.h b/src/msg/Message.h index 87b4b263b686d..2a66b4a4b3523 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -71,6 +71,7 @@ #define MSG_OSD_ALIVE 73 #define MSG_OSD_MARK_ME_DOWN 74 #define MSG_OSD_FULL 75 +#define MSG_OSD_MARK_ME_DEAD 123 // removed right after luminous //#define MSG_OSD_SUBOP 76 -- 2.39.5