From 14b1ab145606e1559554a84f9c6bcd7b90e7bc44 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 23 Feb 2017 15:51:37 -0500 Subject: [PATCH] mon/OSDMonitor: handle MOSDFull messages from OSDs Signed-off-by: Sage Weil --- src/messages/MOSDFull.h | 50 +++++++++++++++++++++ src/mon/Monitor.cc | 1 + src/mon/OSDMonitor.cc | 96 +++++++++++++++++++++++++++++++++++++++++ src/mon/OSDMonitor.h | 3 ++ src/msg/Message.cc | 4 ++ src/msg/Message.h | 1 + 6 files changed, 155 insertions(+) create mode 100644 src/messages/MOSDFull.h diff --git a/src/messages/MOSDFull.h b/src/messages/MOSDFull.h new file mode 100644 index 0000000000000..5fcd3c8b37d7c --- /dev/null +++ b/src/messages/MOSDFull.h @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_MOSDFULL_H +#define CEPH_MOSDFULL_H + +#include "messages/PaxosServiceMessage.h" +#include "osd/OSDMap.h" + +// tell the mon to update the full/nearfull bits. note that in the +// future this message could be generalized to other state bits, but +// for now name it for its sole application. + +class MOSDFull : public PaxosServiceMessage { + public: + epoch_t map_epoch = 0; + uint32_t state = 0; + +private: + ~MOSDFull() {} + +public: + MOSDFull(epoch_t e, unsigned s) + : PaxosServiceMessage(MSG_OSD_FULL, e), map_epoch(e), state(s) { } + MOSDFull() + : PaxosServiceMessage(MSG_OSD_FULL, 0) {} + +public: + void encode_payload(uint64_t features) { + paxos_encode(); + ::encode(map_epoch, payload); + ::encode(state, payload); + } + void decode_payload() { + bufferlist::iterator p = payload.begin(); + paxos_decode(p); + ::decode(map_epoch, p); + ::decode(state, p); + } + + const char *get_type_name() const { return "osd_full"; } + void print(ostream &out) const { + set states; + OSDMap::calc_state_set(state, states); + out << "osd_full(e" << map_epoch << " " << states << " v" << version << ")"; + } + +}; + +#endif diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 7205fdc638c25..f46ad3e4c2059 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3736,6 +3736,7 @@ void Monitor::dispatch_op(MonOpRequestRef op) case CEPH_MSG_MON_GET_OSDMAP: case CEPH_MSG_POOLOP: case MSG_OSD_MARK_ME_DOWN: + case MSG_OSD_FULL: case MSG_OSD_FAILURE: case MSG_OSD_BOOT: case MSG_OSD_ALIVE: diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 52c3cf49953a8..3cec6ef6ee2e0 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -33,6 +33,7 @@ #include "messages/MOSDFailure.h" #include "messages/MOSDMarkMeDown.h" +#include "messages/MOSDFull.h" #include "messages/MOSDMap.h" #include "messages/MMonGetOSDMap.h" #include "messages/MOSDBoot.h" @@ -1363,6 +1364,8 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op) // damp updates case MSG_OSD_MARK_ME_DOWN: return preprocess_mark_me_down(op); + case MSG_OSD_FULL: + return preprocess_full(op); case MSG_OSD_FAILURE: return preprocess_failure(op); case MSG_OSD_BOOT: @@ -1394,6 +1397,8 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op) // damp updates case MSG_OSD_MARK_ME_DOWN: return prepare_mark_me_down(op); + case MSG_OSD_FULL: + return prepare_full(op); case MSG_OSD_FAILURE: return prepare_failure(op); case MSG_OSD_BOOT: @@ -2296,6 +2301,97 @@ void OSDMonitor::_booted(MonOpRequestRef op, bool logit) } +// ------------- +// full + +bool OSDMonitor::preprocess_full(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + MOSDFull *m = static_cast(op->get_req()); + int from = m->get_orig_source().num(); + set state; + unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_FULL; + + // check permissions, ignore if failed + MonSession *session = m->get_session(); + if (!session) + goto ignore; + if (!session->is_capable("osd", MON_CAP_X)) { + dout(0) << "MOSDFull from entity with insufficient privileges:" + << session->caps << dendl; + goto ignore; + } + + // ignore a full message from the osd instance that already went down + if (!osdmap.exists(from)) { + dout(7) << __func__ << " ignoring full message from nonexistent " + << m->get_orig_source_inst() << dendl; + goto ignore; + } + if ((!osdmap.is_up(from) && + osdmap.get_most_recent_inst(from) == m->get_orig_source_inst()) || + (osdmap.is_up(from) && + osdmap.get_inst(from) != m->get_orig_source_inst())) { + dout(7) << __func__ << " ignoring full message from down " + << m->get_orig_source_inst() << dendl; + goto ignore; + } + + OSDMap::calc_state_set(osdmap.get_state(from), state); + + if ((osdmap.get_state(from) & mask) == m->state) { + dout(7) << __func__ << " state already " << state << " for osd." << from + << " " << m->get_orig_source_inst() << dendl; + _reply_map(op, m->version); + goto ignore; + } + + dout(10) << __func__ << " want state " << state << " for osd." << from + << " " << m->get_orig_source_inst() << dendl; + return false; + + ignore: + return true; +} + +bool OSDMonitor::prepare_full(MonOpRequestRef op) +{ + op->mark_osdmon_event(__func__); + const MOSDFull *m = static_cast(op->get_req()); + const int from = m->get_orig_source().num(); + + const unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_FULL; + const unsigned want_state = m->state & mask; // safety first + + unsigned cur_state = osdmap.get_state(from); + auto p = pending_inc.new_state.find(from); + if (p != pending_inc.new_state.end()) { + cur_state ^= p->second; + } + cur_state &= mask; + + set want_state_set, cur_state_set; + OSDMap::calc_state_set(want_state, want_state_set); + OSDMap::calc_state_set(cur_state, cur_state_set); + + if (cur_state != want_state) { + if (p != pending_inc.new_state.end()) { + p->second &= ~mask; + } else { + pending_inc.new_state[from] = 0; + } + pending_inc.new_state[from] |= (osdmap.get_state(from) & mask) ^ want_state; + dout(7) << __func__ << " osd." << from << " " << cur_state_set + << " -> " << want_state_set << dendl; + } else { + dout(7) << __func__ << " osd." << from << " " << cur_state_set + << " = wanted " << want_state_set << ", just waiting" << dendl; + } + + wait_for_finished_proposal(op, new C_ReplyMap(this, op, m->version)); + return true; +} + // ------------- // alive diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index bc7d44fd40fd5..70dd67735a64b 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -261,6 +261,9 @@ private: void process_failures(); void take_all_failures(list& ls); + bool preprocess_full(MonOpRequestRef op); + bool prepare_full(MonOpRequestRef op); + bool preprocess_boot(MonOpRequestRef op); bool prepare_boot(MonOpRequestRef op); void _booted(MonOpRequestRef op, bool logit); diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 26c91eef09346..632bc92606a2f 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -60,6 +60,7 @@ using namespace std; #include "messages/MOSDPGTemp.h" #include "messages/MOSDFailure.h" #include "messages/MOSDMarkMeDown.h" +#include "messages/MOSDFull.h" #include "messages/MOSDPing.h" #include "messages/MOSDOp.h" #include "messages/MOSDOpReply.h" @@ -433,6 +434,9 @@ Message *decode_message(CephContext *cct, int crcflags, case MSG_OSD_MARK_ME_DOWN: m = new MOSDMarkMeDown(); break; + case MSG_OSD_FULL: + m = new MOSDFull(); + break; case MSG_OSD_PING: m = new MOSDPing(); break; diff --git a/src/msg/Message.h b/src/msg/Message.h index 63e6d11503678..5120f7edf85a1 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -65,6 +65,7 @@ #define MSG_OSD_FAILURE 72 #define MSG_OSD_ALIVE 73 #define MSG_OSD_MARK_ME_DOWN 74 +#define MSG_OSD_FULL 75 #define MSG_OSD_SUBOP 76 #define MSG_OSD_SUBOPREPLY 77 -- 2.39.5