From 3d4a6739f2a0db895809ced5976ce9eeb0190963 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Thu, 23 Jan 2014 14:52:40 -0800 Subject: [PATCH] Elector: send an OP_NAK MMonElection to old peers who support it Only new monitors support receiving OP_NAK from a peer without crashing, but when we add new required features in the future, our monitors can accept an OP_NAK message which tells them what features they're missing. Then they will print out an error message and shut down. (Unfortunately, doing a clean shutdown from here would require a lot of infrastructure, so we just call exit(0).) Signed-off-by: Greg Farnum --- src/include/ceph_features.h | 4 ++- src/messages/MMonElection.h | 6 ++--- src/mon/Elector.cc | 51 ++++++++++++++++++++++++++++++++----- src/mon/Elector.h | 27 +++++++++++++++++++- 4 files changed, 76 insertions(+), 12 deletions(-) diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 988a8574bcdcd..80699697d80e0 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -44,7 +44,9 @@ #define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) #define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) #define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ -#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) /* supports new-style OSDMap encoding */ +/* The process supports new-style OSDMap encoding. Monitors also use + this bit to determine if peers support NAK messages. */ +#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature diff --git a/src/messages/MMonElection.h b/src/messages/MMonElection.h index ea55bbdb45345..f1ef1dd792a45 100644 --- a/src/messages/MMonElection.h +++ b/src/messages/MMonElection.h @@ -45,11 +45,11 @@ public: bufferlist monmap_bl; set quorum; uint64_t quorum_features; + bufferlist sharing_bl; /* the following were both used in the next branch for a while * on user cluster, so we've left them in for compatibility. */ version_t defunct_one; version_t defunct_two; - bufferlist commands; MMonElection() : Message(MSG_MON_ELECTION, HEAD_VERSION, COMPAT_VERSION), op(0), epoch(0), quorum_features(0), defunct_one(0), @@ -91,7 +91,7 @@ public: ::encode(quorum_features, payload); ::encode(defunct_one, payload); ::encode(defunct_two, payload); - ::encode(commands, payload); + ::encode(sharing_bl, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); @@ -112,7 +112,7 @@ public: ::decode(defunct_two, p); } if (header.version >= 5) - ::decode(commands, p); + ::decode(sharing_bl, p); } }; diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc index fdab343c8a5c6..511d71411d96f 100644 --- a/src/mon/Elector.cc +++ b/src/mon/Elector.cc @@ -111,7 +111,7 @@ void Elector::defer(int who) leader_acked = who; ack_stamp = ceph_clock_now(g_ceph_context); MMonElection *m = new MMonElection(MMonElection::OP_ACK, epoch, mon->monmap); - m->commands = mon->get_supported_commands_bl(); + m->sharing_bl = mon->get_supported_commands_bl(); mon->messenger->send_message(m, mon->monmap->get_inst(who)); // set a timer @@ -196,7 +196,7 @@ void Elector::victory() MMonElection *m = new MMonElection(MMonElection::OP_VICTORY, epoch, mon->monmap); m->quorum = quorum; m->quorum_features = features; - m->commands = *cmds_bl; + m->sharing_bl = *cmds_bl; mon->messenger->send_message(m, mon->monmap->get_inst(*p)); } @@ -213,8 +213,9 @@ void Elector::handle_propose(MMonElection *m) assert(m->epoch % 2 == 1); // election if ((required_features ^ m->get_connection()->get_features()) & required_features) { - dout(5) << " ignoring propose from mon without required features" << dendl; - m->put(); + dout(5) << " ignoring propose from mon" << from + << " without required features" << dendl; + nak_old_peer(m); return; } else if (m->epoch > epoch) { bump_epoch(m->epoch); @@ -278,7 +279,7 @@ void Elector::handle_ack(MMonElection *m) if (electing_me) { // thanks acked_me[from] = m->get_connection()->get_features(); - if (!m->commands.length()) + if (!m->sharing_bl.length()) classic_mons.insert(from); dout(5) << " so far i have " << acked_me << dendl; @@ -324,10 +325,10 @@ void Elector::handle_victory(MMonElection *m) cancel_timer(); // stash leader's commands - if (m->commands.length()) { + if (m->sharing_bl.length()) { MonCommand *new_cmds; int cmdsize; - bufferlist::iterator bi = m->commands.begin(); + bufferlist::iterator bi = m->sharing_bl.begin(); MonCommand::decode_array(&new_cmds, &cmdsize, bi); mon->set_leader_supported_commands(new_cmds, cmdsize); } else { // they are a legacy monitor; use known legacy command set @@ -340,8 +341,41 @@ void Elector::handle_victory(MMonElection *m) m->put(); } +void Elector::nak_old_peer(MMonElection *m) +{ + uint64_t supported_features = m->get_connection()->get_features(); + + if (supported_features & CEPH_FEATURE_OSDMAP_ENC) { + uint64_t required_features = mon->apply_compatset_features_to_quorum_requirements(); + dout(10) << "sending nak to peer " << m->get_source() + << " that only supports " << supported_features + << " of the required " << required_features << dendl; + + MMonElection *reply = new MMonElection(MMonElection::OP_NAK, m->epoch, + mon->monmap); + reply->quorum_features = required_features; + mon->features.encode(reply->sharing_bl); + mon->messenger->send_message(reply, m->get_connection()); + } + m->put(); +} +void Elector::handle_nak(MMonElection *m) +{ + dout(1) << "handle_nak from " << m->get_source() + << " quorum_features " << m->quorum_features << dendl; + CompatSet other; + bufferlist::iterator bi = m->sharing_bl.begin(); + other.decode(bi); + CompatSet diff = Monitor::get_supported_features().unsupported(other); + + derr << "Shutting down because I do not support required monitor features: { " + << diff << " }" << dendl; + + exit(0); + // the end! +} void Elector::dispatch(Message *m) { @@ -422,6 +456,9 @@ void Elector::dispatch(Message *m) case MMonElection::OP_VICTORY: handle_victory(em); return; + case MMonElection::OP_NAK: + handle_nak(em); + return; default: assert(0); } diff --git a/src/mon/Elector.h b/src/mon/Elector.h index 0b2b893ba34e1..b88e8304aacc6 100644 --- a/src/mon/Elector.h +++ b/src/mon/Elector.h @@ -245,7 +245,7 @@ class Elector { * @post We sent a message of type OP_VICTORY to each quorum member. */ void victory(); - + /** * Handle a message from some other node proposing himself to become him * the Leader. @@ -317,6 +317,31 @@ class Elector { * @param m A message with an operation type of OP_VICTORY */ void handle_victory(class MMonElection *m); + /** + * Send a nak to a peer who's out of date, containing information about why. + * + * If we get a message from a peer who can't support the required quorum + * features, we have to ignore them. This function will at least send + * them a message about *why* they're being ignored -- if they're new + * enough to support such a message. + * + * @param m A message from a monitor not supporting required features. We + * take ownership of the reference. + */ + void nak_old_peer(class MMonElection *m); + /** + * Handle a message from some other participant declaring + * we cannot join the quorum. + * + * Apparently the quorum requires some feature that we do not implement. Shut + * down gracefully. + * + * @pre Election is on-going. + * @post We've shut down. + * + * @param m A message with an operation type of OP_NAK + */ + void handle_nak(class MMonElection *m); public: /** -- 2.39.5