From: Sage Weil Date: Thu, 17 May 2012 18:08:38 +0000 (-0700) Subject: mon: take probed peer's monmap if it has ever joined a quorum X-Git-Tag: v0.48argonaut~137^2~13^2~40 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0088699f7aa5e1dfec45f8daaf17f9a295369dba;p=ceph.git mon: take probed peer's monmap if it has ever joined a quorum If we probe a peer and their monmap has actually been part of a started cluster/quorum, and ours hasn't, take theirs. Comparing versions isn't sufficient. Signed-off-by: Sage Weil --- diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 3172f5cc829..3a741d61f1b 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -568,17 +568,24 @@ void Monitor::handle_probe_reply(MMonProbe *m) return; } - // newer map? - MonMap *newmap = new MonMap; - newmap->decode(m->monmap_bl); - if (newmap->get_epoch() > monmap->get_epoch()) { - dout(10) << " got new monmap epoch " << newmap->get_epoch() - << " > my " << monmap->get_epoch() << dendl; - monmap->decode(m->monmap_bl); - m->put(); - - bootstrap(); - return; + // newer map, or they've joined a quorum and we haven't? + bufferlist mybl; + monmap->encode(mybl, m->get_connection()->get_features()); + // make sure it's actually different; the checks below err toward + // taking the other guy's map, which could cause us to loop. + if (!mybl.contents_equal(m->monmap_bl)) { + MonMap *newmap = new MonMap; + newmap->decode(m->monmap_bl); + if (m->has_ever_joined && (newmap->get_epoch() > monmap->get_epoch() || + !has_ever_joined)) { + dout(10) << " got newer/committed monmap epoch " << newmap->get_epoch() + << ", mine was " << monmap->get_epoch() << dendl; + monmap->decode(m->monmap_bl); + m->put(); + + bootstrap(); + return; + } } // rename peer?