From: Sage Weil Date: Fri, 8 Mar 2013 16:56:44 +0000 (-0800) Subject: osd: mark down connections from old peers X-Git-Tag: v0.59~25^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=881e9d850c6762290f8be24da9e74b9dc112f1c9;p=ceph.git osd: mark down connections from old peers Close out any connection with an old peer. This avoids a race like: - peer marked down - we get map, mark down the con - they reconnect and try to send us some stuff - we share our map to tell them they are old and dead, but leave the con open ... - peer marks itself up a few times, eventually reuses the same port - sends messages on their fresh con - we discard because of our old con This could cause a tight reconnect loop, but it is better than wrong behavior. Other possible fixes: - make addr nonce truly unique (augment pid in nonce) - make a smarter 'disposable' msgr state (bleh) Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index b97ff5fcca0f..3554ffb993dd 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4521,6 +4521,8 @@ bool OSD::require_same_or_newer_map(OpRequestRef op, epoch_t epoch) Message *m = op->request; dout(15) << "require_same_or_newer_map " << epoch << " (i am " << osdmap->get_epoch() << ") " << m << dendl; + assert(osd_lock.is_locked()); + // do they have a newer map? if (epoch > osdmap->get_epoch()) { dout(7) << "waiting for newer map epoch " << epoch << " > my " << osdmap->get_epoch() << " with " << m << dendl; @@ -4538,12 +4540,8 @@ bool OSD::require_same_or_newer_map(OpRequestRef op, epoch_t epoch) int from = m->get_source().num(); if (!osdmap->have_inst(from) || osdmap->get_cluster_addr(from) != m->get_source_inst().addr) { - if (m->get_connection()->has_feature(CEPH_FEATURE_OSD_HBMSGS)) { - dout(10) << "from dead osd." << from << ", dropping, sharing map" << dendl; - send_incremental_map(epoch, m->get_connection()); - } else { - dout(10) << "from dead osd." << from << ", but it lacks OSD_HBMSGS feature, not sharing map" << dendl; - } + dout(10) << "from dead osd." << from << ", marking down" << dendl; + cluster_messenger->mark_down(m->get_connection()); return false; } }