From 8c16a376a9cbfd6812824fb71e2cc9935e14e667 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 8 Mar 2013 08:56:44 -0800 Subject: [PATCH] osd: mark down connections from old peers Close out any connection with an old peer. This avoids a race like: - peer marked down - we get map, mark down the con - they reconnect and try to send us some stuff - we share our map to tell them they are old and dead, but leave the con open ... - peer marks itself up a few times, eventually reuses the same port - sends messages on their fresh con - we discard because of our old con This could cause a tight reconnect loop, but it is better than wrong behavior. Other possible fixes: - make addr nonce truly unique (augment pid in nonce) - make a smarter 'disposable' msgr state (bleh) Signed-off-by: Sage Weil (cherry picked from commit 881e9d850c6762290f8be24da9e74b9dc112f1c9) --- src/osd/OSD.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 023bd9b3406e5..d814e3690767a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4484,6 +4484,8 @@ bool OSD::require_same_or_newer_map(OpRequestRef op, epoch_t epoch) Message *m = op->request; dout(15) << "require_same_or_newer_map " << epoch << " (i am " << osdmap->get_epoch() << ") " << m << dendl; + assert(osd_lock.is_locked()); + // do they have a newer map? if (epoch > osdmap->get_epoch()) { dout(7) << "waiting for newer map epoch " << epoch << " > my " << osdmap->get_epoch() << " with " << m << dendl; @@ -4501,12 +4503,8 @@ bool OSD::require_same_or_newer_map(OpRequestRef op, epoch_t epoch) int from = m->get_source().num(); if (!osdmap->have_inst(from) || osdmap->get_cluster_addr(from) != m->get_source_inst().addr) { - if (m->get_connection()->has_feature(CEPH_FEATURE_OSD_HBMSGS)) { - dout(10) << "from dead osd." << from << ", dropping, sharing map" << dendl; - send_incremental_map(epoch, m->get_connection()); - } else { - dout(10) << "from dead osd." << from << ", but it lacks OSD_HBMSGS feature, not sharing map" << dendl; - } + dout(10) << "from dead osd." << from << ", marking down" << dendl; + cluster_messenger->mark_down(m->get_connection()); return false; } } -- 2.39.5