From 4fc4573fd2e85f3f87956ef61631a2beb9824fb8 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Sun, 19 Jan 2014 01:17:49 -0800 Subject: [PATCH] PG: drop messages from down peers This overlaps with the existing old_peering_msg() mechanism except in one case: pulls from a replica not in the acting set. If such a replica gets marked down, we may resend pulls to another replica without causing a new interval to start. If we recieved, but didn't process, a push in response to such a pull prior to processing the map marking the peer down, we might process the push after having reset the pull state for a different pull operation. We can avoid this by discarding ops from down peers. Signed-off-by: Samuel Just --- src/osd/PG.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index d3887eda82f9..84fa6483291f 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4868,6 +4868,15 @@ bool PG::can_discard_replica_op(OpRequestRef op) T *m = static_cast(op->get_req()); assert(m->get_header().type == MSGTYPE); + /* Mostly, this overlaps with the old_peering_msg + * condition. An important exception is pushes + * sent by replicas not in the acting set, since + * if such a replica goes down it does not cause + * a new interval. */ + int from = m->get_source().num(); + if (get_osdmap()->get_down_at(from) >= m->map_epoch) + return true; + // same pg? // if pg changes _at all_, we reset and repeer! if (old_peering_msg(m->map_epoch, m->map_epoch)) { -- 2.47.3