From: Kefu Chai Date: Thu, 24 Aug 2017 08:04:54 +0000 (+0800) Subject: osd/PG: discard msgs from down peers X-Git-Tag: v13.0.1~1105^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e3fce6be44506168a7a138aab93f6a4d6776397b;p=ceph.git osd/PG: discard msgs from down peers if a repop is replied after a replica goes down in a new osdmap, and before the pg advances to this new osdmap, the repop replies before this repop can be discarded by that replica OSD, because the primary resets the connection to it when handling the new osdmap marking it down, and also resets the messenger sesssion when the replica reconnects. to avoid the out-of-order replies, the messages from that replica should be discarded. Fixes: http://tracker.ceph.com/issues/19605 Signed-off-by: Kefu Chai --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 42e3883a1a80..2ea5610911ad 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5696,12 +5696,21 @@ bool PG::can_discard_replica_op(OpRequestRef& op) const T *m = static_cast(op->get_req()); assert(m->get_type() == MSGTYPE); + int from = m->get_source().num(); + + // if a repop is replied after a replica goes down in a new osdmap, and + // before the pg advances to this new osdmap, the repop replies before this + // repop can be discarded by that replica OSD, because the primary resets the + // connection to it when handling the new osdmap marking it down, and also + // resets the messenger sesssion when the replica reconnects. to avoid the + // out-of-order replies, the messages from that replica should be discarded. + if (osd->get_osdmap()->is_down(from)) + return true; /* Mostly, this overlaps with the old_peering_msg * condition. An important exception is pushes * sent by replicas not in the acting set, since * if such a replica goes down it does not cause * a new interval. */ - int from = m->get_source().num(); if (get_osdmap()->get_down_at(from) >= m->map_epoch) return true;