From: Kefu Chai Date: Thu, 24 Aug 2017 08:04:54 +0000 (+0800) Subject: osd/PG: discard msgs from down peers X-Git-Tag: v12.2.1~86^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ac4cb6dd41adfc549d064756215421c33368dc86;p=ceph.git osd/PG: discard msgs from down peers if a repop is replied after a replica goes down in a new osdmap, and before the pg advances to this new osdmap, the repop replies before this repop can be discarded by that replica OSD, because the primary resets the connection to it when handling the new osdmap marking it down, and also resets the messenger sesssion when the replica reconnects. to avoid the out-of-order replies, the messages from that replica should be discarded. Fixes: http://tracker.ceph.com/issues/19605 Signed-off-by: Kefu Chai (cherry picked from commit e3fce6be44506168a7a138aab93f6a4d6776397b) --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 5849c64bb070..b14f56734e1e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5700,12 +5700,21 @@ bool PG::can_discard_replica_op(OpRequestRef& op) const T *m = static_cast(op->get_req()); assert(m->get_type() == MSGTYPE); + int from = m->get_source().num(); + + // if a repop is replied after a replica goes down in a new osdmap, and + // before the pg advances to this new osdmap, the repop replies before this + // repop can be discarded by that replica OSD, because the primary resets the + // connection to it when handling the new osdmap marking it down, and also + // resets the messenger sesssion when the replica reconnects. to avoid the + // out-of-order replies, the messages from that replica should be discarded. + if (osd->get_osdmap()->is_down(from)) + return true; /* Mostly, this overlaps with the old_peering_msg * condition. An important exception is pushes * sent by replicas not in the acting set, since * if such a replica goes down it does not cause * a new interval. */ - int from = m->get_source().num(); if (get_osdmap()->get_down_at(from) >= m->map_epoch) return true;