From 8d3b4ca591090a5ab3b5940f43a86866f2f5b9bc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 7 Jun 2018 07:07:19 -0500 Subject: [PATCH] osd/PG: reset PG peering if osd transitions from down -> up Consider a PG that is stray and ends up in ReplicaActive (because it is participating as a recovery source). If it is marked down wrongly and then comes back up, then the PG will not reset, because there was not an interval change (the PG is not part of the up or acting sets). This can leave the PG in an odd state, leading to questionable behavior. (For example, a stray might be in ReplicaActive and then ignore some types of query messages.) Signed-off-by: Sage Weil --- src/osd/PG.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 97564c378ba61..3df79d54698d3 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5765,9 +5765,12 @@ bool PG::should_restart_peering( dout(20) << "new interval newup " << newup << " newacting " << newacting << dendl; return true; - } else { - return false; } + if (!lastmap->is_up(osd->whoami) && osdmap->is_up(osd->whoami)) { + dout(10) << __func__ << " osd transitioned from down -> up" << dendl; + return true; + } + return false; } bool PG::old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) -- 2.39.5