From 7f1b6adec31e71ef4b3ec9b3c5cba8b1a4b7746f Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Wed, 20 Jun 2018 13:20:58 -0400 Subject: [PATCH] osd/PG: restrict async_recovery_targets to up osds When an osd that is part of the acting set and not the up set, gets chosen as an async_recovery_target, it gets removed from the acting set. Since this osd is no longer in the up or acting set, it is classified as a stray in the next peering cycle. This results in choose_acting() looping between two proposed acting sets. To avoid this, we will only choose up osds as async_recovery_targets. Signed-off-by: Neha Ojha --- src/osd/PG.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index d2a8220fd327d..70b7184ad65ac 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1514,6 +1514,12 @@ void PG::choose_async_recovery_ec(const map &all_info, // do not include strays if (stray_set.find(shard_i) != stray_set.end()) continue; + // Do not include an osd that is not up, since choosing it as + // an async_recovery_target will move it out of the acting set. + // This results in it being identified as a stray during peering, + // because it is no longer in the up or acting set. + if (!is_up(shard_i)) + continue; auto shard_info = all_info.find(shard_i)->second; // for ec pools we rollback all entries past the authoritative // last_update *before* activation. This is relatively inexpensive @@ -1556,6 +1562,12 @@ void PG::choose_async_recovery_replicated(const map &all_ // do not include strays if (stray_set.find(shard_i) != stray_set.end()) continue; + // Do not include an osd that is not up, since choosing it as + // an async_recovery_target will move it out of the acting set. + // This results in it being identified as a stray during peering, + // because it is no longer in the up or acting set. + if (!is_up(shard_i)) + continue; auto shard_info = all_info.find(shard_i)->second; // use the approximate magnitude of the difference in length of // logs as the cost of recovery -- 2.39.5