From 2ed1b8bf8e0e1733d05e570ef8741cc33956324d Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 4 Aug 2023 16:12:34 -0700 Subject: [PATCH] osd: ensure async recovery does not drop a pg below min_size PeeringState::choose_async_recovery_ec may remove OSDs from the acting set as long as PeeringState::recoverable evaluates to true. Prior to 90022b35 (merge of PR 17619), the condition was PeeringState::recoverable_and_ge_min_size, which behaved as the name indicates. 7cb818a85 weakened the condition in PeeringState::recoverable_and_ge_min_size to only check min_size if !cct->_conf.get_val("osd_allow_recovery_below_min_size") (name was changed to PeeringState::recoverable in a subsequent commit in that PR e4c8bee88). PeeringState::recoverable_and_ge_min_size had (and has) two callers: choose_acting and choose_async_recovery_ec. For choose_acting, this change is correct. However, for choose_async_recovery_ec, we don't want to reduce the acting set size below min_size as it would prevent the PG doing IO during recovery. PeeringState::choose_async_recovery_replicated already correctly checks min_size. Fixes: https://tracker.ceph.com/issues/62338 Signed-off-by: Samuel Just (cherry picked from commit 2fc5486ea2f89e0ca7fce9e36ee2dfbe3743725d) --- src/osd/PeeringState.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index c86fbdc6f6e..d3d68557c10 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -2151,9 +2151,11 @@ void PeeringState::choose_async_recovery_ec( const OSDMapRef osdmap) const { set > candidates_by_cost; + unsigned want_acting_size = 0; for (uint8_t i = 0; i < want->size(); ++i) { if ((*want)[i] == CRUSH_ITEM_NONE) continue; + ++want_acting_size; // Considering log entries to recover is accurate enough for // now. We could use minimum_to_decode_with_cost() later if @@ -2186,6 +2188,7 @@ void PeeringState::choose_async_recovery_ec( candidates_by_cost.emplace(approx_missing_objects, shard_i); } } + ceph_assert(candidates_by_cost.size() <= want_acting_size); psdout(20) << __func__ << " candidates by cost are: " << candidates_by_cost << dendl; @@ -2196,7 +2199,10 @@ void PeeringState::choose_async_recovery_ec( pg_shard_t cur_shard = rit->second; vector candidate_want(*want); candidate_want[cur_shard.shard.id] = CRUSH_ITEM_NONE; - if (recoverable(candidate_want)) { + ceph_assert(want_acting_size > 0); + --want_acting_size; + if ((want_acting_size >= pool.info.min_size) && + recoverable(candidate_want)) { want->swap(candidate_want); async_recovery->insert(cur_shard); } -- 2.39.5