]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: ensure async recovery does not drop a pg below min_size
authorSamuel Just <sjust@redhat.com>
Fri, 4 Aug 2023 23:12:34 +0000 (16:12 -0700)
committerKonstantin Shalygin <k0ste@k0ste.ru>
Fri, 5 Apr 2024 14:16:02 +0000 (21:16 +0700)
PeeringState::choose_async_recovery_ec may remove OSDs from the acting
set as long as PeeringState::recoverable evaluates to true. Prior to
90022b35 (merge of PR 17619), the condition was
PeeringState::recoverable_and_ge_min_size, which behaved as the name
indicates. 7cb818a85 weakened the condition in
PeeringState::recoverable_and_ge_min_size to only check min_size if
!cct->_conf.get_val<bool>("osd_allow_recovery_below_min_size") (name was
changed to PeeringState::recoverable in a subsequent commit in that PR
e4c8bee88). PeeringState::recoverable_and_ge_min_size had (and has) two
callers: choose_acting and choose_async_recovery_ec. For choose_acting,
this change is correct. However, for choose_async_recovery_ec, we don't
want to reduce the acting set size below min_size as it would prevent
the PG doing IO during recovery.

PeeringState::choose_async_recovery_replicated already correctly checks
min_size.

Fixes: https://tracker.ceph.com/issues/62338
Signed-off-by: Samuel Just <sjust@redhat.com>
(cherry picked from commit 2fc5486ea2f89e0ca7fce9e36ee2dfbe3743725d)

src/osd/PeeringState.cc

index c86fbdc6f6e58a9725d7f4d4966589fed5fb7850..d3d68557c1039a0051486921806bdfe750cf8249 100644 (file)
@@ -2151,9 +2151,11 @@ void PeeringState::choose_async_recovery_ec(
   const OSDMapRef osdmap) const
 {
   set<pair<int, pg_shard_t> > candidates_by_cost;
+  unsigned want_acting_size = 0;
   for (uint8_t i = 0; i < want->size(); ++i) {
     if ((*want)[i] == CRUSH_ITEM_NONE)
       continue;
+    ++want_acting_size;
 
     // Considering log entries to recover is accurate enough for
     // now. We could use minimum_to_decode_with_cost() later if
@@ -2186,6 +2188,7 @@ void PeeringState::choose_async_recovery_ec(
       candidates_by_cost.emplace(approx_missing_objects, shard_i);
     }
   }
+  ceph_assert(candidates_by_cost.size() <= want_acting_size);
 
   psdout(20) << __func__ << " candidates by cost are: " << candidates_by_cost
             << dendl;
@@ -2196,7 +2199,10 @@ void PeeringState::choose_async_recovery_ec(
     pg_shard_t cur_shard = rit->second;
     vector<int> candidate_want(*want);
     candidate_want[cur_shard.shard.id] = CRUSH_ITEM_NONE;
-    if (recoverable(candidate_want)) {
+    ceph_assert(want_acting_size > 0);
+    --want_acting_size;
+    if ((want_acting_size >= pool.info.min_size) &&
+       recoverable(candidate_want)) {
       want->swap(candidate_want);
       async_recovery->insert(cur_shard);
     }