]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr: relax osd ok-to-stop condition on degraded pgs
authorXuehan Xu <xxhdx1985126@gmail.com>
Sun, 7 Feb 2021 04:40:36 +0000 (12:40 +0800)
committerNathan Cutler <ncutler@suse.com>
Thu, 8 Apr 2021 14:52:25 +0000 (16:52 +0200)
Right now, the "ok-to-stop" condition is relatively rigorous, it allows
stopping an osd only if no PG on it is non-active or degraded. But there
are situations in which an OSD is part of a degraded pg and the pg still
still have > min_size complete replicas after the OSD is stopped.

In 9750061d5d4236aaba156d60790e0b8bcd7cfb64, we changed from considering
just acting to using avail_no_missing (OSDs that have no missing objects).
When the projected pg_acting is constructed this way, we can safely compare
to min_size... even for a PG marked degraded.

Fixes: https://tracker.ceph.com/issues/49392
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
(cherry picked from commit 2f28fc58eb63e20c2a869f5407516cf8044dbc41)

src/mgr/DaemonServer.cc

index 8ba6912c711760790a1a99ec31aa7c9dafe34edf..a561844832f80af2bd62a75e9fb58b84f70881bd 100644 (file)
@@ -1602,20 +1602,21 @@ bool DaemonServer::_handle_command(
            continue;
          }
          touched_pgs++;
-         if (!(q.second.state & PG_STATE_ACTIVE) ||
-             (q.second.state & PG_STATE_DEGRADED)) {
-           ++dangerous_pgs;
-           continue;
-         }
+
          const pg_pool_t *pi = osdmap.get_pg_pool(q.first.pool());
          if (!pi) {
            ++dangerous_pgs; // pool is creating or deleting
-         } else {
-           if (pg_acting.size() < pi->min_size) {
-             ++dangerous_pgs;
-           }
+            continue;
          }
-       }
+
+         if (!(q.second.state & PG_STATE_ACTIVE)) {
+           ++dangerous_pgs;
+           continue;
+         }
+          if (pg_acting.size() < pi->min_size) {
+            ++dangerous_pgs;
+          }
+        }
       });
     if (r) {
       cmdctx->reply(r, ss);