]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/PGMap: do not consider changing pg stuck 35958/head
authorKefu Chai <kchai@redhat.com>
Wed, 1 Jul 2020 11:33:35 +0000 (19:33 +0800)
committerLaura Paduano <lpaduano@suse.com>
Tue, 7 Jul 2020 11:40:40 +0000 (13:40 +0200)
there is chance that we have a PG just created but fails to peered
before a mgr module retrieves the health report from mgr. in that
case, the "last_peered" field is not set, as that pg has not peered. but
normally, the newly created PG will be active+clean in couple seconds
which is way under the default setting of mon_pg_stuck_threshold (60
seconds).

so in this change, if the "last_whatever" is not set, we also use the
"last_changed" as a reference to see if the PG is healthy, and only
consider PG stuck if the last_changed is also too old.

Fixes: https://tracker.ceph.com/issues/45717
Signed-off-by: Kefu Chai <kchai@redhat.com>
(cherry picked from commit 34e1df66cdf9ac4aeea338a8f3d5b9a10fa5983a)

src/mon/PGMap.cc

index a340fd0562a17bce41084c258136b25fe36b2ee7..57796651c6e3d4a5d36f820b363216e97f06ab2b 100644 (file)
@@ -2525,7 +2525,11 @@ void PGMap::get_health_checks(
         if (pg_response.stuck_since) {
           // Delayed response, check for stuckness
           utime_t last_whatever = pg_response.stuck_since(pg_info);
-          if (last_whatever >= cutoff) {
+          if (last_whatever.is_zero() &&
+            pg_info.last_change >= cutoff) {
+            // still moving, ignore
+            continue;
+          } else if (last_whatever >= cutoff) {
             // Not stuck enough, ignore.
             continue;
           } else {