From 4a4c1fde89c821e3dad4d8c338eef7091d3594a3 Mon Sep 17 00:00:00 2001
From: David Zafman <dzafman@redhat.com>
Date: Thu, 30 May 2019 08:26:19 -0700
Subject: [PATCH] osd: Fix the way that auto repair triggers after regular
 scrub

We used a trick to get auto repair to happen after scrub errors
which reset the scrub/deep-scrub stamps.  This not only
looks bad to the user, but causes health warnings.  Instead
use a new scrubber flag need_auto which causes reg_next_srub()
to set deadline for immediate scrubbing.  It also causes time_for_deep
to be set so that auto repair triggers.

Every regular scrub was triggering a deep scrub. Check
scrubber.authoritative.size() (scrub error count), so regular scrub doesn't
trigger deep-scrub when there are no errors.

Caused by: 2202e5d0b107795837ce79ffce2a980e8c12fc62

Fixes: http://tracker.ceph.com/issues/40073

Signed-off-by: David Zafman <dzafman@redhat.com>
---
 src/crimson/osd/pg.h   |  2 +-
 src/osd/PG.cc          | 50 ++++++++++++++++++++++++++++--------------
 src/osd/PG.h           |  5 +++--
 src/osd/PeeringState.h |  2 +-
 4 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index 109a09ee8d5..c32f190c9b8 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -116,7 +116,7 @@ public:
     // Not needed yet -- mainly for scrub scheduling
   }
 
-  void scrub_requested(bool deep, bool repair) final {
+  void scrub_requested(bool deep, bool repair, bool need_auto = false) final {
     ceph_assert(0 == "Not implemented");
   }
 
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 1bac86d79d5..f6fd0492a47 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -335,6 +335,7 @@ PG::Scrubber::Scrubber()
    active(false),
    shallow_errors(0), deep_errors(0), fixed(0),
    must_scrub(false), must_deep_scrub(false), must_repair(false),
+   need_auto(false),
    auto_repair(false),
    check_repair(false),
    deep_scrub_on_error(false),
@@ -424,6 +425,12 @@ bool PG::queue_scrub()
   }
   // An interrupted recovery repair could leave this set.
   state_clear(PG_STATE_REPAIR);
+  if (scrubber.need_auto) {
+    scrubber.must_scrub = true;
+    scrubber.must_deep_scrub = true;
+    scrubber.auto_repair = true;
+    scrubber.need_auto = false;
+  }
   scrubber.priority = scrubber.must_scrub ?
          cct->_conf->osd_requested_scrub_priority : get_scrub_priority();
   scrubber.must_scrub = false;
@@ -1372,7 +1379,10 @@ bool PG::sched_scrub()
   // the scheduling of the scrub/repair (e.g. request reservation)
   scrubber.deep_scrub_on_error = false;
   scrubber.auto_repair = false;
-  if (cct->_conf->osd_scrub_auto_repair
+  if (scrubber.need_auto) {
+      dout(20) << __func__ << ": need repair after scrub errors" << dendl;
+      time_for_deep = true;
+  } else if (cct->_conf->osd_scrub_auto_repair
       && get_pgbackend()->auto_repair_supported()
       // respect the command from user, and not do auto-repair
       && !scrubber.must_repair
@@ -1454,7 +1464,7 @@ void PG::reg_next_scrub()
 
   utime_t reg_stamp;
   bool must = false;
-  if (scrubber.must_scrub) {
+  if (scrubber.must_scrub || scrubber.need_auto) {
     // Set the smallest time that isn't utime_t()
     reg_stamp = Scrubber::scrub_must_stamp();
     must = true;
@@ -1493,12 +1503,18 @@ void PG::on_info_history_change()
   reg_next_scrub();
 }
 
-void PG::scrub_requested(bool deep, bool repair)
+void PG::scrub_requested(bool deep, bool repair, bool need_auto)
 {
   unreg_next_scrub();
-  scrubber.must_scrub = true;
-  scrubber.must_deep_scrub = deep || repair;
-  scrubber.must_repair = repair;
+  if (need_auto) {
+    scrubber.need_auto = true;
+  } else {
+    scrubber.must_scrub = true;
+    scrubber.must_deep_scrub = deep || repair;
+    scrubber.must_repair = repair;
+    // User might intervene, so clear this
+    scrubber.need_auto = false;
+  }
   reg_next_scrub();
 }
 
@@ -3113,7 +3129,7 @@ void PG::scrub_finish()
 {
   dout(20) << __func__ << dendl;
   bool repair = state_test(PG_STATE_REPAIR);
-  bool do_deep_scrub = false;
+  bool do_auto_scrub = false;
   // if the repair request comes from auto-repair and large number of errors,
   // we would like to cancel auto-repair
   if (repair && scrubber.auto_repair
@@ -3126,12 +3142,13 @@ void PG::scrub_finish()
 
   // if a regular scrub had errors within the limit, do a deep scrub to auto repair.
   if (scrubber.deep_scrub_on_error
+      && scrubber.authoritative.size()
       && scrubber.authoritative.size() <= cct->_conf->osd_scrub_auto_repair_num_errors) {
     ceph_assert(!deep_scrub);
-    scrubber.deep_scrub_on_error = false;
-    do_deep_scrub = true;
+    do_auto_scrub = true;
     dout(20) << __func__ << " Try to auto repair after scrub errors" << dendl;
   }
+  scrubber.deep_scrub_on_error = false;
 
   // type-specific finish (can tally more errors)
   _scrub_finish();
@@ -3181,7 +3198,7 @@ void PG::scrub_finish()
     // finish up
     ObjectStore::Transaction t;
     recovery_state.update_stats(
-      [this, do_deep_scrub, deep_scrub](auto &history, auto &stats) {
+      [this, deep_scrub](auto &history, auto &stats) {
 	utime_t now = ceph_clock_now();
 	history.last_scrub = recovery_state.get_info().last_update;
 	history.last_scrub_stamp = now;
@@ -3219,13 +3236,6 @@ void PG::scrub_finish()
 		     << " error(s) still present after re-scrub" << dendl;
 	  }
 	}
-	if (do_deep_scrub) {
-	  // XXX: Auto scrub won't activate if must_scrub is set, but
-	  // setting the scrub stamps affects what users see.
-	  utime_t stamp = utime_t(0,1);
-	  set_last_scrub_stamp(stamp, history, stats);
-	  set_last_deep_scrub_stamp(stamp, history, stats);
-	}
 	return true;
       },
       &t);
@@ -3245,6 +3255,10 @@ void PG::scrub_finish()
   scrub_clear_state(has_error);
   scrub_unreserve_replicas();
 
+  if (do_auto_scrub) {
+    scrub_requested(false, false, true);
+  }
+
   if (is_active() && is_primary()) {
     recovery_state.share_pg_info();
   }
@@ -3313,6 +3327,8 @@ ostream& operator<<(ostream& out, const PG& pg)
     out << " MUST_DEEP_SCRUB";
   if (pg.scrubber.must_scrub)
     out << " MUST_SCRUB";
+  if (pg.scrubber.need_auto)
+    out << " NEED_AUTO";
 
   if (pg.recovery_ops_active)
     out << " rops=" << pg.recovery_ops_active;
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 76ec494a860..d43c47126f2 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -400,7 +400,7 @@ public:
 
   void on_info_history_change() override;
 
-  void scrub_requested(bool deep, bool repair) override;
+  void scrub_requested(bool deep, bool repair, bool need_auto = false) override;
 
   uint64_t get_snap_trimq_size() const override {
     return snap_trimq.size();
@@ -1113,7 +1113,7 @@ public:
     utime_t sleep_start;
 
     // flags to indicate explicitly requested scrubs (by admin)
-    bool must_scrub, must_deep_scrub, must_repair;
+    bool must_scrub, must_deep_scrub, must_repair, need_auto;
 
     // Priority to use for scrub scheduling
     unsigned priority = 0;
@@ -1236,6 +1236,7 @@ public:
       must_scrub = false;
       must_deep_scrub = false;
       must_repair = false;
+      need_auto = false;
       auto_repair = false;
       check_repair = false;
       deep_scrub_on_error = false;
diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h
index d2466187607..0e3787650c6 100644
--- a/src/osd/PeeringState.h
+++ b/src/osd/PeeringState.h
@@ -113,7 +113,7 @@ public:
     /// Notify that info/history changed (generally to update scrub registration)
     virtual void on_info_history_change() = 0;
     /// Notify that a scrub has been requested
-    virtual void scrub_requested(bool deep, bool repair) = 0;
+    virtual void scrub_requested(bool deep, bool repair, bool need_auto = false) = 0;
 
     /// Return current snap_trimq size
     virtual uint64_t get_snap_trimq_size() const = 0;
-- 
2.39.5