From: David Zafman <dzafman@redhat.com>
Date: Thu, 2 Jul 2020 17:05:57 +0000 (-0700)
Subject: osd: Cancel in-progress scrubs (not user requested)
X-Git-Tag: v14.2.11~14^2~1
X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=16f52b1b1e29d9c4e94d5d146c7b404f1d8ef4da;p=ceph.git

osd: Cancel in-progress scrubs (not user requested)

This change adds new scrubber.req_scrub to track user
requested scrubs, deep_scrub or repair.

Fixes: https://tracker.ceph.com/issues/46275

Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 33749cc3c39131d7abed9b8c14064dbfaa87f3a2)

Conflicts:
	PendingReleaseNotes (trivial)
	src/osd/PG.cc (Due to code re-arrangement changes add manually)
---

diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index c0ec6fb1709e4..76cc45fcb1e7c 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -8,3 +8,7 @@
   results on the cluster, which could fill a nearly-full cluster.
   They have been replaced by a tool, currently considered
   experimental, ``rgw-orphan-list``.
+
+* Now when noscrub and/or nodeep-scrub flags are set globally or per pool,
+  scheduled scrubs of the type disabled will be aborted. All user initiated
+  scrubs are NOT interrupted.
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index d421fc512302f..40da25a3bc67e 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -363,6 +363,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   finish_sync_event(NULL),
   backoff_lock("PG::backoff_lock"),
   scrub_after_recovery(false),
+  save_req_scrub(false),
   active_pushes(0),
   recovery_state(this),
   peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
@@ -1122,6 +1123,7 @@ void PG::clear_primary_state()
 
   scrubber.reserved_peers.clear();
   scrub_after_recovery = false;
+  save_req_scrub = false;
 
   agent_clear();
 }
@@ -1132,7 +1134,7 @@ PG::Scrubber::Scrubber()
    active(false),
    shallow_errors(0), deep_errors(0), fixed(0),
    must_scrub(false), must_deep_scrub(false), must_repair(false),
-   need_auto(false), time_for_deep(false),
+   need_auto(false), req_scrub(false), time_for_deep(false),
    auto_repair(false),
    check_repair(false),
    deep_scrub_on_error(false),
@@ -2635,6 +2637,8 @@ void PG::_finish_recovery(Context *c)
       scrub_after_recovery = false;
       scrubber.must_deep_scrub = true;
       scrubber.check_repair = true;
+      // We remember whether req_scrub was set when scrub_after_recovery set to true
+      scrubber.req_scrub = save_req_scrub;
       queue_scrub();
     }
   } else {
@@ -4581,6 +4585,7 @@ void PG::scrub_requested(bool deep, bool repair, bool need_auto)
     scrubber.must_repair = repair;
     // User might intervene, so clear this
     scrubber.need_auto = false;
+    scrubber.req_scrub = true;
   }
   reg_next_scrub();
 }
@@ -5239,6 +5244,12 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
   chunky_scrub(handle);
 }
 
+void PG::abort_scrub()
+{
+  scrub_clear_state();
+  scrub_unreserve_replicas();
+}
+
 /*
  * Chunky scrub scrubs objects one chunk at a time with writes blocked for that
  * chunk.
@@ -5319,12 +5330,29 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
  */
 void PG::chunky_scrub(ThreadPool::TPHandle &handle)
 {
+  // Since repair is only by request and we need to scrub afterward
+  // treat the same as req_scrub.
+  if (!scrubber.req_scrub) {
+    if (state_test(PG_STATE_DEEP_SCRUB)) {
+      if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
+         pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) {
+           dout(10) << "nodeep_scrub set, aborting" << dendl;
+        abort_scrub();
+        return;
+      }
+    } else if (state_test(PG_STATE_SCRUBBING)) {
+      if (get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) {
+        dout(10) << "noscrub set, aborting" << dendl;
+        abort_scrub();
+        return;
+      }
+    }
+  }
   // check for map changes
   if (scrubber.is_chunky_scrub_active()) {
     if (scrubber.epoch_start != info.history.same_interval_since) {
-      dout(10) << "scrub  pg changed, aborting" << dendl;
-      scrub_clear_state();
-      scrub_unreserve_replicas();
+      dout(10) << "scrub pg changed, aborting" << dendl;
+      abort_scrub();
       return;
     }
   }
@@ -5705,6 +5733,7 @@ void PG::scrub_clear_state(bool has_error)
   state_clear(PG_STATE_DEEP_SCRUB);
   publish_stats_to_osd();
 
+  scrubber.req_scrub = false;
   // local -> nothing.
   if (scrubber.local_reserved) {
     osd->dec_scrubs_local();
@@ -5958,7 +5987,8 @@ void PG::scrub_finish()
     } else if (has_error) {
       // Deep scrub in order to get corrected error counts
       scrub_after_recovery = true;
-      dout(20) << __func__ << " Set scrub_after_recovery" << dendl;
+      save_req_scrub = scrubber.req_scrub;
+      dout(20) << __func__ << " Set scrub_after_recovery, req_scrub=" << save_req_scrub << dendl;
     } else if (scrubber.shallow_errors || scrubber.deep_errors) {
       // We have errors but nothing can be fixed, so there is no repair
       // possible.
@@ -6673,6 +6703,8 @@ ostream& operator<<(ostream& out, const PG& pg)
     out << " TIME_FOR_DEEP";
   if (pg.scrubber.need_auto)
     out << " NEED_AUTO";
+  if (pg.scrubber.req_scrub)
+    out << " REQ_SCRUB";
 
   //out << " (" << pg.pg_log.get_tail() << "," << pg.pg_log.get_head() << "]";
   if (pg.pg_log.get_missing().num_missing()) {
diff --git a/src/osd/PG.h b/src/osd/PG.h
index b9383376f75b5..1ebf0c8f875fb 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1717,7 +1717,7 @@ public:
     utime_t sleep_start;
 
     // flags to indicate explicitly requested scrubs (by admin)
-    bool must_scrub, must_deep_scrub, must_repair, need_auto;
+    bool must_scrub, must_deep_scrub, must_repair, need_auto, req_scrub;
 
     // Priority to use for scrub scheduling
     unsigned priority = 0;
@@ -1842,6 +1842,7 @@ public:
       must_deep_scrub = false;
       must_repair = false;
       need_auto = false;
+      req_scrub = false;
       time_for_deep = false;
       auto_repair = false;
       check_repair = false;
@@ -1878,6 +1879,7 @@ public:
 
 protected:
   bool scrub_after_recovery;
+  bool save_req_scrub; // Saved for scrub_after_recovery
 
   int active_pushes;
 
@@ -1896,6 +1898,7 @@ protected:
     const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
     pg_shard_t bad_peer);
 
+  void abort_scrub();
   void chunky_scrub(ThreadPool::TPHandle &handle);
   void scrub_compare_maps();
   /**