osd: Cancel in-progress scrubs (not user requested)

author David Zafman <dzafman@redhat.com>

Thu, 2 Jul 2020 17:05:57 +0000 (10:05 -0700)

committer David Zafman <dzafman@redhat.com>

Fri, 24 Jul 2020 19:04:38 +0000 (12:04 -0700)
author David Zafman <dzafman@redhat.com>
Thu, 2 Jul 2020 17:05:57 +0000 (10:05 -0700)
committer David Zafman <dzafman@redhat.com>
Fri, 24 Jul 2020 19:04:38 +0000 (12:04 -0700)
diff --git a/PendingReleaseNotes b/PendingReleaseNotes

index c0ec6fb1709e4cb8fc0dbcce2efa0466a98ea55a..76cc45fcb1e7c4cf161a0b9017d12d0b5fdb0485 100644 (file)
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -8,3 +8,7 @@
    results on the cluster, which could fill a nearly-full cluster.
    They have been replaced by a tool, currently considered
    experimental, ``rgw-orphan-list``.
+
+* Now when noscrub and/or nodeep-scrub flags are set globally or per pool,
+  scheduled scrubs of the type disabled will be aborted. All user initiated
+  scrubs are NOT interrupted.
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index d421fc512302f2315a21ca87a3a0c4b9954d6717..40da25a3bc67e3906fbc0ba35a4df67492f9f89e 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -363,6 +363,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
    finish_sync_event(NULL),
    backoff_lock("PG::backoff_lock"),
    scrub_after_recovery(false),
+  save_req_scrub(false),
    active_pushes(0),
    recovery_state(this),
    peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
@@ -1122,6 +1123,7 @@ void PG::clear_primary_state()
  
    scrubber.reserved_peers.clear();
    scrub_after_recovery = false;
+  save_req_scrub = false;
  
    agent_clear();
  }
@@ -1132,7 +1134,7 @@ PG::Scrubber::Scrubber()
     active(false),
     shallow_errors(0), deep_errors(0), fixed(0),
     must_scrub(false), must_deep_scrub(false), must_repair(false),
-   need_auto(false), time_for_deep(false),
+   need_auto(false), req_scrub(false), time_for_deep(false),
     auto_repair(false),
     check_repair(false),
     deep_scrub_on_error(false),
@@ -2635,6 +2637,8 @@ void PG::_finish_recovery(Context *c)
        scrub_after_recovery = false;
        scrubber.must_deep_scrub = true;
        scrubber.check_repair = true;
+      // We remember whether req_scrub was set when scrub_after_recovery set to true
+      scrubber.req_scrub = save_req_scrub;
        queue_scrub();
      }
    } else {
@@ -4581,6 +4585,7 @@ void PG::scrub_requested(bool deep, bool repair, bool need_auto)
      scrubber.must_repair = repair;
      // User might intervene, so clear this
      scrubber.need_auto = false;
+    scrubber.req_scrub = true;
    }
    reg_next_scrub();
  }
@@ -5239,6 +5244,12 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
    chunky_scrub(handle);
  }
  
+void PG::abort_scrub()
+{
+  scrub_clear_state();
+  scrub_unreserve_replicas();
+}
+
  /*
   * Chunky scrub scrubs objects one chunk at a time with writes blocked for that
   * chunk.
@@ -5319,12 +5330,29 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
   */
  void PG::chunky_scrub(ThreadPool::TPHandle &handle)
  {
+  // Since repair is only by request and we need to scrub afterward
+  // treat the same as req_scrub.
+  if (!scrubber.req_scrub) {
+    if (state_test(PG_STATE_DEEP_SCRUB)) {
+      if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
+         pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) {
+           dout(10) << "nodeep_scrub set, aborting" << dendl;
+        abort_scrub();
+        return;
+      }
+    } else if (state_test(PG_STATE_SCRUBBING)) {
+      if (get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) {
+        dout(10) << "noscrub set, aborting" << dendl;
+        abort_scrub();
+        return;
+      }
+    }
+  }
    // check for map changes
    if (scrubber.is_chunky_scrub_active()) {
      if (scrubber.epoch_start != info.history.same_interval_since) {
-      dout(10) << "scrub  pg changed, aborting" << dendl;
-      scrub_clear_state();
-      scrub_unreserve_replicas();
+      dout(10) << "scrub pg changed, aborting" << dendl;
+      abort_scrub();
        return;
      }
    }
@@ -5705,6 +5733,7 @@ void PG::scrub_clear_state(bool has_error)
    state_clear(PG_STATE_DEEP_SCRUB);
    publish_stats_to_osd();
  
+  scrubber.req_scrub = false;
    // local -> nothing.
    if (scrubber.local_reserved) {
      osd->dec_scrubs_local();
@@ -5958,7 +5987,8 @@ void PG::scrub_finish()
      } else if (has_error) {
        // Deep scrub in order to get corrected error counts
        scrub_after_recovery = true;
-      dout(20) << __func__ << " Set scrub_after_recovery" << dendl;
+      save_req_scrub = scrubber.req_scrub;
+      dout(20) << __func__ << " Set scrub_after_recovery, req_scrub=" << save_req_scrub << dendl;
      } else if (scrubber.shallow_errors || scrubber.deep_errors) {
        // We have errors but nothing can be fixed, so there is no repair
        // possible.
@@ -6673,6 +6703,8 @@ ostream& operator<<(ostream& out, const PG& pg)
      out << " TIME_FOR_DEEP";
    if (pg.scrubber.need_auto)
      out << " NEED_AUTO";
+  if (pg.scrubber.req_scrub)
+    out << " REQ_SCRUB";
  
    //out << " (" << pg.pg_log.get_tail() << "," << pg.pg_log.get_head() << "]";
    if (pg.pg_log.get_missing().num_missing()) {
diff --git a/src/osd/PG.h b/src/osd/PG.h

index b9383376f75b522eb1311e6973bc31ecf38ddba9..1ebf0c8f875fbfb12dff3914fa830fec599c3b1a 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1717,7 +1717,7 @@ public:
      utime_t sleep_start;
  
      // flags to indicate explicitly requested scrubs (by admin)
-    bool must_scrub, must_deep_scrub, must_repair, need_auto;
+    bool must_scrub, must_deep_scrub, must_repair, need_auto, req_scrub;
  
      // Priority to use for scrub scheduling
      unsigned priority = 0;
@@ -1842,6 +1842,7 @@ public:
        must_deep_scrub = false;
        must_repair = false;
        need_auto = false;
+      req_scrub = false;
        time_for_deep = false;
        auto_repair = false;
        check_repair = false;
@@ -1878,6 +1879,7 @@ public:
  
  protected:
    bool scrub_after_recovery;
+  bool save_req_scrub; // Saved for scrub_after_recovery
  
    int active_pushes;
  
@@ -1896,6 +1898,7 @@ protected:
      const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
      pg_shard_t bad_peer);
  
+  void abort_scrub();
    void chunky_scrub(ThreadPool::TPHandle &handle);
    void scrub_compare_maps();
    /**
author	David Zafman <dzafman@redhat.com>
	Thu, 2 Jul 2020 17:05:57 +0000 (10:05 -0700)
committer	David Zafman <dzafman@redhat.com>
	Fri, 24 Jul 2020 19:04:38 +0000 (12:04 -0700)
PendingReleaseNotes		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history