pg: add auto-repair for EC pool

author Guang Yang <yguang@yahoo-inc.com>

Wed, 7 Oct 2015 04:34:34 +0000 (04:34 +0000)

committer Guang Yang <yguang@yahoo-inc.com>

Fri, 16 Oct 2015 20:29:48 +0000 (20:29 +0000)
author Guang Yang <yguang@yahoo-inc.com>
Wed, 7 Oct 2015 04:34:34 +0000 (04:34 +0000)
committer Guang Yang <yguang@yahoo-inc.com>
Fri, 16 Oct 2015 20:29:48 +0000 (20:29 +0000)
diff --git a/src/common/config_opts.h b/src/common/config_opts.h

index 282fc0ddc89bc00b01161f7c7ceb0e21a1461a85..9c1a3c2f6761cea8de3610bdc726eaeec4470680 100644 (file)
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -674,6 +674,8 @@ OPTION(osd_scrub_interval_randomize_ratio, OPT_FLOAT, 0.5) // randomize the sche
  OPTION(osd_scrub_chunk_min, OPT_INT, 5)
  OPTION(osd_scrub_chunk_max, OPT_INT, 25)
  OPTION(osd_scrub_sleep, OPT_FLOAT, 0)   // sleep between [deep]scrub ops
+OPTION(osd_scrub_auto_repair, OPT_BOOL, false)   // whether auto-repair inconsistencies upon deep-scrubbing
+OPTION(osd_scrub_auto_repair_num_errors, OPT_U32, 5)   // only auto-repair when number of errors is below this threshold
  OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
  OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
  OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60)   // objects must be this old (seconds) before we update the whole-object digest on scrub
diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h

index a039b70c8a8dc0953ec98ed2b09fc0a9aae1f226..efb284e2d73f8acecd6298a9b5f0108f09404be8 100644 (file)
--- a/src/osd/ECBackend.h
+++ b/src/osd/ECBackend.h
@@ -494,6 +494,7 @@ public:
      ObjectStore::Transaction *t);
  
    bool scrub_supported() { return true; }
+  bool auto_repair_supported() const { return true; }
  
    void be_deep_scrub(
      const hobject_t &obj,
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index e97439cd51417e904311451dd1f6bed41b7867af..b72d231730550c649d279a0b4e00fad75f1e3c3e 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -2039,7 +2039,7 @@ bool PG::queue_scrub()
      state_set(PG_STATE_DEEP_SCRUB);
      scrubber.must_deep_scrub = false;
    }
-  if (scrubber.must_repair) {
+  if (scrubber.must_repair || scrubber.auto_repair) {
      state_set(PG_STATE_REPAIR);
      scrubber.must_repair = false;
    }
@@ -3180,6 +3180,21 @@ bool PG::sched_scrub()
        return false;
    }
  
+  if (cct->_conf->osd_scrub_auto_repair
+      && get_pgbackend()->auto_repair_supported()
+      && time_for_deep
+      // respect the command from user, and not do auto-repair
+      && !scrubber.must_repair
+      && !scrubber.must_scrub
+      && !scrubber.must_deep_scrub) {
+    dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl;
+    scrubber.auto_repair = true;
+  } else {
+    // this happens when user issue the scrub/repair command during
+    // the scheduling of the scrub/repair (e.g. request reservation)
+    scrubber.auto_repair = false;
+  }
+
    bool ret = true;
    if (!scrubber.reserved) {
      assert(scrubber.reserved_peers.empty());
@@ -4205,6 +4220,13 @@ bool PG::scrub_process_inconsistent()
  void PG::scrub_finish() 
  {
    bool repair = state_test(PG_STATE_REPAIR);
+  // if the repair request comes from auto-repair and large number of errors,
+  // we would like to cancel auto-repair
+  if (repair && scrubber.auto_repair
+      && scrubber.authoritative.size() > cct->_conf->osd_scrub_auto_repair_num_errors) {
+    state_clear(PG_STATE_REPAIR);
+    repair = false;
+  }
    bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
    const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
  
@@ -4940,6 +4962,8 @@ ostream& operator<<(ostream& out, const PG& pg)
  
    if (pg.scrubber.must_repair)
      out << " MUST_REPAIR";
+  if (pg.scrubber.auto_repair)
+    out << " AUTO_REPAIR";
    if (pg.scrubber.must_deep_scrub)
      out << " MUST_DEEP_SCRUB";
    if (pg.scrubber.must_scrub)
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 872c81e6fbf6999849e509b74130bbf11ea39cca..00a172533b96791262abe19eb4cea99fbf1394bb 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1071,6 +1071,7 @@ public:
        active(false), queue_snap_trim(false),
        waiting_on(0), shallow_errors(0), deep_errors(0), fixed(0),
        must_scrub(false), must_deep_scrub(false), must_repair(false),
+      auto_repair(false),
        num_digest_updates_pending(0),
        state(INACTIVE),
        deep(false),
@@ -1099,6 +1100,9 @@ public:
      // flags to indicate explicitly requested scrubs (by admin)
      bool must_scrub, must_deep_scrub, must_repair;
  
+    // this flag indicates whether we would like to do auto-repair of the PG or not
+    bool auto_repair;
+
      // Maps from objects with errors to missing/inconsistent peers
      map<hobject_t, set<pg_shard_t>, hobject_t::BitwiseComparator> missing;
      map<hobject_t, set<pg_shard_t>, hobject_t::BitwiseComparator> inconsistent;
@@ -1187,6 +1191,7 @@ public:
        must_scrub = false;
        must_deep_scrub = false;
        must_repair = false;
+      auto_repair = false;
  
        state = PG::Scrubber::INACTIVE;
        start = hobject_t();
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h

index 52599942b439d0fba2c90c2bf89f364d25ed5b1b..415f95fc3a46ee2035ba8aa6a7b35eb5a76ffccd 100644 (file)
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -548,6 +548,7 @@
       Context *on_complete, bool fast_read = false) = 0;
  
     virtual bool scrub_supported() { return false; }
+   virtual bool auto_repair_supported() const { return false; }
     void be_scan_list(
       ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
       ThreadPool::TPHandle &handle);
author	Guang Yang <yguang@yahoo-inc.com>
	Wed, 7 Oct 2015 04:34:34 +0000 (04:34 +0000)
committer	Guang Yang <yguang@yahoo-inc.com>
	Fri, 16 Oct 2015 20:29:48 +0000 (20:29 +0000)
src/common/config_opts.h		patch \| blob \| history
src/osd/ECBackend.h		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/PGBackend.h		patch \| blob \| history