]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: support osd_scrub_extended_sleep
authorJeegn Chen <jeegnchen@tencent.com>
Fri, 26 Jul 2019 02:09:01 +0000 (10:09 +0800)
committerJeegn Chen <jeegnchen@tencent.com>
Mon, 12 Aug 2019 08:54:36 +0000 (16:54 +0800)
1. always take osd_scrub_sleep for manually initiated
   scrubs
2. when scrub_time_permit() return true for scheduled
   ones, the existing osd_scrub_sleep is used
3. when scrub_time_permit() return false for scheduled
   ones, there may be 2 scenarios
   3.1 if osd_scrub_extended_sleep <= osd_scrub_sleep,
       let's take osd_scrub_sleep
   3.2 otherwise, let's take osd_scrub_extended_sleep

Fixes: http://tracker.ceph.com/issues/40955
Signed-off-by: Jeegn Chen <jeegnchen@tencent.com>
qa/standalone/scrub/osd-scrub-test.sh
src/common/legacy_config_opts.h
src/common/options.cc
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc

index c530233ea820b3ee07ea8be8bfc1c5458dcf37c2..b99ef328c81ff7b163f1e4c227ade3151393074a 100755 (executable)
@@ -187,6 +187,49 @@ function TEST_interval_changes() {
     teardown $dir || return 1
 }
 
+function TEST_scrub_extented_sleep() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=15
+
+    TESTDATA="testdata.$$"
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
+    local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --osd_scrub_sleep=0 \
+                        --osd_scrub_extended_sleep=10 \
+                        --bluestore_cache_autotune=false \
+                        --osd_scrub_begin_hour=$scrub_begin_hour \
+                        --osd_scrub_end_hour=$scrub_end_hour || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+
+    # Trigger a scrub on a PG
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid || return 1
+
+    # Due to the long delay, the scrub should not be done within 3 seconds
+    for ((i=0; i < 3; i++)); do
+        if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
+            return 1
+        fi
+        sleep 1
+    done
+
+    teardown $dir || return 1
+}
+
 main osd-scrub-test "$@"
 
 # Local Variables:
index df13b12c80f53ca969d78e3bbfda75d3cecda3eb..be8d89728063ffbd9813cdf1855351a7484ba8e9 100644 (file)
@@ -717,6 +717,7 @@ OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE)   // the probability to back off the
 OPTION(osd_scrub_chunk_min, OPT_INT)
 OPTION(osd_scrub_chunk_max, OPT_INT)
 OPTION(osd_scrub_sleep, OPT_FLOAT)   // sleep between [deep]scrub ops
+OPTION(osd_scrub_extended_sleep, OPT_FLOAT)   // more sleep between [deep]scrub ops
 OPTION(osd_scrub_auto_repair, OPT_BOOL)   // whether auto-repair inconsistencies upon deep-scrubbing
 OPTION(osd_scrub_auto_repair_num_errors, OPT_U32)   // only auto-repair when number of errors is below this threshold
 OPTION(osd_deep_scrub_interval, OPT_FLOAT) // once a week
index 0d07fa57ed2e7bfa4ca68ffe165bca6cb8f559eb..b457d039fef9f8c25f33ebc122dfb0a9cdf8d6e1 100644 (file)
@@ -3503,6 +3503,14 @@ std::vector<Option> get_global_options() {
     .set_default(0)
     .set_description("Duration to inject a delay during scrubbing"),
 
+    Option("osd_scrub_extended_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description("Duration to inject a delay during scrubbing out of scrubbing hours")
+    .add_see_also("osd_scrub_begin_hour")
+    .add_see_also("osd_scrub_end_hour")
+    .add_see_also("osd_scrub_begin_week_day")
+    .add_see_also("osd_scrub_end_week_day"),
+
     Option("osd_scrub_auto_repair", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(false)
     .set_description("Automatically repair damaged objects detected during scrub"),
index 119a1b0fc4a7edeb0534e41f683dce3dc27424a2..a94bf687f4f05e1804fbbe9717f5bbe6a463e681 100644 (file)
@@ -7203,6 +7203,20 @@ bool OSDService::ScrubJob::ScrubJob::operator<(const OSDService::ScrubJob& rhs)
   return pgid < rhs.pgid;
 }
 
+double OSD::scrub_sleep_time(bool must_scrub)
+{
+  if (must_scrub) {
+    return cct->_conf->osd_scrub_sleep;
+  }
+  utime_t now = ceph_clock_now();
+  if (scrub_time_permit(now)) {
+    return cct->_conf->osd_scrub_sleep;
+  }
+  double normal_sleep = cct->_conf->osd_scrub_sleep;
+  double extended_sleep = cct->_conf->osd_scrub_extended_sleep;
+  return std::max(extended_sleep, normal_sleep);
+}
+
 bool OSD::scrub_time_permit(utime_t now)
 {
   struct tm bdt;
index 95e772bd8e53a0354c38aafd6fdf92d3475995cd..19202fd83a52d6ed4455c6ca363e9bea1acb192a 100644 (file)
@@ -1871,6 +1871,8 @@ protected:
     return service.get_tid();
   }
 
+  double scrub_sleep_time(bool must_scrub);
+
   // -- generic pg peering --
   PeeringCtx create_context();
   void dispatch_context(PeeringCtx &ctx, PG *pg, OSDMapRef curmap,
index ac8ec080e39f0371f5c4fd505881f357227561a6..8aeae763254cb05bf02afaddd8cfd51915a7915d 100644 (file)
@@ -2419,7 +2419,9 @@ void PG::replica_scrub(
  */
 void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
 {
-  if (cct->_conf->osd_scrub_sleep > 0 &&
+  OSDService *osds = osd;
+  double scrub_sleep = osds->osd->scrub_sleep_time(scrubber.must_scrub);
+  if (scrub_sleep > 0 &&
       (scrubber.state == PG::Scrubber::NEW_CHUNK ||
        scrubber.state == PG::Scrubber::INACTIVE) &&
        scrubber.needs_sleep) {
@@ -2427,7 +2429,6 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
     dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
 
     // Do an async sleep so we don't block the op queue
-    OSDService *osds = osd;
     spg_t pgid = get_pgid();
     int state = scrubber.state;
     auto scrub_requeue_callback =
@@ -2452,7 +2453,7 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
           pg->unlock();
         });
     std::lock_guard l(osd->sleep_lock);
-    osd->sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
+    osd->sleep_timer.add_event_after(scrub_sleep,
                                            scrub_requeue_callback);
     scrubber.sleeping = true;
     scrubber.sleep_start = ceph_clock_now();