]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: add 'snap-trimming' as a distinct delay reason
authorRonen Friedman <rfriedma@redhat.com>
Mon, 2 Sep 2024 18:09:13 +0000 (13:09 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 4 Sep 2024 12:07:46 +0000 (07:07 -0500)
allowing the configuration of lower delay times (compared
to 'pg_state', now denoting PGs that are not active or
not clean) for PGs that failed to be scrubbed due to performing
snap-trimming.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/common/options/osd.yaml.in
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_job.cc
src/osd/scrubber_common.h

index 946d1bfa1e9426ad3577bda01c26ec3b7c150329..226e22e18ff3327abf237741bf0b9dae1b95a218 100644 (file)
@@ -560,6 +560,17 @@ options:
   see_also:
   - osd_scrub_retry_delay
   with_legacy: false
+- name: osd_scrub_retry_trimming
+  type: int
+  level: advanced
+  desc: Period (in seconds) before retrying to scrub a previously snap-trimming PG
+  long_desc: Minimum delay after a failed attempt to scrub a PG that was performing
+    snap trimming and not available for scrubbing.
+  default: 10
+  min: 1
+  see_also:
+  - osd_scrub_retry_delay
+  with_legacy: false
 - name: osd_scrub_disable_reservation_queuing
   type: bool
   level: advanced
index 0e044810fcd673db8e11f7cd456eb45e2930767d..b0bdc9fcb9594318aad791ae79fcf379c5eb1aeb 100644 (file)
@@ -2332,7 +2332,8 @@ Scrub::schedule_result_t PgScrubber::start_scrub_session(
     // i.e. some time before setting 'snaptrim'.
     dout(10) << __func__ << ": cannot scrub while snap-trimming" << dendl;
     requeue_penalized(
-       s_or_d, delay_both_targets_t::yes, delay_cause_t::pg_state, clock_now);
+       s_or_d, delay_both_targets_t::yes, delay_cause_t::snap_trimming,
+       clock_now);
     return schedule_result_t::target_specific_failure;
   }
 
index dd9f8b56de03803711d9e7184da84dc3dcaf6ef0..c74648bae2578ce354c7ed9c818abfe4369bf2bb 100644 (file)
@@ -313,6 +313,9 @@ SchedTarget& ScrubJob::delay_on_failure(
     case delay_cause_t::pg_state:
       delay = seconds(cct->_conf.get_val<int64_t>("osd_scrub_retry_pg_state"));
       break;
+    case delay_cause_t::snap_trimming:
+      delay = seconds(cct->_conf.get_val<int64_t>("osd_scrub_retry_trimming"));
+      break;
     case delay_cause_t::local_resources:
     default:
       // for all other possible delay causes: use the default delay
index 60c1a68d2bea42935ee6eec99457c3171573f650..5e510a03a82ef86947879a1cc4f355d24f6e1660 100644 (file)
@@ -229,7 +229,8 @@ enum class delay_cause_t {
   none,                    ///< scrub attempt was successful
   replicas,        ///< failed to reserve replicas
   flags,           ///< noscrub or nodeep-scrub
-  pg_state,        ///< e.g. snap-trimming
+  pg_state,        ///< not active+clean
+  snap_trimming,    ///< snap-trimming is in progress
   restricted_time,  ///< time restrictions or busy CPU
   local_resources,  ///< too many scrubbing PGs
   aborted,         ///< scrub was aborted w/ unspecified reason
@@ -252,6 +253,7 @@ struct formatter<Scrub::delay_cause_t> : ::fmt::formatter<std::string_view> {
       case replicas:            desc = "replicas"; break;
       case flags:               desc = "noscrub"; break;
       case pg_state:            desc = "pg-state"; break;
+      case snap_trimming:       desc = "snap-trim"; break;
       case restricted_time:     desc = "time/load"; break;
       case local_resources:     desc = "local-cnt"; break;
       case aborted:             desc = "aborted"; break;