From: Ronen Friedman Date: Mon, 2 Sep 2024 18:09:13 +0000 (-0500) Subject: osd/scrub: add 'snap-trimming' as a distinct delay reason X-Git-Tag: v20.0.0~1113^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a84c3c457547498deec6998651158b5d5107f77c;p=ceph.git osd/scrub: add 'snap-trimming' as a distinct delay reason allowing the configuration of lower delay times (compared to 'pg_state', now denoting PGs that are not active or not clean) for PGs that failed to be scrubbed due to performing snap-trimming. Signed-off-by: Ronen Friedman --- diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index 946d1bfa1e942..226e22e18ff33 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -560,6 +560,17 @@ options: see_also: - osd_scrub_retry_delay with_legacy: false +- name: osd_scrub_retry_trimming + type: int + level: advanced + desc: Period (in seconds) before retrying to scrub a previously snap-trimming PG + long_desc: Minimum delay after a failed attempt to scrub a PG that was performing + snap trimming and not available for scrubbing. + default: 10 + min: 1 + see_also: + - osd_scrub_retry_delay + with_legacy: false - name: osd_scrub_disable_reservation_queuing type: bool level: advanced diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 0e044810fcd67..b0bdc9fcb9594 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -2332,7 +2332,8 @@ Scrub::schedule_result_t PgScrubber::start_scrub_session( // i.e. some time before setting 'snaptrim'. dout(10) << __func__ << ": cannot scrub while snap-trimming" << dendl; requeue_penalized( - s_or_d, delay_both_targets_t::yes, delay_cause_t::pg_state, clock_now); + s_or_d, delay_both_targets_t::yes, delay_cause_t::snap_trimming, + clock_now); return schedule_result_t::target_specific_failure; } diff --git a/src/osd/scrubber/scrub_job.cc b/src/osd/scrubber/scrub_job.cc index dd9f8b56de038..c74648bae2578 100644 --- a/src/osd/scrubber/scrub_job.cc +++ b/src/osd/scrubber/scrub_job.cc @@ -313,6 +313,9 @@ SchedTarget& ScrubJob::delay_on_failure( case delay_cause_t::pg_state: delay = seconds(cct->_conf.get_val("osd_scrub_retry_pg_state")); break; + case delay_cause_t::snap_trimming: + delay = seconds(cct->_conf.get_val("osd_scrub_retry_trimming")); + break; case delay_cause_t::local_resources: default: // for all other possible delay causes: use the default delay diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 60c1a68d2bea4..5e510a03a82ef 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -229,7 +229,8 @@ enum class delay_cause_t { none, ///< scrub attempt was successful replicas, ///< failed to reserve replicas flags, ///< noscrub or nodeep-scrub - pg_state, ///< e.g. snap-trimming + pg_state, ///< not active+clean + snap_trimming, ///< snap-trimming is in progress restricted_time, ///< time restrictions or busy CPU local_resources, ///< too many scrubbing PGs aborted, ///< scrub was aborted w/ unspecified reason @@ -252,6 +253,7 @@ struct formatter : ::fmt::formatter { case replicas: desc = "replicas"; break; case flags: desc = "noscrub"; break; case pg_state: desc = "pg-state"; break; + case snap_trimming: desc = "snap-trim"; break; case restricted_time: desc = "time/load"; break; case local_resources: desc = "local-cnt"; break; case aborted: desc = "aborted"; break;