doc: Document which options are disabled by mClock.

author Niklas Hambüchen <mail@nh2.me>

Tue, 9 Apr 2024 03:55:38 +0000 (05:55 +0200)

committer Zac Dover <zac.dover@proton.me>

Fri, 8 Nov 2024 10:10:36 +0000 (20:10 +1000)
author Niklas Hambüchen <mail@nh2.me>
Tue, 9 Apr 2024 03:55:38 +0000 (05:55 +0200)
committer Zac Dover <zac.dover@proton.me>
Fri, 8 Nov 2024 10:10:36 +0000 (20:10 +1000)
diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst

index fa3e59b2eac6fccdf2ca0100dd3057750e99ab53..d797fabf1d1a07bcc99c0ccc4ddb7621d49e603a 100644 (file)
--- a/doc/rados/configuration/osd-config-ref.rst
+++ b/doc/rados/configuration/osd-config-ref.rst
@@ -355,6 +355,8 @@ considerably. To maintain operational performance, Ceph performs this migration
  with 'backfilling', which allows Ceph to set backfill operations to a lower
  priority than requests to read or write data.
  
+.. note:: Some of these settings are automatically reset if the `mClock`_
+                   scheduler is active, see `mClock backfill`_.
  
  .. confval:: osd_max_backfills
  .. confval:: osd_backfill_scan_min
@@ -397,6 +399,9 @@ To maintain operational performance, Ceph performs recovery with limitations on
  the number recovery requests, threads and object chunk sizes which allows Ceph
  perform well in a degraded state.
  
+.. note:: Some of these settings are automatically reset if the `mClock`_
+          scheduler is active, see `mClock backfill`_.
+
  .. confval:: osd_recovery_delay_start
  .. confval:: osd_recovery_max_active
  .. confval:: osd_recovery_max_active_hdd
@@ -434,6 +439,8 @@ Miscellaneous
  .. _pool: ../../operations/pools
  .. _Configuring Monitor/OSD Interaction: ../mon-osd-interaction
  .. _Monitoring OSDs and PGs: ../../operations/monitoring-osd-pg#peering
+.. _mClock: ../mclock-config-ref.rst
+.. _mClock backfill: ../mclock-config-ref.rst#recovery-backfill-options
  .. _Pool & PG Config Reference: ../pool-pg-config-ref
  .. _Journal Config Reference: ../journal-ref
  .. _cache target dirty high ratio: ../../operations/pools#cache-target-dirty-high-ratio
diff --git a/doc/rados/operations/monitoring-osd-pg.rst b/doc/rados/operations/monitoring-osd-pg.rst

index d7530827a6a794d563cc9ce3b9828656772a6875..1223577cd0909a860416066cdb0b54d9a62e8b89 100644 (file)
--- a/doc/rados/operations/monitoring-osd-pg.rst
+++ b/doc/rados/operations/monitoring-osd-pg.rst
@@ -419,7 +419,10 @@ conditions change.
  Ceph provides a number of settings to manage the load spike associated with the
  reassignment of PGs to an OSD (especially a new OSD). The ``osd_max_backfills``
  setting specifies the maximum number of concurrent backfills to and from an OSD
-(default: 1). The ``backfill_full_ratio`` setting allows an OSD to refuse a
+(default: 1; note you cannot change this if the `mClock`_ scheduler is active,
+unless you set ``osd_mclock_override_recovery_settings = true``, see
+`mClock backfill`_).
+The ``backfill_full_ratio`` setting allows an OSD to refuse a
  backfill request if the OSD is approaching its full ratio (default: 90%). This
  setting can be changed with the ``ceph osd set-backfillfull-ratio`` command. If
  an OSD refuses a backfill request, the ``osd_backfill_retry_interval`` setting
@@ -545,6 +548,8 @@ performing the migration. For details, see the `Architecture`_ section.
  .. _data placement: ../data-placement
  .. _pool: ../pools
  .. _placement group: ../placement-groups
+.. _mClock: ../../configuration/mclock-config-ref.rst
+.. _mClock backfill: ../../configuration/mclock-config-ref.rst#recovery-backfill-options
  .. _Architecture: ../../../architecture
  .. _OSD Not Running: ../../troubleshooting/troubleshooting-osd#osd-not-running
  .. _Troubleshooting PG Errors: ../../troubleshooting/troubleshooting-pg#troubleshooting-pg-errors
diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in

index 3cee0b71e803510c4d6b23a6ff02aba423463286..111501e0608f38750840c0bb8cde93fc4284b0c8 100644 (file)
--- a/src/common/options/osd.yaml.in
+++ b/src/common/options/osd.yaml.in
@@ -48,7 +48,10 @@ options:
      in recovery and 1 shard of another recovering PG.
    fmt_desc: The maximum number of backfills allowed to or from a single OSD.
      Note that this is applied separately for read and write operations.
+    This setting is automatically reset when the mClock scheduler is used.
    default: 1
+  see_also:
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -84,6 +87,7 @@ options:
    fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
      Increasing this value will slow down recovery operation while
      client operations will be less impacted.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -94,6 +98,7 @@ options:
    desc: Time in seconds to sleep before next recovery or backfill op for HDDs
    fmt_desc: Time in seconds to sleep before next recovery or backfill op
      for HDDs.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0.1
    flags:
    - runtime
@@ -104,6 +109,7 @@ options:
    desc: Time in seconds to sleep before next recovery or backfill op for SSDs
    fmt_desc: Time in seconds to sleep before the next recovery or backfill op
      for SSDs.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    see_also:
    - osd_recovery_sleep
@@ -117,6 +123,7 @@ options:
      on HDD and journal is on SSD
    fmt_desc: Time in seconds to sleep before the next recovery or backfill op
      when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0.025
    see_also:
    - osd_recovery_sleep
@@ -129,6 +136,7 @@ options:
    fmt_desc: Time in seconds to sleep before next snap trim op.
      Increasing this value will slow down snap trimming.
      This option overrides backend specific variants.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -137,6 +145,7 @@ options:
    type: float
    level: advanced
    desc: Time in seconds to sleep before next snap trim for HDDs
+  note: This setting is ignored when the mClock scheduler is used.
    default: 5
    flags:
    - runtime
@@ -146,6 +155,7 @@ options:
    desc: Time in seconds to sleep before next snap trim for SSDs
    fmt_desc: Time in seconds to sleep before next snap trim op
      for SSD OSDs (including NVMe).
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -156,6 +166,7 @@ options:
      is on SSD
    fmt_desc: Time in seconds to sleep before next snap trim op
      when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 2
    flags:
    - runtime
@@ -170,6 +181,7 @@ options:
    desc: Maximum concurrent scrubs on a single OSD
    fmt_desc: The maximum number of simultaneous scrub operations for
      a Ceph OSD Daemon.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 3
    with_legacy: true
  - name: osd_scrub_during_recovery
@@ -341,9 +353,11 @@ options:
  - name: osd_scrub_sleep
    type: float
    level: advanced
-  desc: Duration to inject a delay during scrubbing
-  fmt_desc: Time to sleep before scrubbing the next group of chunks. Increasing this value will slow
-    down the overall rate of scrubbing so that client operations will be less impacted.
+  desc: Duration (in seconds) of delay injected between chunks when scrubbing
+  fmt_desc: Sleep time in seconds before scrubbing the next group of objects (the next chunk).
+    Increasing this value will slow down the overall rate of scrubbing, reducing scrub
+    impact on client operations.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -352,7 +366,13 @@ options:
  - name: osd_scrub_extended_sleep
    type: float
    level: advanced
-  desc: Duration to inject a delay during scrubbing out of scrubbing hours
+  desc: Duration (in seconds) of delay injected between chunks when scrubbing out
+    of scrubbing hours
+  fmt_desc: Sleep time in seconds before scrubbing the next group of objects (the next chunk).
+    This configuration value is used for scrubbing out of scrubbing hours.
+    Increasing this value will slow down the overall rate of scrubbing, reducing scrub
+    impact on client operations.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    see_also:
    - osd_scrub_begin_hour
@@ -1170,10 +1190,12 @@ options:
      is ``0``, which means that the ``hdd`` or ``ssd`` values
      (below) are used, depending on the type of the primary
      device backing the OSD.
+    This setting is automatically reset when the mClock scheduler is used.
    default: 0
    see_also:
    - osd_recovery_max_active_hdd
    - osd_recovery_max_active_ssd
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -1184,10 +1206,12 @@ options:
      devices)
    fmt_desc: The number of active recovery requests per OSD at one time, if the
      primary device is rotational.
+  note: This setting is automatically reset when the mClock scheduler is used.
    default: 3
    see_also:
    - osd_recovery_max_active
    - osd_recovery_max_active_ssd
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -1198,10 +1222,12 @@ options:
      solid state devices)
    fmt_desc: The number of active recovery requests per OSD at one time, if the
      primary device is non-rotational (i.e., an SSD).
+  note: This setting is automatically reset when the mClock scheduler is used.
    default: 10
    see_also:
    - osd_recovery_max_active
    - osd_recovery_max_active_hdd
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -1289,13 +1315,15 @@ options:
      below)
    fmt_desc: Time in seconds to sleep before the next removal transaction. This
      throttles the PG deletion process.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
  - name: osd_delete_sleep_hdd
    type: float
    level: advanced
-  desc: Time in seconds to sleep before next removal transaction for HDDs
+  desc: Time in seconds to sleep before next removal transaction for HDDs.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 5
    flags:
    - runtime
@@ -1303,6 +1331,7 @@ options:
    type: float
    level: advanced
    desc: Time in seconds to sleep before next removal transaction for SSDs
+  note: This setting is ignored when the mClock scheduler is used.
    default: 1
    flags:
    - runtime
@@ -1311,6 +1340,7 @@ options:
    level: advanced
    desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
      and OSD journal or WAL+DB is on SSD
+  note: This setting is ignored when the mClock scheduler is used.
    default: 1
    flags:
    - runtime
author	Niklas Hambüchen <mail@nh2.me>
	Tue, 9 Apr 2024 03:55:38 +0000 (05:55 +0200)
committer	Zac Dover <zac.dover@proton.me>
	Fri, 8 Nov 2024 10:10:36 +0000 (20:10 +1000)
doc/rados/configuration/osd-config-ref.rst		patch \| blob \| history
doc/rados/operations/monitoring-osd-pg.rst		patch \| blob \| history
src/common/options/osd.yaml.in		patch \| blob \| history