doc: Document which options are disabled by mClock.

author Niklas Hambüchen <mail@nh2.me>

Tue, 9 Apr 2024 03:55:38 +0000 (05:55 +0200)

committer Zac Dover <zac.dover@proton.me>

Fri, 8 Nov 2024 10:07:18 +0000 (20:07 +1000)
author Niklas Hambüchen <mail@nh2.me>
Tue, 9 Apr 2024 03:55:38 +0000 (05:55 +0200)
committer Zac Dover <zac.dover@proton.me>
Fri, 8 Nov 2024 10:07:18 +0000 (20:07 +1000)
diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst

index 634690b88c064500a474fafb325d9b3c8e8c792e..1dc9f49c1de12dd3a4ec46b1af852a0e4f686d6a 100644 (file)
--- a/doc/rados/configuration/osd-config-ref.rst
+++ b/doc/rados/configuration/osd-config-ref.rst
@@ -362,6 +362,8 @@ considerably. To maintain operational performance, Ceph performs this migration
  with 'backfilling', which allows Ceph to set backfill operations to a lower
  priority than requests to read or write data.
  
+.. note:: Some of these settings are automatically reset if the `mClock`_
+                   scheduler is active, see `mClock backfill`_.
  
  .. confval:: osd_max_backfills
  .. confval:: osd_backfill_scan_min
@@ -404,6 +406,9 @@ To maintain operational performance, Ceph performs recovery with limitations on
  the number recovery requests, threads and object chunk sizes which allows Ceph
  perform well in a degraded state.
  
+.. note:: Some of these settings are automatically reset if the `mClock`_
+          scheduler is active, see `mClock backfill`_.
+
  .. confval:: osd_recovery_delay_start
  .. confval:: osd_recovery_max_active
  .. confval:: osd_recovery_max_active_hdd
@@ -441,6 +446,8 @@ Miscellaneous
  .. _pool: ../../operations/pools
  .. _Configuring Monitor/OSD Interaction: ../mon-osd-interaction
  .. _Monitoring OSDs and PGs: ../../operations/monitoring-osd-pg#peering
+.. _mClock: ../mclock-config-ref.rst
+.. _mClock backfill: ../mclock-config-ref.rst#recovery-backfill-options
  .. _Pool & PG Config Reference: ../pool-pg-config-ref
  .. _Journal Config Reference: ../journal-ref
  .. _cache target dirty high ratio: ../../operations/pools#cache-target-dirty-high-ratio
diff --git a/doc/rados/operations/monitoring-osd-pg.rst b/doc/rados/operations/monitoring-osd-pg.rst

index 5a36478d092357d45f52f0ff4ae6af1628731c90..81e94e6ab654d4301362f85de69be6e61b6714bc 100644 (file)
--- a/doc/rados/operations/monitoring-osd-pg.rst
+++ b/doc/rados/operations/monitoring-osd-pg.rst
@@ -419,7 +419,10 @@ conditions change.
  Ceph provides a number of settings to manage the load spike associated with the
  reassignment of PGs to an OSD (especially a new OSD). The ``osd_max_backfills``
  setting specifies the maximum number of concurrent backfills to and from an OSD
-(default: 1). The ``backfill_full_ratio`` setting allows an OSD to refuse a
+(default: 1; note you cannot change this if the `mClock`_ scheduler is active,
+unless you set ``osd_mclock_override_recovery_settings = true``, see
+`mClock backfill`_).
+The ``backfill_full_ratio`` setting allows an OSD to refuse a
  backfill request if the OSD is approaching its full ratio (default: 90%). This
  setting can be changed with the ``ceph osd set-backfillfull-ratio`` command. If
  an OSD refuses a backfill request, the ``osd_backfill_retry_interval`` setting
@@ -545,6 +548,8 @@ performing the migration. For details, see the `Architecture`_ section.
  .. _data placement: ../data-placement
  .. _pool: ../pools
  .. _placement group: ../placement-groups
+.. _mClock: ../../configuration/mclock-config-ref.rst
+.. _mClock backfill: ../../configuration/mclock-config-ref.rst#recovery-backfill-options
  .. _Architecture: ../../../architecture
  .. _OSD Not Running: ../../troubleshooting/troubleshooting-osd#osd-not-running
  .. _Troubleshooting PG Errors: ../../troubleshooting/troubleshooting-pg#troubleshooting-pg-errors
diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in

index 9dc40735e48ce914ed4287d5e04c22f73979e29e..57cfb227c98671dccd390c6a02be96818e256ded 100644 (file)
--- a/src/common/options/osd.yaml.in
+++ b/src/common/options/osd.yaml.in
@@ -58,7 +58,10 @@ options:
      in recovery and 1 shard of another recovering PG.
    fmt_desc: The maximum number of backfills allowed to or from a single OSD.
      Note that this is applied separately for read and write operations.
+    This setting is automatically reset when the mClock scheduler is used.
    default: 1
+  see_also:
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -95,6 +98,7 @@ options:
    fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
      Increasing this value will slow down recovery operation while
      client operations will be less impacted.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -105,6 +109,7 @@ options:
    desc: Time in seconds to sleep before next recovery or backfill op for HDDs
    fmt_desc: Time in seconds to sleep before next recovery or backfill op
      for HDDs.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0.1
    flags:
    - runtime
@@ -115,6 +120,7 @@ options:
    desc: Time in seconds to sleep before next recovery or backfill op for SSDs
    fmt_desc: Time in seconds to sleep before the next recovery or backfill op
      for SSDs.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    see_also:
    - osd_recovery_sleep
@@ -128,6 +134,7 @@ options:
      on HDD and journal is on SSD
    fmt_desc: Time in seconds to sleep before the next recovery or backfill op
      when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0.025
    see_also:
    - osd_recovery_sleep
@@ -141,6 +148,7 @@ options:
    fmt_desc: Time in seconds to sleep before next snap trim op.
      Increasing this value will slow down snap trimming.
      This option overrides backend specific variants.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -149,6 +157,7 @@ options:
    type: float
    level: advanced
    desc: Time in seconds to sleep before next snap trim for HDDs
+  note: This setting is ignored when the mClock scheduler is used.
    default: 5
    flags:
    - runtime
@@ -158,6 +167,7 @@ options:
    desc: Time in seconds to sleep before next snap trim for SSDs
    fmt_desc: Time in seconds to sleep before next snap trim op
      for SSD OSDs (including NVMe).
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -168,6 +178,7 @@ options:
      is on SSD
    fmt_desc: Time in seconds to sleep before next snap trim op
      when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 2
    flags:
    - runtime
@@ -182,6 +193,7 @@ options:
    desc: Maximum concurrent scrubs on a single OSD
    fmt_desc: The maximum number of simultaneous scrub operations for
      a Ceph OSD Daemon.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 3
    with_legacy: true
  - name: osd_scrub_during_recovery
@@ -387,7 +399,7 @@ options:
    fmt_desc: Sleep time in seconds before scrubbing the next group of objects (the next chunk).
      Increasing this value will slow down the overall rate of scrubbing, reducing scrub
      impact on client operations.
-    This setting is ignored when the mClock scheduler is used.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
@@ -402,7 +414,7 @@ options:
      This configuration value is used for scrubbing out of scrubbing hours.
      Increasing this value will slow down the overall rate of scrubbing, reducing scrub
      impact on client operations.
-    This setting is ignored when the mClock scheduler is used.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    see_also:
    - osd_scrub_begin_hour
@@ -1255,10 +1267,12 @@ options:
      is ``0``, which means that the ``hdd`` or ``ssd`` values
      (below) are used, depending on the type of the primary
      device backing the OSD.
+    This setting is automatically reset when the mClock scheduler is used.
    default: 0
    see_also:
    - osd_recovery_max_active_hdd
    - osd_recovery_max_active_ssd
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -1269,10 +1283,12 @@ options:
      devices)
    fmt_desc: The number of active recovery requests per OSD at one time, if the
      primary device is rotational.
+  note: This setting is automatically reset when the mClock scheduler is used.
    default: 3
    see_also:
    - osd_recovery_max_active
    - osd_recovery_max_active_ssd
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -1283,10 +1299,12 @@ options:
      solid state devices)
    fmt_desc: The number of active recovery requests per OSD at one time, if the
      primary device is non-rotational (i.e., an SSD).
+  note: This setting is automatically reset when the mClock scheduler is used.
    default: 10
    see_also:
    - osd_recovery_max_active
    - osd_recovery_max_active_hdd
+  - osd_mclock_override_recovery_settings
    flags:
    - runtime
    with_legacy: true
@@ -1381,13 +1399,15 @@ options:
      overrides _ssd, _hdd, and _hybrid if non-zero.
    fmt_desc: Time in seconds to sleep before the next removal transaction. This
      throttles the PG deletion process.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 0
    flags:
    - runtime
  - name: osd_delete_sleep_hdd
    type: float
    level: advanced
-  desc: Time in seconds to sleep before next removal transaction for HDDs
+  desc: Time in seconds to sleep before next removal transaction for HDDs.
+  note: This setting is ignored when the mClock scheduler is used.
    default: 5
    flags:
    - runtime
@@ -1395,6 +1415,7 @@ options:
    type: float
    level: advanced
    desc: Time in seconds to sleep before next removal transaction for SSDs
+  note: This setting is ignored when the mClock scheduler is used.
    default: 1
    flags:
    - runtime
@@ -1403,6 +1424,7 @@ options:
    level: advanced
    desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
      and OSD journal or WAL+DB is on SSD
+  note: This setting is ignored when the mClock scheduler is used.
    default: 1
    flags:
    - runtime
author	Niklas Hambüchen <mail@nh2.me>
	Tue, 9 Apr 2024 03:55:38 +0000 (05:55 +0200)
committer	Zac Dover <zac.dover@proton.me>
	Fri, 8 Nov 2024 10:07:18 +0000 (20:07 +1000)
doc/rados/configuration/osd-config-ref.rst		patch \| blob \| history
doc/rados/operations/monitoring-osd-pg.rst		patch \| blob \| history
src/common/options/osd.yaml.in		patch \| blob \| history