default: 100
flags:
- runtime
-- name: osd_numa_prefer_iface
- type: bool
- level: advanced
- desc: prefer IP on network interface on same numa node as storage
- default: true
- see_also:
- - osd_numa_auto_affinity
- flags:
- - startup
-- name: osd_numa_auto_affinity
- type: bool
- level: advanced
- desc: automatically set affinity to numa node when storage and network match
- default: true
- flags:
- - startup
-- name: osd_numa_node
- type: int
- level: advanced
- desc: set affinity to a numa node (-1 for none)
- default: -1
- see_also:
- - osd_numa_auto_affinity
- flags:
- - startup
-- name: osd_smart_report_timeout
- type: uint
- level: advanced
- desc: Timeout (in seconds) for smarctl to run, default is set to 5
- default: 5
-# verify backend can support configured max object name length
-- name: osd_check_max_object_name_len_on_startup
- type: bool
- level: dev
- default: true
- with_legacy: true
-- name: osd_max_backfills
- type: uint
- level: advanced
- desc: Maximum number of concurrent local and remote backfills or recoveries per
- OSD
- long_desc: There can be osd_max_backfills local reservations AND the same remote
- reservations per OSD. So a value of 1 lets this OSD participate as 1 PG primary
- in recovery and 1 shard of another recovering PG.
- fmt_desc: The maximum number of backfills allowed to or from a single OSD.
- Note that this is applied separately for read and write operations.
- default: 1
- flags:
- - runtime
- with_legacy: true
-# Minimum recovery priority (255 = max, smaller = lower)
-- name: osd_min_recovery_priority
- type: int
- level: advanced
- desc: Minimum priority below which recovery is not performed
- long_desc: The purpose here is to prevent the cluster from doing *any* lower priority
- work (e.g., rebalancing) below this threshold and focus solely on higher priority
- work (e.g., replicating degraded objects).
- default: 0
- with_legacy: true
-- name: osd_backfill_retry_interval
- type: float
- level: advanced
- desc: how frequently to retry backfill reservations after being denied (e.g., due
- to a full OSD)
- fmt_desc: The number of seconds to wait before retrying backfill requests.
- default: 30
- with_legacy: true
-- name: osd_recovery_retry_interval
- type: float
- level: advanced
- desc: how frequently to retry recovery reservations after being denied (e.g., due
- to a full OSD)
- default: 30
- with_legacy: true
-- name: osd_agent_max_ops
- type: int
- level: advanced
- desc: maximum concurrent tiering operations for tiering agent
- fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
- in the high speed mode.
- default: 4
- with_legacy: true
-- name: osd_agent_max_low_ops
- type: int
- level: advanced
- desc: maximum concurrent low-priority tiering operations for tiering agent
- fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
- in the low speed mode.
- default: 2
- with_legacy: true
-- name: osd_agent_min_evict_effort
- type: float
- level: advanced
- desc: minimum effort to expend evicting clean objects
- default: 0.1
- min: 0
- max: 0.99
- with_legacy: true
-- name: osd_agent_quantize_effort
- type: float
- level: advanced
- desc: size of quantize unit for eviction effort
- default: 0.1
- with_legacy: true
-- name: osd_agent_delay_time
- type: float
- level: advanced
- desc: how long agent should sleep if it has no work to do
- default: 5
- with_legacy: true
-- name: osd_find_best_info_ignore_history_les
- type: bool
- level: dev
- desc: ignore last_epoch_started value when peering AND PROBABLY LOSE DATA
- long_desc: THIS IS AN EXTREMELY DANGEROUS OPTION THAT SHOULD ONLY BE USED AT THE
- DIRECTION OF A DEVELOPER. It makes peering ignore the last_epoch_started value
- when peering, which can allow the OSD to believe an OSD has an authoritative view
- of a PG's contents even when it is in fact old and stale, typically leading to
- data loss (by believing a stale PG is up to date).
- default: false
- with_legacy: true
-# decay atime and hist histograms after how many objects go by
-- name: osd_agent_hist_halflife
- type: int
- level: advanced
- desc: halflife of agent atime and temp histograms
- default: 1000
- with_legacy: true
-# decay atime and hist histograms after how many objects go by
-- name: osd_agent_slop
- type: float
- level: advanced
- desc: slop factor to avoid switching tiering flush and eviction mode
- default: 0.02
- with_legacy: true
-- name: osd_uuid
- type: uuid
- level: advanced
- desc: uuid label for a new OSD
- fmt_desc: The universally unique identifier (UUID) for the Ceph OSD Daemon.
- note: The ``osd_uuid`` applies to a single Ceph OSD Daemon. The ``fsid``
- applies to the entire cluster.
- flags:
- - create
- with_legacy: true
-- name: osd_data
- type: str
- level: advanced
- desc: path to OSD data
- fmt_desc: The path to the OSDs data. You must create the directory when
- deploying Ceph. You should mount a drive for OSD data at this
- mount point. We do not recommend changing the default.
- default: /var/lib/ceph/osd/$cluster-$id
- flags:
- - no_mon_update
- with_legacy: true
-- name: osd_journal
- type: str
- level: advanced
- desc: path to OSD journal (when FileStore backend is in use)
- fmt_desc: The path to the OSD's journal. This may be a path to a file or a
- block device (such as a partition of an SSD). If it is a file,
- you must create the directory to contain it. We recommend using a
- separate fast device when the ``osd_data`` drive is an HDD.
- default: /var/lib/ceph/osd/$cluster-$id/journal
- flags:
- - no_mon_update
- with_legacy: true
-- name: osd_journal_size
- type: size
- level: advanced
- desc: size of FileStore journal (in MiB)
- fmt_desc: The size of the journal in megabytes.
- default: 5_K
- flags:
- - create
- with_legacy: true
-- name: osd_journal_flush_on_shutdown
- type: bool
- level: advanced
- desc: flush FileStore journal contents during clean OSD shutdown
- default: true
- with_legacy: true
-- name: osd_compact_on_start
- type: bool
- level: advanced
- desc: compact OSD's object store's OMAP on start
- default: false
-# flags for specific control purpose during osd mount() process.
-# e.g., can be 1 to skip over replaying journal
-# or 2 to skip over mounting omap or 3 to skip over both.
-# This might be helpful in case the journal is totally corrupted
-# and we still want to bring the osd daemon back normally, etc.
-- name: osd_os_flags
- type: uint
- level: dev
- desc: flags to skip filestore omap or journal initialization
- default: 0
-- name: osd_max_write_size
- type: size
- level: advanced
- desc: Maximum size of a RADOS write operation in megabytes
- long_desc: This setting prevents clients from doing very large writes to RADOS. If
- you set this to a value below what clients expect, they will receive an error
- when attempting to write to the cluster.
- fmt_desc: The maximum size of a write in megabytes.
- default: 90
- min: 4
- with_legacy: true
-- name: osd_max_pgls
- type: uint
- level: advanced
- desc: maximum number of results when listing objects in a pool
- fmt_desc: The maximum number of placement groups to list. A client
- requesting a large number can tie up the Ceph OSD Daemon.
- default: 1_K
- with_legacy: true
-- name: osd_client_message_size_cap
- type: size
- level: advanced
- desc: maximum memory to devote to in-flight client requests
- long_desc: If this value is exceeded, the OSD will not read any new client data
- off of the network until memory is freed.
- fmt_desc: The largest client data message allowed in memory.
- default: 500_M
- with_legacy: true
-- name: osd_client_message_cap
- type: uint
- level: advanced
- desc: maximum number of in-flight client requests
- default: 0
- with_legacy: true
- name: osd_crush_update_weight_set
type: bool
level: advanced
long_desc: This setting only exists for compatibility with hammer (and older) clusters.
default: true
with_legacy: true
-- name: osd_crush_update_on_start
- type: bool
- level: advanced
- desc: update OSD CRUSH location on startup
- default: true
- with_legacy: true
-- name: osd_class_update_on_start
- type: bool
- level: advanced
- desc: set OSD device class on startup
- default: true
- with_legacy: true
-- name: osd_crush_initial_weight
- type: float
- level: advanced
- desc: if >= 0, initial CRUSH weight for newly created OSDs
- long_desc: If this value is negative, the size of the OSD in TiB is used.
- fmt_desc: The initial CRUSH weight for newly added OSDs. The default
- value of this option is ``the size of a newly added OSD in TB``. By default,
- the initial CRUSH weight for a newly added OSD is set to its device size in
- TB. See `Weighting Bucket Items`_ for details.
- default: -1
- with_legacy: true
# whether turn on fast read on the pool or not
- name: osd_pool_default_ec_fast_read
type: bool
flags:
- startup
with_legacy: true
-# Allows the "peered" state for recovery and backfill below min_size
-- name: osd_allow_recovery_below_min_size
- type: bool
- level: dev
- desc: allow replicated pools to recover with < min_size active members
- default: true
- services:
- - osd
- with_legacy: true
- name: osd_pool_default_flags
type: int
level: dev
default: true
fmt_desc: Enable removing duplicates in the OSD map.
with_legacy: true
-- name: osd_map_cache_size
- type: int
- level: advanced
- default: 50
- fmt_desc: The number of OSD maps to keep cached.
- with_legacy: true
- name: osd_map_message_max
type: int
level: advanced
desc: maximum number of OSDMaps to include in a single message
fmt_desc: The maximum map entries allowed per MOSDMap message.
default: 40
+ services:
+ - osd
+ - mon
with_legacy: true
-- name: osd_map_message_max_bytes
- type: size
- level: advanced
- desc: maximum number of bytes worth of OSDMaps to include in a single message
- default: 10_M
- with_legacy: true
-# cap on # of inc maps we send to peers, clients
-- name: osd_map_share_max_epochs
- type: int
- level: advanced
- default: 40
- with_legacy: true
-- name: osd_pg_epoch_max_lag_factor
- type: float
- level: advanced
- desc: Max multiple of the map cache that PGs can lag before we throttle map injest
- default: 2
- see_also:
- - osd_map_cache_size
-- name: osd_inject_bad_map_crc_probability
- type: float
- level: dev
- default: 0
- with_legacy: true
-- name: osd_inject_failure_on_pg_removal
- type: bool
- level: dev
- default: false
- with_legacy: true
-# shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
-- name: osd_max_markdown_period
- type: int
- level: advanced
- default: 10_min
- with_legacy: true
-- name: osd_max_markdown_count
- type: int
- level: advanced
- default: 5
- with_legacy: true
-- name: osd_op_pq_max_tokens_per_priority
- type: uint
- level: advanced
- default: 4_M
- with_legacy: true
-- name: osd_op_pq_min_cost
- type: size
- level: advanced
- default: 64_K
- with_legacy: true
-# preserve clone_overlap during recovery/migration
-- name: osd_recover_clone_overlap
- type: bool
- level: advanced
- default: true
- fmt_desc: Preserves clone overlap during recovery. Should always be set
- to ``true``.
- with_legacy: true
-- name: osd_num_cache_shards
- type: size
- level: advanced
- desc: The number of cache shards to use in the object store.
- default: 32
- flags:
- - startup
-- name: osd_op_num_threads_per_shard
- type: int
- level: advanced
- default: 0
- flags:
- - startup
- with_legacy: true
-- name: osd_op_num_threads_per_shard_hdd
- type: int
- level: advanced
- default: 1
- see_also:
- - osd_op_num_threads_per_shard
- flags:
- - startup
- with_legacy: true
-- name: osd_op_num_threads_per_shard_ssd
- type: int
- level: advanced
- default: 2
- see_also:
- - osd_op_num_threads_per_shard
- flags:
- - startup
- with_legacy: true
-- name: osd_op_num_shards
- type: int
- level: advanced
- fmt_desc: The number of shards allocated for a given OSD. Each shard has its own processing queue.
- PGs on the OSD are distributed evenly in the shard. This setting overrides _ssd and _hdd if
- non-zero.
- default: 0
- flags:
- - startup
- with_legacy: true
-- name: osd_op_num_shards_hdd
- type: int
- level: advanced
- fmt_desc: the number of shards allocated for a given OSD (for rotational media).
- default: 5
- see_also:
- - osd_op_num_shards
- flags:
- - startup
- with_legacy: true
-- name: osd_op_num_shards_ssd
- type: int
- level: advanced
- fmt_desc: the number of shards allocated for a given OSD (for solid state media).
- default: 8
- see_also:
- - osd_op_num_shards
- flags:
- - startup
- with_legacy: true
-- name: osd_skip_data_digest
- type: bool
- level: dev
- desc: Do not store full-object checksums if the backend (bluestore) does its own
- checksums. Only usable with all BlueStore OSDs.
- default: false
-# PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
-# mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
-# and "mclock_client" are based on the mClock/dmClock algorithm
-# (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
-# class the operation belongs to. "mclock_client" does the same but
-# also works to ienforce fairness between clients. "debug_random"
-# chooses among all four with equal probability.
-- name: osd_op_queue
- type: str
- level: advanced
- desc: which operation priority queue algorithm to use
- long_desc: which operation priority queue algorithm to use
- fmt_desc: This sets the type of queue to be used for prioritizing ops
- within each OSD. Both queues feature a strict sub-queue which is
- dequeued before the normal queue. The normal queue is different
- between implementations. The WeightedPriorityQueue (``wpq``)
- dequeues operations in relation to their priorities to prevent
- starvation of any queue. WPQ should help in cases where a few OSDs
- are more overloaded than others. The mClockQueue
- (``mclock_scheduler``) prioritizes operations based on which class
- they belong to (recovery, scrub, snaptrim, client op, osd subop).
- See `QoS Based on mClock`_. Requires a restart.
- default: mclock_scheduler
- see_also:
- - osd_op_queue_cut_off
- enum_values:
- - wpq
- - mclock_scheduler
- - debug_random
- with_legacy: true
-# Min priority to go to strict queue. (low, high)
-- name: osd_op_queue_cut_off
- type: str
- level: advanced
- desc: the threshold between high priority ops and low priority ops
- long_desc: the threshold between high priority ops that use strict priority ordering
- and low priority ops that use a fairness algorithm that may or may not incorporate
- priority
- fmt_desc: This selects which priority ops will be sent to the strict
- queue verses the normal queue. The ``low`` setting sends all
- replication ops and higher to the strict queue, while the ``high``
- option sends only replication acknowledgment ops and higher to
- the strict queue. Setting this to ``high`` should help when a few
- OSDs in the cluster are very busy especially when combined with
- ``wpq`` in the ``osd_op_queue`` setting. OSDs that are very busy
- handling replication traffic could starve primary client traffic
- on these OSDs without these settings. Requires a restart.
- default: high
- see_also:
- - osd_op_queue
- enum_values:
- - low
- - high
- - debug_random
- with_legacy: true
-- name: osd_mclock_scheduler_client_res
- type: uint
- level: advanced
- desc: IO proportion reserved for each client (default)
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO proportion reserved for each client (default).
- default: 1
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_client_wgt
- type: uint
- level: advanced
- desc: IO share for each client (default) over reservation
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO share for each client (default) over reservation.
- default: 1
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_client_lim
- type: uint
- level: advanced
- desc: IO limit for each client (default) over reservation
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO limit for each client (default) over reservation.
- default: 999999
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_background_recovery_res
- type: uint
- level: advanced
- desc: IO proportion reserved for background recovery (default)
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO proportion reserved for background recovery (default).
- default: 1
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_background_recovery_wgt
- type: uint
- level: advanced
- desc: IO share for each background recovery over reservation
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO share for each background recovery over reservation.
- default: 1
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_background_recovery_lim
- type: uint
- level: advanced
- desc: IO limit for background recovery over reservation
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO limit for background recovery over reservation.
- default: 999999
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_background_best_effort_res
- type: uint
- level: advanced
- desc: IO proportion reserved for background best_effort (default)
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO proportion reserved for background best_effort (default).
- default: 1
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_background_best_effort_wgt
- type: uint
- level: advanced
- desc: IO share for each background best_effort over reservation
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO share for each background best_effort over reservation.
- default: 1
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_background_best_effort_lim
- type: uint
- level: advanced
- desc: IO limit for background best_effort over reservation
- long_desc: Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: IO limit for background best_effort over reservation.
- default: 999999
- see_also:
- - osd_op_queue
-- name: osd_mclock_scheduler_anticipation_timeout
- type: float
- level: advanced
- desc: mclock anticipation timeout in seconds
- long_desc: the amount of time that mclock waits until the unused resource is forfeited
- default: 0
-- name: osd_mclock_cost_per_io_usec
- type: float
- level: dev
- desc: Cost per IO in microseconds to consider per OSD (overrides _ssd and _hdd if
- non-zero)
- long_desc: This option specifies the cost factor to consider in usec per OSD. This
- is considered by the mclock scheduler to set an additional cost factor in QoS
- calculations. Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: Cost per IO in microseconds to consider per OSD (overrides _ssd
- and _hdd if non-zero)
- default: 0
- flags:
- - runtime
-- name: osd_mclock_cost_per_io_usec_hdd
- type: float
- level: dev
- desc: Cost per IO in microseconds to consider per OSD (for rotational media)
- long_desc: This option specifies the cost factor to consider in usec per OSD for
- rotational device type. This is considered by the mclock_scheduler to set an additional
- cost factor in QoS calculations. Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: Cost per IO in microseconds to consider per OSD (for rotational
- media)
- default: 25000
- flags:
- - runtime
-- name: osd_mclock_cost_per_io_usec_ssd
- type: float
- level: dev
- desc: Cost per IO in microseconds to consider per OSD (for solid state media)
- long_desc: This option specifies the cost factor to consider in usec per OSD for
- solid state device type. This is considered by the mclock_scheduler to set an
- additional cost factor in QoS calculations. Only considered for osd_op_queue =
- mclock_scheduler
- fmt_desc: Cost per IO in microseconds to consider per OSD (for solid state
- media)
- default: 50
- flags:
- - runtime
-- name: osd_mclock_cost_per_byte_usec
- type: float
- level: dev
- desc: Cost per byte in microseconds to consider per OSD (overrides _ssd and _hdd
- if non-zero)
- long_desc: This option specifies the cost per byte to consider in microseconds per
- OSD. This is considered by the mclock scheduler to set an additional cost factor
- in QoS calculations. Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: Cost per byte in microseconds to consider per OSD (overrides _ssd
- and _hdd if non-zero)
- default: 0
- flags:
- - runtime
-- name: osd_mclock_cost_per_byte_usec_hdd
- type: float
- level: dev
- desc: Cost per byte in microseconds to consider per OSD (for rotational media)
- long_desc: This option specifies the cost per byte to consider in microseconds per
- OSD for rotational device type. This is considered by the mclock_scheduler to
- set an additional cost factor in QoS calculations. Only considered for osd_op_queue
- = mclock_scheduler
- fmt_desc: Cost per byte in microseconds to consider per OSD (for rotational
- media)
- default: 5.2
- flags:
- - runtime
-- name: osd_mclock_cost_per_byte_usec_ssd
- type: float
- level: dev
- desc: Cost per byte in microseconds to consider per OSD (for solid state media)
- long_desc: This option specifies the cost per byte to consider in microseconds per
- OSD for solid state device type. This is considered by the mclock_scheduler to
- set an additional cost factor in QoS calculations. Only considered for osd_op_queue
- = mclock_scheduler
- fmt_desc: Cost per byte in microseconds to consider per OSD (for solid state
- media)
- default: 0.011
- flags:
- - runtime
-- name: osd_mclock_max_capacity_iops
- type: float
- level: basic
- desc: Max IOPs capacity (at 4KiB block size) to consider per OSD (overrides _ssd
- and _hdd if non-zero)
- long_desc: This option specifies the max osd capacity in iops per OSD. Helps in
- QoS calculations when enabling a dmclock profile. Only considered for osd_op_queue
- = mclock_scheduler
- fmt_desc: Max IOPS capacity (at 4KiB block size) to consider per OSD
- (overrides _ssd and _hdd if non-zero)
- default: 0
- flags:
- - runtime
-- name: osd_mclock_max_capacity_iops_hdd
- type: float
- level: basic
- desc: Max IOPs capacity (at 4KiB block size) to consider per OSD (for rotational
- media)
- long_desc: This option specifies the max OSD capacity in iops per OSD. Helps in
- QoS calculations when enabling a dmclock profile. Only considered for osd_op_queue
- = mclock_scheduler
- fmt_desc: Max IOPS capacity (at 4KiB block size) to consider per OSD (for
- rotational media)
- default: 315
- flags:
- - runtime
-- name: osd_mclock_max_capacity_iops_ssd
- type: float
- level: basic
- desc: Max IOPs capacity (at 4KiB block size) to consider per OSD (for solid state
- media)
- long_desc: This option specifies the max OSD capacity in iops per OSD. Helps in
- QoS calculations when enabling a dmclock profile. Only considered for osd_op_queue
- = mclock_scheduler
- fmt_desc: Max IOPS capacity (at 4KiB block size) to consider per OSD (for
- solid state media)
- default: 21500
- flags:
- - runtime
-- name: osd_mclock_profile
- type: str
- level: advanced
- desc: Which mclock profile to use
- long_desc: This option specifies the mclock profile to enable - one among the set
- of built-in profiles or a custom profile. Only considered for osd_op_queue = mclock_scheduler
- fmt_desc: |
- This sets the type of mclock profile to use for providing QoS
- based on operations belonging to different classes (background
- recovery, scrub, snaptrim, client op, osd subop). Once a built-in
- profile is enabled, the lower level mclock resource control
- parameters [*reservation, weight, limit*] and some Ceph
- configuration parameters are set transparently. Note that the
- above does not apply for the *custom* profile.
- default: high_client_ops
- see_also:
- - osd_op_queue
- enum_values:
- - balanced
- - high_recovery_ops
- - high_client_ops
- - custom
- flags:
- - runtime
-# do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
-- name: osd_ignore_stale_divergent_priors
- type: bool
- level: advanced
- default: false
- with_legacy: true
-# Set to true for testing. Users should NOT set this.
-# If set to true even after reading enough shards to
-# decode the object, any error will be reported.
-- name: osd_read_ec_check_for_errors
- type: bool
- level: advanced
- default: false
- with_legacy: true
-# Only use clone_overlap for recovery if there are fewer than
-# osd_recover_clone_overlap_limit entries in the overlap set
-- name: osd_recover_clone_overlap_limit
- type: uint
- level: advanced
- default: 10
- flags:
- - runtime
-- name: osd_debug_feed_pullee
- type: int
- level: dev
- desc: Feed a pullee, and force primary to pull a currently missing object from it
- default: -1
- with_legacy: true
-- name: osd_backfill_scan_min
- type: int
- level: advanced
- default: 64
- fmt_desc: The minimum number of objects per backfill scan.
- with_legacy: true
-- name: osd_backfill_scan_max
- type: int
- level: advanced
- default: 512
- fmt_desc: The maximum number of objects per backfill scan.p
- with_legacy: true
-- name: osd_op_thread_timeout
- type: int
- level: advanced
- default: 15
- fmt_desc: The Ceph OSD Daemon operation thread timeout in seconds.
- with_legacy: true
-- name: osd_op_thread_suicide_timeout
- type: int
- level: advanced
- default: 150
- with_legacy: true
-- name: osd_recovery_sleep
- type: float
- level: advanced
- desc: Time in seconds to sleep before next recovery or backfill op
- fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
- Increasing this value will slow down recovery operation while
- client operations will be less impacted.
- default: 0
- flags:
- - runtime
- with_legacy: true
-- name: osd_recovery_sleep_hdd
- type: float
- level: advanced
- desc: Time in seconds to sleep before next recovery or backfill op for HDDs
- fmt_desc: Time in seconds to sleep before next recovery or backfill op
- for HDDs.
- default: 0.1
- flags:
- - runtime
- with_legacy: true
-- name: osd_recovery_sleep_ssd
- type: float
- level: advanced
- desc: Time in seconds to sleep before next recovery or backfill op for SSDs
- fmt_desc: Time in seconds to sleep before the next recovery or backfill op
- for SSDs.
- default: 0
- see_also:
- - osd_recovery_sleep
- flags:
- - runtime
- with_legacy: true
-- name: osd_recovery_sleep_hybrid
- type: float
- level: advanced
- desc: Time in seconds to sleep before next recovery or backfill op when data is
- on HDD and journal is on SSD
- fmt_desc: Time in seconds to sleep before the next recovery or backfill op
- when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
- default: 0.025
- see_also:
- - osd_recovery_sleep
- flags:
- - runtime
-- name: osd_snap_trim_sleep
- type: float
- level: advanced
- desc: Time in seconds to sleep before next snap trim (overrides values below)
- fmt_desc: Time in seconds to sleep before next snap trim op.
- Increasing this value will slow down snap trimming.
- This option overrides backend specific variants.
- default: 0
- flags:
- - runtime
- with_legacy: true
-- name: osd_snap_trim_sleep_hdd
- type: float
- level: advanced
- desc: Time in seconds to sleep before next snap trim for HDDs
- default: 5
- flags:
- - runtime
-- name: osd_snap_trim_sleep_ssd
- type: float
- level: advanced
- desc: Time in seconds to sleep before next snap trim for SSDs
- fmt_desc: Time in seconds to sleep before next snap trim op
- for SSD OSDs (including NVMe).
- default: 0
- flags:
- - runtime
-- name: osd_snap_trim_sleep_hybrid
- type: float
- level: advanced
- desc: Time in seconds to sleep before next snap trim when data is on HDD and journal
- is on SSD
- fmt_desc: Time in seconds to sleep before next snap trim op
- when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
- default: 2
- flags:
- - runtime
-- name: osd_scrub_invalid_stats
- type: bool
- level: advanced
- default: true
- with_legacy: true
-- name: osd_heartbeat_interval
- type: int
- level: dev
- desc: Interval (in seconds) between peer pings
- fmt_desc: How often an Ceph OSD Daemon pings its peers (in seconds).
- default: 6
- min: 1
- max: 1_min
- with_legacy: true
-# (seconds) how long before we decide a peer has failed
-# This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration
-- name: osd_heartbeat_grace
- type: int
- level: advanced
- default: 20
- fmt_desc: The elapsed time when a Ceph OSD Daemon hasn't shown a heartbeat
- that the Ceph Storage Cluster considers it ``down``.
- This setting must be set in both the [mon] and [osd] or [global]
- sections so that it is read by both monitor and OSD daemons.
- with_legacy: true
-- name: osd_heartbeat_stale
- type: int
- level: advanced
- desc: Interval (in seconds) we mark an unresponsive heartbeat peer as stale.
- long_desc: Automatically mark unresponsive heartbeat sessions as stale and tear
- them down. The primary benefit is that OSD doesn't need to keep a flood of blocked
- heartbeat messages around in memory.
- default: 10_min
-# minimum number of peers
-- name: osd_heartbeat_min_peers
- type: int
- level: advanced
- default: 10
- with_legacy: true
-# prio the heartbeat tcp socket and set dscp as CS6 on it if true
-- name: osd_heartbeat_use_min_delay_socket
- type: bool
- level: advanced
- default: false
- with_legacy: true
-# the minimum size of OSD heartbeat messages to send
-- name: osd_heartbeat_min_size
- type: size
- level: advanced
- desc: Minimum heartbeat packet size in bytes. Will add dummy payload if heartbeat
- packet is smaller than this.
- default: 2000
- with_legacy: true
-# max number of parallel snap trims/pg
-- name: osd_pg_max_concurrent_snap_trims
- type: uint
- level: advanced
- default: 2
- with_legacy: true
-# max number of trimming pgs
-- name: osd_max_trimming_pgs
- type: uint
- level: advanced
- default: 2
- with_legacy: true
-# minimum number of peers that must be reachable to mark ourselves
-# back up after being wrongly marked down.
-- name: osd_heartbeat_min_healthy_ratio
- type: float
- level: advanced
- default: 0.33
- with_legacy: true
-# (seconds) how often to ping monitor if no peers
-- name: osd_mon_heartbeat_interval
- type: int
- level: advanced
- default: 30
- fmt_desc: How often the Ceph OSD Daemon pings a Ceph Monitor if it has no
- Ceph OSD Daemon peers.
- with_legacy: true
-- name: osd_mon_heartbeat_stat_stale
- type: int
- level: advanced
- desc: Stop reporting on heartbeat ping times not updated for this many seconds.
- long_desc: Stop reporting on old heartbeat information unless this is set to zero
- fmt_desc: Stop reporting on heartbeat ping times which haven't been updated for
- this many seconds. Set to zero to disable this action.
- default: 1_hr
-# failures, up_thru, boot.
-- name: osd_mon_report_interval
- type: int
- level: advanced
- desc: Frequency of OSD reports to mon for peer failures, fullness status changes
- fmt_desc: The number of seconds a Ceph OSD Daemon may wait
- from startup or another reportable event before reporting
- to a Ceph Monitor.
- default: 5
- with_legacy: true
-# max updates in flight
-- name: osd_mon_report_max_in_flight
- type: int
- level: advanced
- default: 2
- with_legacy: true
-# (second) how often to send beacon message to monitor
-- name: osd_beacon_report_interval
- type: int
- level: advanced
- default: 5_min
- with_legacy: true
-# report pg stats for any given pg at least this often
-- name: osd_pg_stat_report_interval_max
- type: int
- level: advanced
- default: 500
- with_legacy: true
-# Max number of snap intervals to report to mgr in pg_stat_t
-- name: osd_max_snap_prune_intervals_per_epoch
- type: uint
- level: dev
- desc: Max number of snap intervals to report to mgr in pg_stat_t
- default: 512
- with_legacy: true
-- name: osd_default_data_pool_replay_window
- type: int
- level: advanced
- default: 45
- fmt_desc: The time (in seconds) for an OSD to wait for a client to replay
- a request.
-- name: osd_auto_mark_unfound_lost
- type: bool
- level: advanced
- default: false
- with_legacy: true
-- name: osd_recovery_delay_start
- type: float
- level: advanced
- default: 0
- fmt_desc: After peering completes, Ceph will delay for the specified number
- of seconds before starting to recover RADOS objects.
- with_legacy: true
-- name: osd_recovery_max_active
- type: uint
- level: advanced
- desc: Number of simultaneous active recovery operations per OSD (overrides _ssd
- and _hdd if non-zero)
- fmt_desc: The number of active recovery requests per OSD at one time. More
- requests will accelerate recovery, but the requests places an
- increased load on the cluster.
- note: This value is only used if it is non-zero. Normally it
- is ``0``, which means that the ``hdd`` or ``ssd`` values
- (below) are used, depending on the type of the primary
- device backing the OSD.
- default: 0
- see_also:
- - osd_recovery_max_active_hdd
- - osd_recovery_max_active_ssd
- flags:
- - runtime
- with_legacy: true
-- name: osd_recovery_max_active_hdd
- type: uint
- level: advanced
- desc: Number of simultaneous active recovery operations per OSD (for rotational
- devices)
- fmt_desc: The number of active recovery requests per OSD at one time, if the
- primary device is rotational.
- default: 3
- see_also:
- - osd_recovery_max_active
- - osd_recovery_max_active_ssd
- flags:
- - runtime
- with_legacy: true
-- name: osd_recovery_max_active_ssd
- type: uint
- level: advanced
- desc: Number of simultaneous active recovery operations per OSD (for non-rotational
- solid state devices)
- fmt_desc: The number of active recovery requests per OSD at one time, if the
- primary device is non-rotational (i.e., an SSD).
- default: 10
- see_also:
- - osd_recovery_max_active
- - osd_recovery_max_active_hdd
- flags:
- - runtime
- with_legacy: true
-- name: osd_recovery_max_single_start
- type: uint
- level: advanced
- default: 1
- fmt_desc: The maximum number of recovery operations per OSD that will be
- newly started when an OSD is recovering.
- with_legacy: true
-# max size of push chunk
-- name: osd_recovery_max_chunk
- type: size
- level: advanced
- default: 8_M
- fmt_desc: the maximum total size of data chunks a recovery op can carry.
- with_legacy: true
-# max number of omap entries per chunk; 0 to disable limit
-- name: osd_recovery_max_omap_entries_per_chunk
- type: uint
- level: advanced
- default: 8096
- with_legacy: true
-# max size of a COPYFROM chunk
-- name: osd_copyfrom_max_chunk
- type: size
- level: advanced
- default: 8_M
- with_legacy: true
-# push cost per object
-- name: osd_push_per_object_cost
- type: size
- level: advanced
- default: 1000
- fmt_desc: the overhead for serving a push op
- with_legacy: true
-# max size of push message
-- name: osd_max_push_cost
- type: size
- level: advanced
- default: 8_M
- with_legacy: true
-# max objects in single push op
-- name: osd_max_push_objects
- type: uint
- level: advanced
- default: 10
- with_legacy: true
-- name: osd_max_scrubs
- type: int
- level: advanced
- desc: Maximum concurrent scrubs on a single OSD
- fmt_desc: The maximum number of simultaneous scrub operations for
- a Ceph OSD Daemon.
- default: 1
- with_legacy: true
-- name: osd_scrub_during_recovery
- type: bool
- level: advanced
- desc: Allow scrubbing when PGs on the OSD are undergoing recovery
- fmt_desc: Allow scrub during recovery. Setting this to ``false`` will disable
- scheduling new scrub (and deep--scrub) while there is active recovery.
- Already running scrubs will be continued. This might be useful to reduce
- load on busy clusters.
- default: false
- with_legacy: true
-- name: osd_repair_during_recovery
- type: bool
- level: advanced
- desc: Allow requested repairing when PGs on the OSD are undergoing recovery
- default: false
- with_legacy: true
-- name: osd_scrub_begin_hour
- type: int
- level: advanced
- desc: Restrict scrubbing to this hour of the day or later
- long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
- fmt_desc: This restricts scrubbing to this hour of the day or later.
- Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0``
- to allow scrubbing the entire day. Along with ``osd_scrub_end_hour``, they define a time
- window, in which the scrubs can happen.
- But a scrub will be performed
- no matter whether the time window allows or not, as long as the placement
- group's scrub interval exceeds ``osd_scrub_max_interval``.
- default: 0
- see_also:
- - osd_scrub_end_hour
- min: 0
- max: 23
- with_legacy: true
-- name: osd_scrub_end_hour
- type: int
- level: advanced
- desc: Restrict scrubbing to hours of the day earlier than this
- long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
- fmt_desc: This restricts scrubbing to the hour earlier than this.
- Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0`` to allow scrubbing
- for the entire day. Along with ``osd_scrub_begin_hour``, they define a time
- window, in which the scrubs can happen. But a scrub will be performed
- no matter whether the time window allows or not, as long as the placement
- group's scrub interval exceeds ``osd_scrub_max_interval``.
- default: 0
- see_also:
- - osd_scrub_begin_hour
- min: 0
- max: 23
- with_legacy: true
-- name: osd_scrub_begin_week_day
- type: int
- level: advanced
- desc: Restrict scrubbing to this day of the week or later
- long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
- for the entire week.
- fmt_desc: This restricts scrubbing to this day of the week or later.
- 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
- and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
- Along with ``osd_scrub_end_week_day``, they define a time window in which
- scrubs can happen. But a scrub will be performed
- no matter whether the time window allows or not, when the PG's
- scrub interval exceeds ``osd_scrub_max_interval``.
- default: 0
- see_also:
- - osd_scrub_end_week_day
- min: 0
- max: 6
- with_legacy: true
-- name: osd_scrub_end_week_day
- type: int
- level: advanced
- desc: Restrict scrubbing to days of the week earlier than this
- long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
- for the entire week.
- fmt_desc: This restricts scrubbing to days of the week earlier than this.
- 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
- and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
- Along with ``osd_scrub_begin_week_day``, they define a time
- window, in which the scrubs can happen. But a scrub will be performed
- no matter whether the time window allows or not, as long as the placement
- group's scrub interval exceeds ``osd_scrub_max_interval``.
- default: 0
- see_also:
- - osd_scrub_begin_week_day
- min: 0
- max: 6
- with_legacy: true
-- name: osd_scrub_load_threshold
- type: float
- level: advanced
- desc: Allow scrubbing when system load divided by number of CPUs is below this value
- fmt_desc: The normalized maximum load. Ceph will not scrub when the system load
- (as defined by ``getloadavg() / number of online CPUs``) is higher than this number.
- Default is ``0.5``.
- default: 0.5
- with_legacy: true
-# if load is low
-- name: osd_scrub_min_interval
- type: float
- level: advanced
- desc: Scrub each PG no more often than this interval
- fmt_desc: The minimal interval in seconds for scrubbing the Ceph OSD Daemon
- when the Ceph Storage Cluster load is low.
- default: 1_day
- see_also:
- - osd_scrub_max_interval
- with_legacy: true
-# regardless of load
-- name: osd_scrub_max_interval
- type: float
+- name: osd_map_message_max_bytes
+ type: size
level: advanced
- desc: Scrub each PG no less often than this interval
- fmt_desc: The maximum interval in seconds for scrubbing the Ceph OSD Daemon
- irrespective of cluster load.
- default: 7_day
- see_also:
- - osd_scrub_min_interval
+ desc: maximum number of bytes worth of OSDMaps to include in a single message
+ default: 10_M
+ services:
+ - osd
+ - mon
with_legacy: true
-# randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
-- name: osd_scrub_interval_randomize_ratio
- type: float
+# do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
+- name: osd_ignore_stale_divergent_priors
+ type: bool
level: advanced
- desc: Ratio of scrub interval to randomly vary
- long_desc: This prevents a scrub 'stampede' by randomly varying the scrub intervals
- so that they are soon uniformly distributed over the week
- fmt_desc: Add a random delay to ``osd_scrub_min_interval`` when scheduling
- the next scrub job for a PG. The delay is a random
- value less than ``osd_scrub_min_interval`` \*
- ``osd_scrub_interval_randomized_ratio``. The default setting
- spreads scrubs throughout the allowed time
- window of ``[1, 1.5]`` \* ``osd_scrub_min_interval``.
- default: 0.5
- see_also:
- - osd_scrub_min_interval
+ default: false
with_legacy: true
-# the probability to back off the scheduled scrub
-- name: osd_scrub_backoff_ratio
- type: float
+- name: osd_heartbeat_interval
+ type: int
level: dev
- desc: Backoff ratio for scheduling scrubs
- long_desc: This is the precentage of ticks that do NOT schedule scrubs, 66% means
- that 1 out of 3 ticks will schedule scrubs
- default: 0.66
+ desc: Interval (in seconds) between peer pings
+ fmt_desc: How often an Ceph OSD Daemon pings its peers (in seconds).
+ default: 6
+ min: 1
+ max: 1_min
with_legacy: true
-- name: osd_scrub_chunk_min
+# (seconds) how long before we decide a peer has failed
+# This setting is read by the MONs and OSDs and has to be set to a equal value in both settings of the configuration
+- name: osd_heartbeat_grace
type: int
level: advanced
- desc: Minimum number of objects to scrub in a single chunk
- fmt_desc: The minimal number of object store chunks to scrub during single operation.
- Ceph blocks writes to single chunk during scrub.
- default: 5
- see_also:
- - osd_scrub_chunk_max
+ default: 20
+ fmt_desc: The elapsed time when a Ceph OSD Daemon hasn't shown a heartbeat
+ that the Ceph Storage Cluster considers it ``down``.
+ This setting must be set in both the [mon] and [osd] or [global]
+ sections so that it is read by both monitor and OSD daemons.
with_legacy: true
-- name: osd_scrub_chunk_max
+- name: osd_heartbeat_stale
type: int
level: advanced
- desc: Maximum number of objects to scrub in a single chunk
- fmt_desc: The maximum number of object store chunks to scrub during single operation.
- default: 25
- see_also:
- - osd_scrub_chunk_min
- with_legacy: true
-# sleep between [deep]scrub ops
-- name: osd_scrub_sleep
- type: float
- level: advanced
- desc: Duration to inject a delay during scrubbing
- fmt_desc: Time to sleep before scrubbing the next group of chunks. Increasing this value will slow
- down the overall rate of scrubbing so that client operations will be less impacted.
- default: 0
- flags:
- - runtime
- with_legacy: true
-# more sleep between [deep]scrub ops
-- name: osd_scrub_extended_sleep
- type: float
- level: advanced
- desc: Duration to inject a delay during scrubbing out of scrubbing hours
- default: 0
- see_also:
- - osd_scrub_begin_hour
- - osd_scrub_end_hour
- - osd_scrub_begin_week_day
- - osd_scrub_end_week_day
- with_legacy: true
-# whether auto-repair inconsistencies upon deep-scrubbing
-- name: osd_scrub_auto_repair
+ desc: Interval (in seconds) we mark an unresponsive heartbeat peer as stale.
+ long_desc: Automatically mark unresponsive heartbeat sessions as stale and tear
+ them down. The primary benefit is that OSD doesn't need to keep a flood of blocked
+ heartbeat messages around in memory.
+ default: 10_min
+# prio the heartbeat tcp socket and set dscp as CS6 on it if true
+- name: osd_heartbeat_use_min_delay_socket
type: bool
level: advanced
- desc: Automatically repair damaged objects detected during scrub
- fmt_desc: Setting this to ``true`` will enable automatic PG repair when errors
- are found by scrubs or deep-scrubs. However, if more than
- ``osd_scrub_auto_repair_num_errors`` errors are found a repair is NOT performed.
default: false
with_legacy: true
-# only auto-repair when number of errors is below this threshold
-- name: osd_scrub_auto_repair_num_errors
- type: uint
+# the minimum size of OSD heartbeat messages to send
+- name: osd_heartbeat_min_size
+ type: size
level: advanced
- desc: Maximum number of detected errors to automatically repair
- fmt_desc: Auto repair will not occur if more than this many errors are found.
- default: 5
- see_also:
- - osd_scrub_auto_repair
+ desc: Minimum heartbeat packet size in bytes. Will add dummy payload if heartbeat
+ packet is smaller than this.
+ default: 2000
with_legacy: true
-- name: osd_scrub_max_preemptions
+# max number of parallel snap trims/pg
+- name: osd_pg_max_concurrent_snap_trims
type: uint
level: advanced
- desc: Set the maximum number of times we will preempt a deep scrub due to a client
- operation before blocking client IO to complete the scrub
- default: 5
- min: 0
- max: 30
-- name: osd_deep_scrub_interval
- type: float
+ default: 2
+ with_legacy: true
+# max number of trimming pgs
+- name: osd_max_trimming_pgs
+ type: uint
level: advanced
- desc: Deep scrub each PG (i.e., verify data checksums) at least this often
- fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
- ``osd_scrub_load_threshold`` does not affect this setting.
- default: 7_day
+ default: 2
with_legacy: true
-- name: osd_deep_scrub_randomize_ratio
+# minimum number of peers that must be reachable to mark ourselves
+# back up after being wrongly marked down.
+- name: osd_heartbeat_min_healthy_ratio
type: float
level: advanced
- desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
- are deep)
- long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
- are uniformly distributed over the week
- default: 0.15
+ default: 0.33
with_legacy: true
-- name: osd_deep_scrub_stride
- type: size
+# (seconds) how often to ping monitor if no peers
+- name: osd_mon_heartbeat_interval
+ type: int
level: advanced
- desc: Number of bytes to read from an object at a time during deep scrub
- fmt_desc: Read size when doing a deep scrub.
- default: 512_K
+ default: 30
+ fmt_desc: How often the Ceph OSD Daemon pings a Ceph Monitor if it has no
+ Ceph OSD Daemon peers.
with_legacy: true
-- name: osd_deep_scrub_keys
+- name: osd_mon_heartbeat_stat_stale
type: int
level: advanced
- desc: Number of keys to read from an object at a time during deep scrub
- default: 1024
- with_legacy: true
-# objects must be this old (seconds) before we update the whole-object digest on scrub
-- name: osd_deep_scrub_update_digest_min_age
+ desc: Stop reporting on heartbeat ping times not updated for this many seconds.
+ long_desc: Stop reporting on old heartbeat information unless this is set to zero
+ fmt_desc: Stop reporting on heartbeat ping times which haven't been updated for
+ this many seconds. Set to zero to disable this action.
+ default: 1_hr
+# failures, up_thru, boot.
+- name: osd_mon_report_interval
type: int
level: advanced
- desc: Update overall object digest only if object was last modified longer ago than
- this
- default: 2_hr
+ desc: Frequency of OSD reports to mon for peer failures, fullness status changes
+ fmt_desc: The number of seconds a Ceph OSD Daemon may wait
+ from startup or another reportable event before reporting
+ to a Ceph Monitor.
+ default: 5
with_legacy: true
-- name: osd_deep_scrub_large_omap_object_key_threshold
- type: uint
+# max updates in flight
+- name: osd_mon_report_max_in_flight
+ type: int
level: advanced
- desc: Warn when we encounter an object with more omap keys than this
- default: 200000
- services:
- - osd
- see_also:
- - osd_deep_scrub_large_omap_object_value_sum_threshold
+ default: 2
with_legacy: true
-- name: osd_deep_scrub_large_omap_object_value_sum_threshold
- type: size
+# (second) how often to send beacon message to monitor
+- name: osd_beacon_report_interval
+ type: int
level: advanced
- desc: Warn when we encounter an object with more omap key bytes than this
- default: 1_G
- services:
- - osd
- see_also:
- - osd_deep_scrub_large_omap_object_key_threshold
+ default: 5_min
with_legacy: true
-# where rados plugins are stored
-- name: osd_class_dir
- type: str
+# report pg stats for any given pg at least this often
+- name: osd_pg_stat_report_interval_max
+ type: int
level: advanced
- default: @CMAKE_INSTALL_LIBDIR@/rados-classes
- fmt_desc: The class path for RADOS class plug-ins.
+ default: 500
with_legacy: true
-- name: osd_open_classes_on_start
- type: bool
- level: advanced
- default: true
+# Max number of snap intervals to report to mgr in pg_stat_t
+- name: osd_max_snap_prune_intervals_per_epoch
+ type: uint
+ level: dev
+ desc: Max number of snap intervals to report to mgr in pg_stat_t
+ default: 512
with_legacy: true
-# list of object classes allowed to be loaded (allow all: *)
-- name: osd_class_load_list
- type: str
+- name: osd_default_data_pool_replay_window
+ type: int
level: advanced
- default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
- user version cas cmpomap queue 2pc_queue fifo
- with_legacy: true
-# list of object classes with default execute perm (allow all: *)
-- name: osd_class_default_list
- type: str
+ default: 45
+ fmt_desc: The time (in seconds) for an OSD to wait for a client to replay
+ a request.
+- name: osd_auto_mark_unfound_lost
+ type: bool
level: advanced
- default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
- user version cas cmpomap queue 2pc_queue fifo
+ default: false
with_legacy: true
- name: osd_check_for_log_corruption
type: bool
level: advanced
default: 30
with_legacy: true
-- name: osd_delete_sleep
- type: float
- level: advanced
- desc: Time in seconds to sleep before next removal transaction (overrides values
- below)
- fmt_desc: Time in seconds to sleep before the next removal transaction. This
- throttles the PG deletion process.
- default: 0
- flags:
- - runtime
-- name: osd_delete_sleep_hdd
- type: float
- level: advanced
- desc: Time in seconds to sleep before next removal transaction for HDDs
- default: 5
- flags:
- - runtime
-- name: osd_delete_sleep_ssd
- type: float
- level: advanced
- desc: Time in seconds to sleep before next removal transaction for SSDs
- default: 1
- flags:
- - runtime
-- name: osd_delete_sleep_hybrid
- type: float
- level: advanced
- desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
- and OSD journal or WAL+DB is on SSD
- default: 1
- flags:
- - runtime
# what % full makes an OSD "full" (failsafe)
- name: osd_failsafe_full_ratio
type: float
level: advanced
default: false
with_legacy: true
-- name: cephadm_path
- type: str
- level: advanced
- desc: Path to cephadm utility
- default: /usr/sbin/cephadm
- services:
- - mgr
-- name: mgr_module_path
- type: str
- level: advanced
- desc: Filesystem path to manager modules.
- default: @CEPH_INSTALL_DATADIR@/mgr
- services:
- - mgr
-- name: mgr_standby_modules
- type: bool
- default: true
- level: advanced
- desc: Start modules in standby (redirect) mode when mgr is standby
- long_desc: By default, the standby modules will answer incoming requests with a
- HTTP redirect to the active manager, allowing users to point their browser at any
- mgr node and find their way to an active mgr. However, this mode is problematic
- when using a load balancer because (1) the redirect locations are usually private
- IPs and (2) the load balancer can't identify which mgr is the right one to send
- traffic to. If a load balancer is being used, set this to false.
-- name: mgr_disabled_modules
- type: str
- level: advanced
- desc: List of manager modules never get loaded
- long_desc: A comma delimited list of module names. This list is read by manager
- when it starts. By default, manager loads all modules found in specified 'mgr_module_path',
- and it starts the enabled ones as instructed. The modules in this list will not
- be loaded at all.
- default: @mgr_disabled_modules@
- services:
- - mgr
- see_also:
- - mgr_module_path
- flags:
- - startup
-- name: mgr_initial_modules
- type: str
- level: basic
- desc: List of manager modules to enable when the cluster is first started
- long_desc: This list of module names is read by the monitor when the cluster is
- first started after installation, to populate the list of enabled manager modules. Subsequent
- updates are done using the 'mgr module [enable|disable]' commands. List may be
- comma or space separated.
- default: restful iostat
- services:
- - mon
- flags:
- - no_mon_update
- - cluster_create
-- name: mgr_data
- type: str
- level: advanced
- desc: Filesystem path to the ceph-mgr data directory, used to contain keyring.
- default: /var/lib/ceph/mgr/$cluster-$id
- services:
- - mgr
- flags:
- - no_mon_update
- name: mgr_tick_period
type: secs
level: advanced
default: 2
services:
- mgr
-- name: mgr_stats_period
- type: int
- level: basic
- desc: Period in seconds of OSD/MDS stats reports to manager
- long_desc: Use this setting to control the granularity of time series data collection
- from daemons. Adjust upwards if the manager CPU load is too high, or if you simply
- do not require the most up to date performance counter data.
- default: 5
- services:
- - mgr
-- name: mgr_client_bytes
- type: size
- level: dev
- default: 128_M
- services:
- - mgr
-- name: mgr_client_messages
- type: uint
- level: dev
- default: 512
- services:
- - mgr
-- name: mgr_osd_bytes
- type: size
- level: dev
- default: 512_M
- services:
- - mgr
-- name: mgr_osd_messages
- type: uint
- level: dev
- default: 8_K
- services:
- - mgr
-- name: mgr_mds_bytes
- type: size
- level: dev
- default: 128_M
- services:
- - mgr
-- name: mgr_mds_messages
- type: uint
- level: dev
- default: 128
- services:
- - mgr
-- name: mgr_mon_bytes
- type: size
- level: dev
- default: 128_M
- services:
- - mgr
-- name: mgr_mon_messages
- type: uint
- level: dev
- default: 128
- services:
- - mgr
+ - mon
- name: mgr_connect_retry_interval
type: float
level: dev
default: 1
services:
- common
-- name: mgr_service_beacon_grace
- type: float
- level: advanced
- desc: Period in seconds from last beacon to manager dropping state about a monitored
- service (RGW, rbd-mirror etc)
- default: 1_min
- services:
- - mgr
- name: mgr_client_service_daemon_unregister_timeout
type: float
level: dev
desc: Time to wait during shutdown to deregister service with mgr
default: 1
-- name: mgr_debug_aggressive_pg_num_changes
- type: bool
- level: dev
- desc: Bypass most throttling and safety checks in pg[p]_num controller
- default: false
- services:
- - mgr
- name: mon_mgr_digest_period
type: int
level: dev
--- /dev/null
+# -*- mode: YAML -*-
+---
+
+options:
+- name: osd_numa_prefer_iface
+ type: bool
+ level: advanced
+ desc: prefer IP on network interface on same numa node as storage
+ default: true
+ see_also:
+ - osd_numa_auto_affinity
+ flags:
+ - startup
+- name: osd_numa_auto_affinity
+ type: bool
+ level: advanced
+ desc: automatically set affinity to numa node when storage and network match
+ default: true
+ flags:
+ - startup
+- name: osd_numa_node
+ type: int
+ level: advanced
+ desc: set affinity to a numa node (-1 for none)
+ default: -1
+ see_also:
+ - osd_numa_auto_affinity
+ flags:
+ - startup
+- name: osd_smart_report_timeout
+ type: uint
+ level: advanced
+ desc: Timeout (in seconds) for smarctl to run, default is set to 5
+ default: 5
+# verify backend can support configured max object name length
+- name: osd_check_max_object_name_len_on_startup
+ type: bool
+ level: dev
+ default: true
+ with_legacy: true
+- name: osd_max_backfills
+ type: uint
+ level: advanced
+ desc: Maximum number of concurrent local and remote backfills or recoveries per
+ OSD
+ long_desc: There can be osd_max_backfills local reservations AND the same remote
+ reservations per OSD. So a value of 1 lets this OSD participate as 1 PG primary
+ in recovery and 1 shard of another recovering PG.
+ fmt_desc: The maximum number of backfills allowed to or from a single OSD.
+ Note that this is applied separately for read and write operations.
+ default: 1
+ flags:
+ - runtime
+ with_legacy: true
+# Minimum recovery priority (255 = max, smaller = lower)
+- name: osd_min_recovery_priority
+ type: int
+ level: advanced
+ desc: Minimum priority below which recovery is not performed
+ long_desc: The purpose here is to prevent the cluster from doing *any* lower priority
+ work (e.g., rebalancing) below this threshold and focus solely on higher priority
+ work (e.g., replicating degraded objects).
+ default: 0
+ with_legacy: true
+- name: osd_backfill_retry_interval
+ type: float
+ level: advanced
+ desc: how frequently to retry backfill reservations after being denied (e.g., due
+ to a full OSD)
+ fmt_desc: The number of seconds to wait before retrying backfill requests.
+ default: 30
+ with_legacy: true
+- name: osd_recovery_retry_interval
+ type: float
+ level: advanced
+ desc: how frequently to retry recovery reservations after being denied (e.g., due
+ to a full OSD)
+ default: 30
+ with_legacy: true
+- name: osd_recovery_sleep
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op
+ fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
+ Increasing this value will slow down recovery operation while
+ client operations will be less impacted.
+ default: 0
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_sleep_hdd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op for HDDs
+ fmt_desc: Time in seconds to sleep before next recovery or backfill op
+ for HDDs.
+ default: 0.1
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_sleep_ssd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op for SSDs
+ fmt_desc: Time in seconds to sleep before the next recovery or backfill op
+ for SSDs.
+ default: 0
+ see_also:
+ - osd_recovery_sleep
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_sleep_hybrid
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op when data is
+ on HDD and journal is on SSD
+ fmt_desc: Time in seconds to sleep before the next recovery or backfill op
+ when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
+ default: 0.025
+ see_also:
+ - osd_recovery_sleep
+ flags:
+ - runtime
+- name: osd_snap_trim_sleep
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim (overrides values below)
+ fmt_desc: Time in seconds to sleep before next snap trim op.
+ Increasing this value will slow down snap trimming.
+ This option overrides backend specific variants.
+ default: 0
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_snap_trim_sleep_hdd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim for HDDs
+ default: 5
+ flags:
+ - runtime
+- name: osd_snap_trim_sleep_ssd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim for SSDs
+ fmt_desc: Time in seconds to sleep before next snap trim op
+ for SSD OSDs (including NVMe).
+ default: 0
+ flags:
+ - runtime
+- name: osd_snap_trim_sleep_hybrid
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim when data is on HDD and journal
+ is on SSD
+ fmt_desc: Time in seconds to sleep before next snap trim op
+ when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
+ default: 2
+ flags:
+ - runtime
+- name: osd_scrub_invalid_stats
+ type: bool
+ level: advanced
+ default: true
+ with_legacy: true
+- name: osd_max_scrubs
+ type: int
+ level: advanced
+ desc: Maximum concurrent scrubs on a single OSD
+ fmt_desc: The maximum number of simultaneous scrub operations for
+ a Ceph OSD Daemon.
+ default: 1
+ with_legacy: true
+- name: osd_scrub_during_recovery
+ type: bool
+ level: advanced
+ desc: Allow scrubbing when PGs on the OSD are undergoing recovery
+ fmt_desc: Allow scrub during recovery. Setting this to ``false`` will disable
+ scheduling new scrub (and deep--scrub) while there is active recovery.
+ Already running scrubs will be continued. This might be useful to reduce
+ load on busy clusters.
+ default: false
+ with_legacy: true
+- name: osd_repair_during_recovery
+ type: bool
+ level: advanced
+ desc: Allow requested repairing when PGs on the OSD are undergoing recovery
+ default: false
+ with_legacy: true
+- name: osd_scrub_begin_hour
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to this hour of the day or later
+ long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
+ fmt_desc: This restricts scrubbing to this hour of the day or later.
+ Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0``
+ to allow scrubbing the entire day. Along with ``osd_scrub_end_hour``, they define a time
+ window, in which the scrubs can happen.
+ But a scrub will be performed
+ no matter whether the time window allows or not, as long as the placement
+ group's scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_end_hour
+ min: 0
+ max: 23
+ with_legacy: true
+- name: osd_scrub_end_hour
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to hours of the day earlier than this
+ long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
+ fmt_desc: This restricts scrubbing to the hour earlier than this.
+ Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0`` to allow scrubbing
+ for the entire day. Along with ``osd_scrub_begin_hour``, they define a time
+ window, in which the scrubs can happen. But a scrub will be performed
+ no matter whether the time window allows or not, as long as the placement
+ group's scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_begin_hour
+ min: 0
+ max: 23
+ with_legacy: true
+- name: osd_scrub_begin_week_day
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to this day of the week or later
+ long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
+ for the entire week.
+ fmt_desc: This restricts scrubbing to this day of the week or later.
+ 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
+ and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
+ Along with ``osd_scrub_end_week_day``, they define a time window in which
+ scrubs can happen. But a scrub will be performed
+ no matter whether the time window allows or not, when the PG's
+ scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_end_week_day
+ min: 0
+ max: 6
+ with_legacy: true
+- name: osd_scrub_end_week_day
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to days of the week earlier than this
+ long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
+ for the entire week.
+ fmt_desc: This restricts scrubbing to days of the week earlier than this.
+ 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
+ and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
+ Along with ``osd_scrub_begin_week_day``, they define a time
+ window, in which the scrubs can happen. But a scrub will be performed
+ no matter whether the time window allows or not, as long as the placement
+ group's scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_begin_week_day
+ min: 0
+ max: 6
+ with_legacy: true
+- name: osd_scrub_load_threshold
+ type: float
+ level: advanced
+ desc: Allow scrubbing when system load divided by number of CPUs is below this value
+ fmt_desc: The normalized maximum load. Ceph will not scrub when the system load
+ (as defined by ``getloadavg() / number of online CPUs``) is higher than this number.
+ Default is ``0.5``.
+ default: 0.5
+ with_legacy: true
+# if load is low
+- name: osd_scrub_min_interval
+ type: float
+ level: advanced
+ desc: Scrub each PG no more often than this interval
+ fmt_desc: The minimal interval in seconds for scrubbing the Ceph OSD Daemon
+ when the Ceph Storage Cluster load is low.
+ default: 1_day
+ see_also:
+ - osd_scrub_max_interval
+ with_legacy: true
+# regardless of load
+- name: osd_scrub_max_interval
+ type: float
+ level: advanced
+ desc: Scrub each PG no less often than this interval
+ fmt_desc: The maximum interval in seconds for scrubbing the Ceph OSD Daemon
+ irrespective of cluster load.
+ default: 7_day
+ see_also:
+ - osd_scrub_min_interval
+ with_legacy: true
+# randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
+- name: osd_scrub_interval_randomize_ratio
+ type: float
+ level: advanced
+ desc: Ratio of scrub interval to randomly vary
+ long_desc: This prevents a scrub 'stampede' by randomly varying the scrub intervals
+ so that they are soon uniformly distributed over the week
+ fmt_desc: Add a random delay to ``osd_scrub_min_interval`` when scheduling
+ the next scrub job for a PG. The delay is a random
+ value less than ``osd_scrub_min_interval`` \*
+ ``osd_scrub_interval_randomized_ratio``. The default setting
+ spreads scrubs throughout the allowed time
+ window of ``[1, 1.5]`` \* ``osd_scrub_min_interval``.
+ default: 0.5
+ see_also:
+ - osd_scrub_min_interval
+ with_legacy: true
+# the probability to back off the scheduled scrub
+- name: osd_scrub_backoff_ratio
+ type: float
+ level: dev
+ desc: Backoff ratio for scheduling scrubs
+ long_desc: This is the precentage of ticks that do NOT schedule scrubs, 66% means
+ that 1 out of 3 ticks will schedule scrubs
+ default: 0.66
+ with_legacy: true
+- name: osd_scrub_chunk_min
+ type: int
+ level: advanced
+ desc: Minimum number of objects to scrub in a single chunk
+ fmt_desc: The minimal number of object store chunks to scrub during single operation.
+ Ceph blocks writes to single chunk during scrub.
+ default: 5
+ see_also:
+ - osd_scrub_chunk_max
+ with_legacy: true
+- name: osd_scrub_chunk_max
+ type: int
+ level: advanced
+ desc: Maximum number of objects to scrub in a single chunk
+ fmt_desc: The maximum number of object store chunks to scrub during single operation.
+ default: 25
+ see_also:
+ - osd_scrub_chunk_min
+ with_legacy: true
+# sleep between [deep]scrub ops
+- name: osd_scrub_sleep
+ type: float
+ level: advanced
+ desc: Duration to inject a delay during scrubbing
+ fmt_desc: Time to sleep before scrubbing the next group of chunks. Increasing this value will slow
+ down the overall rate of scrubbing so that client operations will be less impacted.
+ default: 0
+ flags:
+ - runtime
+ with_legacy: true
+# more sleep between [deep]scrub ops
+- name: osd_scrub_extended_sleep
+ type: float
+ level: advanced
+ desc: Duration to inject a delay during scrubbing out of scrubbing hours
+ default: 0
+ see_also:
+ - osd_scrub_begin_hour
+ - osd_scrub_end_hour
+ - osd_scrub_begin_week_day
+ - osd_scrub_end_week_day
+ with_legacy: true
+# whether auto-repair inconsistencies upon deep-scrubbing
+- name: osd_scrub_auto_repair
+ type: bool
+ level: advanced
+ desc: Automatically repair damaged objects detected during scrub
+ fmt_desc: Setting this to ``true`` will enable automatic PG repair when errors
+ are found by scrubs or deep-scrubs. However, if more than
+ ``osd_scrub_auto_repair_num_errors`` errors are found a repair is NOT performed.
+ default: false
+ with_legacy: true
+# only auto-repair when number of errors is below this threshold
+- name: osd_scrub_auto_repair_num_errors
+ type: uint
+ level: advanced
+ desc: Maximum number of detected errors to automatically repair
+ fmt_desc: Auto repair will not occur if more than this many errors are found.
+ default: 5
+ see_also:
+ - osd_scrub_auto_repair
+ with_legacy: true
+- name: osd_scrub_max_preemptions
+ type: uint
+ level: advanced
+ desc: Set the maximum number of times we will preempt a deep scrub due to a client
+ operation before blocking client IO to complete the scrub
+ default: 5
+ min: 0
+ max: 30
+- name: osd_deep_scrub_interval
+ type: float
+ level: advanced
+ desc: Deep scrub each PG (i.e., verify data checksums) at least this often
+ fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
+ ``osd_scrub_load_threshold`` does not affect this setting.
+ default: 7_day
+ with_legacy: true
+- name: osd_deep_scrub_randomize_ratio
+ type: float
+ level: advanced
+ desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
+ are deep)
+ long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
+ are uniformly distributed over the week
+ default: 0.15
+ with_legacy: true
+- name: osd_deep_scrub_stride
+ type: size
+ level: advanced
+ desc: Number of bytes to read from an object at a time during deep scrub
+ fmt_desc: Read size when doing a deep scrub.
+ default: 512_K
+ with_legacy: true
+- name: osd_deep_scrub_keys
+ type: int
+ level: advanced
+ desc: Number of keys to read from an object at a time during deep scrub
+ default: 1024
+ with_legacy: true
+# objects must be this old (seconds) before we update the whole-object digest on scrub
+- name: osd_deep_scrub_update_digest_min_age
+ type: int
+ level: advanced
+ desc: Update overall object digest only if object was last modified longer ago than
+ this
+ default: 2_hr
+ with_legacy: true
+- name: osd_deep_scrub_large_omap_object_key_threshold
+ type: uint
+ level: advanced
+ desc: Warn when we encounter an object with more omap keys than this
+ default: 200000
+ services:
+ - osd
+ - mds
+ see_also:
+ - osd_deep_scrub_large_omap_object_value_sum_threshold
+ with_legacy: true
+- name: osd_deep_scrub_large_omap_object_value_sum_threshold
+ type: size
+ level: advanced
+ desc: Warn when we encounter an object with more omap key bytes than this
+ default: 1_G
+ services:
+ - osd
+ see_also:
+ - osd_deep_scrub_large_omap_object_key_threshold
+ with_legacy: true
+# where rados plugins are stored
+- name: osd_class_dir
+ type: str
+ level: advanced
+ default: @CMAKE_INSTALL_LIBDIR@/rados-classes
+ fmt_desc: The class path for RADOS class plug-ins.
+ with_legacy: true
+- name: osd_open_classes_on_start
+ type: bool
+ level: advanced
+ default: true
+ with_legacy: true
+# list of object classes allowed to be loaded (allow all: *)
+- name: osd_class_load_list
+ type: str
+ level: advanced
+ default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
+ user version cas cmpomap queue 2pc_queue fifo
+ with_legacy: true
+# list of object classes with default execute perm (allow all: *)
+- name: osd_class_default_list
+ type: str
+ level: advanced
+ default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
+ user version cas cmpomap queue 2pc_queue fifo
+ with_legacy: true
+- name: osd_agent_max_ops
+ type: int
+ level: advanced
+ desc: maximum concurrent tiering operations for tiering agent
+ fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
+ in the high speed mode.
+ default: 4
+ with_legacy: true
+- name: osd_agent_max_low_ops
+ type: int
+ level: advanced
+ desc: maximum concurrent low-priority tiering operations for tiering agent
+ fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
+ in the low speed mode.
+ default: 2
+ with_legacy: true
+- name: osd_agent_min_evict_effort
+ type: float
+ level: advanced
+ desc: minimum effort to expend evicting clean objects
+ default: 0.1
+ min: 0
+ max: 0.99
+ with_legacy: true
+- name: osd_agent_quantize_effort
+ type: float
+ level: advanced
+ desc: size of quantize unit for eviction effort
+ default: 0.1
+ with_legacy: true
+- name: osd_agent_delay_time
+ type: float
+ level: advanced
+ desc: how long agent should sleep if it has no work to do
+ default: 5
+ with_legacy: true
+# decay atime and hist histograms after how many objects go by
+- name: osd_agent_hist_halflife
+ type: int
+ level: advanced
+ desc: halflife of agent atime and temp histograms
+ default: 1000
+ with_legacy: true
+# decay atime and hist histograms after how many objects go by
+- name: osd_agent_slop
+ type: float
+ level: advanced
+ desc: slop factor to avoid switching tiering flush and eviction mode
+ default: 0.02
+ with_legacy: true
+- name: osd_find_best_info_ignore_history_les
+ type: bool
+ level: dev
+ desc: ignore last_epoch_started value when peering AND PROBABLY LOSE DATA
+ long_desc: THIS IS AN EXTREMELY DANGEROUS OPTION THAT SHOULD ONLY BE USED AT THE
+ DIRECTION OF A DEVELOPER. It makes peering ignore the last_epoch_started value
+ when peering, which can allow the OSD to believe an OSD has an authoritative view
+ of a PG's contents even when it is in fact old and stale, typically leading to
+ data loss (by believing a stale PG is up to date).
+ default: false
+ with_legacy: true
+- name: osd_uuid
+ type: uuid
+ level: advanced
+ desc: uuid label for a new OSD
+ fmt_desc: The universally unique identifier (UUID) for the Ceph OSD Daemon.
+ note: The ``osd_uuid`` applies to a single Ceph OSD Daemon. The ``fsid``
+ applies to the entire cluster.
+ flags:
+ - create
+ with_legacy: true
+- name: osd_data
+ type: str
+ level: advanced
+ desc: path to OSD data
+ fmt_desc: The path to the OSDs data. You must create the directory when
+ deploying Ceph. You should mount a drive for OSD data at this
+ mount point. We do not recommend changing the default.
+ default: /var/lib/ceph/osd/$cluster-$id
+ flags:
+ - no_mon_update
+ with_legacy: true
+- name: osd_journal
+ type: str
+ level: advanced
+ desc: path to OSD journal (when FileStore backend is in use)
+ fmt_desc: The path to the OSD's journal. This may be a path to a file or a
+ block device (such as a partition of an SSD). If it is a file,
+ you must create the directory to contain it. We recommend using a
+ separate fast device when the ``osd_data`` drive is an HDD.
+ default: /var/lib/ceph/osd/$cluster-$id/journal
+ flags:
+ - no_mon_update
+ with_legacy: true
+- name: osd_journal_size
+ type: size
+ level: advanced
+ desc: size of FileStore journal (in MiB)
+ fmt_desc: The size of the journal in megabytes.
+ default: 5_K
+ flags:
+ - create
+ with_legacy: true
+- name: osd_journal_flush_on_shutdown
+ type: bool
+ level: advanced
+ desc: flush FileStore journal contents during clean OSD shutdown
+ default: true
+ with_legacy: true
+- name: osd_compact_on_start
+ type: bool
+ level: advanced
+ desc: compact OSD's object store's OMAP on start
+ default: false
+# flags for specific control purpose during osd mount() process.
+# e.g., can be 1 to skip over replaying journal
+# or 2 to skip over mounting omap or 3 to skip over both.
+# This might be helpful in case the journal is totally corrupted
+# and we still want to bring the osd daemon back normally, etc.
+- name: osd_os_flags
+ type: uint
+ level: dev
+ desc: flags to skip filestore omap or journal initialization
+ default: 0
+- name: osd_max_write_size
+ type: size
+ level: advanced
+ desc: Maximum size of a RADOS write operation in megabytes
+ long_desc: This setting prevents clients from doing very large writes to RADOS. If
+ you set this to a value below what clients expect, they will receive an error
+ when attempting to write to the cluster.
+ fmt_desc: The maximum size of a write in megabytes.
+ default: 90
+ min: 4
+ with_legacy: true
+- name: osd_max_pgls
+ type: uint
+ level: advanced
+ desc: maximum number of results when listing objects in a pool
+ fmt_desc: The maximum number of placement groups to list. A client
+ requesting a large number can tie up the Ceph OSD Daemon.
+ default: 1_K
+ with_legacy: true
+- name: osd_client_message_size_cap
+ type: size
+ level: advanced
+ desc: maximum memory to devote to in-flight client requests
+ long_desc: If this value is exceeded, the OSD will not read any new client data
+ off of the network until memory is freed.
+ fmt_desc: The largest client data message allowed in memory.
+ default: 500_M
+ with_legacy: true
+- name: osd_client_message_cap
+ type: uint
+ level: advanced
+ desc: maximum number of in-flight client requests
+ default: 0
+ with_legacy: true
+- name: osd_crush_update_on_start
+ type: bool
+ level: advanced
+ desc: update OSD CRUSH location on startup
+ default: true
+ with_legacy: true
+- name: osd_class_update_on_start
+ type: bool
+ level: advanced
+ desc: set OSD device class on startup
+ default: true
+ with_legacy: true
+- name: osd_crush_initial_weight
+ type: float
+ level: advanced
+ desc: if >= 0, initial CRUSH weight for newly created OSDs
+ long_desc: If this value is negative, the size of the OSD in TiB is used.
+ fmt_desc: The initial CRUSH weight for newly added OSDs. The default
+ value of this option is ``the size of a newly added OSD in TB``. By default,
+ the initial CRUSH weight for a newly added OSD is set to its device size in
+ TB. See `Weighting Bucket Items`_ for details.
+ default: -1
+ with_legacy: true
+# Allows the "peered" state for recovery and backfill below min_size
+- name: osd_allow_recovery_below_min_size
+ type: bool
+ level: dev
+ desc: allow replicated pools to recover with < min_size active members
+ default: true
+ services:
+ - osd
+ with_legacy: true
+# cap on # of inc maps we send to peers, clients
+- name: osd_map_share_max_epochs
+ type: int
+ level: advanced
+ default: 40
+ with_legacy: true
+- name: osd_map_cache_size
+ type: int
+ level: advanced
+ default: 50
+ fmt_desc: The number of OSD maps to keep cached.
+ with_legacy: true
+- name: osd_pg_epoch_max_lag_factor
+ type: float
+ level: advanced
+ desc: Max multiple of the map cache that PGs can lag before we throttle map injest
+ default: 2
+ see_also:
+ - osd_map_cache_size
+- name: osd_inject_bad_map_crc_probability
+ type: float
+ level: dev
+ default: 0
+ with_legacy: true
+- name: osd_inject_failure_on_pg_removal
+ type: bool
+ level: dev
+ default: false
+ with_legacy: true
+# shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
+- name: osd_max_markdown_period
+ type: int
+ level: advanced
+ default: 10_min
+ with_legacy: true
+- name: osd_max_markdown_count
+ type: int
+ level: advanced
+ default: 5
+ with_legacy: true
+- name: osd_op_thread_timeout
+ type: int
+ level: advanced
+ default: 15
+ fmt_desc: The Ceph OSD Daemon operation thread timeout in seconds.
+ with_legacy: true
+- name: osd_op_thread_suicide_timeout
+ type: int
+ level: advanced
+ default: 150
+ with_legacy: true
+- name: osd_op_pq_max_tokens_per_priority
+ type: uint
+ level: advanced
+ default: 4_M
+ with_legacy: true
+- name: osd_op_pq_min_cost
+ type: size
+ level: advanced
+ default: 64_K
+ with_legacy: true
+# preserve clone_overlap during recovery/migration
+- name: osd_recover_clone_overlap
+ type: bool
+ level: advanced
+ default: true
+ fmt_desc: Preserves clone overlap during recovery. Should always be set
+ to ``true``.
+ with_legacy: true
+- name: osd_num_cache_shards
+ type: size
+ level: advanced
+ desc: The number of cache shards to use in the object store.
+ default: 32
+ flags:
+ - startup
+- name: osd_op_num_threads_per_shard
+ type: int
+ level: advanced
+ default: 0
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_threads_per_shard_hdd
+ type: int
+ level: advanced
+ default: 1
+ see_also:
+ - osd_op_num_threads_per_shard
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_threads_per_shard_ssd
+ type: int
+ level: advanced
+ default: 2
+ see_also:
+ - osd_op_num_threads_per_shard
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_shards
+ type: int
+ level: advanced
+ fmt_desc: The number of shards allocated for a given OSD. Each shard has its own processing queue.
+ PGs on the OSD are distributed evenly in the shard. This setting overrides _ssd and _hdd if
+ non-zero.
+ default: 0
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_shards_hdd
+ type: int
+ level: advanced
+ fmt_desc: the number of shards allocated for a given OSD (for rotational media).
+ default: 5
+ see_also:
+ - osd_op_num_shards
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_shards_ssd
+ type: int
+ level: advanced
+ fmt_desc: the number of shards allocated for a given OSD (for solid state media).
+ default: 8
+ see_also:
+ - osd_op_num_shards
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_skip_data_digest
+ type: bool
+ level: dev
+ desc: Do not store full-object checksums if the backend (bluestore) does its own
+ checksums. Only usable with all BlueStore OSDs.
+ default: false
+# PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
+# mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
+# and "mclock_client" are based on the mClock/dmClock algorithm
+# (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
+# class the operation belongs to. "mclock_client" does the same but
+# also works to ienforce fairness between clients. "debug_random"
+# chooses among all four with equal probability.
+- name: osd_op_queue
+ type: str
+ level: advanced
+ desc: which operation priority queue algorithm to use
+ long_desc: which operation priority queue algorithm to use
+ fmt_desc: This sets the type of queue to be used for prioritizing ops
+ within each OSD. Both queues feature a strict sub-queue which is
+ dequeued before the normal queue. The normal queue is different
+ between implementations. The WeightedPriorityQueue (``wpq``)
+ dequeues operations in relation to their priorities to prevent
+ starvation of any queue. WPQ should help in cases where a few OSDs
+ are more overloaded than others. The mClockQueue
+ (``mclock_scheduler``) prioritizes operations based on which class
+ they belong to (recovery, scrub, snaptrim, client op, osd subop).
+ See `QoS Based on mClock`_. Requires a restart.
+ default: mclock_scheduler
+ see_also:
+ - osd_op_queue_cut_off
+ enum_values:
+ - wpq
+ - mclock_scheduler
+ - debug_random
+ with_legacy: true
+# Min priority to go to strict queue. (low, high)
+- name: osd_op_queue_cut_off
+ type: str
+ level: advanced
+ desc: the threshold between high priority ops and low priority ops
+ long_desc: the threshold between high priority ops that use strict priority ordering
+ and low priority ops that use a fairness algorithm that may or may not incorporate
+ priority
+ fmt_desc: This selects which priority ops will be sent to the strict
+ queue verses the normal queue. The ``low`` setting sends all
+ replication ops and higher to the strict queue, while the ``high``
+ option sends only replication acknowledgment ops and higher to
+ the strict queue. Setting this to ``high`` should help when a few
+ OSDs in the cluster are very busy especially when combined with
+ ``wpq`` in the ``osd_op_queue`` setting. OSDs that are very busy
+ handling replication traffic could starve primary client traffic
+ on these OSDs without these settings. Requires a restart.
+ default: high
+ see_also:
+ - osd_op_queue
+ enum_values:
+ - low
+ - high
+ - debug_random
+ with_legacy: true
+- name: osd_mclock_scheduler_client_res
+ type: uint
+ level: advanced
+ desc: IO proportion reserved for each client (default)
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO proportion reserved for each client (default).
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_client_wgt
+ type: uint
+ level: advanced
+ desc: IO share for each client (default) over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO share for each client (default) over reservation.
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_client_lim
+ type: uint
+ level: advanced
+ desc: IO limit for each client (default) over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO limit for each client (default) over reservation.
+ default: 999999
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_recovery_res
+ type: uint
+ level: advanced
+ desc: IO proportion reserved for background recovery (default)
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO proportion reserved for background recovery (default).
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_recovery_wgt
+ type: uint
+ level: advanced
+ desc: IO share for each background recovery over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO share for each background recovery over reservation.
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_recovery_lim
+ type: uint
+ level: advanced
+ desc: IO limit for background recovery over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO limit for background recovery over reservation.
+ default: 999999
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_best_effort_res
+ type: uint
+ level: advanced
+ desc: IO proportion reserved for background best_effort (default)
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO proportion reserved for background best_effort (default).
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_best_effort_wgt
+ type: uint
+ level: advanced
+ desc: IO share for each background best_effort over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO share for each background best_effort over reservation.
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_best_effort_lim
+ type: uint
+ level: advanced
+ desc: IO limit for background best_effort over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO limit for background best_effort over reservation.
+ default: 999999
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_anticipation_timeout
+ type: float
+ level: advanced
+ desc: mclock anticipation timeout in seconds
+ long_desc: the amount of time that mclock waits until the unused resource is forfeited
+ default: 0
+- name: osd_mclock_cost_per_io_usec
+ type: float
+ level: dev
+ desc: Cost per IO in microseconds to consider per OSD (overrides _ssd and _hdd if
+ non-zero)
+ long_desc: This option specifies the cost factor to consider in usec per OSD. This
+ is considered by the mclock scheduler to set an additional cost factor in QoS
+ calculations. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: Cost per IO in microseconds to consider per OSD (overrides _ssd
+ and _hdd if non-zero)
+ default: 0
+ flags:
+ - runtime
+- name: osd_mclock_cost_per_io_usec_hdd
+ type: float
+ level: dev
+ desc: Cost per IO in microseconds to consider per OSD (for rotational media)
+ long_desc: This option specifies the cost factor to consider in usec per OSD for
+ rotational device type. This is considered by the mclock_scheduler to set an additional
+ cost factor in QoS calculations. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: Cost per IO in microseconds to consider per OSD (for rotational
+ media)
+ default: 25000
+ flags:
+ - runtime
+- name: osd_mclock_cost_per_io_usec_ssd
+ type: float
+ level: dev
+ desc: Cost per IO in microseconds to consider per OSD (for solid state media)
+ long_desc: This option specifies the cost factor to consider in usec per OSD for
+ solid state device type. This is considered by the mclock_scheduler to set an
+ additional cost factor in QoS calculations. Only considered for osd_op_queue =
+ mclock_scheduler
+ fmt_desc: Cost per IO in microseconds to consider per OSD (for solid state
+ media)
+ default: 50
+ flags:
+ - runtime
+- name: osd_mclock_cost_per_byte_usec
+ type: float
+ level: dev
+ desc: Cost per byte in microseconds to consider per OSD (overrides _ssd and _hdd
+ if non-zero)
+ long_desc: This option specifies the cost per byte to consider in microseconds per
+ OSD. This is considered by the mclock scheduler to set an additional cost factor
+ in QoS calculations. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: Cost per byte in microseconds to consider per OSD (overrides _ssd
+ and _hdd if non-zero)
+ default: 0
+ flags:
+ - runtime
+- name: osd_mclock_cost_per_byte_usec_hdd
+ type: float
+ level: dev
+ desc: Cost per byte in microseconds to consider per OSD (for rotational media)
+ long_desc: This option specifies the cost per byte to consider in microseconds per
+ OSD for rotational device type. This is considered by the mclock_scheduler to
+ set an additional cost factor in QoS calculations. Only considered for osd_op_queue
+ = mclock_scheduler
+ fmt_desc: Cost per byte in microseconds to consider per OSD (for rotational
+ media)
+ default: 5.2
+ flags:
+ - runtime
+- name: osd_mclock_cost_per_byte_usec_ssd
+ type: float
+ level: dev
+ desc: Cost per byte in microseconds to consider per OSD (for solid state media)
+ long_desc: This option specifies the cost per byte to consider in microseconds per
+ OSD for solid state device type. This is considered by the mclock_scheduler to
+ set an additional cost factor in QoS calculations. Only considered for osd_op_queue
+ = mclock_scheduler
+ fmt_desc: Cost per byte in microseconds to consider per OSD (for solid state
+ media)
+ default: 0.011
+ flags:
+ - runtime
+- name: osd_mclock_max_capacity_iops
+ type: float
+ level: basic
+ desc: Max IOPs capacity (at 4KiB block size) to consider per OSD (overrides _ssd
+ and _hdd if non-zero)
+ long_desc: This option specifies the max osd capacity in iops per OSD. Helps in
+ QoS calculations when enabling a dmclock profile. Only considered for osd_op_queue
+ = mclock_scheduler
+ fmt_desc: Max IOPS capacity (at 4KiB block size) to consider per OSD
+ (overrides _ssd and _hdd if non-zero)
+ default: 0
+ flags:
+ - runtime
+- name: osd_mclock_max_capacity_iops_hdd
+ type: float
+ level: basic
+ desc: Max IOPs capacity (at 4KiB block size) to consider per OSD (for rotational
+ media)
+ long_desc: This option specifies the max OSD capacity in iops per OSD. Helps in
+ QoS calculations when enabling a dmclock profile. Only considered for osd_op_queue
+ = mclock_scheduler
+ fmt_desc: Max IOPS capacity (at 4KiB block size) to consider per OSD (for
+ rotational media)
+ default: 315
+ flags:
+ - runtime
+- name: osd_mclock_max_capacity_iops_ssd
+ type: float
+ level: basic
+ desc: Max IOPs capacity (at 4KiB block size) to consider per OSD (for solid state
+ media)
+ long_desc: This option specifies the max OSD capacity in iops per OSD. Helps in
+ QoS calculations when enabling a dmclock profile. Only considered for osd_op_queue
+ = mclock_scheduler
+ fmt_desc: Max IOPS capacity (at 4KiB block size) to consider per OSD (for
+ solid state media)
+ default: 21500
+ flags:
+ - runtime
+- name: osd_mclock_profile
+ type: str
+ level: advanced
+ desc: Which mclock profile to use
+ long_desc: This option specifies the mclock profile to enable - one among the set
+ of built-in profiles or a custom profile. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: |
+ This sets the type of mclock profile to use for providing QoS
+ based on operations belonging to different classes (background
+ recovery, scrub, snaptrim, client op, osd subop). Once a built-in
+ profile is enabled, the lower level mclock resource control
+ parameters [*reservation, weight, limit*] and some Ceph
+ configuration parameters are set transparently. Note that the
+ above does not apply for the *custom* profile.
+ default: high_client_ops
+ see_also:
+ - osd_op_queue
+ enum_values:
+ - balanced
+ - high_recovery_ops
+ - high_client_ops
+ - custom
+ flags:
+ - runtime
+# Set to true for testing. Users should NOT set this.
+# If set to true even after reading enough shards to
+# decode the object, any error will be reported.
+- name: osd_read_ec_check_for_errors
+ type: bool
+ level: advanced
+ default: false
+ with_legacy: true
+- name: osd_recovery_delay_start
+ type: float
+ level: advanced
+ default: 0
+ fmt_desc: After peering completes, Ceph will delay for the specified number
+ of seconds before starting to recover RADOS objects.
+ with_legacy: true
+- name: osd_recovery_max_active
+ type: uint
+ level: advanced
+ desc: Number of simultaneous active recovery operations per OSD (overrides _ssd
+ and _hdd if non-zero)
+ fmt_desc: The number of active recovery requests per OSD at one time. More
+ requests will accelerate recovery, but the requests places an
+ increased load on the cluster.
+ note: This value is only used if it is non-zero. Normally it
+ is ``0``, which means that the ``hdd`` or ``ssd`` values
+ (below) are used, depending on the type of the primary
+ device backing the OSD.
+ default: 0
+ see_also:
+ - osd_recovery_max_active_hdd
+ - osd_recovery_max_active_ssd
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_max_active_hdd
+ type: uint
+ level: advanced
+ desc: Number of simultaneous active recovery operations per OSD (for rotational
+ devices)
+ fmt_desc: The number of active recovery requests per OSD at one time, if the
+ primary device is rotational.
+ default: 3
+ see_also:
+ - osd_recovery_max_active
+ - osd_recovery_max_active_ssd
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_max_active_ssd
+ type: uint
+ level: advanced
+ desc: Number of simultaneous active recovery operations per OSD (for non-rotational
+ solid state devices)
+ fmt_desc: The number of active recovery requests per OSD at one time, if the
+ primary device is non-rotational (i.e., an SSD).
+ default: 10
+ see_also:
+ - osd_recovery_max_active
+ - osd_recovery_max_active_hdd
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_max_single_start
+ type: uint
+ level: advanced
+ default: 1
+ fmt_desc: The maximum number of recovery operations per OSD that will be
+ newly started when an OSD is recovering.
+ with_legacy: true
+# max size of push chunk
+- name: osd_recovery_max_chunk
+ type: size
+ level: advanced
+ default: 8_M
+ fmt_desc: the maximum total size of data chunks a recovery op can carry.
+ with_legacy: true
+# max number of omap entries per chunk; 0 to disable limit
+- name: osd_recovery_max_omap_entries_per_chunk
+ type: uint
+ level: advanced
+ default: 8096
+ with_legacy: true
+# max size of a COPYFROM chunk
+- name: osd_copyfrom_max_chunk
+ type: size
+ level: advanced
+ default: 8_M
+ with_legacy: true
+# push cost per object
+- name: osd_push_per_object_cost
+ type: size
+ level: advanced
+ default: 1000
+ fmt_desc: the overhead for serving a push op
+ with_legacy: true
+# max size of push message
+- name: osd_max_push_cost
+ type: size
+ level: advanced
+ default: 8_M
+ with_legacy: true
+# max objects in single push op
+- name: osd_max_push_objects
+ type: uint
+ level: advanced
+ default: 10
+ with_legacy: true
+# Only use clone_overlap for recovery if there are fewer than
+# osd_recover_clone_overlap_limit entries in the overlap set
+- name: osd_recover_clone_overlap_limit
+ type: uint
+ level: advanced
+ default: 10
+ flags:
+ - runtime
+- name: osd_debug_feed_pullee
+ type: int
+ level: dev
+ desc: Feed a pullee, and force primary to pull a currently missing object from it
+ default: -1
+ with_legacy: true
+- name: osd_backfill_scan_min
+ type: int
+ level: advanced
+ default: 64
+ fmt_desc: The minimum number of objects per backfill scan.
+ with_legacy: true
+- name: osd_backfill_scan_max
+ type: int
+ level: advanced
+ default: 512
+ fmt_desc: The maximum number of objects per backfill scan.p
+ with_legacy: true
+# minimum number of peers
+- name: osd_heartbeat_min_peers
+ type: int
+ level: advanced
+ default: 10
+ with_legacy: true
+- name: osd_delete_sleep
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction (overrides values
+ below)
+ fmt_desc: Time in seconds to sleep before the next removal transaction. This
+ throttles the PG deletion process.
+ default: 0
+ flags:
+ - runtime
+- name: osd_delete_sleep_hdd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction for HDDs
+ default: 5
+ flags:
+ - runtime
+- name: osd_delete_sleep_ssd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction for SSDs
+ default: 1
+ flags:
+ - runtime
+- name: osd_delete_sleep_hybrid
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
+ and OSD journal or WAL+DB is on SSD
+ default: 1
+ flags:
+ - runtime