From: Kefu Chai <kchai@redhat.com>
Date: Fri, 28 May 2021 02:16:42 +0000 (+0800)
Subject: common/options: extract mgr and mon options out
X-Git-Tag: v17.1.0~1741^2
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dff996d9e24c6259fffce7e704e47e066c367b6a;p=ceph.git

common/options: extract mgr and mon options out

this change is a part of a sequence of changes like
9b5e54d996a771f63b97620d4640c08f6406d843. because we need to audit
the consumers of an option before extracting it to the dedicated
.yaml.in file, it takes considerable time to get this done.

so let's do this piecemeal.

Signed-off-by: Kefu Chai <kchai@redhat.com>
---

diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in
index b0e403dfb2a..e7e9455a682 100644
--- a/src/common/options/global.yaml.in
+++ b/src/common/options/global.yaml.in
@@ -1424,715 +1424,46 @@ options:
   desc: send ourselves a SIGTERM early during startup
   default: false
   with_legacy: true
-- name: mon_enable_op_tracker
-  type: bool
-  level: advanced
-  desc: enable/disable MON op tracking
-  default: true
-  services:
-  - mon
-- name: mon_op_complaint_time
-  type: secs
-  level: advanced
-  desc: time after which to consider a monitor operation blocked after no updates
-  default: 30
-  services:
-  - mon
-- name: mon_op_log_threshold
-  type: int
-  level: advanced
-  desc: max number of slow ops to display
-  default: 5
-  services:
-  - mon
-- name: mon_op_history_size
-  type: uint
-  level: advanced
-  desc: max number of completed ops to track
-  default: 20
-  services:
-  - mon
-- name: mon_op_history_duration
-  type: secs
-  level: advanced
-  desc: expiration time in seconds of historical MON OPS
-  default: 10_min
-  services:
-  - mon
-- name: mon_op_history_slow_op_size
-  type: uint
-  level: advanced
-  desc: max number of slow historical MON OPS to keep
-  default: 20
-  services:
-  - mon
-- name: mon_op_history_slow_op_threshold
-  type: secs
-  level: advanced
-  desc: duration of an op to be considered as a historical slow op
-  default: 10
-  services:
-  - mon
-- name: mon_data
-  type: str
-  level: advanced
-  desc: path to mon database
-  fmt_desc: The monitor's data location.
-  default: /var/lib/ceph/mon/$cluster-$id
-  services:
-  - mon
-  flags:
-  - no_mon_update
-  with_legacy: true
-# list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
-- name: mon_initial_members
-  type: str
-  level: advanced
-  fmt_desc: The IDs of initial monitors in a cluster during startup. If 
-    specified, Ceph requires an odd number of monitors to form an 
-    initial quorum (e.g., 3).
-  note: A *majority* of monitors in your cluster must be able to reach 
-    each other in order to establish a quorum. You can decrease the initial 
-    number of monitors to establish a quorum with this setting.
-  services:
-  - mon
-  flags:
-  - no_mon_update
-  - cluster_create
-  with_legacy: true
-# compact leveldb on ceph-mon start
-- name: mon_compact_on_start
-  type: bool
-  level: advanced
-  default: false
-  services:
-  - mon
-  fmt_desc: Compact the database used as Ceph Monitor store on
-    ``ceph-mon`` start. A manual compaction helps to shrink the
-    monitor database and improve the performance of it if the regular
-    compaction fails to work.
-  with_legacy: true
-# trigger leveldb compaction on bootstrap
-- name: mon_compact_on_bootstrap
-  type: bool
-  level: advanced
-  default: false
-  services:
-  - mon
-  fmt_desc: Compact the database used as Ceph Monitor store
-    on bootstrap. Monitors probe each other to establish
-    a quorum after bootstrap. If a monitor times out before joining the
-    quorum, it will start over and bootstrap again.
-  with_legacy: true
-# compact (a prefix) when we trim old states
-- name: mon_compact_on_trim
-  type: bool
-  level: advanced
-  default: true
-  services:
-  - mon
-  fmt_desc: Compact a certain prefix (including paxos) when we trim its old states.
-  with_legacy: true
-- name: mon_osdmap_full_prune_enabled
-  type: bool
-  level: advanced
-  desc: enables pruning full osdmap versions when we go over a given number of maps
-  default: true
-  services:
-  - mon
-  see_also:
-  - mon_osdmap_full_prune_min
-  - mon_osdmap_full_prune_interval
-  - mon_osdmap_full_prune_txsize
-- name: mon_osdmap_full_prune_min
-  type: uint
-  level: advanced
-  desc: minimum number of versions in the store to trigger full map pruning
-  default: 10000
-  services:
-  - mon
-  see_also:
-  - mon_osdmap_full_prune_enabled
-  - mon_osdmap_full_prune_interval
-  - mon_osdmap_full_prune_txsize
-- name: mon_osdmap_full_prune_interval
-  type: uint
-  level: advanced
-  desc: interval between maps that will not be pruned; maps in the middle will be
-    pruned.
-  default: 10
-  services:
-  - mon
-  see_also:
-  - mon_osdmap_full_prune_enabled
-  - mon_osdmap_full_prune_interval
-  - mon_osdmap_full_prune_txsize
-- name: mon_osdmap_full_prune_txsize
-  type: uint
-  level: advanced
-  desc: number of maps we will prune per iteration
-  default: 100
-  services:
-  - mon
-  see_also:
-  - mon_osdmap_full_prune_enabled
-  - mon_osdmap_full_prune_interval
-  - mon_osdmap_full_prune_txsize
-- name: mon_osd_cache_size
-  type: int
-  level: advanced
-  desc: maximum number of OSDMaps to cache in memory
-  fmt_desc: The size of osdmaps cache, not to rely on underlying store's cache
-  default: 500
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_cache_size_min
-  type: size
-  level: advanced
-  desc: The minimum amount of bytes to be kept mapped in memory for osd monitor caches.
-  fmt_desc: The minimum amount of bytes to be kept mapped in memory for osd
-     monitor caches.
-  default: 128_M
-  services:
-  - mon
-  with_legacy: true
-- name: mon_memory_target
-  type: size
-  level: basic
-  desc: The amount of bytes pertaining to osd monitor caches and kv cache to be kept
-    mapped in memory with cache auto-tuning enabled
-  fmt_desc: The amount of bytes pertaining to OSD monitor caches and KV cache
-    to be kept mapped in memory with cache auto-tuning enabled.
-  default: 2_G
-  services:
-  - mon
-  flags:
-  - runtime
-  with_legacy: true
-- name: mon_memory_autotune
-  type: bool
-  level: basic
-  desc: Autotune the cache memory being used for osd monitors and kv database
-  fmt_desc: Autotune the cache memory used for OSD monitors and KV
-    database.
-  default: true
-  services:
-  - mon
-  flags:
-  - runtime
-  with_legacy: true
-- name: mon_cpu_threads
-  type: int
-  level: advanced
-  desc: worker threads for CPU intensive background work
-  fmt_desc: Number of threads for performing CPU intensive work on monitor.
-  default: 4
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_mapping_pgs_per_chunk
-  type: int
-  level: dev
-  desc: granularity of PG placement calculation background work
-  fmt_desc: We calculate the mapping from placement group to OSDs in chunks.
-    This option specifies the number of placement groups per chunk.
-  default: 4096
-  services:
-  - mon
-  with_legacy: true
-- name: mon_clean_pg_upmaps_per_chunk
-  type: uint
-  level: dev
-  desc: granularity of PG upmap validation background work
-  default: 256
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_max_creating_pgs
-  type: int
-  level: advanced
-  desc: maximum number of PGs the mon will create at once
-  default: 1024
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_max_initial_pgs
-  type: int
-  level: advanced
-  desc: maximum number of PGs a pool will created with
-  long_desc: If the user specifies more PGs than this, the cluster will subsequently
-    split PGs after the pool is created in order to reach the target.
-  default: 1024
-  services:
-  - mon
-- name: mon_tick_interval
-  type: int
-  level: advanced
-  desc: interval for internal mon background checks
-  fmt_desc: A monitor's tick interval in seconds.
-  default: 5
-  services:
-  - mon
-  with_legacy: true
-- name: mon_session_timeout
-  type: int
-  level: advanced
-  desc: close inactive mon client connections after this many seconds
-  fmt_desc: Monitor will terminate inactive sessions stay idle over this
-    time limit.
-  default: 5_min
-  services:
-  - mon
-  with_legacy: true
-- name: mon_subscribe_interval
-  type: float
-  level: dev
-  desc: subscribe interval for pre-jewel clients
-  fmt_desc: The refresh interval (in seconds) for subscriptions. The
-    subscription mechanism enables obtaining cluster maps
-    and log information.
-  default: 1_day
-  services:
-  - mon
-  with_legacy: true
-- name: mon_delta_reset_interval
-  type: float
-  level: advanced
-  desc: window duration for rate calculations in 'ceph status'
-  fmt_desc: Seconds of inactivity before we reset the PG delta to 0. We keep
-    track of the delta of the used space of each pool, so, for
-    example, it would be easier for us to understand the progress of
-    recovery or the performance of cache tier. But if there's no
-    activity reported for a certain pool, we just reset the history of
-    deltas of that pool.
-  default: 10
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_laggy_halflife
-  type: int
-  level: advanced
-  desc: halflife of OSD 'lagginess' factor
-  fmt_desc: The number of seconds laggy estimates will decay.
-  default: 1_hr
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_laggy_weight
-  type: float
-  level: advanced
-  desc: how heavily to weight OSD marking itself back up in overall laggy_probability
-  long_desc: 1.0 means that an OSD marking itself back up (because it was marked down
-    but not actually dead) means a 100% laggy_probability; 0.0 effectively disables
-    tracking of laggy_probability.
-  fmt_desc: The weight for new samples in laggy estimation decay.
-  default: 0.3
-  services:
-  - mon
-  min: 0
-  max: 1
-  with_legacy: true
-- name: mon_osd_laggy_max_interval
-  type: int
-  level: advanced
-  desc: cap value for period for OSD to be marked for laggy_interval calculation
-  fmt_desc: Maximum value of ``laggy_interval`` in laggy estimations (in seconds).
-              Monitor uses an adaptive approach to evaluate the ``laggy_interval`` of
-              a certain OSD. This value will be used to calculate the grace time for
-              that OSD.
-  default: 5_min
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_adjust_heartbeat_grace
-  type: bool
-  level: advanced
-  desc: increase OSD heartbeat grace if peers appear to be laggy
-  long_desc: If an OSD is marked down but then marks itself back up, it implies it
-    wasn't actually down but was unable to respond to heartbeats.  If this option
-    is true, we can use the laggy_probability and laggy_interval values calculated
-    to model this situation to increase the heartbeat grace period for this OSD so
-    that it isn't marked down again.  laggy_probability is an estimated probability
-    that the given OSD is down because it is laggy (not actually down), and laggy_interval
-    is an estiate on how long it stays down when it is laggy.
-  fmt_desc: If set to ``true``, Ceph will scale based on laggy estimations.
-  default: true
-  services:
-  - mon
-  see_also:
-  - mon_osd_laggy_halflife
-  - mon_osd_laggy_weight
-  - mon_osd_laggy_max_interval
-  with_legacy: true
-- name: mon_osd_adjust_down_out_interval
-  type: bool
-  level: advanced
-  desc: increase the mon_osd_down_out_interval if an OSD appears to be laggy
-  fmt_desc: If set to ``true``, Ceph will scaled based on laggy estimations.
-  default: true
-  services:
-  - mon
-  see_also:
-  - mon_osd_adjust_heartbeat_grace
-  with_legacy: true
-- name: mon_osd_auto_mark_in
-  type: bool
-  level: advanced
-  desc: mark any OSD that comes up 'in'
-  fmt_desc: Ceph will mark any booting Ceph OSD Daemons as ``in``
-              the Ceph Storage Cluster.
-  default: false
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_auto_mark_auto_out_in
-  type: bool
-  level: advanced
-  desc: mark any OSD that comes up that was automatically marked 'out' back 'in'
-  fmt_desc: Ceph will mark booting Ceph OSD Daemons auto marked ``out``
-              of the Ceph Storage Cluster as ``in`` the cluster.
-  default: true
-  services:
-  - mon
-  see_also:
-  - mon_osd_down_out_interval
-  with_legacy: true
-- name: mon_osd_auto_mark_new_in
-  type: bool
-  level: advanced
-  desc: mark any new OSD that comes up 'in'
-  fmt_desc: Ceph will mark booting new Ceph OSD Daemons as ``in`` the
-              Ceph Storage Cluster.
-  default: true
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_destroyed_out_interval
-  type: int
-  level: advanced
-  desc: mark any OSD 'out' that has been 'destroy'ed for this long (seconds)
-  default: 10_min
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_down_out_interval
-  type: int
-  level: advanced
-  desc: mark any OSD 'out' that has been 'down' for this long (seconds)
-  fmt_desc: The number of seconds Ceph waits before marking a Ceph OSD Daemon
-              ``down`` and ``out`` if it doesn't respond.
-  default: 10_min
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_down_out_subtree_limit
-  type: str
-  level: advanced
-  desc: do not automatically mark OSDs 'out' if an entire subtree of this size is
-    down
-  fmt_desc: The smallest :term:`CRUSH` unit type that Ceph will **not**
-              automatically mark out. For instance, if set to ``host`` and if
-              all OSDs of a host are down, Ceph will not automatically mark out
-              these OSDs.
-  default: rack
-  services:
-  - mon
-  see_also:
-  - mon_osd_down_out_interval
-  flags:
-  - runtime
-- name: mon_osd_min_up_ratio
-  type: float
-  level: advanced
-  desc: do not automatically mark OSDs 'out' if fewer than this many OSDs are 'up'
-  fmt_desc: The minimum ratio of ``up`` Ceph OSD Daemons before Ceph will
-              mark Ceph OSD Daemons ``down``.
-  default: 0.3
-  services:
-  - mon
-  see_also:
-  - mon_osd_down_out_interval
-  with_legacy: true
-- name: mon_osd_min_in_ratio
-  type: float
-  level: advanced
-  desc: do not automatically mark OSDs 'out' if fewer than this many OSDs are 'in'
-  fmt_desc: The minimum ratio of ``in`` Ceph OSD Daemons before Ceph will
-              mark Ceph OSD Daemons ``out``.
-  default: 0.75
-  services:
-  - mon
-  see_also:
-  - mon_osd_down_out_interval
-  with_legacy: true
-- name: mon_osd_warn_op_age
-  type: float
-  level: advanced
-  desc: issue REQUEST_SLOW health warning if OSD ops are slower than this age (seconds)
-  default: 32
-  services:
-  - mgr
-  with_legacy: true
-- name: mon_osd_warn_num_repaired
-  type: uint
-  level: advanced
-  desc: issue OSD_TOO_MANY_REPAIRS health warning if an OSD has more than this many
-    read repairs
-  default: 10
-  services:
-  - mon
-- name: mon_osd_err_op_age_ratio
-  type: float
-  level: advanced
-  desc: issue REQUEST_STUCK health error if OSD ops are slower than is age (seconds)
-  default: 128
-  services:
-  - mgr
-  with_legacy: true
-- name: mon_osd_prime_pg_temp
-  type: bool
-  level: dev
-  desc: minimize peering work by priming pg_temp values after a map change
-  fmt_desc: Enables or disables priming the PGMap with the previous OSDs when an ``out``
-    OSD comes back into the cluster. With the ``true`` setting, clients
-    will continue to use the previous OSDs until the newly ``in`` OSDs for
-    a PG have peered.
-  default: true
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_prime_pg_temp_max_time
-  type: float
-  level: dev
-  desc: maximum time to spend precalculating PG mappings on map change (seconds)
-  fmt_desc: How much time in seconds the monitor should spend trying to prime the
-    PGMap when an out OSD comes back into the cluster.
-  default: 0.5
-  services:
-  - mon
-  with_legacy: true
-- name: mon_osd_prime_pg_temp_max_estimate
-  type: float
-  level: advanced
-  desc: calculate all PG mappings if estimated fraction of PGs that change is above
-    this amount
-  fmt_desc: Maximum estimate of time spent on each PG before we prime all PGs
-    in parallel.
-  default: 0.25
-  services:
-  - mon
-  with_legacy: true
-- name: mon_stat_smooth_intervals
-  type: uint
-  level: advanced
-  desc: number of PGMaps stats over which we calc the average read/write throughput
-    of the whole cluster
-  fmt_desc: Ceph will smooth statistics over the last ``N`` PG maps.
-  default: 6
-  services:
-  - mgr
-  min: 1
-- name: mon_election_timeout
-  type: float
-  level: advanced
-  desc: maximum time for a mon election (seconds)
-  fmt_desc: On election proposer, maximum waiting time for all ACKs in seconds.
-  default: 5
-  services:
-  - mon
-  with_legacy: true
-- name: mon_election_default_strategy
-  type: uint
-  level: advanced
-  desc: The election strategy to set when constructing the first monmap.
-  default: 1
-  min: 1
-  max: 3
-- name: mon_lease
-  type: float
-  level: advanced
-  desc: lease interval between quorum monitors (seconds)
-  long_desc: This setting controls how sensitive your mon quorum is to intermittent
-    network issues or other failures.
-  fmt_desc: The length (in seconds) of the lease on the monitor's versions.
-  default: 5
-  services:
-  - mon
-  with_legacy: true
-- name: mon_lease_renew_interval_factor
-  type: float
-  level: advanced
-  desc: multiple of mon_lease for the lease renewal interval
-  long_desc: Leases must be renewed before they time out.  A smaller value means frequent
-    renewals, while a value close to 1 makes a lease expiration more likely.
-  fmt_desc: |
-    ``mon_lease`` \* ``mon_lease_renew_interval_factor`` will be the
-    interval for the Leader to renew the other monitor's leases. The
-    factor should be less than ``1.0``.
-  default: 0.6
-  services:
-  - mon
-  see_also:
-  - mon_lease
-  min: 0
-  max: 0.9999999
-  with_legacy: true
-- name: mon_lease_ack_timeout_factor
-  type: float
-  level: advanced
-  desc: multiple of mon_lease for the lease ack interval before calling new election
-  fmt_desc: The Leader will wait ``mon_lease`` \* ``mon_lease_ack_timeout_factor``
-    for the Providers to acknowledge the lease extension.
-  default: 2
-  services:
-  - mon
-  see_also:
-  - mon_lease
-  min: 1.0001
-  max: 100
-  with_legacy: true
-- name: mon_accept_timeout_factor
-  type: float
+# list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
+- name: mon_initial_members
+  type: str
   level: advanced
-  desc: multiple of mon_lease for follower mons to accept proposed state changes before
-    calling a new election
-  fmt_desc: The Leader will wait ``mon_lease`` \* ``mon_accept_timeout_factor``
-    for the Requester(s) to accept a Paxos update. It is also used
-    during the Paxos recovery phase for similar purposes.
-  default: 2
+  fmt_desc: The IDs of initial monitors in a cluster during startup. If 
+    specified, Ceph requires an odd number of monitors to form an 
+    initial quorum (e.g., 3).
+  note: A *majority* of monitors in your cluster must be able to reach 
+    each other in order to establish a quorum. You can decrease the initial 
+    number of monitors to establish a quorum with this setting.
   services:
   - mon
-  see_also:
-  - mon_lease
+  flags:
+  - no_mon_update
+  - cluster_create
   with_legacy: true
-- name: mon_elector_ping_timeout
+- name: mon_delta_reset_interval
   type: float
   level: advanced
-  desc: The time after which a ping 'times out' and a connection is considered down
-  default: 2
-  services:
-  - mon
-  see_also:
-  - mon_elector_ping_divisor
-- name: mon_elector_ping_divisor
-  type: uint
-  level: advanced
-  desc: We will send a ping up to this many times per timeout per
-  default: 2
-  services:
-  - mon
-  see_also:
-  - mon_elector_ping_timeout
-- name: mon_con_tracker_persist_interval
-  type: uint
-  level: advanced
-  desc: how many updates the ConnectionTracker takes before it persists to disk
+  desc: window duration for rate calculations in 'ceph status'
+  fmt_desc: Seconds of inactivity before we reset the PG delta to 0. We keep
+    track of the delta of the used space of each pool, so, for
+    example, it would be easier for us to understand the progress of
+    recovery or the performance of cache tier. But if there's no
+    activity reported for a certain pool, we just reset the history of
+    deltas of that pool.
   default: 10
   services:
-  - mon
-  min: 1
-  max: 100000
-- name: mon_con_tracker_score_halflife
+  - mgr
+  with_legacy: true
+- name: mon_stat_smooth_intervals
   type: uint
   level: advanced
-  desc: The 'halflife' used when updating/calculating peer connection scores
-  default: 43200
-  services:
-  - mon
-  min: 60
-- name: mon_elector_ignore_propose_margin
-  type: float
-  level: advanced
-  desc: The difference in connection score allowed before a peon stops ignoring out-of-quorum
-    PROPOSEs
-  default: 0.0005
-  services:
-  - mon
-- name: mon_warn_on_degraded_stretch_mode
-  type: bool
-  level: advanced
-  desc: Issue a health warning if we are in degraded stretch mode
-  default: true
-  services:
-  - mon
-- name: mon_stretch_cluster_recovery_ratio
-  type: float
-  level: advanced
-  desc: the ratio of up OSDs at which a degraded stretch cluster enters recovery
-  default: 0.6
-  services:
-  - mon
-  min: 0.51
-  max: 1
-- name: mon_stretch_recovery_min_wait
-  type: float
-  level: advanced
-  desc: how long the monitors wait before considering fully-healthy PGs as evidence
-    the stretch mode is repaired
-  default: 15
+  desc: number of PGMaps stats over which we calc the average read/write throughput
+    of the whole cluster
+  fmt_desc: Ceph will smooth statistics over the last ``N`` PG maps.
+  default: 6
   services:
-  - mon
+  - mgr
   min: 1
-- name: mon_stretch_pool_size
-  type: uint
-  level: dev
-  default: 4
-  services:
-  - mon
-  min: 3
-  max: 6
-- name: mon_stretch_pool_min_size
-  type: uint
-  level: dev
-  default: 2
-  services:
-  - mon
-  min: 2
-  max: 4
-- name: mon_clock_drift_allowed
-  type: float
-  level: advanced
-  desc: allowed clock drift (in seconds) between mons before issuing a health warning
-  default: 0.05
-  services:
-  - mon
-  with_legacy: true
-# exponential backoff for clock drift warnings
-- name: mon_clock_drift_warn_backoff
-  type: float
-  level: advanced
-  desc: exponential backoff factor for logging clock drift warnings in the cluster
-    log
-  default: 5
-  services:
-  - mon
-  with_legacy: true
-# on leader, timecheck (clock drift check) interval (seconds)
-- name: mon_timecheck_interval
-  type: float
-  level: advanced
-  desc: frequency of clock synchronization checks between monitors (seconds)
-  fmt_desc: The time check interval (clock drift check) in seconds
-    for the Leader.
-  default: 5_min
-  services:
-  - mon
-  with_legacy: true
-# on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
-- name: mon_timecheck_skew_interval
-  type: float
-  level: advanced
-  desc: frequency of clock synchronization (re)checks between monitors while clocks
-    are believed to be skewed (seconds)
-  fmt_desc: The time check interval (clock drift check) in seconds when in
-    presence of a skew in seconds for the Leader.
-  default: 30
-  services:
-  - mon
-  see_also:
-  - mon_timecheck_interval
-  with_legacy: true
 - name: mon_pg_stuck_threshold
   type: int
   level: advanced
@@ -2701,29 +2032,6 @@ options:
   services:
   - mon
   with_legacy: true
-# force mon to trim mdsmaps to this point (dangerous)
-- name: mon_mds_force_trim_to
-  type: int
-  level: dev
-  desc: force mons to trim mdsmaps/fsmaps through this epoch
-  fmt_desc: Force monitor to trim mdsmaps to this point (0 disables it.
-    dangerous, use with care)
-  default: 0
-  services:
-  - mon
-  with_legacy: true
-# skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
-- name: mon_mds_skip_sanity
-  type: bool
-  level: advanced
-  desc: skip sanity checks on fsmap/mdsmap
-  fmt_desc: Skip safety assertions on FSMap (in case of bugs where we want to
-    continue anyway). Monitor terminates if the FSMap sanity check
-    fails, but we can disable it by enabling this option.
-  default: false
-  services:
-  - mon
-  with_legacy: true
 - name: mon_debug_extra_checks
   type: bool
   level: dev
@@ -2890,189 +2198,6 @@ options:
   services:
   - mon
   with_legacy: true
-- name: mon_osd_blocklist_default_expire
-  type: float
-  level: advanced
-  desc: Duration in seconds that blocklist entries for clients remain in the OSD map
-  default: 1_hr
-  services:
-  - mon
-  with_legacy: true
-- name: mon_mds_blocklist_interval
-  type: float
-  level: dev
-  desc: Duration in seconds that blocklist entries for MDS daemons remain in the OSD
-    map
-  fmt_desc: The blocklist duration for failed MDSs in the OSD map. Note,
-    this controls how long failed MDS daemons will stay in the
-    OSDMap blocklist. It has no effect on how long something is
-    blocklisted when the administrator blocklists it manually. For
-    example, ``ceph osd blocklist add`` will still use the default
-    blocklist time.
-  default: 1_day
-  services:
-  - mon
-  min: 1_hr
-  flags:
-  - runtime
-- name: mon_mgr_blocklist_interval
-  type: float
-  level: dev
-  desc: Duration in seconds that blocklist entries for mgr daemons remain in the OSD
-    map
-  default: 1_day
-  services:
-  - mon
-  min: 1_hr
-  flags:
-  - runtime
-- name: mon_osd_crush_smoke_test
-  type: bool
-  level: advanced
-  desc: perform a smoke test on any new CRUSH map before accepting changes
-  default: true
-  services:
-  - mon
-  with_legacy: true
-- name: mon_smart_report_timeout
-  type: uint
-  level: advanced
-  desc: Timeout (in seconds) for smarctl to run, default is set to 5
-  default: 5
-  services:
-  - mon
-- name: mon_auth_validate_all_caps
-  type: bool
-  level: advanced
-  desc: Whether to parse non-monitor capabilities set by the 'ceph auth ...' commands.
-    Disabling this saves CPU on the monitor, but allows invalid capabilities to be
-    set, and only be rejected later, when they are used.
-  default: true
-  services:
-  - mon
-  flags:
-  - runtime
-- name: mon_warn_on_older_version
-  type: bool
-  level: advanced
-  desc: issue DAEMON_OLD_VERSION health warning if daemons are not all running the
-    same version
-  default: true
-  services:
-  - mon
-- name: mon_warn_older_version_delay
-  type: secs
-  level: advanced
-  desc: issue DAEMON_OLD_VERSION health warning after this amount of time has elapsed
-  default: 7_day
-  services:
-  - mon
-# how often (in commits) to stash a full copy of the PaxosService state
-- name: paxos_stash_full_interval
-  type: int
-  level: advanced
-  default: 25
-  services:
-  - mon
-  fmt_desc: How often (in commits) to stash a full copy of the PaxosService state.
-    Current this setting only affects ``mds``, ``mon``, ``auth`` and ``mgr``
-    PaxosServices.
-  with_legacy: true
-# max paxos iterations before we must first sync the monitor stores
-- name: paxos_max_join_drift
-  type: int
-  level: advanced
-  default: 10
-  services:
-  - mon
-  fmt_desc: The maximum Paxos iterations before we must first sync the
-    monitor data stores. When a monitor finds that its peer is too
-    far ahead of it, it will first sync with data stores before moving
-    on.
-  with_legacy: true
-# gather updates for this long before proposing a map update
-- name: paxos_propose_interval
-  type: float
-  level: advanced
-  default: 1
-  services:
-  - mon
-  fmt_desc: Gather updates for this time interval before proposing
-    a map update.
-  with_legacy: true
-# min time to gather updates for after period of inactivity
-- name: paxos_min_wait
-  type: float
-  level: advanced
-  default: 0.05
-  services:
-  - mon
-  fmt_desc: The minimum amount of time to gather updates after a period of
-    inactivity.
-  with_legacy: true
-# minimum number of paxos states to keep around
-- name: paxos_min
-  type: int
-  level: advanced
-  default: 500
-  services:
-  - mon
-  fmt_desc: The minimum number of Paxos states to keep around
-  with_legacy: true
-# number of extra proposals tolerated before trimming
-- name: paxos_trim_min
-  type: int
-  level: advanced
-  default: 250
-  services:
-  - mon
-  fmt_desc: Number of extra proposals tolerated before trimming
-  with_legacy: true
-# maximum amount of versions to trim during a single proposal (0 disables it)
-- name: paxos_trim_max
-  type: int
-  level: advanced
-  default: 500
-  services:
-  - mon
-  fmt_desc: The maximum number of extra proposals to trim at a time
-  with_legacy: true
-# minimum amount of versions to trigger a trim (0 disables it)
-- name: paxos_service_trim_min
-  type: uint
-  level: advanced
-  default: 250
-  services:
-  - mon
-  fmt_desc: The minimum amount of versions to trigger a trim (0 disables it)
-  with_legacy: true
-# maximum amount of versions to trim during a single proposal (0 disables it)
-- name: paxos_service_trim_max
-  type: uint
-  level: advanced
-  default: 500
-  services:
-  - mon
-  fmt_desc: The maximum amount of versions to trim during a single proposal (0 disables it)
-  with_legacy: true
-- name: paxos_service_trim_max_multiplier
-  type: uint
-  level: advanced
-  desc: factor by which paxos_service_trim_max will be multiplied to get a new upper
-    bound when trim sizes are high  (0 disables it)
-  default: 20
-  services:
-  - mon
-  min: 0
-  flags:
-  - runtime
-- name: paxos_kill_at
-  type: int
-  level: dev
-  default: 0
-  services:
-  - mon
-  with_legacy: true
 # required of mon, mds, osd daemons
 - name: auth_cluster_required
   type: str
@@ -4538,11 +3663,6 @@ options:
   level: dev
   desc: The block size for index partitions. (0 = rocksdb default)
   default: 4_K
-- name: mon_rocksdb_options
-  type: str
-  level: advanced
-  default: write_buffer_size=33554432,compression=kNoCompression,level_compaction_dynamic_level_bytes=true
-  with_legacy: true
 # osd_*_priority adjust the relative priority of client io, recovery io,
 # snaptrim io, etc
 #
diff --git a/src/common/options/mgr.yaml.in b/src/common/options/mgr.yaml.in
index cabd2110746..8ab7c6f63bd 100644
--- a/src/common/options/mgr.yaml.in
+++ b/src/common/options/mgr.yaml.in
@@ -192,3 +192,11 @@ options:
   services:
   - mgr
   - mon
+- name: mon_osd_err_op_age_ratio
+  type: float
+  level: advanced
+  desc: issue REQUEST_STUCK health error if OSD ops are slower than is age (seconds)
+  default: 128
+  services:
+  - mgr
+  with_legacy: true
diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in
index d08ac61ca27..cf995c388ff 100644
--- a/src/common/options/mon.yaml.in
+++ b/src/common/options/mon.yaml.in
@@ -309,3 +309,875 @@ options:
   services:
   - mon
   with_legacy: true
+- name: mon_election_timeout
+  type: float
+  level: advanced
+  desc: maximum time for a mon election (seconds)
+  fmt_desc: On election proposer, maximum waiting time for all ACKs in seconds.
+  default: 5
+  services:
+  - mon
+  with_legacy: true
+- name: mon_election_default_strategy
+  type: uint
+  level: advanced
+  desc: The election strategy to set when constructing the first monmap.
+  default: 1
+  min: 1
+  max: 3
+- name: mon_lease
+  type: float
+  level: advanced
+  desc: lease interval between quorum monitors (seconds)
+  long_desc: This setting controls how sensitive your mon quorum is to intermittent
+    network issues or other failures.
+  fmt_desc: The length (in seconds) of the lease on the monitor's versions.
+  default: 5
+  services:
+  - mon
+  with_legacy: true
+- name: mon_lease_renew_interval_factor
+  type: float
+  level: advanced
+  desc: multiple of mon_lease for the lease renewal interval
+  long_desc: Leases must be renewed before they time out.  A smaller value means frequent
+    renewals, while a value close to 1 makes a lease expiration more likely.
+  fmt_desc: |
+    ``mon_lease`` \* ``mon_lease_renew_interval_factor`` will be the
+    interval for the Leader to renew the other monitor's leases. The
+    factor should be less than ``1.0``.
+  default: 0.6
+  services:
+  - mon
+  see_also:
+  - mon_lease
+  min: 0
+  max: 0.9999999
+  with_legacy: true
+- name: mon_lease_ack_timeout_factor
+  type: float
+  level: advanced
+  desc: multiple of mon_lease for the lease ack interval before calling new election
+  fmt_desc: The Leader will wait ``mon_lease`` \* ``mon_lease_ack_timeout_factor``
+    for the Providers to acknowledge the lease extension.
+  default: 2
+  services:
+  - mon
+  see_also:
+  - mon_lease
+  min: 1.0001
+  max: 100
+  with_legacy: true
+- name: mon_accept_timeout_factor
+  type: float
+  level: advanced
+  desc: multiple of mon_lease for follower mons to accept proposed state changes before
+    calling a new election
+  fmt_desc: The Leader will wait ``mon_lease`` \* ``mon_accept_timeout_factor``
+    for the Requester(s) to accept a Paxos update. It is also used
+    during the Paxos recovery phase for similar purposes.
+  default: 2
+  services:
+  - mon
+  see_also:
+  - mon_lease
+  with_legacy: true
+- name: mon_elector_ping_timeout
+  type: float
+  level: advanced
+  desc: The time after which a ping 'times out' and a connection is considered down
+  default: 2
+  services:
+  - mon
+  see_also:
+  - mon_elector_ping_divisor
+- name: mon_elector_ping_divisor
+  type: uint
+  level: advanced
+  desc: We will send a ping up to this many times per timeout per
+  default: 2
+  services:
+  - mon
+  see_also:
+  - mon_elector_ping_timeout
+- name: mon_con_tracker_persist_interval
+  type: uint
+  level: advanced
+  desc: how many updates the ConnectionTracker takes before it persists to disk
+  default: 10
+  services:
+  - mon
+  min: 1
+  max: 100000
+- name: mon_con_tracker_score_halflife
+  type: uint
+  level: advanced
+  desc: The 'halflife' used when updating/calculating peer connection scores
+  default: 43200
+  services:
+  - mon
+  min: 60
+- name: mon_elector_ignore_propose_margin
+  type: float
+  level: advanced
+  desc: The difference in connection score allowed before a peon stops ignoring out-of-quorum
+    PROPOSEs
+  default: 0.0005
+  services:
+  - mon
+- name: mon_warn_on_degraded_stretch_mode
+  type: bool
+  level: advanced
+  desc: Issue a health warning if we are in degraded stretch mode
+  default: true
+  services:
+  - mon
+- name: mon_stretch_cluster_recovery_ratio
+  type: float
+  level: advanced
+  desc: the ratio of up OSDs at which a degraded stretch cluster enters recovery
+  default: 0.6
+  services:
+  - mon
+  min: 0.51
+  max: 1
+- name: mon_stretch_recovery_min_wait
+  type: float
+  level: advanced
+  desc: how long the monitors wait before considering fully-healthy PGs as evidence
+    the stretch mode is repaired
+  default: 15
+  services:
+  - mon
+  min: 1
+- name: mon_stretch_pool_size
+  type: uint
+  level: dev
+  default: 4
+  services:
+  - mon
+  min: 3
+  max: 6
+- name: mon_stretch_pool_min_size
+  type: uint
+  level: dev
+  default: 2
+  services:
+  - mon
+  min: 2
+  max: 4
+- name: mon_clock_drift_allowed
+  type: float
+  level: advanced
+  desc: allowed clock drift (in seconds) between mons before issuing a health warning
+  default: 0.05
+  services:
+  - mon
+  with_legacy: true
+# exponential backoff for clock drift warnings
+- name: mon_clock_drift_warn_backoff
+  type: float
+  level: advanced
+  desc: exponential backoff factor for logging clock drift warnings in the cluster
+    log
+  default: 5
+  services:
+  - mon
+  with_legacy: true
+# on leader, timecheck (clock drift check) interval (seconds)
+- name: mon_timecheck_interval
+  type: float
+  level: advanced
+  desc: frequency of clock synchronization checks between monitors (seconds)
+  fmt_desc: The time check interval (clock drift check) in seconds
+    for the Leader.
+  default: 5_min
+  services:
+  - mon
+  with_legacy: true
+# on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
+- name: mon_timecheck_skew_interval
+  type: float
+  level: advanced
+  desc: frequency of clock synchronization (re)checks between monitors while clocks
+    are believed to be skewed (seconds)
+  fmt_desc: The time check interval (clock drift check) in seconds when in
+    presence of a skew in seconds for the Leader.
+  default: 30
+  services:
+  - mon
+  see_also:
+  - mon_timecheck_interval
+  with_legacy: true
+# how often (in commits) to stash a full copy of the PaxosService state
+- name: paxos_stash_full_interval
+  type: int
+  level: advanced
+  default: 25
+  services:
+  - mon
+  fmt_desc: How often (in commits) to stash a full copy of the PaxosService state.
+    Current this setting only affects ``mds``, ``mon``, ``auth`` and ``mgr``
+    PaxosServices.
+  with_legacy: true
+# max paxos iterations before we must first sync the monitor stores
+- name: paxos_max_join_drift
+  type: int
+  level: advanced
+  default: 10
+  services:
+  - mon
+  fmt_desc: The maximum Paxos iterations before we must first sync the
+    monitor data stores. When a monitor finds that its peer is too
+    far ahead of it, it will first sync with data stores before moving
+    on.
+  with_legacy: true
+# gather updates for this long before proposing a map update
+- name: paxos_propose_interval
+  type: float
+  level: advanced
+  default: 1
+  services:
+  - mon
+  fmt_desc: Gather updates for this time interval before proposing
+    a map update.
+  with_legacy: true
+# min time to gather updates for after period of inactivity
+- name: paxos_min_wait
+  type: float
+  level: advanced
+  default: 0.05
+  services:
+  - mon
+  fmt_desc: The minimum amount of time to gather updates after a period of
+    inactivity.
+  with_legacy: true
+# minimum number of paxos states to keep around
+- name: paxos_min
+  type: int
+  level: advanced
+  default: 500
+  services:
+  - mon
+  fmt_desc: The minimum number of Paxos states to keep around
+  with_legacy: true
+# number of extra proposals tolerated before trimming
+- name: paxos_trim_min
+  type: int
+  level: advanced
+  default: 250
+  services:
+  - mon
+  fmt_desc: Number of extra proposals tolerated before trimming
+  with_legacy: true
+# maximum amount of versions to trim during a single proposal (0 disables it)
+- name: paxos_trim_max
+  type: int
+  level: advanced
+  default: 500
+  services:
+  - mon
+  fmt_desc: The maximum number of extra proposals to trim at a time
+  with_legacy: true
+# minimum amount of versions to trigger a trim (0 disables it)
+- name: paxos_service_trim_min
+  type: uint
+  level: advanced
+  default: 250
+  services:
+  - mon
+  fmt_desc: The minimum amount of versions to trigger a trim (0 disables it)
+  with_legacy: true
+# maximum amount of versions to trim during a single proposal (0 disables it)
+- name: paxos_service_trim_max
+  type: uint
+  level: advanced
+  default: 500
+  services:
+  - mon
+  fmt_desc: The maximum amount of versions to trim during a single proposal (0 disables it)
+  with_legacy: true
+- name: paxos_service_trim_max_multiplier
+  type: uint
+  level: advanced
+  desc: factor by which paxos_service_trim_max will be multiplied to get a new upper
+    bound when trim sizes are high  (0 disables it)
+  default: 20
+  services:
+  - mon
+  min: 0
+  flags:
+  - runtime
+- name: paxos_kill_at
+  type: int
+  level: dev
+  default: 0
+  services:
+  - mon
+  with_legacy: true
+- name: mon_auth_validate_all_caps
+  type: bool
+  level: advanced
+  desc: Whether to parse non-monitor capabilities set by the 'ceph auth ...' commands.
+    Disabling this saves CPU on the monitor, but allows invalid capabilities to be
+    set, and only be rejected later, when they are used.
+  default: true
+  services:
+  - mon
+  flags:
+  - runtime
+# force mon to trim mdsmaps to this point (dangerous)
+- name: mon_mds_force_trim_to
+  type: int
+  level: dev
+  desc: force mons to trim mdsmaps/fsmaps through this epoch
+  fmt_desc: Force monitor to trim mdsmaps to this point (0 disables it.
+    dangerous, use with care)
+  default: 0
+  services:
+  - mon
+  with_legacy: true
+# skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
+- name: mon_mds_skip_sanity
+  type: bool
+  level: advanced
+  desc: skip sanity checks on fsmap/mdsmap
+  fmt_desc: Skip safety assertions on FSMap (in case of bugs where we want to
+    continue anyway). Monitor terminates if the FSMap sanity check
+    fails, but we can disable it by enabling this option.
+  default: false
+  services:
+  - mon
+  with_legacy: true
+- name: mon_mds_blocklist_interval
+  type: float
+  level: dev
+  desc: Duration in seconds that blocklist entries for MDS daemons remain in the OSD
+    map
+  fmt_desc: The blocklist duration for failed MDSs in the OSD map. Note,
+    this controls how long failed MDS daemons will stay in the
+    OSDMap blocklist. It has no effect on how long something is
+    blocklisted when the administrator blocklists it manually. For
+    example, ``ceph osd blocklist add`` will still use the default
+    blocklist time.
+  default: 1_day
+  services:
+  - mon
+  min: 1_hr
+  flags:
+  - runtime
+- name: mon_mgr_blocklist_interval
+  type: float
+  level: dev
+  desc: Duration in seconds that blocklist entries for mgr daemons remain in the OSD
+    map
+  default: 1_day
+  services:
+  - mon
+  min: 1_hr
+  flags:
+  - runtime
+- name: mon_osd_laggy_halflife
+  type: int
+  level: advanced
+  desc: halflife of OSD 'lagginess' factor
+  fmt_desc: The number of seconds laggy estimates will decay.
+  default: 1_hr
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_laggy_weight
+  type: float
+  level: advanced
+  desc: how heavily to weight OSD marking itself back up in overall laggy_probability
+  long_desc: 1.0 means that an OSD marking itself back up (because it was marked down
+    but not actually dead) means a 100% laggy_probability; 0.0 effectively disables
+    tracking of laggy_probability.
+  fmt_desc: The weight for new samples in laggy estimation decay.
+  default: 0.3
+  services:
+  - mon
+  min: 0
+  max: 1
+  with_legacy: true
+- name: mon_osd_laggy_max_interval
+  type: int
+  level: advanced
+  desc: cap value for period for OSD to be marked for laggy_interval calculation
+  fmt_desc: Maximum value of ``laggy_interval`` in laggy estimations (in seconds).
+              Monitor uses an adaptive approach to evaluate the ``laggy_interval`` of
+              a certain OSD. This value will be used to calculate the grace time for
+              that OSD.
+  default: 5_min
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_adjust_heartbeat_grace
+  type: bool
+  level: advanced
+  desc: increase OSD heartbeat grace if peers appear to be laggy
+  long_desc: If an OSD is marked down but then marks itself back up, it implies it
+    wasn't actually down but was unable to respond to heartbeats.  If this option
+    is true, we can use the laggy_probability and laggy_interval values calculated
+    to model this situation to increase the heartbeat grace period for this OSD so
+    that it isn't marked down again.  laggy_probability is an estimated probability
+    that the given OSD is down because it is laggy (not actually down), and laggy_interval
+    is an estiate on how long it stays down when it is laggy.
+  fmt_desc: If set to ``true``, Ceph will scale based on laggy estimations.
+  default: true
+  services:
+  - mon
+  see_also:
+  - mon_osd_laggy_halflife
+  - mon_osd_laggy_weight
+  - mon_osd_laggy_max_interval
+  with_legacy: true
+- name: mon_osd_adjust_down_out_interval
+  type: bool
+  level: advanced
+  desc: increase the mon_osd_down_out_interval if an OSD appears to be laggy
+  fmt_desc: If set to ``true``, Ceph will scaled based on laggy estimations.
+  default: true
+  services:
+  - mon
+  see_also:
+  - mon_osd_adjust_heartbeat_grace
+  with_legacy: true
+- name: mon_osd_auto_mark_in
+  type: bool
+  level: advanced
+  desc: mark any OSD that comes up 'in'
+  fmt_desc: Ceph will mark any booting Ceph OSD Daemons as ``in``
+              the Ceph Storage Cluster.
+  default: false
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_auto_mark_auto_out_in
+  type: bool
+  level: advanced
+  desc: mark any OSD that comes up that was automatically marked 'out' back 'in'
+  fmt_desc: Ceph will mark booting Ceph OSD Daemons auto marked ``out``
+              of the Ceph Storage Cluster as ``in`` the cluster.
+  default: true
+  services:
+  - mon
+  see_also:
+  - mon_osd_down_out_interval
+  with_legacy: true
+- name: mon_osd_auto_mark_new_in
+  type: bool
+  level: advanced
+  desc: mark any new OSD that comes up 'in'
+  fmt_desc: Ceph will mark booting new Ceph OSD Daemons as ``in`` the
+              Ceph Storage Cluster.
+  default: true
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_destroyed_out_interval
+  type: int
+  level: advanced
+  desc: mark any OSD 'out' that has been 'destroy'ed for this long (seconds)
+  default: 10_min
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_down_out_interval
+  type: int
+  level: advanced
+  desc: mark any OSD 'out' that has been 'down' for this long (seconds)
+  fmt_desc: The number of seconds Ceph waits before marking a Ceph OSD Daemon
+              ``down`` and ``out`` if it doesn't respond.
+  default: 10_min
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_down_out_subtree_limit
+  type: str
+  level: advanced
+  desc: do not automatically mark OSDs 'out' if an entire subtree of this size is
+    down
+  fmt_desc: The smallest :term:`CRUSH` unit type that Ceph will **not**
+              automatically mark out. For instance, if set to ``host`` and if
+              all OSDs of a host are down, Ceph will not automatically mark out
+              these OSDs.
+  default: rack
+  services:
+  - mon
+  see_also:
+  - mon_osd_down_out_interval
+  flags:
+  - runtime
+- name: mon_osd_min_up_ratio
+  type: float
+  level: advanced
+  desc: do not automatically mark OSDs 'out' if fewer than this many OSDs are 'up'
+  fmt_desc: The minimum ratio of ``up`` Ceph OSD Daemons before Ceph will
+              mark Ceph OSD Daemons ``down``.
+  default: 0.3
+  services:
+  - mon
+  see_also:
+  - mon_osd_down_out_interval
+  with_legacy: true
+- name: mon_osd_min_in_ratio
+  type: float
+  level: advanced
+  desc: do not automatically mark OSDs 'out' if fewer than this many OSDs are 'in'
+  fmt_desc: The minimum ratio of ``in`` Ceph OSD Daemons before Ceph will
+              mark Ceph OSD Daemons ``out``.
+  default: 0.75
+  services:
+  - mon
+  see_also:
+  - mon_osd_down_out_interval
+  with_legacy: true
+- name: mon_osd_warn_op_age
+  type: float
+  level: advanced
+  desc: issue REQUEST_SLOW health warning if OSD ops are slower than this age (seconds)
+  default: 32
+  services:
+  - mgr
+  with_legacy: true
+- name: mon_osd_warn_num_repaired
+  type: uint
+  level: advanced
+  desc: issue OSD_TOO_MANY_REPAIRS health warning if an OSD has more than this many
+    read repairs
+  default: 10
+  services:
+  - mon
+- name: mon_osd_prime_pg_temp
+  type: bool
+  level: dev
+  desc: minimize peering work by priming pg_temp values after a map change
+  fmt_desc: Enables or disables priming the PGMap with the previous OSDs when an ``out``
+    OSD comes back into the cluster. With the ``true`` setting, clients
+    will continue to use the previous OSDs until the newly ``in`` OSDs for
+    a PG have peered.
+  default: true
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_prime_pg_temp_max_time
+  type: float
+  level: dev
+  desc: maximum time to spend precalculating PG mappings on map change (seconds)
+  fmt_desc: How much time in seconds the monitor should spend trying to prime the
+    PGMap when an out OSD comes back into the cluster.
+  default: 0.5
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_prime_pg_temp_max_estimate
+  type: float
+  level: advanced
+  desc: calculate all PG mappings if estimated fraction of PGs that change is above
+    this amount
+  fmt_desc: Maximum estimate of time spent on each PG before we prime all PGs
+    in parallel.
+  default: 0.25
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_blocklist_default_expire
+  type: float
+  level: advanced
+  desc: Duration in seconds that blocklist entries for clients remain in the OSD map
+  default: 1_hr
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_crush_smoke_test
+  type: bool
+  level: advanced
+  desc: perform a smoke test on any new CRUSH map before accepting changes
+  default: true
+  services:
+  - mon
+  with_legacy: true
+- name: mon_smart_report_timeout
+  type: uint
+  level: advanced
+  desc: Timeout (in seconds) for smarctl to run, default is set to 5
+  default: 5
+  services:
+  - mon
+- name: mon_warn_on_older_version
+  type: bool
+  level: advanced
+  desc: issue DAEMON_OLD_VERSION health warning if daemons are not all running the
+    same version
+  default: true
+  services:
+  - mon
+- name: mon_warn_older_version_delay
+  type: secs
+  level: advanced
+  desc: issue DAEMON_OLD_VERSION health warning after this amount of time has elapsed
+  default: 7_day
+  services:
+  - mon
+- name: mon_data
+  type: str
+  level: advanced
+  desc: path to mon database
+  fmt_desc: The monitor's data location.
+  default: /var/lib/ceph/mon/$cluster-$id
+  services:
+  - mon
+  flags:
+  - no_mon_update
+  with_legacy: true
+- name: mon_rocksdb_options
+  type: str
+  level: advanced
+  default: write_buffer_size=33554432,compression=kNoCompression,level_compaction_dynamic_level_bytes=true
+  with_legacy: true
+- name: mon_enable_op_tracker
+  type: bool
+  level: advanced
+  desc: enable/disable MON op tracking
+  default: true
+  services:
+  - mon
+# compact leveldb on ceph-mon start
+- name: mon_compact_on_start
+  type: bool
+  level: advanced
+  default: false
+  services:
+  - mon
+  fmt_desc: Compact the database used as Ceph Monitor store on
+    ``ceph-mon`` start. A manual compaction helps to shrink the
+    monitor database and improve the performance of it if the regular
+    compaction fails to work.
+  with_legacy: true
+# trigger leveldb compaction on bootstrap
+- name: mon_compact_on_bootstrap
+  type: bool
+  level: advanced
+  default: false
+  services:
+  - mon
+  fmt_desc: Compact the database used as Ceph Monitor store
+    on bootstrap. Monitors probe each other to establish
+    a quorum after bootstrap. If a monitor times out before joining the
+    quorum, it will start over and bootstrap again.
+  with_legacy: true
+# compact (a prefix) when we trim old states
+- name: mon_compact_on_trim
+  type: bool
+  level: advanced
+  default: true
+  services:
+  - mon
+  fmt_desc: Compact a certain prefix (including paxos) when we trim its old states.
+  with_legacy: true
+- name: mon_op_complaint_time
+  type: secs
+  level: advanced
+  desc: time after which to consider a monitor operation blocked after no updates
+  default: 30
+  services:
+  - mon
+- name: mon_op_log_threshold
+  type: int
+  level: advanced
+  desc: max number of slow ops to display
+  default: 5
+  services:
+  - mon
+- name: mon_op_history_size
+  type: uint
+  level: advanced
+  desc: max number of completed ops to track
+  default: 20
+  services:
+  - mon
+- name: mon_op_history_duration
+  type: secs
+  level: advanced
+  desc: expiration time in seconds of historical MON OPS
+  default: 10_min
+  services:
+  - mon
+- name: mon_op_history_slow_op_size
+  type: uint
+  level: advanced
+  desc: max number of slow historical MON OPS to keep
+  default: 20
+  services:
+  - mon
+- name: mon_op_history_slow_op_threshold
+  type: secs
+  level: advanced
+  desc: duration of an op to be considered as a historical slow op
+  default: 10
+  services:
+  - mon
+- name: mon_osdmap_full_prune_enabled
+  type: bool
+  level: advanced
+  desc: enables pruning full osdmap versions when we go over a given number of maps
+  default: true
+  services:
+  - mon
+  see_also:
+  - mon_osdmap_full_prune_min
+  - mon_osdmap_full_prune_interval
+  - mon_osdmap_full_prune_txsize
+- name: mon_osdmap_full_prune_min
+  type: uint
+  level: advanced
+  desc: minimum number of versions in the store to trigger full map pruning
+  default: 10000
+  services:
+  - mon
+  see_also:
+  - mon_osdmap_full_prune_enabled
+  - mon_osdmap_full_prune_interval
+  - mon_osdmap_full_prune_txsize
+- name: mon_osdmap_full_prune_interval
+  type: uint
+  level: advanced
+  desc: interval between maps that will not be pruned; maps in the middle will be
+    pruned.
+  default: 10
+  services:
+  - mon
+  see_also:
+  - mon_osdmap_full_prune_enabled
+  - mon_osdmap_full_prune_interval
+  - mon_osdmap_full_prune_txsize
+- name: mon_osdmap_full_prune_txsize
+  type: uint
+  level: advanced
+  desc: number of maps we will prune per iteration
+  default: 100
+  services:
+  - mon
+  see_also:
+  - mon_osdmap_full_prune_enabled
+  - mon_osdmap_full_prune_interval
+  - mon_osdmap_full_prune_txsize
+- name: mon_osd_cache_size
+  type: int
+  level: advanced
+  desc: maximum number of OSDMaps to cache in memory
+  fmt_desc: The size of osdmaps cache, not to rely on underlying store's cache
+  default: 500
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_cache_size_min
+  type: size
+  level: advanced
+  desc: The minimum amount of bytes to be kept mapped in memory for osd monitor caches.
+  fmt_desc: The minimum amount of bytes to be kept mapped in memory for osd
+     monitor caches.
+  default: 128_M
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_mapping_pgs_per_chunk
+  type: int
+  level: dev
+  desc: granularity of PG placement calculation background work
+  fmt_desc: We calculate the mapping from placement group to OSDs in chunks.
+    This option specifies the number of placement groups per chunk.
+  default: 4096
+  services:
+  - mon
+  with_legacy: true
+- name: mon_clean_pg_upmaps_per_chunk
+  type: uint
+  level: dev
+  desc: granularity of PG upmap validation background work
+  default: 256
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_max_creating_pgs
+  type: int
+  level: advanced
+  desc: maximum number of PGs the mon will create at once
+  default: 1024
+  services:
+  - mon
+  with_legacy: true
+- name: mon_osd_max_initial_pgs
+  type: int
+  level: advanced
+  desc: maximum number of PGs a pool will created with
+  long_desc: If the user specifies more PGs than this, the cluster will subsequently
+    split PGs after the pool is created in order to reach the target.
+  default: 1024
+  services:
+  - mon
+- name: mon_memory_target
+  type: size
+  level: basic
+  desc: The amount of bytes pertaining to osd monitor caches and kv cache to be kept
+    mapped in memory with cache auto-tuning enabled
+  fmt_desc: The amount of bytes pertaining to OSD monitor caches and KV cache
+    to be kept mapped in memory with cache auto-tuning enabled.
+  default: 2_G
+  services:
+  - mon
+  flags:
+  - runtime
+  with_legacy: true
+- name: mon_memory_autotune
+  type: bool
+  level: basic
+  desc: Autotune the cache memory being used for osd monitors and kv database
+  fmt_desc: Autotune the cache memory used for OSD monitors and KV
+    database.
+  default: true
+  services:
+  - mon
+  flags:
+  - runtime
+  with_legacy: true
+- name: mon_cpu_threads
+  type: int
+  level: advanced
+  desc: worker threads for CPU intensive background work
+  fmt_desc: Number of threads for performing CPU intensive work on monitor.
+  default: 4
+  services:
+  - mon
+  with_legacy: true
+- name: mon_tick_interval
+  type: int
+  level: advanced
+  desc: interval for internal mon background checks
+  fmt_desc: A monitor's tick interval in seconds.
+  default: 5
+  services:
+  - mon
+  with_legacy: true
+- name: mon_session_timeout
+  type: int
+  level: advanced
+  desc: close inactive mon client connections after this many seconds
+  fmt_desc: Monitor will terminate inactive sessions stay idle over this
+    time limit.
+  default: 5_min
+  services:
+  - mon
+  with_legacy: true
+- name: mon_subscribe_interval
+  type: float
+  level: dev
+  desc: subscribe interval for pre-jewel clients
+  fmt_desc: The refresh interval (in seconds) for subscriptions. The
+    subscription mechanism enables obtaining cluster maps
+    and log information.
+  default: 1_day
+  services:
+  - mon
+  with_legacy: true