doc: account for PG autoscaling being the default

author Conrad Hoffmann <ch@bitfehler.net>

Wed, 22 Mar 2023 22:03:57 +0000 (23:03 +0100)

committer Zac Dover <zac.dover@proton.me>

Mon, 17 Apr 2023 21:00:46 +0000 (23:00 +0200)
author Conrad Hoffmann <ch@bitfehler.net>
Wed, 22 Mar 2023 22:03:57 +0000 (23:03 +0100)
committer Zac Dover <zac.dover@proton.me>
Mon, 17 Apr 2023 21:00:46 +0000 (23:00 +0200)
diff --git a/doc/install/manual-deployment.rst b/doc/install/manual-deployment.rst

index 95232fce2aaef1bddbd4a462af965470a0741a02..6716ecb5beb508d27baceedfa474b9c8663ad7b2 100644 (file)
--- a/doc/install/manual-deployment.rst
+++ b/doc/install/manual-deployment.rst
@@ -132,24 +132,24 @@ The procedure is as follows:
  
  #. Add the initial monitor(s) to your Ceph configuration file. ::
  
-       mon initial members = {hostname}[,{hostname}]
+       mon_initial_members = {hostname}[,{hostname}]
  
     For example::
  
-       mon initial members = mon-node1
+       mon_initial_members = mon-node1
  
  
  #. Add the IP address(es) of the initial monitor(s) to your Ceph configuration
     file and save the file. ::
  
-       mon host = {ip-address}[,{ip-address}]
+       mon_host = {ip-address}[,{ip-address}]
  
     For example::
  
-       mon host = 192.168.0.1
+       mon_host = 192.168.0.1
  
     **Note:** You may use IPv6 addresses instead of IPv4 addresses, but
-   you must set ``ms bind ipv6`` to ``true``. See `Network Configuration
+   you must set ``ms_bind_ipv6`` to ``true``. See `Network Configuration
     Reference`_ for details about network configuration.
  
  #. Create a keyring for your cluster and generate a monitor secret key. ::
@@ -210,37 +210,33 @@ The procedure is as follows:
  
         [global]
         fsid = {cluster-id}
-       mon initial members = {hostname}[, {hostname}]
-       mon host = {ip-address}[, {ip-address}]
-       public network = {network}[, {network}]
-       cluster network = {network}[, {network}]
-       auth cluster required = cephx
-       auth service required = cephx
-       auth client required = cephx
-       osd journal size = {n}
-       osd pool default size = {n}  # Write an object n times.
-       osd pool default min size = {n} # Allow writing n copies in a degraded state.
-       osd pool default pg num = {n}
-       osd pool default pgp num = {n}
-       osd crush chooseleaf type = {n}
+       mon_initial_members = {hostname}[, {hostname}]
+       mon_host = {ip-address}[, {ip-address}]
+       public_network = {network}[, {network}]
+       cluster_network = {network}[, {network}]
+       auth_cluster required = cephx
+       auth_service required = cephx
+       auth_client required = cephx
+       osd_pool_default_size = {n}  # Write an object n times.
+       osd_pool_default_min_size = {n} # Allow writing n copies in a degraded state.
+       osd_pool_default_pg_num = {n}
+       osd_crush_chooseleaf_type = {n}
  
     In the foregoing example, the ``[global]`` section of the configuration might
     look like this::
  
         [global]
         fsid = a7f64266-0894-4f1e-a635-d0aeaca0e993
-       mon initial members = mon-node1
-       mon host = 192.168.0.1
-       public network = 192.168.0.0/24
-       auth cluster required = cephx
-       auth service required = cephx
-       auth client required = cephx
-       osd journal size = 1024
-       osd pool default size = 3
-       osd pool default min size = 2
-       osd pool default pg num = 333
-       osd pool default pgp num = 333
-       osd crush chooseleaf type = 1
+       mon_initial_members = mon-node1
+       mon_host = 192.168.0.1
+       public_network = 192.168.0.0/24
+       auth_cluster_required = cephx
+       auth_service_required = cephx
+       auth_client_required = cephx
+       osd_pool_default_size = 3
+       osd_pool_default_min_size = 2
+       osd_pool_default_pg_num = 333
+       osd_crush_chooseleaf_type = 1
  
  
  #. Start the monitor(s).
@@ -295,7 +291,7 @@ Adding OSDs
  
  Once you have your initial monitor(s) running, you should add OSDs. Your cluster
  cannot reach an ``active + clean`` state until you have enough OSDs to handle the
-number of copies of an object (e.g., ``osd pool default size = 2`` requires at
+number of copies of an object (e.g., ``osd_pool_default_size = 2`` requires at
  least two OSDs). After bootstrapping your monitor, your cluster has a default
  CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to
  a Ceph Node.
@@ -311,8 +307,6 @@ CRUSH map under the host for you. Execute ``ceph-volume -h`` for CLI details.
  The ``ceph-volume`` utility automates the steps of the `Long Form`_ below. To
  create the first two OSDs with the short form procedure, execute the following for each OSD:
  
-bluestore
-^^^^^^^^^
  #. Create the OSD. ::
  
         copy /var/lib/ceph/bootstrap-osd/ceph.keyring from monitor node (mon-node1) to /var/lib/ceph/bootstrap-osd/ceph.keyring on osd node (osd-node1)
@@ -353,45 +347,6 @@ activate):
         sudo ceph-volume lvm activate 0 a7f64266-0894-4f1e-a635-d0aeaca0e993
  
  
-filestore
-^^^^^^^^^
-#. Create the OSD. ::
-
-       ssh {osd node}
-       sudo ceph-volume lvm create --filestore --data {data-path} --journal {journal-path}
-
-   For example::
-
-       ssh osd-node1
-       sudo ceph-volume lvm create --filestore --data /dev/hdd1 --journal /dev/hdd2
-
-Alternatively, the creation process can be split in two phases (prepare, and
-activate):
-
-#. Prepare the OSD. ::
-
-       ssh {node-name}
-       sudo ceph-volume lvm prepare --filestore --data {data-path} --journal {journal-path}
-
-   For example::
-
-       ssh osd-node1
-       sudo ceph-volume lvm prepare --filestore --data /dev/hdd1 --journal /dev/hdd2
-
-   Once prepared, the ``ID`` and ``FSID`` of the prepared OSD are required for
-   activation. These can be obtained by listing OSDs in the current server::
-
-    sudo ceph-volume lvm list
-
-#. Activate the OSD::
-
-       sudo ceph-volume lvm activate --filestore {ID} {FSID}
-
-   For example::
-
-       sudo ceph-volume lvm activate --filestore 0 a7f64266-0894-4f1e-a635-d0aeaca0e993
-
-
  Long Form
  ---------
  
diff --git a/doc/rados/configuration/demo-ceph.conf b/doc/rados/configuration/demo-ceph.conf

index 2537dc45c0cb52d2197dd688368afee9264d4ac8..8ba285a42a57d761e797be2be157a2ee651563b0 100644 (file)
--- a/doc/rados/configuration/demo-ceph.conf
+++ b/doc/rados/configuration/demo-ceph.conf
@@ -15,13 +15,13 @@ auth_cluster_required = cephx
  auth_service_required = cephx
  auth_client_required = cephx
  
-#Choose reasonable numbers for journals, number of replicas
-#and placement groups.
+#Choose reasonable number of replicas and placement groups.
  osd_journal_size = {n}
  osd_pool_default_size = {n}  # Write an object n times.
-osd_pool_default_min_size = {n} # Allow writing n copy in a degraded state.
+osd_pool_default_min_size = {n} # Allow writing n copies in a degraded state.
+osd_pool_default_pg_autoscale_mode = {mode} # on, off, or warn
+# Only used if autoscaling is off or warn:
  osd_pool_default_pg_num = {n}
-osd_pool_default_pgp_num = {n}
  
  #Choose a reasonable crush leaf type.
  #0 for a 1-node cluster.
diff --git a/doc/rados/configuration/pool-pg.conf b/doc/rados/configuration/pool-pg.conf

index 28252e860aab2c8a482d29b0794ae70f5e97d051..6765d37dfad7149e824966e4839a1778bc75e15c 100644 (file)
--- a/doc/rados/configuration/pool-pg.conf
+++ b/doc/rados/configuration/pool-pg.conf
@@ -1,21 +1,21 @@
  [global]
  
-       # By default, Ceph makes 3 replicas of RADOS objects. If you want to maintain four
-       # copies of an object the default value--a primary copy and three replica
-       # copies--reset the default values as shown in 'osd_pool_default_size'.
-       # If you want to allow Ceph to accept an I/O operation to a degraded PG,
-       # set 'osd_pool_default_min_size' to a number less than the
-       # 'osd_pool_default_size' value.
+       # By default, Ceph makes three replicas of RADOS objects. If you want
+       # to maintain four copies of an object the default value--a primary
+       # copy and three replica copies--reset the default values as shown in
+       # 'osd_pool_default_size'. If you want to allow Ceph to accept an I/O
+       # operation to a degraded PG, set 'osd_pool_default_min_size' to a
+       # number less than the 'osd_pool_default_size' value.
  
-       osd_pool_default_size = 3  # Write an object 3 times.
+       osd_pool_default_size = 3  # Write an object three times.
         osd_pool_default_min_size = 2 # Accept an I/O operation to a PG that has two copies of an object.
  
+       # Note: by default, PG autoscaling is enabled and this value is used only
+       # in specific circumstances. It is however still recommend to set it.
         # Ensure you have a realistic number of placement groups. We recommend
         # approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
-       # divided by the number of replicas (i.e., osd pool default size). So for
-       # 10 OSDs and osd pool default size = 4, we'd recommend approximately
+       # divided by the number of replicas (i.e., 'osd_pool_default_size'). So for
+       # 10 OSDs and 'osd_pool_default_size' = 4, we'd recommend approximately
         # (100 * 10) / 4 = 250.
-        # always use the nearest power of 2
-
+       # Always use the nearest power of two.
         osd_pool_default_pg_num = 256
-       osd_pool_default_pgp_num = 256
diff --git a/doc/rados/operations/pools.rst b/doc/rados/operations/pools.rst

index 12f1c1b1bb13c4233b88da0defb1d6a344c2e761..b4ab3b83ea8570fcfb2c8dc64d3f4cbda369d7fd 100644 (file)
--- a/doc/rados/operations/pools.rst
+++ b/doc/rados/operations/pools.rst
@@ -55,10 +55,14 @@ To list your cluster's pools, execute:
  Create a Pool
  =============
  
-Before creating pools, refer to the `Pool, PG and CRUSH Config Reference`_.
-Ideally, you should override the default value for the number of placement
-groups in your Ceph configuration file, as the default is NOT ideal.
-For details on placement group numbers refer to `setting the number of placement groups`_
+If you are not using the PG autoscaler you may wish to explicitly set a value
+for :confval:osd_pool_default_pg_num, as the default is small and not ideal for
+many production-scale deployments. Refer to the `Pool, PG and CRUSH Config
+Reference`_. Be careful, though, to not set a very high value as auto-deployed
+pools, notably certain RGW pools, will not hold much data and thus should not
+have a gratuitous number of PGs. When the PG autoscaler is not actively
+managing placement group numbers, best practice is to explicitly provide pg_num
+and pgp_num when creating each pool.
  
  .. note:: Starting with Luminous, all pools need to be associated to the
     application using the pool. See `Associate Pool to Application`_ below for
@@ -66,10 +70,11 @@ For details on placement group numbers refer to `setting the number of placement
  
  For example:
  
-.. prompt:: bash $
+.. code-block:: ini
  
+       [global]
+       osd_pool_default_pg_autoscale_mode = off
         osd_pool_default_pg_num = 128
-       osd_pool_default_pgp_num = 128
  
  To create a pool, execute:
  
@@ -91,13 +96,16 @@ Where:
  
  .. describe:: {pg-num}
  
-   The total number of placement groups for the pool. See :ref:`placement groups`
-   for details on calculating a suitable number. The
-   default value ``8`` is NOT suitable for most systems.
+   The total number of placement groups for the pool. See :ref:`placement
+   groups` for details on calculating a suitable number. The default value of
+   :confval:`osd_pool_default_pg_num` is likely too small for production pools
+   used for bulk data, including RBD and RGW data and bucket pools
+   respectively.
  
    :Type: Integer
-  :Required: Yes.
-  :Default: 8
+  :Required: No. Set to ``1`` if autoscaling is enabled, otherwise picks up Ceph
+         configuration value :confval:`osd_pool_default_pg_num`
+  :Default: Value of :confval:`osd_pool_default_pg_num`
  
  .. describe:: {pgp-num}
  
@@ -106,8 +114,9 @@ Where:
     for placement group splitting scenarios.
  
    :Type: Integer
-  :Required: Yes. Picks up default or Ceph configuration value if not specified.
-  :Default: 8
+  :Required: No. Picks up Ceph configuration value :confval:`osd_pool_default_pgp_num`
+         if not specified. If that is not set, defaults to value of ``pg-num``.
+  :Default: Value of ``pg-num`` 
  
  .. describe:: {replicated|erasure}
  
diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in

index fe08916d49542b992e98a63a8fb7a0e1ef3982dd..bdb415c5de5c313a1c7fcc6723bf829080a3857b 100644 (file)
--- a/src/common/options/global.yaml.in
+++ b/src/common/options/global.yaml.in
@@ -2509,14 +2509,17 @@ options:
    type: uint
    level: advanced
    desc: number of PGs for placement purposes (0 to match pg_num)
-  fmt_desc: The default number of placement groups for placement for a pool.
+  fmt_desc: |
+    The default number of placement groups for placement for a pool.
      The default value is the same as ``pgp_num`` with ``mkpool``.
-    PG and PGP should be equal (for now).
+    PG and PGP should be equal (for now). Note: should not be set unless
+    autoscaling is disabled.
    default: 0
    services:
    - mon
    see_also:
    - osd_pool_default_pg_num
+  - osd_pool_default_pg_autoscale_mode
    flags:
    - runtime
  - name: osd_pool_default_type
author	Conrad Hoffmann <ch@bitfehler.net>
	Wed, 22 Mar 2023 22:03:57 +0000 (23:03 +0100)
committer	Zac Dover <zac.dover@proton.me>
	Mon, 17 Apr 2023 21:00:46 +0000 (23:00 +0200)
doc/install/manual-deployment.rst		patch \| blob \| history
doc/rados/configuration/demo-ceph.conf		patch \| blob \| history
doc/rados/configuration/pool-pg.conf		patch \| blob \| history
doc/rados/operations/pools.rst		patch \| blob \| history
src/common/options/global.yaml.in		patch \| blob \| history