]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow setting up RGW delaying shutdown to complete client connections 61487/head
authorAdam King <adking@redhat.com>
Wed, 22 Jan 2025 19:23:48 +0000 (14:23 -0500)
committerAdam King <adking@redhat.com>
Wed, 2 Apr 2025 17:50:56 +0000 (13:50 -0400)
Specifically through the spec file. This was added recently on the RGW
side by https://github.com/ceph/ceph/commit/575f5d461706b02a596d5ce959e5fb1aa87c1d8c
This commit is to make it easier to take advantage of for users in cephadm deployments

Signed-off-by: Adam King <adking@redhat.com>
doc/cephadm/services/rgw.rst
src/cephadm/cephadmlib/daemons/ceph.py
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_migration.py
src/pybind/mgr/cephadm/tests/test_spec.py
src/python-common/ceph/deployment/service_spec.py
src/python-common/ceph/tests/test_service_spec.py

index c18d90d53bd1f819fa808013f4ba7188ac730c28..e1d3a8e54419178b155f4c60d88454c9e029ea3b 100644 (file)
@@ -229,6 +229,41 @@ RGW daemons deployed for that RGW service. For example
     The daemon can still receive replication data unless it has been removed
     from the zonegroup and zone replication endpoints.
 
+Draining client connections on shutdown
+---------------------------------------
+
+When an RGW daemon is stopped by for any reason, including during the cephadm upgrade process,
+RGW offers a setting to delay shutdown as the RGW daemon attempts to complete ongoing
+client requests. This setting is off by default but activated manually by either passing
+``--stop-timeout=<timeout-in-seconds>`` to the RGW process or by setting the
+``rgw_exit_timeout_secs`` config option for the RGW daemon. This value may be configured in
+the RGW service spec file by specifying the ``rgw_exit_timeout_secs`` parameter in the spec
+file. For example
+
+.. code-block:: yaml
+
+    service_type: rgw
+    service_id: foo
+    placement:
+      label: rgw
+    spec:
+      rgw_realm: myrealm
+      rgw_zone: myzone
+      rgw_zonegroup: myzg
+      rgw_exit_timeout_secs: 120
+
+would tell the RGW daemons cephadm deploys for the rgw.foo service to wait up to 120
+seconds for current client requests to complete. Note that the RGW daemon will refuse
+new client requests during this time.
+
+.. note:: In cephadm deployments this setting defaults to on and 120 seconds. If you would
+    like to disable this feature you must set ``rgw_exit_timeout_secs`` to 0 in the spec
+
+.. note:: Modifications to this setting in the spec will not be picked up by the RGW daemons
+    in the service until they are redeployed using either the ``ceph orch redeploy <service-name>``
+    or ``ceph orch daemon redeploy <daemon-name>`` commands
+
+
 Service specification
 ---------------------
 
index 40061672d06e5b2758322803e8d1ee772c2f178b..c31a355d7eb41373f95b201dd480a4f5899ff321 100644 (file)
@@ -90,6 +90,10 @@ class Ceph(ContainerDaemonForm):
                 # but that doesn't seem to persist in the object after it's passed
                 # in further function calls
                 ctr.args = ctr.args + ['--set-crush-location', c_loc]
+        if self.identity.daemon_type == 'rgw' and config_json is not None:
+            if 'rgw_exit_timeout_secs' in config_json:
+                stop_timeout = config_json['rgw_exit_timeout_secs']
+                ctr.args = ctr.args + [f'--stop-timeout={stop_timeout}']
         return ctr
 
     _uid_gid: Optional[Tuple[int, int]] = None
index d5dcac62cc90f51f8a761cf9a9ae7a387d8805f4..4061e4edcb2c832347845dc17fe3febb4062698f 100644 (file)
@@ -461,6 +461,79 @@ class TestCephAdm(object):
         _cephadm.command_deploy_from(ctx)
         _deploy_daemon.assert_called()
 
+    def test_rgw_exit_timeout(self, funkypatch):
+        """
+        test that rgw exit timeout secs is set properly
+        """
+        funkypatch.patch('cephadm.logger')
+        funkypatch.patch('cephadm.FileLock')
+        _deploy_daemon = funkypatch.patch('cephadm.deploy_daemon')
+        funkypatch.patch('cephadm.make_var_run')
+        funkypatch.patch('cephadmlib.file_utils.make_run_dir')
+        funkypatch.patch('os.mkdir')
+        _migrate_sysctl = funkypatch.patch('cephadm.migrate_sysctl_dir')
+        funkypatch.patch(
+            'cephadm.check_unit',
+            dest=lambda *args, **kwargs: (None, 'running', None),
+        )
+        funkypatch.patch(
+            'cephadm.get_unit_name',
+            dest=lambda *args, **kwargs: 'mon-unit-name',
+        )
+        funkypatch.patch(
+            'cephadm.extract_uid_gid', dest=lambda *args, **kwargs: (0, 0)
+        )
+        _get_container = funkypatch.patch('cephadm.get_container')
+        funkypatch.patch(
+            'cephadm.apply_deploy_config_to_ctx', dest=lambda d, c: None
+        )
+        _fetch_configs = funkypatch.patch(
+            'cephadmlib.context_getters.fetch_configs'
+        )
+        funkypatch.patch(
+            'cephadm.read_configuration_source', dest=lambda c: {}
+        )
+        funkypatch.patch('cephadm.fetch_custom_config_files')
+
+        ctx = _cephadm.CephadmContext()
+        ctx.name = 'rgw.foo.test.abcdef'
+        ctx.fsid = 'b66e5288-d8ea-11ef-b953-525400f9646d'
+        ctx.reconfig = False
+        ctx.container_engine = mock_docker()
+        ctx.allow_ptrace = True
+        ctx.config_json = '-'
+        ctx.osd_fsid = '0'
+        ctx.tcp_ports = '3300 6789'
+        _fetch_configs.return_value = {
+            'rgw_exit_timeout_secs': 200
+        }
+
+        _get_container.return_value = _cephadm.CephContainer.for_daemon(
+            ctx,
+            ident=_cephadm.DaemonIdentity(
+                fsid='b66e5288-d8ea-11ef-b953-525400f9646d',
+                daemon_type='rgw',
+                daemon_id='foo.test.abcdef',
+            ),
+            entrypoint='',
+            args=[],
+            container_args=[],
+            volume_mounts={},
+            bind_mounts=[],
+            envs=[],
+            privileged=False,
+            ptrace=False,
+            host_network=True,
+        )
+
+        def _exit_timeout_secs_checker(ctx, ident, container, uid, gid, **kwargs):
+            argval = ' '.join(container.args)
+            assert '--stop-timeout=200' in argval
+
+        _deploy_daemon.side_effect = _exit_timeout_secs_checker
+        _cephadm.command_deploy_from(ctx)
+        _deploy_daemon.assert_called()
+
     @mock.patch('cephadm.logger')
     @mock.patch('cephadm.fetch_custom_config_files')
     def test_write_custom_conf_files(self, _get_config, _logger, cephadm_fs):
index 50d6b566a4f4959c186b9d06aeb3789c7ff8c207..cbc5ccd4c71a5edc2239da191762aaf214ea6ff9 100644 (file)
@@ -1304,6 +1304,10 @@ class RgwService(CephService):
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         svc_spec = cast(RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
         config, parent_deps = super().generate_config(daemon_spec)
+
+        if hasattr(svc_spec, 'rgw_exit_timeout_secs') and svc_spec.rgw_exit_timeout_secs:
+            config['rgw_exit_timeout_secs'] = svc_spec.rgw_exit_timeout_secs
+
         rgw_deps = parent_deps + self.get_dependencies(self.mgr, svc_spec)
         return config, rgw_deps
 
index 87fe08633fa5861d46b9c530f2a091c689fd25e0..694d332f589e03a2ec0ddbb8f003789495f70fa1 100644 (file)
@@ -273,6 +273,7 @@ class TestCephadm(object):
                             'service_id': 'r.z',
                             'service_name': 'rgw.r.z',
                             'service_type': 'rgw',
+                            'spec': {'rgw_exit_timeout_secs': 120},
                             'status': {'created': mock.ANY, 'running': 1, 'size': 1,
                                        'ports': [80]},
                         }
index b12dd30bf4b7ac2348523af57ade948a15a3c50d..939c313c63f5358f5d4fdab5f12d3aad7835da9f 100644 (file)
@@ -338,6 +338,7 @@ def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_en
                                                                                 'rgw_thread_pool_size=512'],
                                                     'rgw_frontend_port': '5000',
                                                     'rgw_frontend_type': 'beast',
+                                                    'rgw_exit_timeout_secs': 120,
                                                 }}
         else:
             # in a real environment, we still expect the spec to be there,
index 668289cf05e508e5ab75bc61ff54e5272489daf2..12aa92000e304bbf5149b950821be710ba8b547f 100644 (file)
@@ -118,6 +118,7 @@ def test_spec_octopus(spec_json):
         j_c.pop('objectstore', None)
         j_c.pop('filter_logic', None)
         j_c.pop('anonymous_access', None)
+        j_c.pop('rgw_exit_timeout_secs', None)
         return j_c
 
     assert spec_json == convert_to_old_style_json(spec.to_json())
index 1c1b1825f155c4d3601ce7defcdbde105fb6f021..39a3b3541e3b629a6899d2caaf9b953fa0ede253 100644 (file)
@@ -1236,6 +1236,7 @@ class RGWSpec(ServiceSpec):
                  generate_cert: bool = False,
                  disable_multisite_sync_traffic: Optional[bool] = None,
                  wildcard_enabled: Optional[bool] = False,
+                 rgw_exit_timeout_secs: int = 120,
                  ):
         assert service_type == 'rgw', service_type
 
@@ -1293,6 +1294,8 @@ class RGWSpec(ServiceSpec):
         self.wildcard_enabled = wildcard_enabled
         #: Attributes for <zone-name>.rgw.buckets.data pool created in rgw realm bootstrap command
         self.data_pool_attributes = data_pool_attributes
+        #: How long the RGW will wait to try and complete client requests when told to shut down
+        self.rgw_exit_timeout_secs = rgw_exit_timeout_secs
 
     def get_port_start(self) -> List[int]:
         ports = self.get_port()
index cb5324d0b799fde57c007c9921461961729fa63d..0c5cd313013c6ec5b62914866d97141f7a6ad585 100644 (file)
@@ -335,6 +335,7 @@ networks:
 - 10.0.0.0/8
 - 192.168.0.0/16
 spec:
+  rgw_exit_timeout_secs: 60
   rgw_frontend_type: civetweb
   rgw_realm: default-rgw-realm
   rgw_zone: eu-central-1