From 6eb989043ee14fabd26845dd67e51d75b3bc4d8f Mon Sep 17 00:00:00 2001 From: Kushal Deb Date: Wed, 21 May 2025 15:31:06 +0530 Subject: [PATCH] improve error handling and add --resume flag to 'ceph rgw realm bootstrap' for partial recovery This patch enhances the `ceph rgw realm bootstrap` command by improving error messaging and introducing a `--resume` flag to support recovery from partial bootstrap failures. Signed-off-by: Kushal Deb --- src/pybind/mgr/rgw/module.py | 9 ++++++++- src/python-common/ceph/rgw/rgwam_core.py | 23 ++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/pybind/mgr/rgw/module.py b/src/pybind/mgr/rgw/module.py index 82560a567a826..284d88ef17519 100644 --- a/src/pybind/mgr/rgw/module.py +++ b/src/pybind/mgr/rgw/module.py @@ -184,6 +184,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): placement: Optional[str] = None, zone_endpoints: Optional[str] = None, start_radosgw: Optional[bool] = True, + skip_realm_components: Optional[bool] = False, inbuf: Optional[str] = None) -> HandleCommandResult: """Bootstrap new rgw realm, zonegroup, and zone""" @@ -207,7 +208,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): try: for spec in rgw_specs: self.create_pools(spec) - RGWAM(self.env).realm_bootstrap(spec, start_radosgw) + RGWAM(self.env).realm_bootstrap(spec, start_radosgw, skip_realm_components) + except RGWAMException as e: self.log.error('cmd run exception: (%d) %s' % (e.retcode, e.message)) # The RGWAM code isn't always consistent about what goes into stdout @@ -237,6 +239,11 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): elif e.stdout: msg = e.stdout return HandleCommandResult(retval=e.retcode, stdout=msg, stderr=e.stderr) + e.stderr = (e.stderr or '') + ( + "\nNote: Partial bootstrap detected - The following entries were already created during a previous bootstrap attempt. \n" + "To resume, run:\n ceph rgw realm bootstrap with --skip-realm-components\n" + ) + return HandleCommandResult(retval=e.retcode, stdout=e.stdout, stderr=e.stderr) except PoolCreationError as e: self.log.error(f'Pool creation failure: {str(e)}') return HandleCommandResult(retval=-errno.EINVAL, stderr=str(e)) diff --git a/src/python-common/ceph/rgw/rgwam_core.py b/src/python-common/ceph/rgw/rgwam_core.py index bbfb85af27a70..9b19865bfc501 100644 --- a/src/python-common/ceph/rgw/rgwam_core.py +++ b/src/python-common/ceph/rgw/rgwam_core.py @@ -555,19 +555,28 @@ class RGWAM: except RGWAMCmdRunException as e: raise RGWAMException('failed to update period', e) - def realm_bootstrap(self, rgw_spec, start_radosgw=True): + def realm_bootstrap(self, rgw_spec, start_radosgw=True, skip_realm_components=False): realm_name = rgw_spec.rgw_realm zonegroup_name = rgw_spec.rgw_zonegroup zone_name = rgw_spec.rgw_zone # Some sanity checks - if realm_name in self.realm_op().list(): - raise RGWAMException(f'Realm {realm_name} already exists') - if zonegroup_name in self.zonegroup_op().list(): - raise RGWAMException(f'Zonegroup {zonegroup_name} already exists') - if zone_name in self.zone_op().list(): - raise RGWAMException(f'Zone {zone_name} already exists') + if not skip_realm_components: + existing = [] + if realm_name in self.realm_op().list(): + existing.append(f"realm: {realm_name}") + # raise RGWAMException(f'Realm {realm_name} already exists') + if zonegroup_name in self.zonegroup_op().list(): + existing.append(f"zonegroup: {zonegroup_name}") + # raise RGWAMException(f'Zonegroup {zonegroup_name} already exists') + if zone_name in self.zone_op().list(): + existing.append(f"zone: {zone_name}") + # raise RGWAMException(f'Zone {zone_name} already exists') + if existing: + raise RGWAMException( + f"The following components already exist: {', '.join(existing)}" + ) # Create RGW entities and update the period realm = self.create_realm(realm_name) -- 2.39.5