cephadm: add ability to continue on failure when applying multiple specs

author Adam King <adking@redhat.com>

Mon, 9 Sep 2024 22:28:45 +0000 (18:28 -0400)

committer Adam King <adking@redhat.com>

Sat, 21 Jun 2025 20:19:08 +0000 (16:19 -0400)
author Adam King <adking@redhat.com>
Mon, 9 Sep 2024 22:28:45 +0000 (18:28 -0400)
committer Adam King <adking@redhat.com>
Sat, 21 Jun 2025 20:19:08 +0000 (16:19 -0400)
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py

index 5758e7c258b363c9719961c6073ef3fcce09218e..45ec18fe1a74851f9bea202c1e7cd6e7750f7c0e 100755 (executable)
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -2915,7 +2915,7 @@ def command_bootstrap(ctx):
          mounts = {}
          mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
          try:
-            out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
+            out = cli(['orch', 'apply', '--continue-on-error', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
              logger.info(out)
          except Exception:
              ctx.error_code = -errno.EINVAL
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index a8b71a1081effeb0729437f96850bb548815d941..1ddfb0240e713a39ae06ff7bac0e68da6b05ec8d 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -3487,7 +3487,12 @@ Then run the following:
          return "Scheduled %s update..." % spec.service_name()
  
      @handle_orch_error
-    def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> List[str]:
+    def apply(
+        self,
+        specs: Sequence[GenericSpec],
+        no_overwrite: bool = False,
+        continue_on_error: bool = True
+    ) -> List[str]:
          results = []
          for spec in specs:
              if no_overwrite:
@@ -3499,7 +3504,14 @@ Then run the following:
                      results.append('Skipped %s service spec. To change %s spec omit --no-overwrite flag'
                                     % (cast(ServiceSpec, spec).service_name(), cast(ServiceSpec, spec).service_name()))
                      continue
-            results.append(self._apply(spec))
+            try:
+                res = self._apply(spec)
+                results.append(res)
+            except Exception as e:
+                if continue_on_error:
+                    results.append(f'Failed to apply spec for {spec}: {str(e)}')
+                else:
+                    raise e
          return results
  
      @handle_orch_error
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py

index 9e5c08f7959e3e09c6ccf618a2c995d7c972650c..7bd6943795601c11ab3d106bc9c703b6634f4c1b 100644 (file)
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -591,7 +591,12 @@ class Orchestrator(object):
          raise NotImplementedError()
  
      @handle_orch_error
-    def apply(self, specs: Sequence["GenericSpec"], no_overwrite: bool = False) -> List[str]:
+    def apply(
+        self,
+        specs: Sequence["GenericSpec"],
+        no_overwrite: bool = False,
+        continue_on_error: bool = False
+    ) -> List[str]:
          """
          Applies any spec
          """
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py

index fd7ffddaa51583c93415b489639430652390ccc9..9cecb377c2bdb7022c595601ce33692d129d08c8 100644 (file)
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -1641,12 +1641,14 @@ Usage:
                     format: Format = Format.plain,
                     unmanaged: bool = False,
                     no_overwrite: bool = False,
+                   continue_on_error: bool = False,
                     inbuf: Optional[str] = None) -> HandleCommandResult:
          """Update the size or placement for a service or apply a large yaml spec"""
          usage = """Usage:
    ceph orch apply -i <yaml spec> [--dry-run]
    ceph orch apply <service_type> [--placement=<placement_string>] [--unmanaged]
          """
+        errs: List[str] = []
          if inbuf:
              if service_type or placement or unmanaged:
                  raise OrchestratorValidationError(usage)
@@ -1656,7 +1658,14 @@ Usage:
              # None entries in the output. Let's skip them silently.
              content = [o for o in yaml_objs if o is not None]
              for s in content:
-                spec = json_to_generic_spec(s)
+                try:
+                    spec = json_to_generic_spec(s)
+                except Exception as e:
+                    if continue_on_error:
+                        errs.append(f'Failed to convert {s} from json object: {str(e)}')
+                        continue
+                    else:
+                        raise e
  
                  # validate the config (we need MgrModule for that)
                  if isinstance(spec, ServiceSpec) and spec.config:
@@ -1664,7 +1673,12 @@ Usage:
                          try:
                              self.get_foreign_ceph_option('mon', k)
                          except KeyError:
-                            raise SpecValidationError(f'Invalid config option {k} in spec')
+                            err = SpecValidationError(f'Invalid config option {k} in spec')
+                            if continue_on_error:
+                                errs.append(str(err))
+                                continue
+                            else:
+                                raise err
  
                  # There is a general "osd" service with no service id, but we use
                  # that to dump osds created individually with "ceph orch daemon add osd"
@@ -1679,7 +1693,12 @@ Usage:
                      and spec.service_type == 'osd'
                      and not spec.service_id
                  ):
-                    raise SpecValidationError('Please provide the service_id field in your OSD spec')
+                    err = SpecValidationError('Please provide the service_id field in your OSD spec')
+                    if continue_on_error:
+                        errs.append(str(err))
+                        continue
+                    else:
+                        raise err
  
                  if dry_run and not isinstance(spec, HostSpec):
                      spec.preview_only = dry_run
@@ -1689,15 +1708,30 @@ Usage:
                      continue
                  specs.append(spec)
          else:
+            # Note in this case there is only ever one spec
+            # being applied so there is no need to worry about
+            # handling of continue_on_error
              placementspec = PlacementSpec.from_string(placement)
              if not service_type:
                  raise OrchestratorValidationError(usage)
              specs = [ServiceSpec(service_type.value, placement=placementspec,
                                   unmanaged=unmanaged, preview_only=dry_run)]
-        return self._apply_misc(specs, dry_run, format, no_overwrite)
-
-    def _apply_misc(self, specs: Sequence[GenericSpec], dry_run: bool, format: Format, no_overwrite: bool = False) -> HandleCommandResult:
-        completion = self.apply(specs, no_overwrite)
+        cmd_result = self._apply_misc(specs, dry_run, format, no_overwrite, continue_on_error)
+        if errs:
+            # HandleCommandResult is a named tuple, so use
+            # _replace to modify it.
+            cmd_result = cmd_result._replace(stdout=cmd_result.stdout + '\n' + '\n'.join(errs))
+        return cmd_result
+
+    def _apply_misc(
+        self,
+        specs: Sequence[GenericSpec],
+        dry_run: bool,
+        format: Format,
+        no_overwrite: bool = False,
+        continue_on_error: bool = False
+    ) -> HandleCommandResult:
+        completion = self.apply(specs, no_overwrite, continue_on_error)
          raise_if_exception(completion)
          out = completion.result_str()
          if dry_run:
author	Adam King <adking@redhat.com>
	Mon, 9 Sep 2024 22:28:45 +0000 (18:28 -0400)
committer	Adam King <adking@redhat.com>
	Sat, 21 Jun 2025 20:19:08 +0000 (16:19 -0400)
src/cephadm/cephadm.py		patch \| blob \| history
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/orchestrator/_interface.py		patch \| blob \| history
src/pybind/mgr/orchestrator/module.py		patch \| blob \| history