cephadm: add ability to continue on failure when applying multiple specs

author Adam King <adking@redhat.com>

Mon, 9 Sep 2024 22:28:45 +0000 (18:28 -0400)

committer Adam King <adking@redhat.com>

Mon, 9 Sep 2024 22:31:40 +0000 (18:31 -0400)
author Adam King <adking@redhat.com>
Mon, 9 Sep 2024 22:28:45 +0000 (18:28 -0400)
committer Adam King <adking@redhat.com>
Mon, 9 Sep 2024 22:31:40 +0000 (18:31 -0400)
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py

index e71addf7bfa5a6a05cf2bb08b3bd4d66f35affe1..33cac4a34032a5ca4eb76f8597aa5deedd789b0d 100755 (executable)
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -2946,7 +2946,7 @@ def command_bootstrap(ctx):
          mounts = {}
          mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
          try:
-            out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
+            out = cli(['orch', 'apply', '--continue-on-error', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
              logger.info(out)
          except Exception:
              ctx.error_code = -errno.EINVAL
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index 1a9a10862180e2e958934c5339b16a9705105fd2..650d9711bd3be7a015700b98f495f1e96c2307e8 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -3516,7 +3516,12 @@ Then run the following:
          return "Scheduled %s update..." % spec.service_name()
  
      @handle_orch_error
-    def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> List[str]:
+    def apply(
+        self,
+        specs: Sequence[GenericSpec],
+        no_overwrite: bool = False,
+        continue_on_error: bool = True
+    ) -> List[str]:
          results = []
          for spec in specs:
              if no_overwrite:
@@ -3528,7 +3533,14 @@ Then run the following:
                      results.append('Skipped %s service spec. To change %s spec omit --no-overwrite flag'
                                     % (cast(ServiceSpec, spec).service_name(), cast(ServiceSpec, spec).service_name()))
                      continue
-            results.append(self._apply(spec))
+            try:
+                res = self._apply(spec)
+                results.append(res)
+            except Exception as e:
+                if continue_on_error:
+                    results.append(f'Failed to apply spec for {spec}: {str(e)}')
+                else:
+                    raise e
          return results
  
      @handle_orch_error
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py

index c33f38cfdd470f255e33d37f500a6db53971240d..7b602401a7e61a6d12f55bf3817bb40e5d9e95ec 100644 (file)
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -574,7 +574,12 @@ class Orchestrator(object):
          raise NotImplementedError()
  
      @handle_orch_error
-    def apply(self, specs: Sequence["GenericSpec"], no_overwrite: bool = False) -> List[str]:
+    def apply(
+        self,
+        specs: Sequence["GenericSpec"],
+        no_overwrite: bool = False,
+        continue_on_error: bool = False
+    ) -> List[str]:
          """
          Applies any spec
          """
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py

index d0f3286177ce5fba87487e57d123817fc8c85e2e..7c943b076f4a8ebdda620e04f6ef5ec49d41586f 100644 (file)
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -1620,12 +1620,14 @@ Usage:
                     format: Format = Format.plain,
                     unmanaged: bool = False,
                     no_overwrite: bool = False,
+                   continue_on_error: bool = False,
                     inbuf: Optional[str] = None) -> HandleCommandResult:
          """Update the size or placement for a service or apply a large yaml spec"""
          usage = """Usage:
    ceph orch apply -i <yaml spec> [--dry-run]
    ceph orch apply <service_type> [--placement=<placement_string>] [--unmanaged]
          """
+        errs: List[str] = []
          if inbuf:
              if service_type or placement or unmanaged:
                  raise OrchestratorValidationError(usage)
@@ -1635,7 +1637,14 @@ Usage:
              # None entries in the output. Let's skip them silently.
              content = [o for o in yaml_objs if o is not None]
              for s in content:
-                spec = json_to_generic_spec(s)
+                try:
+                    spec = json_to_generic_spec(s)
+                except Exception as e:
+                    if continue_on_error:
+                        errs.append(f'Failed to convert {s} from json object: {str(e)}')
+                        continue
+                    else:
+                        raise e
  
                  # validate the config (we need MgrModule for that)
                  if isinstance(spec, ServiceSpec) and spec.config:
@@ -1643,7 +1652,12 @@ Usage:
                          try:
                              self.get_foreign_ceph_option('mon', k)
                          except KeyError:
-                            raise SpecValidationError(f'Invalid config option {k} in spec')
+                            err = SpecValidationError(f'Invalid config option {k} in spec')
+                            if continue_on_error:
+                                errs.append(str(err))
+                                continue
+                            else:
+                                raise err
  
                  # There is a general "osd" service with no service id, but we use
                  # that to dump osds created individually with "ceph orch daemon add osd"
@@ -1658,7 +1672,12 @@ Usage:
                      and spec.service_type == 'osd'
                      and not spec.service_id
                  ):
-                    raise SpecValidationError('Please provide the service_id field in your OSD spec')
+                    err = SpecValidationError('Please provide the service_id field in your OSD spec')
+                    if continue_on_error:
+                        errs.append(str(err))
+                        continue
+                    else:
+                        raise err
  
                  if dry_run and not isinstance(spec, HostSpec):
                      spec.preview_only = dry_run
@@ -1668,15 +1687,30 @@ Usage:
                      continue
                  specs.append(spec)
          else:
+            # Note in this case there is only ever one spec
+            # being applied so there is no need to worry about
+            # handling of continue_on_error
              placementspec = PlacementSpec.from_string(placement)
              if not service_type:
                  raise OrchestratorValidationError(usage)
              specs = [ServiceSpec(service_type.value, placement=placementspec,
                                   unmanaged=unmanaged, preview_only=dry_run)]
-        return self._apply_misc(specs, dry_run, format, no_overwrite)
-
-    def _apply_misc(self, specs: Sequence[GenericSpec], dry_run: bool, format: Format, no_overwrite: bool = False) -> HandleCommandResult:
-        completion = self.apply(specs, no_overwrite)
+        cmd_result = self._apply_misc(specs, dry_run, format, no_overwrite, continue_on_error)
+        if errs:
+            # HandleCommandResult is a named tuple, so use
+            # _replace to modify it.
+            cmd_result = cmd_result._replace(stdout=cmd_result.stdout + '\n' + '\n'.join(errs))
+        return cmd_result
+
+    def _apply_misc(
+        self,
+        specs: Sequence[GenericSpec],
+        dry_run: bool,
+        format: Format,
+        no_overwrite: bool = False,
+        continue_on_error: bool = False
+    ) -> HandleCommandResult:
+        completion = self.apply(specs, no_overwrite, continue_on_error)
          raise_if_exception(completion)
          out = completion.result_str()
          if dry_run:
author	Adam King <adking@redhat.com>
	Mon, 9 Sep 2024 22:28:45 +0000 (18:28 -0400)
committer	Adam King <adking@redhat.com>
	Mon, 9 Sep 2024 22:31:40 +0000 (18:31 -0400)
src/cephadm/cephadm.py		patch \| blob \| history
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/orchestrator/_interface.py		patch \| blob \| history
src/pybind/mgr/orchestrator/module.py		patch \| blob \| history