]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/orch: Add DaemonDescriptionStatus `starting` and `unknown`
authorSebastian Wagner <sewagner@redhat.com>
Thu, 9 Sep 2021 10:30:42 +0000 (12:30 +0200)
committerSebastian Wagner <sewagner@redhat.com>
Wed, 10 Nov 2021 12:49:24 +0000 (13:49 +0100)
Signed-off-by: Sebastian Wagner <sewagner@redhat.com>
doc/mgr/orchestrator_modules.rst
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/tests/fixtures.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_upgrade.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/pybind/mgr/rook/module.py

index 5991afe6136568c6aa197d449e585b516256c8e4..a28b43059d210d1b1a4bd72acdf1c6f9b612102f 100644 (file)
@@ -252,6 +252,9 @@ Daemons
 .. automethod:: Orchestrator.remove_daemons
 .. automethod:: Orchestrator.daemon_action
 
+.. autoclass:: DaemonDescription
+.. autoclass:: DaemonDescriptionStatus
+
 OSD management
 --------------
 
index 4090d040fa03a06de004587fa3b8dcfeb59e7a38..b5508662721a84b57c6659c7f7e177ee996213bb 100644 (file)
@@ -854,7 +854,6 @@ class HostCache():
                 # We do not refresh daemons on hosts in maintenance mode, so stored daemon statuses
                 # could be wrong. We must assume maintenance is working and daemons are stopped
                 dd.status = orchestrator.DaemonDescriptionStatus.stopped
-                dd.status_desc = 'stopped'
             dd.events = self.mgr.events.get_for_daemon(dd.name())
             return dd
 
index 0bab2b8004955c90ab362927be65999a9e164a47..10974780d8d07fd558299ee39e4722bdaa0ff5c9 100644 (file)
@@ -1106,7 +1106,7 @@ class CephadmServe:
                         # prime cached service state with what we (should have)
                         # just created
                         sd = daemon_spec.to_daemon_description(
-                            DaemonDescriptionStatus.running, 'starting')
+                            DaemonDescriptionStatus.starting, 'starting')
                         self.mgr.cache.add_daemon(daemon_spec.host, sd)
                         if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS:
                             self.mgr.requires_post_actions.add(daemon_spec.name())
index 8b3721e20e342d47c01d7b23eb7007d27af52730..8254f142d5b169bf6a88a7e46d43ae741397d477 100644 (file)
@@ -14,7 +14,7 @@ except ImportError:
     pass
 
 from cephadm import CephadmOrchestrator
-from orchestrator import raise_if_exception, OrchResult, HostSpec
+from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus
 from tests import mock
 
 
@@ -152,7 +152,7 @@ def assert_rm_service(cephadm: CephadmOrchestrator, srv_name):
 
 
 @contextmanager
-def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '') -> Iterator[List[str]]:
+def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]:
     if spec.placement.is_empty() and host:
         spec.placement = PlacementSpec(hosts=[host], count=1)
     if meth is not None:
@@ -167,6 +167,9 @@ def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=No
 
     CephadmServe(cephadm_module)._apply_all_services()
 
+    if status_running:
+        make_daemons_running(cephadm_module, spec.service_name())
+
     dds = wait(cephadm_module, cephadm_module.list_daemons())
     own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
     if host:
@@ -177,6 +180,12 @@ def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=No
     assert_rm_service(cephadm_module, spec.service_name())
 
 
+def make_daemons_running(cephadm_module, service_name):
+    own_dds = cephadm_module.cache.get_daemons_by_service(service_name)
+    for dd in own_dds:
+        dd.status = DaemonDescriptionStatus.running  # We're changing the reference
+
+
 def _deploy_cephadm_binary(host):
     def foo(*args, **kwargs):
         return True
index 1eff2e4cee31dd113cf053c940a0896b0eb08977..714fc7d0eef937507a1e0e8eacf1872d1fc99ace 100644 (file)
@@ -23,7 +23,7 @@ from orchestrator import DaemonDescription, InventoryHost, \
     HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent
 from tests import mock
 from .fixtures import wait, _run_cephadm, match_glob, with_host, \
-    with_cephadm_module, with_service, _deploy_cephadm_binary
+    with_cephadm_module, with_service, _deploy_cephadm_binary, make_daemons_running
 from cephadm.module import CephadmOrchestrator
 
 """
@@ -159,14 +159,16 @@ class TestCephadm(object):
                         'service_name': 'mds.name',
                         'daemon_type': 'mds',
                         'hostname': 'test',
-                        'status': 1,
+                        'status': 2,
                         'status_desc': 'starting',
                         'is_active': False,
                         'ports': [],
                     }
                 ]
 
-                with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), CephadmOrchestrator.apply_rgw, 'test'):
+                with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'),
+                                  CephadmOrchestrator.apply_rgw, 'test', status_running=True):
+                    make_daemons_running(cephadm_module, 'mds.name')
 
                     c = cephadm_module.describe_service()
                     out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
@@ -200,8 +202,10 @@ class TestCephadm(object):
     def test_service_ls_service_type_flag(self, cephadm_module):
         with with_host(cephadm_module, 'host1'):
             with with_host(cephadm_module, 'host2'):
-                with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), CephadmOrchestrator.apply_mgr, ''):
-                    with with_service(cephadm_module, ServiceSpec('mds', 'test-id', placement=PlacementSpec(count=2)), CephadmOrchestrator.apply_mds, ''):
+                with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)),
+                                  CephadmOrchestrator.apply_mgr, '', status_running=True):
+                    with with_service(cephadm_module, ServiceSpec('mds', 'test-id', placement=PlacementSpec(count=2)),
+                                      CephadmOrchestrator.apply_mds, '', status_running=True):
 
                         # with no service-type. Should provide info fot both services
                         c = cephadm_module.describe_service()
@@ -920,13 +924,15 @@ class TestCephadm(object):
             with with_host(cephadm_module, 'host2'):
                 with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1:  # type: DaemonDescription
                     with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2:  # type: DaemonDescription
+                        CephadmServe(cephadm_module)._check_for_moved_osds()
+                        # both are in status "starting"
+                        assert len(cephadm_module.cache.get_daemons()) == 2
+
                         dd1.status = DaemonDescriptionStatus.running
                         dd2.status = DaemonDescriptionStatus.error
                         cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1})
                         cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2})
-
                         CephadmServe(cephadm_module)._check_for_moved_osds()
-
                         assert len(cephadm_module.cache.get_daemons()) == 1
 
                         assert cephadm_module.events.get_for_daemon('osd.1') == [
index 690385878bdf6923e5bb9de48b488508865027d0..8a52a74c1c93377ad9ad5325f935c15c306f49a9 100644 (file)
@@ -15,7 +15,7 @@ from .fixtures import _run_cephadm, wait, with_host, with_service, \
 def test_upgrade_start(cephadm_module: CephadmOrchestrator):
     with with_host(cephadm_module, 'test'):
         with with_host(cephadm_module, 'test2'):
-            with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2))):
+            with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True):
                 assert wait(cephadm_module, cephadm_module.upgrade_start(
                     'image_id', None)) == 'Initiating upgrade to docker.io/image_id'
 
@@ -43,7 +43,8 @@ def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator):
         with with_host(cephadm_module, 'host2'):
             cephadm_module.set_container_image('global', 'from_image')
             cephadm_module.use_repo_digest = use_repo_digest
-            with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)), CephadmOrchestrator.apply_mgr, ''),\
+            with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)),
+                              CephadmOrchestrator.apply_mgr, '', status_running=True),\
                 mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name",
                            return_value='foo'),\
                 mock.patch("cephadm.module.CephadmOrchestrator.version",
index 8a36deebb1a6403076b112908667315d5b68cc9e..26a834939e05b06f572f1132496ad6197d9547bb 100644 (file)
@@ -769,9 +769,23 @@ def handle_type_error(method: FuncT) -> FuncT:
 
 
 class DaemonDescriptionStatus(enum.IntEnum):
+    unknown = -2
     error = -1
     stopped = 0
     running = 1
+    starting = 2  #: Daemon is deployed, but not yet running
+
+    @staticmethod
+    def to_str(status: Optional['DaemonDescriptionStatus']) -> str:
+        if status is None:
+            status = DaemonDescriptionStatus.unknown
+        return {
+            DaemonDescriptionStatus.unknown: 'unknown',
+            DaemonDescriptionStatus.error: 'error',
+            DaemonDescriptionStatus.stopped: 'stopped',
+            DaemonDescriptionStatus.running: 'running',
+            DaemonDescriptionStatus.starting: 'starting',
+        }.get(status, '<unknown>')
 
 
 class DaemonDescription(object):
@@ -817,7 +831,7 @@ class DaemonDescription(object):
                  rank_generation: Optional[int] = None,
                  ) -> None:
 
-        # Host is at the same granularity as InventoryHost
+        #: Host is at the same granularity as InventoryHost
         self.hostname: Optional[str] = hostname
 
         # Not everyone runs in containers, but enough people do to
@@ -828,32 +842,32 @@ class DaemonDescription(object):
         self.container_image_name = container_image_name  # image friendly name
         self.container_image_digests = container_image_digests  # reg hashes
 
-        # The type of service (osd, mon, mgr, etc.)
+        #: The type of service (osd, mon, mgr, etc.)
         self.daemon_type = daemon_type
 
-        # The orchestrator will have picked some names for daemons,
-        # typically either based on hostnames or on pod names.
-        # This is the <foo> in mds.<foo>, the ID that will appear
-        # in the FSMap/ServiceMap.
+        #: The orchestrator will have picked some names for daemons,
+        #: typically either based on hostnames or on pod names.
+        #: This is the <foo> in mds.<foo>, the ID that will appear
+        #: in the FSMap/ServiceMap.
         self.daemon_id: Optional[str] = daemon_id
         self.daemon_name = self.name()
 
-        # Some daemon types have a numeric rank assigned
+        #: Some daemon types have a numeric rank assigned
         self.rank: Optional[int] = rank
         self.rank_generation: Optional[int] = rank_generation
 
         self._service_name: Optional[str] = service_name
 
-        # Service version that was deployed
+        #: Service version that was deployed
         self.version = version
 
-        # Service status: -1 error, 0 stopped, 1 running
-        self.status = status
+        # Service status: -2 unknown, -1 error, 0 stopped, 1 running, 2 starting
+        self._status = status
 
-        # Service status description when status == error.
+        #: Service status description when status == error.
         self.status_desc = status_desc
 
-        # datetime when this info was last refreshed
+        #: datetime when this info was last refreshed
         self.last_refresh: Optional[datetime.datetime] = last_refresh
 
         self.created: Optional[datetime.datetime] = created
@@ -861,7 +875,7 @@ class DaemonDescription(object):
         self.last_configured: Optional[datetime.datetime] = last_configured
         self.last_deployed: Optional[datetime.datetime] = last_deployed
 
-        # Affinity to a certain OSDSpec
+        #: Affinity to a certain OSDSpec
         self.osdspec_affinity: Optional[str] = osdspec_affinity
 
         self.events: List[OrchestratorEvent] = events or []
@@ -877,6 +891,15 @@ class DaemonDescription(object):
 
         self.is_active = is_active
 
+    @property
+    def status(self) -> Optional[DaemonDescriptionStatus]:
+        return self._status
+
+    @status.setter
+    def status(self, new: DaemonDescriptionStatus) -> None:
+        self._status = new
+        self.status_desc = DaemonDescriptionStatus.to_str(new)
+
     def get_port_summary(self) -> str:
         if not self.ports:
             return ''
index 20bab4202a9f3a542388233c02fbd6b4913ac486..8efd31a13b81754c291db704c5c2bd9f3737dee6 100644 (file)
@@ -662,13 +662,8 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
                 if s.status_desc:
                     status = s.status_desc
                 else:
-                    status = {
-                        DaemonDescriptionStatus.error: 'error',
-                        DaemonDescriptionStatus.stopped: 'stopped',
-                        DaemonDescriptionStatus.running: 'running',
-                        None: '<unknown>'
-                    }[s.status]
-                if s.status == DaemonDescriptionStatus.running and s.started:
+                    status = DaemonDescriptionStatus.to_str(s.status)
+                if s.status == DaemonDescriptionStatus.running and s.started:  # See DDS.starting
                     status += ' (%s)' % to_pretty_timedelta(now - s.started)
 
                 table.add_row((
index 93021d0c2efde2874abcf2caac763775720386ba..a54c0b65b0f645fc697fb2bac5e62729b89b1f72 100644 (file)
@@ -418,14 +418,13 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator):
             sd.hostname = p['hostname']
             sd.daemon_type = p['labels']['app'].replace('rook-ceph-', '')
             status = {
-                'Pending': orchestrator.DaemonDescriptionStatus.error,
+                'Pending': orchestrator.DaemonDescriptionStatus.starting,
                 'Running': orchestrator.DaemonDescriptionStatus.running,
                 'Succeeded': orchestrator.DaemonDescriptionStatus.stopped,
                 'Failed': orchestrator.DaemonDescriptionStatus.error,
-                'Unknown': orchestrator.DaemonDescriptionStatus.error,
+                'Unknown': orchestrator.DaemonDescriptionStatus.unknown,
             }[p['phase']]
             sd.status = status
-            sd.status_desc = p['phase']
 
             if 'ceph_daemon_id' in p['labels']:
                 sd.daemon_id = p['labels']['ceph_daemon_id']