]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm/osd: update removal report immediately
authorKiefer Chang <kiefer.chang@suse.com>
Tue, 3 Mar 2020 08:58:40 +0000 (16:58 +0800)
committerKiefer Chang <kiefer.chang@suse.com>
Mon, 9 Mar 2020 06:50:37 +0000 (14:50 +0800)
Currently, the removal report is updated after entering the main serve()
loop. This tiny window makes clients (like Dashboard) fail to poll the
result. Refresh the report data immediately after scheduling OSD for
removal.

The report structure is changed from Dict to Set because:
- The key is `OSDRemoval` instance, which make it hard to use.
- More consistent with orchestrator interfaces: most calls return a list.

Signed-off-by: Kiefer Chang <kiefer.chang@suse.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/osd.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/orchestrator/module.py

index 37f9c28edbb5d1a04a5ceab33f7561919d595adc..f755431848803c68184d9323d2ae804a723d8829 100644 (file)
@@ -2933,15 +2933,15 @@ receivers:
                 continue
             found.add(OSDRemoval(daemon.daemon_id, replace, force,
                                  daemon.hostname, daemon.name(),
-                                 datetime.datetime.utcnow()))
+                                 datetime.datetime.utcnow(), -1))
 
         not_found = {osd_id for osd_id in osd_ids if osd_id not in [x.osd_id for x in found]}
         if not_found:
             raise OrchestratorError('Unable to find OSD: %s' % not_found)
 
-        for osd in found:
-            self.rm_util.to_remove_osds.add(osd)
-            # trigger the serve loop to initiate the removal
+        self.rm_util.queue_osds_for_removal(found)
+
+        # trigger the serve loop to initiate the removal
         self._kick_serve_loop()
         return trivial_result(f"Scheduled OSD(s) for removal")
 
@@ -2949,7 +2949,7 @@ receivers:
         """
         The CLI call to retrieve an osd removal report
         """
-        return trivial_result(self.rm_util.osd_removal_report)
+        return trivial_result(self.rm_util.report)
 
     def list_specs(self) -> orchestrator.Completion:
         """
index 6616b38ca362734c51c45a44e965418d0a84c96a..4e2181d201aef6865a1150a78cf2d00e28e3602d 100644 (file)
@@ -3,7 +3,7 @@ import json
 import logging
 import time
 
-from typing import List, NamedTuple, Dict, Any, Set, Union
+from typing import List, Dict, Any, Set, Union
 
 import orchestrator
 from orchestrator import OrchestratorError
@@ -11,13 +11,22 @@ from orchestrator import OrchestratorError
 logger = logging.getLogger(__name__)
 
 
-class OSDRemoval(NamedTuple):
-    osd_id: int
-    replace: bool
-    force: bool
-    nodename: str
-    fullname: str
-    started_at: datetime.datetime
+class OSDRemoval(object):
+    def __init__(self,
+                 osd_id: str,
+                 replace: bool,
+                 force: bool,
+                 nodename: str,
+                 fullname: str,
+                 start_at: datetime.datetime,
+                 pg_count: int):
+        self.osd_id = osd_id
+        self.replace = replace
+        self.force = force
+        self.nodename = nodename
+        self.fullname = fullname
+        self.started_at = start_at
+        self.pg_count = pg_count
 
     # needed due to changing 'started_at' attr
     def __eq__(self, other):
@@ -26,6 +35,16 @@ class OSDRemoval(NamedTuple):
     def __hash__(self):
         return hash(self.osd_id)
 
+    def __repr__(self):
+        return ('<OSDRemoval>(osd_id={}, replace={}, force={}, nodename={}'
+                ', fullname={}, started_at={}, pg_count={})').format(
+            self.osd_id, self.replace, self.force, self.nodename,
+            self.fullname, self.started_at, self.pg_count)
+
+    @property
+    def pg_count_str(self) -> str:
+        return 'n/a' if self.pg_count < 0 else str(self.pg_count)
+
 
 class RemoveUtil(object):
     def __init__(self, mgr):
@@ -33,6 +52,12 @@ class RemoveUtil(object):
         self.to_remove_osds: Set[OSDRemoval] = set()
         self.osd_removal_report: Dict[OSDRemoval, Union[int,str]] = dict()
 
+    @property
+    def report(self) -> Set[OSDRemoval]:
+        return self.to_remove_osds.copy()
+
+    def queue_osds_for_removal(self, osds: Set[OSDRemoval]):
+        self.to_remove_osds.update(osds)
 
     def _remove_osds_bg(self) -> None:
         """
@@ -41,7 +66,7 @@ class RemoveUtil(object):
         """
         logger.debug(
             f"{len(self.to_remove_osds)} OSDs are scheduled for removal: {list(self.to_remove_osds)}")
-        self.osd_removal_report = self._generate_osd_removal_status()
+        self._update_osd_removal_status()
         remove_osds: set = self.to_remove_osds.copy()
         for osd in remove_osds:
             if not osd.force:
@@ -77,17 +102,14 @@ class RemoveUtil(object):
             logger.debug(f"Removing {osd.osd_id} from the queue.")
             self.to_remove_osds.remove(osd)
 
-    def _generate_osd_removal_status(self) -> Dict[OSDRemoval, Union[int,str]]:
+    def _update_osd_removal_status(self):
         """
         Generate a OSD report that can be printed to the CLI
         """
-        logger.debug("Assembling report for osd rm status")
-        report: Dict[OSDRemoval, Union[int,str]] = {}
+        logger.debug("Update OSD removal status")
         for osd in self.to_remove_osds:
-            pg_count = self.get_pg_count(str(osd.osd_id))
-            report[osd] = pg_count if pg_count != -1 else 'n/a'
-        logger.debug(f"Reporting: {report}")
-        return report
+            osd.pg_count = self.get_pg_count(str(osd.osd_id))
+        logger.debug(f"OSD removal status: {self.to_remove_osds}")
 
     def drain_osd(self, osd_id: str) -> bool:
         """
index 6080ee5503b314908a624ad0dbc20858f0d0feeb..f64cdb11cdbd42d71ae35eb5c341c68820d060ab 100644 (file)
@@ -146,15 +146,14 @@ class TestCephadm(object):
             out = wait(cephadm_module, c)
             assert out == ["Removed osd.0 from host 'test'"]
 
-            osd_removal_op = OSDRemoval(0, False, False, 'test', 'osd.0', datetime.datetime.utcnow())
-            cephadm_module.rm_util.to_remove_osds.add(osd_removal_op)
+            osd_removal_op = OSDRemoval(0, False, False, 'test', 'osd.0', datetime.datetime.utcnow(), -1)
+            cephadm_module.rm_util.queue_osds_for_removal({osd_removal_op})
             cephadm_module.rm_util._remove_osds_bg()
             assert cephadm_module.rm_util.to_remove_osds == set()
 
             c = cephadm_module.remove_osds_status()
             out = wait(cephadm_module, c)
-            assert out == {osd_removal_op: 0}
-
+            assert out == set()
 
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
index ded3f5839ccededb73e78c1793476e08a8639831..d86fbfa9fe9b103179c0b83ece75357aa0759c65 100644 (file)
@@ -488,7 +488,7 @@ Usage:
         self._orchestrator_wait([completion])
         raise_if_exception(completion)
         report = completion.result
-        if len(report) == 0:
+        if not report:
             return HandleCommandResult(stdout="No OSD remove/replace operations reported")
         table = PrettyTable(
             ['NAME', 'HOST', 'PGS', 'STARTED_AT'],
@@ -497,8 +497,8 @@ Usage:
         table.left_padding_width = 0
         table.right_padding_width = 1
         # TODO: re-add sorted and sort by pg_count
-        for osd, status in report.items():
-            table.add_row((osd.fullname, osd.nodename, status, osd.started_at))
+        for osd in report:
+            table.add_row((osd.fullname, osd.nodename, osd.pg_count_str, osd.started_at))
 
         return HandleCommandResult(stdout=table.get_string())