]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/progress: introduce turn off/on feature 37488/head
authorKamoltat <ksirivad@redhat.com>
Wed, 30 Sep 2020 05:44:23 +0000 (05:44 +0000)
committerkamoltat <ksirivad@redhat.com>
Mon, 16 Nov 2020 03:46:42 +0000 (03:46 +0000)
progress module can be turned off/on by using
the commands: 'progress off' and 'progress on'

As well as refractoring teuthology test suite
to prevent future bugs that can possibly occur

fixes: https://tracker.ceph.com/issues/47238

Signed-off-by: kamoltat <ksirivad@redhat.com>
PendingReleaseNotes
qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml [new file with mode: 0644]
qa/tasks/mgr/test_progress.py
src/pybind/mgr/progress/module.py

index 0fc5579e8e5aeaa4e19485bd12abefc8090ed487..a23384e93007f40d9f0f2af7497a158d29c6af26 100644 (file)
@@ -31,6 +31,9 @@
   "ceph health mute DAEMON_OLD_VERSION --sticky".  In this case after
   upgrade has finished use "ceph health unmute DAEMON_OLD_VERSION".
 
+* MGR: progress module can now be turned on/off, using the commands:
+  ``ceph progress on`` and ``ceph progress off``.
+
 >=15.0.0
 --------
 
diff --git a/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml
new file mode 100644 (file)
index 0000000..042c3d7
--- /dev/null
@@ -0,0 +1,44 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+- - mon.b
+  - mon.c
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+- exec:
+    client.0:
+    - ceph progress off
+
+- workunit:
+    clients:
+      all:
+        - mon/pg_autoscaler.sh
index fa73b951096c4b31c4e6c63f6d1ecc28f3cc596e..e64d23aa7ba7c4b6b13d994109bd568c6365b68f 100644 (file)
@@ -58,6 +58,29 @@ class TestProgress(MgrTestCase):
     def is_osd_marked_in(self, ev):
         return ev['message'].endswith('marked in')
 
+    def _get_osd_in_out_events(self, marked='both'):
+        """
+        Return the event that deals with OSDs being
+        marked in, out or both
+        """
+
+        marked_in_events = []
+        marked_out_events = []
+
+        events_in_progress = self._events_in_progress()
+        for ev in events_in_progress:
+            if self.is_osd_marked_out(ev):
+                marked_out_events.append(ev)
+            elif self.is_osd_marked_in(ev):
+                marked_in_events.append(ev)
+
+        if marked == 'both':
+            return [marked_in_events] + [marked_out_events]
+        elif marked == 'in':
+            return marked_in_events
+        else:
+            return marked_out_events
+
     def _osd_in_out_events_count(self, marked='both'):
         """
         Count the number of on going recovery events that deals with
@@ -193,6 +216,14 @@ class TestProgress(MgrTestCase):
         new_event = self._events_in_progress()[0]
         return new_event
 
+    def _no_events_anywhere(self):
+        """
+        Whether there are any live or completed events
+        """
+        p = self._get_progress()
+        total_events = len(p['events']) + len(p['completed'])
+        return total_events == 0
+
     def _is_quiet(self):
         """
         Whether any progress events are live.
@@ -318,3 +349,50 @@ class TestProgress(MgrTestCase):
         self.assertEqual(
             self._osd_in_out_completed_events_count('out'),
             osd_count - pool_size)
+
+    def test_turn_off_module(self):
+        """
+        When the the module is turned off, there should not
+        be any on going events or completed events.
+        Also module should not accept any kind of Remote Event
+        coming in from other module, however, once it is turned
+        back, on creating an event should be working as it is.
+        """
+
+        pool_size = 3
+        self._setup_pool(size=pool_size)
+        self._write_some_data(self.WRITE_PERIOD)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off")
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'out', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD)
+
+        self.assertTrue(self._no_events_anywhere())
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on")
+
+        self._write_some_data(self.WRITE_PERIOD)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'out', '0')
+
+        # Wait for a progress event to pop up
+        self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+                              timeout=self.EVENT_CREATION_PERIOD*2,
+                              period=1)
+
+        ev1 = self._get_osd_in_out_events('out')[0]
+
+        log.info(json.dumps(ev1, indent=1))
+
+        self.wait_until_true(lambda: self._is_complete(ev1['id']),
+                             timeout=self.RECOVERY_PERIOD)
+        self.assertTrue(self._is_quiet())
index 40098d7b43bc44112912bfc02c24bf2c134912c8..a5748ead805f9907ac236729d81db95fe3f84aaa 100644 (file)
@@ -416,7 +416,14 @@ class Module(MgrModule):
          "perm": "r"},
         {"cmd": "progress clear",
          "desc": "Reset progress tracking",
+         "perm": "rw"},
+        {"cmd": "progress on",
+         "desc": "Enable progress tracking",
+         "perm": "rw"},
+        {"cmd": "progress off",
+         "desc": "Disable progress tracking",
          "perm": "rw"}
+
     ]
 
     MODULE_OPTIONS = [
@@ -434,6 +441,12 @@ class Module(MgrModule):
             'desc': 'how frequently to persist completed events',
             'runtime': True,
         },
+        {
+            'name': 'enabled',
+            'default': True,
+            'type': 'bool',
+            
+        }
     ]  # type: List[Dict[str, Any]]
 
     def __init__(self, *args, **kwargs):
@@ -458,6 +471,7 @@ class Module(MgrModule):
         if TYPE_CHECKING:
             self.max_completed_events = 0
             self.persist_interval = 0
+            self.enabled = True
 
     def config_notify(self):
         for opt in self.MODULE_OPTIONS:
@@ -605,7 +619,8 @@ class Module(MgrModule):
 
     def notify(self, notify_type, notify_data):
         self._ready.wait()
-
+        if not self.enabled:
+            return
         if notify_type == "osd_map":
             old_osdmap = self._latest_osdmap
             self._latest_osdmap = self.get_osdmap()
@@ -724,10 +739,12 @@ class Module(MgrModule):
         """
         For calling from other mgr modules
         """
+        if not self.enabled:
+            return
+
         if refs is None:
             refs = []
         try:
-
             ev = self._events[ev_id]
             assert isinstance(ev, RemoteEvent)
         except KeyError:
@@ -762,6 +779,8 @@ class Module(MgrModule):
         """
         For calling from other mgr modules
         """
+        if not self.enabled:
+            return
         try:
             ev = self._events[ev_id]
             assert isinstance(ev, RemoteEvent)
@@ -789,6 +808,12 @@ class Module(MgrModule):
         except KeyError:
             self.log.warning("fail: ev {0} does not exist".format(ev_id))
 
+    def on(self):
+        self.set_module_option('enabled', True)
+
+    def off(self):
+        self.set_module_option('enabled', False)
+
     def _handle_ls(self):
         if len(self._events) or len(self._completed_events):
             out = ""
@@ -815,13 +840,15 @@ class Module(MgrModule):
             'completed': [ev.to_json() for ev in self._completed_events]
         }
 
-    def _handle_clear(self):
+    def clear(self):
         self._events = {}
         self._completed_events = []
         self._dirty = True
         self._save()
         self.clear_all_progress_events()
 
+    def _handle_clear(self):
+        self.clear()  
         return 0, "", ""
 
     def handle_command(self, _, cmd):
@@ -835,5 +862,16 @@ class Module(MgrModule):
             return self._handle_clear()
         elif cmd['prefix'] == "progress json":
             return 0, json.dumps(self._json(), indent=4, sort_keys=True), ""
+        elif cmd['prefix'] == "progress on":
+            if self.enabled:
+                return 0, "", "progress already enabled!"
+            self.on()
+            return 0, "", "progress enabled"
+        elif cmd['prefix'] == "progress off":
+            if not self.enabled:
+                return 0, "", "progress already disabled!"
+            self.off()
+            self.clear()
+            return 0, "", "progress disabled"
         else:
             raise NotImplementedError(cmd['prefix'])