From 993bb02b30cf73a1c1c70da1ef266be8373d56dd Mon Sep 17 00:00:00 2001 From: Kamoltat Date: Wed, 30 Sep 2020 05:44:23 +0000 Subject: [PATCH] mgr/progress: introduce turn off/on feature progress module can be turned off/on by using the commands: 'progress off' and 'progress on' As well as refractoring teuthology test suite to prevent future bugs that can possibly occur fixes: https://tracker.ceph.com/issues/47238 Signed-off-by: kamoltat --- PendingReleaseNotes | 3 + .../all/pg-autoscaler-progress-off.yaml | 44 +++++++++++ qa/tasks/mgr/test_progress.py | 78 +++++++++++++++++++ src/pybind/mgr/progress/module.py | 44 ++++++++++- 4 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 0fc5579e8e5ae..a23384e93007f 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -31,6 +31,9 @@ "ceph health mute DAEMON_OLD_VERSION --sticky". In this case after upgrade has finished use "ceph health unmute DAEMON_OLD_VERSION". +* MGR: progress module can now be turned on/off, using the commands: + ``ceph progress on`` and ``ceph progress off``. + >=15.0.0 -------- diff --git a/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml new file mode 100644 index 0000000000000..042c3d78e7d7d --- /dev/null +++ b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml @@ -0,0 +1,44 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +- - mon.b + - mon.c + - osd.4 + - osd.5 + - osd.6 + - osd.7 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request +- exec: + client.0: + - ceph progress off + +- workunit: + clients: + all: + - mon/pg_autoscaler.sh diff --git a/qa/tasks/mgr/test_progress.py b/qa/tasks/mgr/test_progress.py index fa73b951096c4..e64d23aa7ba7c 100644 --- a/qa/tasks/mgr/test_progress.py +++ b/qa/tasks/mgr/test_progress.py @@ -58,6 +58,29 @@ class TestProgress(MgrTestCase): def is_osd_marked_in(self, ev): return ev['message'].endswith('marked in') + def _get_osd_in_out_events(self, marked='both'): + """ + Return the event that deals with OSDs being + marked in, out or both + """ + + marked_in_events = [] + marked_out_events = [] + + events_in_progress = self._events_in_progress() + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_events.append(ev) + elif self.is_osd_marked_in(ev): + marked_in_events.append(ev) + + if marked == 'both': + return [marked_in_events] + [marked_out_events] + elif marked == 'in': + return marked_in_events + else: + return marked_out_events + def _osd_in_out_events_count(self, marked='both'): """ Count the number of on going recovery events that deals with @@ -193,6 +216,14 @@ class TestProgress(MgrTestCase): new_event = self._events_in_progress()[0] return new_event + def _no_events_anywhere(self): + """ + Whether there are any live or completed events + """ + p = self._get_progress() + total_events = len(p['events']) + len(p['completed']) + return total_events == 0 + def _is_quiet(self): """ Whether any progress events are live. @@ -318,3 +349,50 @@ class TestProgress(MgrTestCase): self.assertEqual( self._osd_in_out_completed_events_count('out'), osd_count - pool_size) + + def test_turn_off_module(self): + """ + When the the module is turned off, there should not + be any on going events or completed events. + Also module should not accept any kind of Remote Event + coming in from other module, however, once it is turned + back, on creating an event should be working as it is. + """ + + pool_size = 3 + self._setup_pool(size=pool_size) + self._write_some_data(self.WRITE_PERIOD) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off") + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + time.sleep(self.EVENT_CREATION_PERIOD) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', '0') + + time.sleep(self.EVENT_CREATION_PERIOD) + + self.assertTrue(self._no_events_anywhere()) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on") + + self._write_some_data(self.WRITE_PERIOD) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + # Wait for a progress event to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD*2, + period=1) + + ev1 = self._get_osd_in_out_events('out')[0] + + log.info(json.dumps(ev1, indent=1)) + + self.wait_until_true(lambda: self._is_complete(ev1['id']), + timeout=self.RECOVERY_PERIOD) + self.assertTrue(self._is_quiet()) diff --git a/src/pybind/mgr/progress/module.py b/src/pybind/mgr/progress/module.py index 40098d7b43bc4..a5748ead805f9 100644 --- a/src/pybind/mgr/progress/module.py +++ b/src/pybind/mgr/progress/module.py @@ -416,7 +416,14 @@ class Module(MgrModule): "perm": "r"}, {"cmd": "progress clear", "desc": "Reset progress tracking", + "perm": "rw"}, + {"cmd": "progress on", + "desc": "Enable progress tracking", + "perm": "rw"}, + {"cmd": "progress off", + "desc": "Disable progress tracking", "perm": "rw"} + ] MODULE_OPTIONS = [ @@ -434,6 +441,12 @@ class Module(MgrModule): 'desc': 'how frequently to persist completed events', 'runtime': True, }, + { + 'name': 'enabled', + 'default': True, + 'type': 'bool', + + } ] # type: List[Dict[str, Any]] def __init__(self, *args, **kwargs): @@ -458,6 +471,7 @@ class Module(MgrModule): if TYPE_CHECKING: self.max_completed_events = 0 self.persist_interval = 0 + self.enabled = True def config_notify(self): for opt in self.MODULE_OPTIONS: @@ -605,7 +619,8 @@ class Module(MgrModule): def notify(self, notify_type, notify_data): self._ready.wait() - + if not self.enabled: + return if notify_type == "osd_map": old_osdmap = self._latest_osdmap self._latest_osdmap = self.get_osdmap() @@ -724,10 +739,12 @@ class Module(MgrModule): """ For calling from other mgr modules """ + if not self.enabled: + return + if refs is None: refs = [] try: - ev = self._events[ev_id] assert isinstance(ev, RemoteEvent) except KeyError: @@ -762,6 +779,8 @@ class Module(MgrModule): """ For calling from other mgr modules """ + if not self.enabled: + return try: ev = self._events[ev_id] assert isinstance(ev, RemoteEvent) @@ -789,6 +808,12 @@ class Module(MgrModule): except KeyError: self.log.warning("fail: ev {0} does not exist".format(ev_id)) + def on(self): + self.set_module_option('enabled', True) + + def off(self): + self.set_module_option('enabled', False) + def _handle_ls(self): if len(self._events) or len(self._completed_events): out = "" @@ -815,13 +840,15 @@ class Module(MgrModule): 'completed': [ev.to_json() for ev in self._completed_events] } - def _handle_clear(self): + def clear(self): self._events = {} self._completed_events = [] self._dirty = True self._save() self.clear_all_progress_events() + def _handle_clear(self): + self.clear() return 0, "", "" def handle_command(self, _, cmd): @@ -835,5 +862,16 @@ class Module(MgrModule): return self._handle_clear() elif cmd['prefix'] == "progress json": return 0, json.dumps(self._json(), indent=4, sort_keys=True), "" + elif cmd['prefix'] == "progress on": + if self.enabled: + return 0, "", "progress already enabled!" + self.on() + return 0, "", "progress enabled" + elif cmd['prefix'] == "progress off": + if not self.enabled: + return 0, "", "progress already disabled!" + self.off() + self.clear() + return 0, "", "progress disabled" else: raise NotImplementedError(cmd['prefix']) -- 2.39.5