"ceph health mute DAEMON_OLD_VERSION --sticky". In this case after
upgrade has finished use "ceph health unmute DAEMON_OLD_VERSION".
+* MGR: progress module can now be turned on/off, using the commands:
+ ``ceph progress on`` and ``ceph progress off``.
+
>=15.0.0
--------
--- /dev/null
+roles:
+- - mon.a
+ - mgr.x
+ - osd.0
+ - osd.1
+ - osd.2
+ - osd.3
+ - client.0
+- - mon.b
+ - mon.c
+ - osd.4
+ - osd.5
+ - osd.6
+ - osd.7
+openstack:
+ - volumes: # attached to each instance
+ count: 4
+ size: 10 # GB
+tasks:
+- install:
+- ceph:
+ create_rbd_pool: false
+ pre-mgr-commands:
+ - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force
+ log-ignorelist:
+ - overall HEALTH_
+ - \(OSDMAP_FLAGS\)
+ - \(OSD_
+ - \(PG_
+ - \(POOL_
+ - \(CACHE_POOL_
+ - \(OBJECT_
+ - \(SLOW_OPS\)
+ - \(REQUEST_SLOW\)
+ - \(TOO_FEW_PGS\)
+ - slow request
+- exec:
+ client.0:
+ - ceph progress off
+
+- workunit:
+ clients:
+ all:
+ - mon/pg_autoscaler.sh
def is_osd_marked_in(self, ev):
return ev['message'].endswith('marked in')
+ def _get_osd_in_out_events(self, marked='both'):
+ """
+ Return the event that deals with OSDs being
+ marked in, out or both
+ """
+
+ marked_in_events = []
+ marked_out_events = []
+
+ events_in_progress = self._events_in_progress()
+ for ev in events_in_progress:
+ if self.is_osd_marked_out(ev):
+ marked_out_events.append(ev)
+ elif self.is_osd_marked_in(ev):
+ marked_in_events.append(ev)
+
+ if marked == 'both':
+ return [marked_in_events] + [marked_out_events]
+ elif marked == 'in':
+ return marked_in_events
+ else:
+ return marked_out_events
+
def _osd_in_out_events_count(self, marked='both'):
"""
Count the number of on going recovery events that deals with
new_event = self._events_in_progress()[0]
return new_event
+ def _no_events_anywhere(self):
+ """
+ Whether there are any live or completed events
+ """
+ p = self._get_progress()
+ total_events = len(p['events']) + len(p['completed'])
+ return total_events == 0
+
def _is_quiet(self):
"""
Whether any progress events are live.
self.assertEqual(
self._osd_in_out_completed_events_count('out'),
osd_count - pool_size)
+
+ def test_turn_off_module(self):
+ """
+ When the the module is turned off, there should not
+ be any on going events or completed events.
+ Also module should not accept any kind of Remote Event
+ coming in from other module, however, once it is turned
+ back, on creating an event should be working as it is.
+ """
+
+ pool_size = 3
+ self._setup_pool(size=pool_size)
+ self._write_some_data(self.WRITE_PERIOD)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off")
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', '0')
+
+ time.sleep(self.EVENT_CREATION_PERIOD)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'in', '0')
+
+ time.sleep(self.EVENT_CREATION_PERIOD)
+
+ self.assertTrue(self._no_events_anywhere())
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on")
+
+ self._write_some_data(self.WRITE_PERIOD)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', '0')
+
+ # Wait for a progress event to pop up
+ self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+ timeout=self.EVENT_CREATION_PERIOD*2,
+ period=1)
+
+ ev1 = self._get_osd_in_out_events('out')[0]
+
+ log.info(json.dumps(ev1, indent=1))
+
+ self.wait_until_true(lambda: self._is_complete(ev1['id']),
+ timeout=self.RECOVERY_PERIOD)
+ self.assertTrue(self._is_quiet())
"perm": "r"},
{"cmd": "progress clear",
"desc": "Reset progress tracking",
+ "perm": "rw"},
+ {"cmd": "progress on",
+ "desc": "Enable progress tracking",
+ "perm": "rw"},
+ {"cmd": "progress off",
+ "desc": "Disable progress tracking",
"perm": "rw"}
+
]
MODULE_OPTIONS = [
'desc': 'how frequently to persist completed events',
'runtime': True,
},
+ {
+ 'name': 'enabled',
+ 'default': True,
+ 'type': 'bool',
+
+ }
] # type: List[Dict[str, Any]]
def __init__(self, *args, **kwargs):
if TYPE_CHECKING:
self.max_completed_events = 0
self.persist_interval = 0
+ self.enabled = True
def config_notify(self):
for opt in self.MODULE_OPTIONS:
def notify(self, notify_type, notify_data):
self._ready.wait()
-
+ if not self.enabled:
+ return
if notify_type == "osd_map":
old_osdmap = self._latest_osdmap
self._latest_osdmap = self.get_osdmap()
"""
For calling from other mgr modules
"""
+ if not self.enabled:
+ return
+
if refs is None:
refs = []
try:
-
ev = self._events[ev_id]
assert isinstance(ev, RemoteEvent)
except KeyError:
"""
For calling from other mgr modules
"""
+ if not self.enabled:
+ return
try:
ev = self._events[ev_id]
assert isinstance(ev, RemoteEvent)
except KeyError:
self.log.warning("fail: ev {0} does not exist".format(ev_id))
+ def on(self):
+ self.set_module_option('enabled', True)
+
+ def off(self):
+ self.set_module_option('enabled', False)
+
def _handle_ls(self):
if len(self._events) or len(self._completed_events):
out = ""
'completed': [ev.to_json() for ev in self._completed_events]
}
- def _handle_clear(self):
+ def clear(self):
self._events = {}
self._completed_events = []
self._dirty = True
self._save()
self.clear_all_progress_events()
+ def _handle_clear(self):
+ self.clear()
return 0, "", ""
def handle_command(self, _, cmd):
return self._handle_clear()
elif cmd['prefix'] == "progress json":
return 0, json.dumps(self._json(), indent=4, sort_keys=True), ""
+ elif cmd['prefix'] == "progress on":
+ if self.enabled:
+ return 0, "", "progress already enabled!"
+ self.on()
+ return 0, "", "progress enabled"
+ elif cmd['prefix'] == "progress off":
+ if not self.enabled:
+ return 0, "", "progress already disabled!"
+ self.off()
+ self.clear()
+ return 0, "", "progress disabled"
else:
raise NotImplementedError(cmd['prefix'])