From: Sunny Kumar Date: Thu, 10 Sep 2020 13:13:27 +0000 (+0100) Subject: mgr/devicehealth: device_health_metrics pool gets created even without any OSDs in... X-Git-Tag: v15.2.9~122^2~40^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F37533%2Fhead;p=ceph.git mgr/devicehealth: device_health_metrics pool gets created even without any OSDs in the cluster This patch checks if the cluster has any OSD before the 'device_health_metrics' pool creation by devicehealth, which prevents the cluster entering HEALTH_WARN immediately after bootstrap.     Fixes: https://tracker.ceph.com/issues/46743 Signed-off-by: Sunny Kumar (cherry picked from commit 9041adae9e7ce9e6c1855818079b128b6af267ba) --- diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index f877ed8ac43..57f35a913cc 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -145,6 +145,7 @@ class Module(MgrModule): # other self.run = True self.event = Event() + self.has_device_pool = False def is_valid_daemon_name(self, who): l = who.split('.') @@ -221,6 +222,37 @@ class Module(MgrModule): self.get_module_option(opt['name'])) self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name'])) + def notify(self, notify_type, notify_id): + # create device_health_metrics pool if it doesn't exist + if notify_type == "osd_map" and self.enable_monitoring: + if not self.has_device_pool: + self.create_device_pool() + self.has_device_pool = True + + def create_device_pool(self): + self.log.debug('create %s pool' % self.pool_name) + # create pool + result = CommandResult('') + self.send_command(result, 'mon', '', json.dumps({ + 'prefix': 'osd pool create', + 'format': 'json', + 'pool': self.pool_name, + 'pg_num': 1, + 'pg_num_min': 1, + }), '') + r, outb, outs = result.wait() + assert r == 0 + # set pool application + result = CommandResult('') + self.send_command(result, 'mon', '', json.dumps({ + 'prefix': 'osd pool application enable', + 'format': 'json', + 'pool': self.pool_name, + 'app': 'mgr_devicehealth', + }), '') + r, outb, outs = result.wait() + assert r == 0 + def serve(self): self.log.info("Starting") self.config_notify() @@ -274,44 +306,23 @@ class Module(MgrModule): self.event.set() def open_connection(self, create_if_missing=True): - pools = self.rados.list_pools() - is_pool = False - for pool in pools: - if pool == self.pool_name: - is_pool = True - break - if not is_pool: + osdmap = self.get("osd_map") + assert osdmap is not None + if len(osdmap['osds']) == 0: + return None + if not self.has_device_pool: if not create_if_missing: return None - self.log.debug('create %s pool' % self.pool_name) - # create pool - result = CommandResult('') - self.send_command(result, 'mon', '', json.dumps({ - 'prefix': 'osd pool create', - 'format': 'json', - 'pool': self.pool_name, - 'pg_num': 1, - 'pg_num_min': 1, - }), '') - r, outb, outs = result.wait() - assert r == 0 - - # set pool application - result = CommandResult('') - self.send_command(result, 'mon', '', json.dumps({ - 'prefix': 'osd pool application enable', - 'format': 'json', - 'pool': self.pool_name, - 'app': 'mgr_devicehealth', - }), '') - r, outb, outs = result.wait() - assert r == 0 - + if self.enable_monitoring: + self.create_device_pool() + self.has_device_pool = True ioctx = self.rados.open_ioctx(self.pool_name) return ioctx def scrape_daemon(self, daemon_type, daemon_id): ioctx = self.open_connection() + if not ioctx: + return 0, "", "" raw_smart_data = self.do_scrape_daemon(daemon_type, daemon_id) if raw_smart_data: for device, raw_data in raw_smart_data.items(): @@ -325,6 +336,8 @@ class Module(MgrModule): osdmap = self.get("osd_map") assert osdmap is not None ioctx = self.open_connection() + if not ioctx: + return 0, "", "" did_device = {} ids = [] for osd in osdmap['osds']: @@ -357,6 +370,8 @@ class Module(MgrModule): 'device ' + devid + ' not claimed by any active daemons') (daemon_type, daemon_id) = daemons[0].split('.') ioctx = self.open_connection() + if not ioctx: + return 0, "", "" raw_smart_data = self.do_scrape_daemon(daemon_type, daemon_id, devid=devid) if raw_smart_data: