From 2abf4f1fc368ee74637b0ba1c200e03c02883db8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 29 Jan 2021 10:36:39 -0600 Subject: [PATCH] mgr/devicehealth: only create pool when we have some osds If we create the pool before we have OSDs, the PGs won't be active+clean and we'll raise a health warning that we shouldn't. This will annoy a new users deploying a new cluster, and (more importantly?) make qa tests fail while deploying initial clusters due to the health warning. Signed-off-by: Sage Weil --- src/pybind/mgr/devicehealth/module.py | 36 +++++++++++++++++---------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 14b7dfdacfc..36cb182f4f1 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -221,11 +221,28 @@ class Module(MgrModule): self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name'])) def notify(self, notify_type: str, notify_id: str) -> None: - # create device_health_metrics pool if it doesn't exist if notify_type == "osd_map" and self.enable_monitoring: - if not self.has_device_pool: - self.create_device_pool() - self.has_device_pool = True + # create device_health_metrics pool if it doesn't exist + self.maybe_create_device_pool() + + def have_enough_osds(self) -> bool: + # wait until we have enough OSDs to allow the pool to be healthy + up = 0 + for osd in self.get("osd_map")["osds"]: + if osd["up"]: + up += 1 + + need = cast(int, self.get_ceph_option("osd_pool_default_size")) + return up >= need + + def maybe_create_device_pool(self) -> bool: + if not self.has_device_pool: + if not self.have_enough_osds(): + self.log.warning("Not enough OSDs yet to create monitoring pool") + return False + self.create_device_pool() + self.has_device_pool = True + return True def create_device_pool(self) -> None: self.log.debug('create %s pool' % self.pool_name) @@ -304,16 +321,9 @@ class Module(MgrModule): self.event.set() def open_connection(self, create_if_missing: bool = True) -> rados.Ioctx: - osdmap = self.get("osd_map") - assert osdmap is not None - if len(osdmap['osds']) == 0: - return None - if not self.has_device_pool: - if not create_if_missing: + if create_if_missing: + if not self.maybe_create_device_pool(): return None - if self.enable_monitoring: - self.create_device_pool() - self.has_device_pool = True ioctx = self.rados.open_ioctx(self.pool_name) return ioctx -- 2.47.3