From: Sage Weil Date: Fri, 29 Jan 2021 16:36:39 +0000 (-0600) Subject: mgr/devicehealth: only create pool when we have some osds X-Git-Tag: v17.1.0~3120^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2abf4f1fc368ee74637b0ba1c200e03c02883db8;p=ceph.git mgr/devicehealth: only create pool when we have some osds If we create the pool before we have OSDs, the PGs won't be active+clean and we'll raise a health warning that we shouldn't. This will annoy a new users deploying a new cluster, and (more importantly?) make qa tests fail while deploying initial clusters due to the health warning. Signed-off-by: Sage Weil --- diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 14b7dfdacfc6..36cb182f4f19 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -221,11 +221,28 @@ class Module(MgrModule): self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name'])) def notify(self, notify_type: str, notify_id: str) -> None: - # create device_health_metrics pool if it doesn't exist if notify_type == "osd_map" and self.enable_monitoring: - if not self.has_device_pool: - self.create_device_pool() - self.has_device_pool = True + # create device_health_metrics pool if it doesn't exist + self.maybe_create_device_pool() + + def have_enough_osds(self) -> bool: + # wait until we have enough OSDs to allow the pool to be healthy + up = 0 + for osd in self.get("osd_map")["osds"]: + if osd["up"]: + up += 1 + + need = cast(int, self.get_ceph_option("osd_pool_default_size")) + return up >= need + + def maybe_create_device_pool(self) -> bool: + if not self.has_device_pool: + if not self.have_enough_osds(): + self.log.warning("Not enough OSDs yet to create monitoring pool") + return False + self.create_device_pool() + self.has_device_pool = True + return True def create_device_pool(self) -> None: self.log.debug('create %s pool' % self.pool_name) @@ -304,16 +321,9 @@ class Module(MgrModule): self.event.set() def open_connection(self, create_if_missing: bool = True) -> rados.Ioctx: - osdmap = self.get("osd_map") - assert osdmap is not None - if len(osdmap['osds']) == 0: - return None - if not self.has_device_pool: - if not create_if_missing: + if create_if_missing: + if not self.maybe_create_device_pool(): return None - if self.enable_monitoring: - self.create_device_pool() - self.has_device_pool = True ioctx = self.rados.open_ioctx(self.pool_name) return ioctx