while self.run:
# refresh daemons
- self.log.debug('refreshing hosts')
- bad_hosts = []
- failures = []
- for host in self.cache.get_hosts():
- if self.cache.host_needs_check(host):
- r = self._check_host(host)
- if r is not None:
- bad_hosts.append(r)
- if self.cache.host_needs_daemon_refresh(host):
- self.log.debug('refreshing %s daemons' % host)
- r = self._refresh_host_daemons(host)
- if r:
- failures.append(r)
- if self.cache.host_needs_device_refresh(host):
- self.log.debug('refreshing %s devices' % host)
- r = self._refresh_host_devices(host)
- if r:
- failures.append(r)
-
- if self.cache.host_needs_osdspec_preview_refresh(host):
- self.log.debug(f"refreshing OSDSpec previews for {host}")
- r = self._refresh_host_osdspec_previews(host)
- if r:
- failures.append(r)
-
- health_changed = False
- if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
- del self.health_checks['CEPHADM_HOST_CHECK_FAILED']
- health_changed = True
- if bad_hosts:
- self.health_checks['CEPHADM_HOST_CHECK_FAILED'] = {
- 'severity': 'warning',
- 'summary': '%d hosts fail cephadm check' % len(bad_hosts),
- 'count': len(bad_hosts),
- 'detail': bad_hosts,
- }
- health_changed = True
- if failures:
- self.health_checks['CEPHADM_REFRESH_FAILED'] = {
- 'severity': 'warning',
- 'summary': 'failed to probe daemons or devices',
- 'count': len(failures),
- 'detail': failures,
- }
- health_changed = True
- elif 'CEPHADM_REFRESH_FAILED' in self.health_checks:
- del self.health_checks['CEPHADM_REFRESH_FAILED']
- health_changed = True
- if health_changed:
- self.set_health_checks(self.health_checks)
+ self.log.debug('refreshing hosts and daemons')
+ self._refresh_hosts_and_daemons()
self._check_for_strays()
self.log.debug(f'Refreshed OSDSpec previews for host <{host}>')
return True
+ def _refresh_hosts_and_daemons(self):
+ bad_hosts = []
+ failures = []
+ for host in self.cache.get_hosts():
+ if self.cache.host_needs_check(host):
+ r = self._check_host(host)
+ if r is not None:
+ bad_hosts.append(r)
+ if self.cache.host_needs_daemon_refresh(host):
+ self.log.debug('refreshing %s daemons' % host)
+ r = self._refresh_host_daemons(host)
+ if r:
+ failures.append(r)
+ if self.cache.host_needs_device_refresh(host):
+ self.log.debug('refreshing %s devices' % host)
+ r = self._refresh_host_devices(host)
+ if r:
+ failures.append(r)
+
+ if self.cache.host_needs_osdspec_preview_refresh(host):
+ self.log.debug(f"refreshing OSDSpec previews for {host}")
+ r = self._refresh_host_osdspec_previews(host)
+ if r:
+ failures.append(r)
+
+
+ health_changed = False
+ if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
+ del self.health_checks['CEPHADM_HOST_CHECK_FAILED']
+ health_changed = True
+ if bad_hosts:
+ self.health_checks['CEPHADM_HOST_CHECK_FAILED'] = {
+ 'severity': 'warning',
+ 'summary': '%d hosts fail cephadm check' % len(bad_hosts),
+ 'count': len(bad_hosts),
+ 'detail': bad_hosts,
+ }
+ health_changed = True
+ if failures:
+ self.health_checks['CEPHADM_REFRESH_FAILED'] = {
+ 'severity': 'warning',
+ 'summary': 'failed to probe daemons or devices',
+ 'count': len(failures),
+ 'detail': failures,
+ }
+ health_changed = True
+ elif 'CEPHADM_REFRESH_FAILED' in self.health_checks:
+ del self.health_checks['CEPHADM_REFRESH_FAILED']
+ health_changed = True
+ if health_changed:
+ self.set_health_checks(self.health_checks)
+
def _refresh_host_daemons(self, host):
try:
out, err, code = self._run_cephadm(