def handle_metadata(self, data: Dict[str, Any]) -> None:
try:
host = data['host']
+ self.mgr.cache.agent_ports[host] = int(data['port'])
if host not in self.mgr.cache.agent_counter:
self.mgr.log.debug(
f'Got metadata from agent on host {host} with no known counter entry. Starting counter at 1 and requesting new metadata')
self.mgr.agent_helpers._request_agent_acks({host})
return
- self.mgr.cache.agent_ports[host] = int(data['port'])
# update timestamp of most recent agent update
self.mgr.cache.agent_timestamp[host] = datetime_now()
up_to_date = False
existing = existing_active + existing_standby
# build to_add
- if self.service_name == 'agent':
- to_add = [dd for dd in others]
- elif not count:
+ if not count:
to_add = [dd for dd in others if dd.hostname not in [
h.hostname for h in self.unreachable_hosts]]
else:
self.mgr.cache.metadata_up_to_date[host] = False
if host in self.mgr.offline_hosts:
return
- self.mgr.offline_hosts.add(host)
+
# In case host is actually offline, it's best to reset the connection to avoid
# a long timeout trying to use an existing connection to an offline host
# REVISIT AFTER https://github.com/ceph/ceph/pull/42919
for agent in agents_down:
detail.append((f'Cephadm agent on host {agent} has not reported in '
f'{2.5 * self.mgr.agent_refresh_rate} seconds. Agent is assumed '
- 'down and host has been marked offline.'))
+ 'down and host may be offline.'))
self.mgr.health_checks['CEPHADM_AGENT_DOWN'] = {
'severity': 'warning',
'summary': '%d Cephadm Agent(s) are not reporting. '
- 'Hosts marked offline' % (len(agents_down)),
+ 'Hosts may be offline' % (len(agents_down)),
'count': len(agents_down),
'detail': detail,
}