up_to_date = True
else:
# we got old counter value with message, inform agent of new timestamp
- self.mgr.agent_helpers._request_agent_acks({host})
+ if not self.mgr.cache.messaging_agent(host):
+ self.mgr.agent_helpers._request_agent_acks({host})
self.mgr.log.info(
f'Received old metadata from agent on host {host}. Requested up-to-date metadata.')
def run(self) -> None:
self.mgr.log.info(f'Sending message to agent on host {self.host}')
+ self.mgr.cache.sending_agent_message[self.host] = True
try:
assert self.mgr.cherrypy_thread
root_cert = self.mgr.cherrypy_thread.ssl_certs.get_root_cert()
ssl_ctx.load_cert_chain(cert_fname, key_fname)
except Exception as e:
self.mgr.log.error(f'Failed to get certs for connecting to agent: {e}')
+ self.mgr.cache.sending_agent_message[self.host] = False
return
try:
bytes_len: str = str(len(self.data.encode('utf-8')))
bytes_len = '0' + bytes_len
except Exception as e:
self.mgr.log.error(f'Failed to get length of json payload: {e}')
+ self.mgr.cache.sending_agent_message[self.host] = False
return
for retry_wait in [3, 5]:
try:
except Exception as e:
# if it's not a connection error, something has gone wrong. Give up.
self.mgr.log.error(f'Failed to contact agent on host {self.host}: {e}')
+ self.mgr.cache.sending_agent_message[self.host] = False
return
self.mgr.log.error(f'Could not connect to agent on host {self.host}')
+ self.mgr.cache.sending_agent_message[self.host] = False
return
def _request_agent_acks(self, hosts: Set[str], increment: bool = False) -> None:
for host in hosts:
- self.mgr.cache.metadata_up_to_date[host] = False
+ if increment:
+ self.mgr.cache.metadata_up_to_date[host] = False
if host not in self.mgr.cache.agent_counter:
self.mgr.cache.agent_counter[host] = 1
elif increment:
self.metadata_up_to_date = {} # type: Dict[str, bool]
self.agent_keys = {} # type: Dict[str, str]
self.agent_ports = {} # type: Dict[str, int]
+ self.sending_agent_message = {} # type: Dict[str, bool]
def load(self):
# type: () -> None
return True
return False
+ def messaging_agent(self, host: str) -> bool:
+ if host not in self.sending_agent_message or not self.sending_agent_message[host]:
+ return False
+ return True
+
def host_metadata_up_to_date(self, host: str) -> bool:
if host not in self.metadata_up_to_date or not self.metadata_up_to_date[host]:
return False
self.mgr._schedule_daemon_action(agent.name(), 'redeploy')
if 'agent' not in self.mgr.spec_store:
self.mgr.agent_helpers._apply_agent()
- for host in self.mgr.cache.get_hosts():
- self.mgr.cache.metadata_up_to_date[host] = False
else:
if 'agent' in self.mgr.spec_store:
self.mgr.spec_store.rm('agent')
if self.mgr.use_agent and not self.mgr.cache.all_host_metadata_up_to_date():
# need to wait for metadata to come in
self.mgr.agent_helpers._request_agent_acks(
- set([h for h in self.mgr.cache.get_hosts() if not self.mgr.cache.host_metadata_up_to_date(h)]))
+ set([h for h in self.mgr.cache.get_hosts() if
+ (not self.mgr.cache.host_metadata_up_to_date(h) and h in self.mgr.cache.agent_ports and not self.mgr.cache.messaging_agent(h))]))
return
first = False
'redeploy',
image=target_image if not d_entry[1] else None
)
+ self.mgr.cache.metadata_up_to_date[d.hostname] = False
except Exception as e:
self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', {
'severity': 'warning',