resp.raise_for_status()
def _wait_for_ready(self):
- """ Attempt to connect to the machine via SSH """
- with safe_while(sleep=6, timeout=config.fog_wait_for_ssh_timeout) as proceed:
+ """
+ Attempt to connect to the machine via SSH (power cycle once at 50% of timeout).
+ """
+
+ total_timeout = config.fog_wait_for_ssh_timeout
+ ipmi_cycle_after_seconds = total_timeout * 0.5
+
+ start = datetime.datetime.now(datetime.timezone.utc)
+ ipmi_cycle_sent = False
+
+ with safe_while(sleep=6, timeout=total_timeout) as proceed:
while proceed():
+ now = datetime.datetime.now(datetime.timezone.utc)
+ elapsed = (now - start).total_seconds()
+
+ # ipmitool power cycle once at 50% of timeout
+ if not ipmi_cycle_sent and elapsed >= ipmi_cycle_after_seconds:
+ ipmi_cycle_sent = True
+ self.log.warning(
+ f"{self.shortname}: SSH not up after {int(elapsed)}s "
+ f"(~50% of timeout); power cycling and continuing to wait"
+ )
+ try:
+ self.remote.console.power_off()
+ self.remote.console.power_on()
+ except Exception as e:
+ self.log.error(
+ f"{self.shortname}: power cycle failed but continuing: {e}"
+ )
+
try:
self.remote.connect()
break