From: Adam King Date: Wed, 4 Sep 2024 20:09:10 +0000 (-0400) Subject: mgr/cephadm: make ssh keepalive settings configurable X-Git-Tag: testing/wip-vshankar-testing-20240917.043942-debug~42^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=20e2948a375827d8047377be5217cfe2031564ea;p=ceph-ci.git mgr/cephadm: make ssh keepalive settings configurable It was found on some larger clusters that these settings were two low and hosts were getting temporarily marked offline Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 780e2b8784e..107e8a79ab4 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -528,6 +528,19 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, desc='Default timeout applied to cephadm commands run directly on ' 'the host (in seconds)' ), + Option( + 'ssh_keepalive_interval', + type='int', + default=7, + desc='How often ssh connections are checked for liveness' + ), + Option( + 'ssh_keepalive_count_max', + type='int', + default=3, + desc='How many times ssh connections can fail liveness checks ' + 'before the host is marked offline' + ), Option( 'cephadm_log_destination', type='str', @@ -630,6 +643,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.default_cephadm_command_timeout = 0 self.cephadm_log_destination = '' self.oob_default_addr = '' + self.ssh_keepalive_interval = 0 + self.ssh_keepalive_count_max = 0 self.notify(NotifyType.mon_map, None) self.config_notify() diff --git a/src/pybind/mgr/cephadm/ssh.py b/src/pybind/mgr/cephadm/ssh.py index f0d507dfe89..4a30c067709 100644 --- a/src/pybind/mgr/cephadm/ssh.py +++ b/src/pybind/mgr/cephadm/ssh.py @@ -168,7 +168,9 @@ class SSHManager: with self.redirect_log(host, addr): try: ssh_options = asyncssh.SSHClientConnectionOptions( - keepalive_interval=7, keepalive_count_max=3) + keepalive_interval=self.mgr.ssh_keepalive_interval, + keepalive_count_max=self.mgr.ssh_keepalive_count_max + ) conn = await asyncssh.connect(addr, username=self.mgr.ssh_user, client_keys=[self.mgr.tkey.name], known_hosts=None, config=[self.mgr.ssh_config_fname], preferred_auth=['publickey'], options=ssh_options)