]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: make ssh keepalive settings configurable
authorAdam King <adking@redhat.com>
Wed, 4 Sep 2024 20:09:10 +0000 (16:09 -0400)
committerAdam King <adking@redhat.com>
Wed, 4 Sep 2024 20:09:10 +0000 (16:09 -0400)
It was found on some larger clusters that these settings
were two low and hosts were getting temporarily marked
offline

Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/ssh.py

index 780e2b8784eadc9188dad583765855784ff41696..107e8a79ab40c2a97232ce0e4798915be3b5c935 100644 (file)
@@ -528,6 +528,19 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             desc='Default timeout applied to cephadm commands run directly on '
             'the host (in seconds)'
         ),
+        Option(
+            'ssh_keepalive_interval',
+            type='int',
+            default=7,
+            desc='How often ssh connections are checked for liveness'
+        ),
+        Option(
+            'ssh_keepalive_count_max',
+            type='int',
+            default=3,
+            desc='How many times ssh connections can fail liveness checks '
+            'before the host is marked offline'
+        ),
         Option(
             'cephadm_log_destination',
             type='str',
@@ -630,6 +643,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             self.default_cephadm_command_timeout = 0
             self.cephadm_log_destination = ''
             self.oob_default_addr = ''
+            self.ssh_keepalive_interval = 0
+            self.ssh_keepalive_count_max = 0
 
         self.notify(NotifyType.mon_map, None)
         self.config_notify()
index f0d507dfe8958ca394d3d15d7283a06cd838a920..4a30c067709433a5c688cb2b7ac0e95ad4abfd4a 100644 (file)
@@ -168,7 +168,9 @@ class SSHManager:
             with self.redirect_log(host, addr):
                 try:
                     ssh_options = asyncssh.SSHClientConnectionOptions(
-                        keepalive_interval=7, keepalive_count_max=3)
+                        keepalive_interval=self.mgr.ssh_keepalive_interval,
+                        keepalive_count_max=self.mgr.ssh_keepalive_count_max
+                    )
                     conn = await asyncssh.connect(addr, username=self.mgr.ssh_user, client_keys=[self.mgr.tkey.name],
                                                   known_hosts=None, config=[self.mgr.ssh_config_fname],
                                                   preferred_auth=['publickey'], options=ssh_options)