]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
tasks/ceph: handle down nodes in log rotation
authorJohn Spray <john.spray@redhat.com>
Mon, 5 Sep 2016 10:21:24 +0000 (11:21 +0100)
committerJohn Spray <john.spray@redhat.com>
Wed, 7 Dec 2016 00:45:46 +0000 (00:45 +0000)
Signed-off-by: John Spray <john.spray@redhat.com>
tasks/ceph.py

index e05ab9b94c9396f568f9a21e5004b59428418f59..cfacfe6e5327b4410897a1b0156f3bf158cf146d 100644 (file)
@@ -13,6 +13,7 @@ import os
 import json
 import time
 import gevent
+import socket
 
 from ceph_manager import CephManager, write_conf
 from tasks.cephfs.filesystem import Filesystem
@@ -112,13 +113,30 @@ def ceph_log(ctx, config):
             # 2) continuously loop over logrotate invocation with ceph-test.conf
             while not self.stop_event.is_set():
                 self.stop_event.wait(timeout=30)
-                run.wait(
-                    ctx.cluster.run(
-                        args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'
-                              ],
-                        wait=False,
+                try:
+                    run.wait(
+                        ctx.cluster.run(
+                            args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'
+                                  ],
+                            wait=False,
+                        )
                     )
-                )
+                except exceptions.ConnectionLostError as e:
+                    # Some tests may power off nodes during test, in which
+                    # case we will see connection errors that we should ignore.
+                    log.debug("Missed logrotate, node '{0}' is offline".format(
+                        e.node))
+                except EOFError as e:
+                    # Paramiko sometimes raises this when it fails to
+                    # connect to a node during open_session.  As with
+                    # ConnectionLostError, we ignore this because nodes
+                    # are allowed to get power cycled during tests.
+                    log.debug("Missed logrotate, EOFError")
+                except socket.error as e:
+                    if e.errno == errno.EHOSTUNREACH:
+                        log.debug("Missed logrotate, host unreachable")
+                    else:
+                        raise
 
         def begin(self):
             self.thread = gevent.spawn(self.invoke_logrotate)