]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
ceph_manager::wait_for_clean: reset timeout if we make progress
authorSamuel Just <sam.just@inktank.com>
Mon, 17 Mar 2014 21:03:30 +0000 (14:03 -0700)
committerSamuel Just <sam.just@inktank.com>
Mon, 17 Mar 2014 23:11:36 +0000 (16:11 -0700)
Fixes: #7748
Signed-off-by: Samuel Just <sam.just@inktank.com>
teuthology/task/ceph_manager.py

index 82dbc1343d078b9dce2f34767f324a8012a52a95..aa7ba163df5a6f0e446cc351b274019bec1fb203 100644 (file)
@@ -974,6 +974,17 @@ class CephManager:
                 num += 1
         return num
 
+    def get_is_making_recovery_progress(self):
+        """
+        Return whether there is recovery progress discernable in the
+        raw cluster status
+        """
+        status = self.raw_cluster_status()
+        kps = status['pgmap'].get('recovering_keys_per_sec', 0)
+        bps = status['pgmap'].get('recovering_bytes_per_sec', 0)
+        ops = status['pgmap'].get('recovering_objects_per_sec', 0)
+        return kps > 0 or bps > 0 or ops > 0
+
     def get_num_active(self):
         """
         Find the number of active pgs.
@@ -1037,8 +1048,13 @@ class CephManager:
         num_active_clean = self.get_num_active_clean()
         while not self.is_clean():
             if timeout is not None:
-                assert time.time() - start < timeout, \
-                    'failed to become clean before timeout expired'
+                if self.get_is_making_recovery_progress():
+                    self.log("making progress, resetting timeout")
+                    start = time.time()
+                else:
+                    self.log("no progress seen, keeping timeout for now")
+                    assert time.time() - start < timeout, \
+                        'failed to become clean before timeout expired'
             cur_active_clean = self.get_num_active_clean()
             if cur_active_clean != num_active_clean:
                 start = time.time()