]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: flag backtrace scrub failures for new files as okay 34288/head
authorMilind Changire <mchangir@redhat.com>
Tue, 2 Jun 2020 02:11:39 +0000 (07:41 +0530)
committerMilind Changire <mchangir@redhat.com>
Tue, 2 Jun 2020 02:11:40 +0000 (07:41 +0530)
New, unwritten files, fail when backtracing during scrub.
This is not necessarily bad. So flag such failures as okay and continue
with other entries.

Fixes: https://tracker.ceph.com/issues/43543
Signed-off-by: Milind Changire <mchangir@redhat.com>
qa/tasks/cephfs/cephfs_test_case.py
qa/tasks/cephfs/test_scrub.py
src/mds/CInode.cc

index 1105e35d49a7185dd1ec044c1ccfb310d52bfc77..e69941dfd849d7370464ccd7e0b2a1ecf3591c5e 100644 (file)
@@ -9,6 +9,7 @@ from tasks.cephfs.fuse_mount import FuseMount
 
 from teuthology.orchestra import run
 from teuthology.orchestra.run import CommandFailedError
+from teuthology.contextutil import safe_while
 
 
 log = logging.getLogger(__name__)
@@ -302,3 +303,12 @@ class CephFSTestCase(CephTestCase):
                 return subtrees
             time.sleep(pause)
         raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank))
+
+    def _wait_until_scrub_complete(self, path="/", recursive=True):
+        out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else [])
+        with safe_while(sleep=10, tries=10) as proceed:
+            while proceed():
+                out_json = self.fs.rank_tell(["scrub", "status"])
+                if out_json['status'] == "no active scrubs running":
+                    break;
+
index 226db815740f03cbedb9199729174ab00779bd64..1e9fad2b4ceb6f74ec297c7506606c130b4e0179 100644 (file)
@@ -75,6 +75,9 @@ class BacktraceWorkload(Workload):
         self._filesystem.mds_asok(["flush", "journal"])
         self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
 
+    def create_files(self, nfiles=1000):
+        self._mount.create_n_files("scrub-new-files/file", nfiles)
+
 
 class DupInodeWorkload(Workload):
     """
@@ -144,6 +147,27 @@ class TestScrub(CephFSTestCase):
                 errors[0].exception, errors[0].backtrace
             ))
 
+    def _get_damage_count(self, damage_type='backtrace'):
+        out_json = self.fs.rank_tell(["damage", "ls"])
+        self.assertNotEqual(out_json, None)
+
+        damage_count = 0
+        for it in out_json:
+            if it['damage_type'] == damage_type:
+                damage_count += 1
+        return damage_count
+
+    def _scrub_new_files(self, workload):
+        """
+        That scrubbing new files does not lead to errors
+        """
+        workload.create_files(1000)
+        self._wait_until_scrub_complete()
+        self.assertEqual(self._get_damage_count(), 0)
+
+    def test_scrub_backtrace_for_new_files(self):
+        self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a))
+
     def test_scrub_backtrace(self):
         self._scrub(BacktraceWorkload(self.fs, self.mount_a))
 
index 14dc4517b460b0a998921944900a91a1c0b21cf7..200351f4feac3a88b0bffffba889ed7033867bff 100644 (file)
@@ -4520,7 +4520,11 @@ void CInode::validate_disk_state(CInode::validated_data *results,
       dout(20) << "ondisk_read_retval: " << results->backtrace.ondisk_read_retval << dendl;
       if (results->backtrace.ondisk_read_retval != 0) {
         results->backtrace.error_str << "failed to read off disk; see retval";
-       goto next;
+        // we probably have a new unwritten file!
+        // so skip the backtrace scrub for this entry and say that all's well
+        if (in->is_dirty_parent())
+          results->backtrace.passed = true;
+        goto next;
       }
 
       // extract the backtrace, and compare it to a newly-constructed one
@@ -4538,6 +4542,11 @@ void CInode::validate_disk_state(CInode::validated_data *results,
         }
         results->backtrace.error_str << "failed to decode on-disk backtrace ("
                                      << bl.length() << " bytes)!";
+        // we probably have a new unwritten file!
+        // so skip the backtrace scrub for this entry and say that all's well
+        if (in->is_dirty_parent())
+          results->backtrace.passed = true;
+
        goto next;
       }
 
@@ -4545,8 +4554,12 @@ void CInode::validate_disk_state(CInode::validated_data *results,
                                              &equivalent, &divergent);
 
       if (divergent || memory_newer < 0) {
-       // we're divergent, or on-disk version is newer
-       results->backtrace.error_str << "On-disk backtrace is divergent or newer";
+        // we're divergent, or on-disk version is newer
+        results->backtrace.error_str << "On-disk backtrace is divergent or newer";
+        // we probably have a new unwritten file!
+        // so skip the backtrace scrub for this entry and say that all's well
+        if (divergent && in->is_dirty_parent())
+          results->backtrace.passed = true;
       } else {
         results->backtrace.passed = true;
       }