From: Milind Changire Date: Tue, 2 Jun 2020 02:11:39 +0000 (+0530) Subject: mds: flag backtrace scrub failures for new files as okay X-Git-Tag: v16.1.0~2166^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=be650fe47ddc808c0d8517523ea41f7f34ef16f1;p=ceph.git mds: flag backtrace scrub failures for new files as okay New, unwritten files, fail when backtracing during scrub. This is not necessarily bad. So flag such failures as okay and continue with other entries. Fixes: https://tracker.ceph.com/issues/43543 Signed-off-by: Milind Changire --- diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 1105e35d49a..e69941dfd84 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -9,6 +9,7 @@ from tasks.cephfs.fuse_mount import FuseMount from teuthology.orchestra import run from teuthology.orchestra.run import CommandFailedError +from teuthology.contextutil import safe_while log = logging.getLogger(__name__) @@ -302,3 +303,12 @@ class CephFSTestCase(CephTestCase): return subtrees time.sleep(pause) raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) + + def _wait_until_scrub_complete(self, path="/", recursive=True): + out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else []) + with safe_while(sleep=10, tries=10) as proceed: + while proceed(): + out_json = self.fs.rank_tell(["scrub", "status"]) + if out_json['status'] == "no active scrubs running": + break; + diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py index 226db815740..1e9fad2b4ce 100644 --- a/qa/tasks/cephfs/test_scrub.py +++ b/qa/tasks/cephfs/test_scrub.py @@ -75,6 +75,9 @@ class BacktraceWorkload(Workload): self._filesystem.mds_asok(["flush", "journal"]) self._filesystem._write_data_xattr(st['st_ino'], "parent", "") + def create_files(self, nfiles=1000): + self._mount.create_n_files("scrub-new-files/file", nfiles) + class DupInodeWorkload(Workload): """ @@ -144,6 +147,27 @@ class TestScrub(CephFSTestCase): errors[0].exception, errors[0].backtrace )) + def _get_damage_count(self, damage_type='backtrace'): + out_json = self.fs.rank_tell(["damage", "ls"]) + self.assertNotEqual(out_json, None) + + damage_count = 0 + for it in out_json: + if it['damage_type'] == damage_type: + damage_count += 1 + return damage_count + + def _scrub_new_files(self, workload): + """ + That scrubbing new files does not lead to errors + """ + workload.create_files(1000) + self._wait_until_scrub_complete() + self.assertEqual(self._get_damage_count(), 0) + + def test_scrub_backtrace_for_new_files(self): + self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a)) + def test_scrub_backtrace(self): self._scrub(BacktraceWorkload(self.fs, self.mount_a)) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 14dc4517b46..200351f4fea 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -4520,7 +4520,11 @@ void CInode::validate_disk_state(CInode::validated_data *results, dout(20) << "ondisk_read_retval: " << results->backtrace.ondisk_read_retval << dendl; if (results->backtrace.ondisk_read_retval != 0) { results->backtrace.error_str << "failed to read off disk; see retval"; - goto next; + // we probably have a new unwritten file! + // so skip the backtrace scrub for this entry and say that all's well + if (in->is_dirty_parent()) + results->backtrace.passed = true; + goto next; } // extract the backtrace, and compare it to a newly-constructed one @@ -4538,6 +4542,11 @@ void CInode::validate_disk_state(CInode::validated_data *results, } results->backtrace.error_str << "failed to decode on-disk backtrace (" << bl.length() << " bytes)!"; + // we probably have a new unwritten file! + // so skip the backtrace scrub for this entry and say that all's well + if (in->is_dirty_parent()) + results->backtrace.passed = true; + goto next; } @@ -4545,8 +4554,12 @@ void CInode::validate_disk_state(CInode::validated_data *results, &equivalent, &divergent); if (divergent || memory_newer < 0) { - // we're divergent, or on-disk version is newer - results->backtrace.error_str << "On-disk backtrace is divergent or newer"; + // we're divergent, or on-disk version is newer + results->backtrace.error_str << "On-disk backtrace is divergent or newer"; + // we probably have a new unwritten file! + // so skip the backtrace scrub for this entry and say that all's well + if (divergent && in->is_dirty_parent()) + results->backtrace.passed = true; } else { results->backtrace.passed = true; }