From be650fe47ddc808c0d8517523ea41f7f34ef16f1 Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Tue, 2 Jun 2020 07:41:39 +0530 Subject: [PATCH] mds: flag backtrace scrub failures for new files as okay New, unwritten files, fail when backtracing during scrub. This is not necessarily bad. So flag such failures as okay and continue with other entries. Fixes: https://tracker.ceph.com/issues/43543 Signed-off-by: Milind Changire --- qa/tasks/cephfs/cephfs_test_case.py | 10 ++++++++++ qa/tasks/cephfs/test_scrub.py | 24 ++++++++++++++++++++++++ src/mds/CInode.cc | 19 ++++++++++++++++--- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 1105e35d49a71..e69941dfd849d 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -9,6 +9,7 @@ from tasks.cephfs.fuse_mount import FuseMount from teuthology.orchestra import run from teuthology.orchestra.run import CommandFailedError +from teuthology.contextutil import safe_while log = logging.getLogger(__name__) @@ -302,3 +303,12 @@ class CephFSTestCase(CephTestCase): return subtrees time.sleep(pause) raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) + + def _wait_until_scrub_complete(self, path="/", recursive=True): + out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else []) + with safe_while(sleep=10, tries=10) as proceed: + while proceed(): + out_json = self.fs.rank_tell(["scrub", "status"]) + if out_json['status'] == "no active scrubs running": + break; + diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py index 226db815740f0..1e9fad2b4ceb6 100644 --- a/qa/tasks/cephfs/test_scrub.py +++ b/qa/tasks/cephfs/test_scrub.py @@ -75,6 +75,9 @@ class BacktraceWorkload(Workload): self._filesystem.mds_asok(["flush", "journal"]) self._filesystem._write_data_xattr(st['st_ino'], "parent", "") + def create_files(self, nfiles=1000): + self._mount.create_n_files("scrub-new-files/file", nfiles) + class DupInodeWorkload(Workload): """ @@ -144,6 +147,27 @@ class TestScrub(CephFSTestCase): errors[0].exception, errors[0].backtrace )) + def _get_damage_count(self, damage_type='backtrace'): + out_json = self.fs.rank_tell(["damage", "ls"]) + self.assertNotEqual(out_json, None) + + damage_count = 0 + for it in out_json: + if it['damage_type'] == damage_type: + damage_count += 1 + return damage_count + + def _scrub_new_files(self, workload): + """ + That scrubbing new files does not lead to errors + """ + workload.create_files(1000) + self._wait_until_scrub_complete() + self.assertEqual(self._get_damage_count(), 0) + + def test_scrub_backtrace_for_new_files(self): + self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a)) + def test_scrub_backtrace(self): self._scrub(BacktraceWorkload(self.fs, self.mount_a)) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 14dc4517b460b..200351f4feac3 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -4520,7 +4520,11 @@ void CInode::validate_disk_state(CInode::validated_data *results, dout(20) << "ondisk_read_retval: " << results->backtrace.ondisk_read_retval << dendl; if (results->backtrace.ondisk_read_retval != 0) { results->backtrace.error_str << "failed to read off disk; see retval"; - goto next; + // we probably have a new unwritten file! + // so skip the backtrace scrub for this entry and say that all's well + if (in->is_dirty_parent()) + results->backtrace.passed = true; + goto next; } // extract the backtrace, and compare it to a newly-constructed one @@ -4538,6 +4542,11 @@ void CInode::validate_disk_state(CInode::validated_data *results, } results->backtrace.error_str << "failed to decode on-disk backtrace (" << bl.length() << " bytes)!"; + // we probably have a new unwritten file! + // so skip the backtrace scrub for this entry and say that all's well + if (in->is_dirty_parent()) + results->backtrace.passed = true; + goto next; } @@ -4545,8 +4554,12 @@ void CInode::validate_disk_state(CInode::validated_data *results, &equivalent, &divergent); if (divergent || memory_newer < 0) { - // we're divergent, or on-disk version is newer - results->backtrace.error_str << "On-disk backtrace is divergent or newer"; + // we're divergent, or on-disk version is newer + results->backtrace.error_str << "On-disk backtrace is divergent or newer"; + // we probably have a new unwritten file! + // so skip the backtrace scrub for this entry and say that all's well + if (divergent && in->is_dirty_parent()) + results->backtrace.passed = true; } else { results->backtrace.passed = true; } -- 2.39.5