From: Patrick Donnelly Date: Sun, 23 Dec 2018 22:22:49 +0000 (-0800) Subject: mds: allow boot on read-only X-Git-Tag: v13.2.5~59^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F26055%2Fhead;p=ceph.git mds: allow boot on read-only Signed-off-by: Patrick Donnelly (cherry picked from commit c7ce967b778a0b86b335f6801301e484aaf6ebc3) Conflicts: src/mds/MDSRank.cc - no ceph_assert in mimic --- diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index a7c394718abd..66b97d344060 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -226,8 +226,15 @@ class FuseMount(CephFSMount): # Now that we're mounted, set permissions so that the rest of the test will have # unrestricted access to the filesystem mount. - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60)) + try: + stderr = StringIO() + self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), stderr=stderr) + except run.CommandFailedError: + stderr = stderr.getvalue() + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise def _mountpoint_exists(self): return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, timeout=(15*60)).exitstatus == 0 diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py index 3d14244ad9f7..f534a12759d9 100644 --- a/qa/tasks/cephfs/test_damage.py +++ b/qa/tasks/cephfs/test_damage.py @@ -12,6 +12,7 @@ DAMAGED_ON_START = "damaged_on_start" DAMAGED_ON_LS = "damaged_on_ls" CRASHED = "server crashed" NO_DAMAGE = "no damage" +READONLY = "readonly" FAILED_CLIENT = "client failed" FAILED_SERVER = "server failed" @@ -161,14 +162,22 @@ class TestDamage(CephFSTestCase): )) # Blatant corruptions - mutations.extend([ - MetadataMutation( - o, - "Corrupt {0}".format(o), - lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk), - DAMAGED_ON_START - ) for o in data_objects - ]) + for obj_id in data_objects: + if obj_id == "500.00000000": + # purge queue corruption results in read-only FS + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk), + READONLY + )) + else: + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk), + DAMAGED_ON_START + )) # Truncations for o in data_objects: @@ -316,7 +325,17 @@ class TestDamage(CephFSTestCase): else: log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc)) results[mutation] = FAILED_SERVER - + elif mutation.expectation == READONLY: + proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False) + try: + proc.wait() + except CommandFailedError: + stderr = proc.stderr.getvalue() + log.info(stderr) + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise else: try: wait([proc], 20) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 325dc6cab9ba..90dfe64a4774 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1501,6 +1501,8 @@ void MDSRank::boot_start(BootStep step, int r) << cpp_strerror(r); damaged(); assert(r == 0); // Unreachable, damaged() calls respawn() + } else if (r == -EROFS) { + dout(0) << "boot error forcing transition to read-only; MDS will try to continue" << dendl; } else { // Completely unexpected error, give up and die dout(0) << "boot_start encountered an error, failing" << dendl;