From ee04ef8c516b3aeb967a12953cdbdf6045467613 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Sun, 23 Dec 2018 14:22:49 -0800 Subject: [PATCH] mds: allow boot on read-only Signed-off-by: Patrick Donnelly (cherry picked from commit c7ce967b778a0b86b335f6801301e484aaf6ebc3) Conflicts: src/mds/MDSRank.cc --- qa/tasks/cephfs/fuse_mount.py | 11 ++++++++-- qa/tasks/cephfs/test_damage.py | 37 +++++++++++++++++++++++++--------- src/mds/MDSRank.cc | 2 ++ 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index ae4072e480cd1..33bcf8c60419d 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -206,8 +206,15 @@ class FuseMount(CephFSMount): # Now that we're mounted, set permissions so that the rest of the test will have # unrestricted access to the filesystem mount. - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60)) + try: + stderr = StringIO() + self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), stderr=stderr) + except run.CommandFailedError: + stderr = stderr.getvalue() + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise def _mountpoint_exists(self): return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, timeout=(15*60)).exitstatus == 0 diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py index b2c606785be36..8fe41f5801587 100644 --- a/qa/tasks/cephfs/test_damage.py +++ b/qa/tasks/cephfs/test_damage.py @@ -12,6 +12,7 @@ DAMAGED_ON_START = "damaged_on_start" DAMAGED_ON_LS = "damaged_on_ls" CRASHED = "server crashed" NO_DAMAGE = "no damage" +READONLY = "readonly" FAILED_CLIENT = "client failed" FAILED_SERVER = "server failed" @@ -159,14 +160,22 @@ class TestDamage(CephFSTestCase): )) # Blatant corruptions - mutations.extend([ - MetadataMutation( - o, - "Corrupt {0}".format(o), - lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk), - DAMAGED_ON_START - ) for o in data_objects - ]) + for obj_id in data_objects: + if obj_id == "500.00000000": + # purge queue corruption results in read-only FS + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk), + READONLY + )) + else: + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk), + DAMAGED_ON_START + )) # Truncations for obj_id in data_objects: @@ -314,7 +323,17 @@ class TestDamage(CephFSTestCase): else: log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc)) results[mutation] = FAILED_SERVER - + elif mutation.expectation == READONLY: + proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False) + try: + proc.wait() + except CommandFailedError: + stderr = proc.stderr.getvalue() + log.info(stderr) + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise else: try: wait([proc], 20) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 3e3ac1d0968c3..77177ab70c765 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1455,6 +1455,8 @@ void MDSRank::boot_start(BootStep step, int r) << cpp_strerror(r); damaged(); assert(r == 0); // Unreachable, damaged() calls respawn() + } else if (r == -EROFS) { + dout(0) << "boot error forcing transition to read-only; MDS will try to continue" << dendl; } else { // Completely unexpected error, give up and die dout(0) << "boot_start encountered an error, failing" << dendl; -- 2.39.5