From: Ramana Raja Date: Mon, 28 Jun 2021 23:39:10 +0000 (-0400) Subject: mds: create file system with specific ID X-Git-Tag: v17.1.0~1308^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a0a8ba5087f0b82588860cda188dfdb48a964771;p=ceph.git mds: create file system with specific ID File system will need to be recreated when monitor databases are lost and rebuilt. Some applications (e.g., CSI) expect that the recovered file system have the same ID as before. Allow creating a file system with a specific ID to help in such scenarios. This can now be done by the `fs new` command using the argument 'fscid' and 'force' flag. Newer file systems will no longer have increasing IDs as a corollary. Fixes: https://tracker.ceph.com/issues/51340 Signed-off-by: Ramana Raja --- diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 4858af5883a3..c4bd7e451fa2 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -19,6 +19,11 @@ * The ``device_health_metrics`` pool has been renamed ``.mgr``. It is now used as a common store for all ``ceph-mgr`` modules. + +* fs: A file system can be created with a specific ID ("fscid"). This is useful + in certain recovery scenarios, e.g., monitor database lost and rebuilt, and + the restored file system is expected to have the same ID as before. + * fs: A file system can be renamed using the `fs rename` command. Any cephx credentials authorized for the old file system name will need to be reauthorized to the new file system name. Since the operations of the clients diff --git a/doc/cephfs/administration.rst b/doc/cephfs/administration.rst index 7806eb8d6133..986e3d4a4ed3 100644 --- a/doc/cephfs/administration.rst +++ b/doc/cephfs/administration.rst @@ -395,3 +395,14 @@ This removes a rank from the failed set. This command resets the file system state to defaults, except for the name and pools. Non-zero ranks are saved in the stopped set. + + +:: + + fs new --fscid --force + +This command creates a file system with a specific **fscid** (file system cluster ID). +You may want to do this when an application expects the file system's ID to be +stable after it has been recovered, e.g., after monitor databases are lost and +rebuilt. Consequently, file system IDs don't always keep increasing with newer +file systems. diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py index 12276c621846..03b477c3e3d6 100644 --- a/qa/tasks/cephfs/test_admin.py +++ b/qa/tasks/cephfs/test_admin.py @@ -112,6 +112,7 @@ class TestFsNew(TestAdminCommands): """ Test "ceph fs new" subcommand. """ + MDSS_REQUIRED = 3 def test_fsnames_can_only_by_goodchars(self): n = 'test_fsnames_can_only_by_goodchars' @@ -213,6 +214,72 @@ class TestFsNew(TestAdminCommands): self.check_pool_application_metadata_key_value( pool_names[i], 'cephfs', keys[i], fs_name) + def test_fs_new_with_specific_id(self): + """ + That a file system can be created with a specific ID. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + for i in range(2): + self.check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name) + + def test_fs_new_with_specific_id_idempotency(self): + """ + That command to create file system with specific ID is idempotent. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + + def test_fs_new_with_specific_id_fails_without_force_flag(self): + """ + That command to create file system with specific ID fails without '--force' flag. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID without --force flag") + else: + self.fail("expected creating file system with specific ID without '--force' flag to fail") + + def test_fs_new_with_specific_id_fails_already_in_use(self): + """ + That creating file system with ID already in use fails. + """ + fs_name = "test_fs_specific_id" + # file system ID already in use + fscid = self.fs.status().map['filesystems'][0]['id'] + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID that is already in use") + else: + self.fail("expected creating file system with ID already in use to fail") + class TestRenameCommand(TestAdminCommands): """ diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index 2a3c674e9246..f88e446571dc 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -449,7 +449,8 @@ mds_gid_t Filesystem::get_standby_replay(mds_gid_t who) const } Filesystem::ref FSMap::create_filesystem(std::string_view name, - int64_t metadata_pool, int64_t data_pool, uint64_t features) + int64_t metadata_pool, int64_t data_pool, uint64_t features, + fs_cluster_id_t fscid) { auto fs = Filesystem::create(); fs->mds_map.epoch = epoch; @@ -461,10 +462,21 @@ Filesystem::ref FSMap::create_filesystem(std::string_view name, fs->mds_map.created = ceph_clock_now(); fs->mds_map.modified = ceph_clock_now(); fs->mds_map.enabled = true; - fs->fscid = next_filesystem_id++; - // ANONYMOUS is only for upgrades from legacy mdsmaps, we should - // have initialized next_filesystem_id such that it's never used here. - ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS); + if (fscid == FS_CLUSTER_ID_NONE) { + fs->fscid = next_filesystem_id++; + } else { + fs->fscid = fscid; + next_filesystem_id = std::max(fscid, (fs_cluster_id_t)next_filesystem_id) + 1; + } + + // File system's ID can be FS_CLUSTER_ID_ANONYMOUS if we're recovering + // a legacy file system by passing FS_CLUSTER_ID_ANONYMOUS as the desired + // file system ID + if (fscid != FS_CLUSTER_ID_ANONYMOUS) { + // ANONYMOUS is only for upgrades from legacy mdsmaps, we should + // have initialized next_filesystem_id such that it's never used here. + ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS); + } filesystems[fs->fscid] = fs; // Created first filesystem? Set it as the one diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index 8a3a6b69cb4e..755a608c3dac 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -400,7 +400,8 @@ public: */ Filesystem::ref create_filesystem( std::string_view name, int64_t metadata_pool, - int64_t data_pool, uint64_t features); + int64_t data_pool, uint64_t features, + fs_cluster_id_t fscid); /** * Remove the filesystem (it must exist). Caller should already diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 20e1796a4700..d1ecabb1499c 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -241,6 +241,18 @@ class FsNewHandler : public FileSystemCommandHandler } } + int64_t fscid = FS_CLUSTER_ID_NONE; + if (cmd_getval(cmdmap, "fscid", fscid)) { + if (!force) { + ss << "Pass --force to create a file system with a specific ID"; + return -EINVAL; + } + if (fsmap.filesystem_exists(fscid)) { + ss << "filesystem already exists with id '" << fscid << "'"; + return -EINVAL; + } + } + pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data); ceph_assert(data_pool != NULL); // Checked it existed above pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata); @@ -280,7 +292,7 @@ class FsNewHandler : public FileSystemCommandHandler // All checks passed, go ahead and create. auto&& fs = fsmap.create_filesystem(fs_name, metadata, data, - mon->get_quorum_con_features()); + mon->get_quorum_con_features(), fscid); ss << "new fs with metadata pool " << metadata << " and data pool " << data; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 705d0492ed57..9c5abd84cf4c 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -331,7 +331,8 @@ COMMAND("fs new " " name=metadata,type=CephString " "name=data,type=CephString " "name=force,type=CephBool,req=false " - "name=allow_dangerous_metadata_overlay,type=CephBool,req=false", + "name=allow_dangerous_metadata_overlay,type=CephBool,req=false " + "name=fscid,type=CephInt,range=0,req=false", "make new filesystem using named pools and ", "fs", "rw") COMMAND("fs fail "