From: Ramana Raja Date: Mon, 28 Jun 2021 23:39:10 +0000 (-0400) Subject: mds: create file system with specific ID X-Git-Tag: v16.2.6~40^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F42900%2Fhead;p=ceph.git mds: create file system with specific ID File system will need to be recreated when monitor databases are lost and rebuilt. Some applications (e.g., CSI) expect that the recovered file system have the same ID as before. Allow creating a file system with a specific ID to help in such scenarios. This can now be done by the `fs new` command using the argument 'fscid' and 'force' flag. Newer file systems will no longer have increasing IDs as a corollary. Fixes: https://tracker.ceph.com/issues/51340 Signed-off-by: Ramana Raja (cherry picked from commit a0a8ba5087f0b82588860cda188dfdb48a964771) Conflicts: qa/tasks/cephfs/test_admin.py fixed conflicts due tests reorder in master --- diff --git a/PendingReleaseNotes b/PendingReleaseNotes index a775bc5668d0..7a9a2cf9c668 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -14,6 +14,10 @@ If you want to return back the old behavior add 'ssl_options=' (empty) to ``rgw frontends`` configuration. +* fs: A file system can be created with a specific ID ("fscid"). This is useful + in certain recovery scenarios, e.g., monitor database lost and rebuilt, and + the restored file system is expected to have the same ID as before. + >=16.0.0 -------- diff --git a/doc/cephfs/administration.rst b/doc/cephfs/administration.rst index c53161313c9b..ac238535890a 100644 --- a/doc/cephfs/administration.rst +++ b/doc/cephfs/administration.rst @@ -379,3 +379,14 @@ This removes a rank from the failed set. This command resets the file system state to defaults, except for the name and pools. Non-zero ranks are saved in the stopped set. + + +:: + + fs new --fscid --force + +This command creates a file system with a specific **fscid** (file system cluster ID). +You may want to do this when an application expects the file system's ID to be +stable after it has been recovered, e.g., after monitor databases are lost and +rebuilt. Consequently, file system IDs don't always keep increasing with newer +file systems. diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py index eebdee165ef2..876be69290bc 100644 --- a/qa/tasks/cephfs/test_admin.py +++ b/qa/tasks/cephfs/test_admin.py @@ -21,7 +21,7 @@ class TestAdminCommands(CephFSTestCase): """ CLIENTS_REQUIRED = 1 - MDSS_REQUIRED = 1 + MDSS_REQUIRED = 3 def test_fsnames_can_only_by_goodchars(self): n = 'test_fsnames_can_only_by_goodchars' @@ -198,6 +198,72 @@ class TestAdminCommands(CephFSTestCase): self._check_pool_application_metadata_key_value( pool_names[i], 'cephfs', keys[i], fs_name) + def test_fs_new_with_specific_id(self): + """ + That a file system can be created with a specific ID. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + for i in range(2): + self._check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name) + + def test_fs_new_with_specific_id_idempotency(self): + """ + That command to create file system with specific ID is idempotent. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + + def test_fs_new_with_specific_id_fails_without_force_flag(self): + """ + That command to create file system with specific ID fails without '--force' flag. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID without --force flag") + else: + self.fail("expected creating file system with specific ID without '--force' flag to fail") + + def test_fs_new_with_specific_id_fails_already_in_use(self): + """ + That creating file system with ID already in use fails. + """ + fs_name = "test_fs_specific_id" + # file system ID already in use + fscid = self.fs.status().map['filesystems'][0]['id'] + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID that is already in use") + else: + self.fail("expected creating file system with ID already in use to fail") + class TestDump(CephFSTestCase): CLIENTS_REQUIRED = 0 diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index 802124f9bc44..37c458660a01 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -449,7 +449,8 @@ mds_gid_t Filesystem::get_standby_replay(mds_gid_t who) const } Filesystem::ref FSMap::create_filesystem(std::string_view name, - int64_t metadata_pool, int64_t data_pool, uint64_t features) + int64_t metadata_pool, int64_t data_pool, uint64_t features, + fs_cluster_id_t fscid) { auto fs = Filesystem::create(); fs->mds_map.epoch = epoch; @@ -461,10 +462,21 @@ Filesystem::ref FSMap::create_filesystem(std::string_view name, fs->mds_map.created = ceph_clock_now(); fs->mds_map.modified = ceph_clock_now(); fs->mds_map.enabled = true; - fs->fscid = next_filesystem_id++; - // ANONYMOUS is only for upgrades from legacy mdsmaps, we should - // have initialized next_filesystem_id such that it's never used here. - ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS); + if (fscid == FS_CLUSTER_ID_NONE) { + fs->fscid = next_filesystem_id++; + } else { + fs->fscid = fscid; + next_filesystem_id = std::max(fscid, (fs_cluster_id_t)next_filesystem_id) + 1; + } + + // File system's ID can be FS_CLUSTER_ID_ANONYMOUS if we're recovering + // a legacy file system by passing FS_CLUSTER_ID_ANONYMOUS as the desired + // file system ID + if (fscid != FS_CLUSTER_ID_ANONYMOUS) { + // ANONYMOUS is only for upgrades from legacy mdsmaps, we should + // have initialized next_filesystem_id such that it's never used here. + ceph_assert(fs->fscid != FS_CLUSTER_ID_ANONYMOUS); + } filesystems[fs->fscid] = fs; // Created first filesystem? Set it as the one diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index 1bd5ca798958..e44574b34a33 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -404,7 +404,8 @@ public: */ Filesystem::ref create_filesystem( std::string_view name, int64_t metadata_pool, - int64_t data_pool, uint64_t features); + int64_t data_pool, uint64_t features, + fs_cluster_id_t fscid); /** * Remove the filesystem (it must exist). Caller should already diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 396ac3118b4e..4fcd1ade445a 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -241,6 +241,18 @@ class FsNewHandler : public FileSystemCommandHandler } } + int64_t fscid = FS_CLUSTER_ID_NONE; + if (cmd_getval(cmdmap, "fscid", fscid)) { + if (!force) { + ss << "Pass --force to create a file system with a specific ID"; + return -EINVAL; + } + if (fsmap.filesystem_exists(fscid)) { + ss << "filesystem already exists with id '" << fscid << "'"; + return -EINVAL; + } + } + pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data); ceph_assert(data_pool != NULL); // Checked it existed above pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata); @@ -280,7 +292,7 @@ class FsNewHandler : public FileSystemCommandHandler // All checks passed, go ahead and create. auto&& fs = fsmap.create_filesystem(fs_name, metadata, data, - mon->get_quorum_con_features()); + mon->get_quorum_con_features(), fscid); ss << "new fs with metadata pool " << metadata << " and data pool " << data; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index cf4a6913de03..383827bb338f 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -367,7 +367,8 @@ COMMAND("fs new " "name=metadata,type=CephString " "name=data,type=CephString " "name=force,type=CephBool,req=false " - "name=allow_dangerous_metadata_overlay,type=CephBool,req=false", + "name=allow_dangerous_metadata_overlay,type=CephBool,req=false " + "name=fscid,type=CephInt,range=0,req=false", "make new filesystem using named pools and ", "fs", "rw") COMMAND("fs fail "