From d7f70d2807c82656c4d47f21144234837112582e Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Tue, 15 Nov 2022 09:56:52 +0000 Subject: [PATCH] tools/cephfs-data-scan: make data pool command args optional They are easily autodetected. Signed-off-by: Mykola Golub --- doc/cephfs/disaster-recovery-experts.rst | 31 +++++++----- qa/tasks/cephfs/test_data_scan.py | 8 ++-- src/tools/cephfs/DataScan.cc | 60 ++++++++++++++++++------ 3 files changed, 67 insertions(+), 32 deletions(-) diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst index d5e3d9cb3fa..c881c24239b 100644 --- a/doc/cephfs/disaster-recovery-experts.rst +++ b/doc/cephfs/disaster-recovery-experts.rst @@ -149,8 +149,8 @@ errors. :: - cephfs-data-scan scan_extents - cephfs-data-scan scan_inodes + cephfs-data-scan scan_extents [ [ ...]] + cephfs-data-scan scan_inodes [] cephfs-data-scan scan_links 'scan_extents' and 'scan_inodes' commands may take a *very long* time @@ -166,22 +166,22 @@ The example below shows how to run 4 workers simultaneously: :: # Worker 0 - cephfs-data-scan scan_extents --worker_n 0 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 0 --worker_m 4 # Worker 1 - cephfs-data-scan scan_extents --worker_n 1 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 1 --worker_m 4 # Worker 2 - cephfs-data-scan scan_extents --worker_n 2 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 2 --worker_m 4 # Worker 3 - cephfs-data-scan scan_extents --worker_n 3 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 3 --worker_m 4 # Worker 0 - cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4 # Worker 1 - cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4 # Worker 2 - cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4 # Worker 3 - cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4 It is **important** to ensure that all workers have completed the scan_extents phase before any workers enter the scan_inodes phase. @@ -191,8 +191,13 @@ operation to delete ancillary data generated during recovery. :: - cephfs-data-scan cleanup + cephfs-data-scan cleanup [] +Note, the data pool parameters for 'scan_extents', 'scan_inodes' and +'cleanup' commands are optional, and usually the tool will be able to +detect the pools automatically. Still you may override this. The +'scan_extents' command needs all data pools to be specified, while +'scan_inodes' and 'cleanup' commands need only the main data pool. Using an alternate metadata pool for recovery @@ -255,8 +260,8 @@ Now perform the recovery of the metadata pool from the data pool: :: cephfs-data-scan init --force-init --filesystem cephfs_recovery --alternate-pool cephfs_recovery_meta - cephfs-data-scan scan_extents --alternate-pool cephfs_recovery_meta --filesystem - cephfs-data-scan scan_inodes --alternate-pool cephfs_recovery_meta --filesystem --force-corrupt + cephfs-data-scan scan_extents --alternate-pool cephfs_recovery_meta --filesystem + cephfs-data-scan scan_inodes --alternate-pool cephfs_recovery_meta --filesystem --force-corrupt cephfs-data-scan scan_links --filesystem cephfs_recovery .. note:: diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index b186cf902b0..32789df4d6c 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -400,8 +400,8 @@ class TestDataScan(CephFSTestCase): self.fs.journal_tool(["journal", "reset", "--force"], 0) self.fs.data_scan(["init"]) - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers) - self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers) + self.fs.data_scan(["scan_extents"], worker_count=workers) + self.fs.data_scan(["scan_inodes"], worker_count=workers) # Mark the MDS repaired self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') @@ -517,8 +517,8 @@ class TestDataScan(CephFSTestCase): # Run data-scan, observe that it inserts our dentry back into the correct fragment # by checking the omap now has the dentry's key again - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) - self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()]) + self.fs.data_scan(["scan_extents"]) + self.fs.data_scan(["scan_inodes"]) self.fs.data_scan(["scan_links"]) self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id)) diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 9efba2eb856..de051505dc4 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -41,8 +41,8 @@ void DataScan::usage() { std::cout << "Usage: \n" << " cephfs-data-scan init [--force-init]\n" - << " cephfs-data-scan scan_extents [--force-pool] [--worker_n N --worker_m M] \n" - << " cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] [--worker_n N --worker_m M] \n" + << " cephfs-data-scan scan_extents [--force-pool] [--worker_n N --worker_m M] [ [ ...]]\n" + << " cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] [--worker_n N --worker_m M] []\n" << " cephfs-data-scan pg_files [...]\n" << " cephfs-data-scan scan_links\n" << "\n" @@ -53,7 +53,7 @@ void DataScan::usage() << " --worker_n: Worker number, range 0-(worker_m-1)\n" << "\n" << " cephfs-data-scan scan_frags [--force-corrupt]\n" - << " cephfs-data-scan cleanup \n" + << " cephfs-data-scan cleanup []\n" << std::endl; generic_client_usage(); @@ -259,33 +259,44 @@ int DataScan::main(const std::vector &args) return pge.scan_path(pg_files_path); } + bool autodetect_data_pools = false; + // Initialize data_io for those commands that need it if (command == "scan_inodes" || command == "scan_extents" || command == "cleanup") { - if (data_pool_name.empty()) { - std::cerr << "Data pool not specified" << std::endl; - return -EINVAL; - } + data_pool_id = fs->mds_map.get_first_data_pool(); - data_pool_id = rados.pool_lookup(data_pool_name.c_str()); - if (data_pool_id < 0) { - std::cerr << "Data pool '" << data_pool_name << "' not found!" << std::endl; - return -ENOENT; - } else { - dout(4) << "data pool '" << data_pool_name - << "' has ID " << data_pool_id << dendl; + std::string pool_name; + r = rados.pool_reverse_lookup(data_pool_id, &pool_name); + if (r < 0) { + std::cerr << "Failed to resolve data pool: " << cpp_strerror(r) + << std::endl; + return r; } - if (data_pool_id != fs->mds_map.get_first_data_pool()) { + if (data_pool_name.empty()) { + autodetect_data_pools = true; + data_pool_name = pool_name; + } else if (data_pool_name != pool_name) { std::cerr << "Warning: pool '" << data_pool_name << "' is not the " "main CephFS data pool!" << std::endl; if (!force_pool) { std::cerr << "Use --force-pool to continue" << std::endl; return -EINVAL; } + + data_pool_id = rados.pool_lookup(data_pool_name.c_str()); + if (data_pool_id < 0) { + std::cerr << "Data pool '" << data_pool_name << "' not found!" + << std::endl; + return -ENOENT; + } } + dout(4) << "data pool '" << data_pool_name << "' has ID " << data_pool_id + << dendl; + dout(4) << "opening data pool '" << data_pool_name << "'" << dendl; r = rados.ioctx_create(data_pool_name.c_str(), data_io); if (r != 0) { @@ -295,6 +306,25 @@ int DataScan::main(const std::vector &args) // Initialize extra data_ios for those commands that need it if (command == "scan_extents") { + if (autodetect_data_pools) { + ceph_assert(extra_data_pool_names.empty()); + + for (auto &pool_id : fs->mds_map.get_data_pools()) { + if (pool_id == data_pool_id) { + continue; + } + + std::string pool_name; + r = rados.pool_reverse_lookup(pool_id, &pool_name); + if (r < 0) { + std::cerr << "Failed to resolve data pool: " << cpp_strerror(r) + << std::endl; + return r; + } + extra_data_pool_names.insert(pool_name); + } + } + for (auto &data_pool_name: extra_data_pool_names) { int64_t pool_id = rados.pool_lookup(data_pool_name.c_str()); if (data_pool_id < 0) { -- 2.39.5