From: Mykola Golub Date: Tue, 15 Nov 2022 09:56:52 +0000 (+0000) Subject: tools/cephfs-data-scan: make data pool command args optional X-Git-Tag: v16.2.14~142^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=79b9f68c3c94f5fd54a6aea51ee3f36070fa74c7;p=ceph.git tools/cephfs-data-scan: make data pool command args optional They are easily autodetected. Signed-off-by: Mykola Golub (cherry picked from commit d7f70d2807c82656c4d47f21144234837112582e) --- diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst index 9688caa037a9..d817f6d1799a 100644 --- a/doc/cephfs/disaster-recovery-experts.rst +++ b/doc/cephfs/disaster-recovery-experts.rst @@ -149,8 +149,8 @@ errors. :: - cephfs-data-scan scan_extents - cephfs-data-scan scan_inodes + cephfs-data-scan scan_extents [ [ ...]] + cephfs-data-scan scan_inodes [] cephfs-data-scan scan_links 'scan_extents' and 'scan_inodes' commands may take a *very long* time @@ -166,22 +166,22 @@ The example below shows how to run 4 workers simultaneously: :: # Worker 0 - cephfs-data-scan scan_extents --worker_n 0 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 0 --worker_m 4 # Worker 1 - cephfs-data-scan scan_extents --worker_n 1 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 1 --worker_m 4 # Worker 2 - cephfs-data-scan scan_extents --worker_n 2 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 2 --worker_m 4 # Worker 3 - cephfs-data-scan scan_extents --worker_n 3 --worker_m 4 + cephfs-data-scan scan_extents --worker_n 3 --worker_m 4 # Worker 0 - cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4 # Worker 1 - cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4 # Worker 2 - cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4 # Worker 3 - cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4 + cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4 It is **important** to ensure that all workers have completed the scan_extents phase before any workers enter the scan_inodes phase. @@ -191,8 +191,13 @@ operation to delete ancillary data geneated during recovery. :: - cephfs-data-scan cleanup + cephfs-data-scan cleanup [] +Note, the data pool parameters for 'scan_extents', 'scan_inodes' and +'cleanup' commands are optional, and usually the tool will be able to +detect the pools automatically. Still you may override this. The +'scan_extents' command needs all data pools to be specified, while +'scan_inodes' and 'cleanup' commands need only the main data pool. Using an alternate metadata pool for recovery @@ -250,8 +255,8 @@ Now perform the recovery of the metadata pool from the data pool: :: cephfs-data-scan init --force-init --filesystem cephfs_recovery --alternate-pool cephfs_recovery_meta - cephfs-data-scan scan_extents --alternate-pool cephfs_recovery_meta --filesystem - cephfs-data-scan scan_inodes --alternate-pool cephfs_recovery_meta --filesystem --force-corrupt + cephfs-data-scan scan_extents --alternate-pool cephfs_recovery_meta --filesystem + cephfs-data-scan scan_inodes --alternate-pool cephfs_recovery_meta --filesystem --force-corrupt cephfs-data-scan scan_links --filesystem cephfs_recovery .. note:: diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index 9e248dc3989e..199b942f7764 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -366,8 +366,8 @@ class TestDataScan(CephFSTestCase): self.fs.journal_tool(["journal", "reset", "--force"], 0) self.fs.data_scan(["init"]) - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers) - self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers) + self.fs.data_scan(["scan_extents"], worker_count=workers) + self.fs.data_scan(["scan_inodes"], worker_count=workers) self.fs.data_scan(["scan_links"]) # Mark the MDS repaired @@ -481,8 +481,8 @@ class TestDataScan(CephFSTestCase): # Run data-scan, observe that it inserts our dentry back into the correct fragment # by checking the omap now has the dentry's key again - self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) - self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()]) + self.fs.data_scan(["scan_extents"]) + self.fs.data_scan(["scan_inodes"]) self.fs.data_scan(["scan_links"]) self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id)) diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 9d26a677997b..35c171da075d 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -39,8 +39,8 @@ void DataScan::usage() { std::cout << "Usage: \n" << " cephfs-data-scan init [--force-init]\n" - << " cephfs-data-scan scan_extents [--force-pool] [--worker_n N --worker_m M] \n" - << " cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] [--worker_n N --worker_m M] \n" + << " cephfs-data-scan scan_extents [--force-pool] [--worker_n N --worker_m M] [ [ ...]]\n" + << " cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] [--worker_n N --worker_m M] []\n" << " cephfs-data-scan pg_files [...]\n" << " cephfs-data-scan scan_links\n" << "\n" @@ -51,7 +51,7 @@ void DataScan::usage() << " --worker_n: Worker number, range 0-(worker_m-1)\n" << "\n" << " cephfs-data-scan scan_frags [--force-corrupt]\n" - << " cephfs-data-scan cleanup \n" + << " cephfs-data-scan cleanup []\n" << std::endl; generic_client_usage(); @@ -257,33 +257,44 @@ int DataScan::main(const std::vector &args) return pge.scan_path(pg_files_path); } + bool autodetect_data_pools = false; + // Initialize data_io for those commands that need it if (command == "scan_inodes" || command == "scan_extents" || command == "cleanup") { - if (data_pool_name.empty()) { - std::cerr << "Data pool not specified" << std::endl; - return -EINVAL; - } + data_pool_id = fs->mds_map.get_first_data_pool(); - data_pool_id = rados.pool_lookup(data_pool_name.c_str()); - if (data_pool_id < 0) { - std::cerr << "Data pool '" << data_pool_name << "' not found!" << std::endl; - return -ENOENT; - } else { - dout(4) << "data pool '" << data_pool_name - << "' has ID " << data_pool_id << dendl; + std::string pool_name; + r = rados.pool_reverse_lookup(data_pool_id, &pool_name); + if (r < 0) { + std::cerr << "Failed to resolve data pool: " << cpp_strerror(r) + << std::endl; + return r; } - if (data_pool_id != fs->mds_map.get_first_data_pool()) { + if (data_pool_name.empty()) { + autodetect_data_pools = true; + data_pool_name = pool_name; + } else if (data_pool_name != pool_name) { std::cerr << "Warning: pool '" << data_pool_name << "' is not the " "main CephFS data pool!" << std::endl; if (!force_pool) { std::cerr << "Use --force-pool to continue" << std::endl; return -EINVAL; } + + data_pool_id = rados.pool_lookup(data_pool_name.c_str()); + if (data_pool_id < 0) { + std::cerr << "Data pool '" << data_pool_name << "' not found!" + << std::endl; + return -ENOENT; + } } + dout(4) << "data pool '" << data_pool_name << "' has ID " << data_pool_id + << dendl; + dout(4) << "opening data pool '" << data_pool_name << "'" << dendl; r = rados.ioctx_create(data_pool_name.c_str(), data_io); if (r != 0) { @@ -293,6 +304,25 @@ int DataScan::main(const std::vector &args) // Initialize extra data_ios for those commands that need it if (command == "scan_extents") { + if (autodetect_data_pools) { + ceph_assert(extra_data_pool_names.empty()); + + for (auto &pool_id : fs->mds_map.get_data_pools()) { + if (pool_id == data_pool_id) { + continue; + } + + std::string pool_name; + r = rados.pool_reverse_lookup(pool_id, &pool_name); + if (r < 0) { + std::cerr << "Failed to resolve data pool: " << cpp_strerror(r) + << std::endl; + return r; + } + extra_data_pool_names.insert(pool_name); + } + } + for (auto &data_pool_name: extra_data_pool_names) { int64_t pool_id = rados.pool_lookup(data_pool_name.c_str()); if (data_pool_id < 0) {