From 1877a01f9d41333b4ec1dde4a116b28f6d364646 Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Thu, 3 Nov 2022 10:57:52 +0000 Subject: [PATCH] tools/cephfs-data-scan: support for multi-datapool Fixes: https://tracker.ceph.com/issues/58029 Signed-off-by: Mykola Golub --- src/tools/cephfs/DataScan.cc | 192 +++++++++++++++++++++++++++-------- src/tools/cephfs/DataScan.h | 2 + 2 files changed, 152 insertions(+), 42 deletions(-) diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 78f5452e3b1f4..9efba2eb856ee 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -158,6 +158,7 @@ int DataScan::main(const std::vector &args) std::string const &command = args[0]; std::string data_pool_name; + std::set extra_data_pool_names; std::string pg_files_path; std::set pg_files_pgs; @@ -177,10 +178,19 @@ int DataScan::main(const std::vector &args) continue; } + // Trailing positional arguments + if (command == "scan_extents") { + if (data_pool_name.empty()) { + data_pool_name = *i; + } else if (*i != data_pool_name) { + extra_data_pool_names.insert(*i); + } + continue; + } + // Trailing positional argument if (i + 1 == args.end() && (command == "scan_inodes" - || command == "scan_extents" || command == "cleanup")) { data_pool_name = *i; continue; @@ -267,9 +277,9 @@ int DataScan::main(const std::vector &args) << "' has ID " << data_pool_id << dendl; } - if (!fs->mds_map.is_data_pool(data_pool_id)) { - std::cerr << "Warning: pool '" << data_pool_name << "' is not a " - "CephFS data pool!" << std::endl; + if (data_pool_id != fs->mds_map.get_first_data_pool()) { + std::cerr << "Warning: pool '" << data_pool_name << "' is not the " + "main CephFS data pool!" << std::endl; if (!force_pool) { std::cerr << "Use --force-pool to continue" << std::endl; return -EINVAL; @@ -283,6 +293,36 @@ int DataScan::main(const std::vector &args) } } + // Initialize extra data_ios for those commands that need it + if (command == "scan_extents") { + for (auto &data_pool_name: extra_data_pool_names) { + int64_t pool_id = rados.pool_lookup(data_pool_name.c_str()); + if (data_pool_id < 0) { + std::cerr << "Data pool '" << data_pool_name << "' not found!" << std::endl; + return -ENOENT; + } else { + dout(4) << "data pool '" << data_pool_name << "' has ID " << pool_id + << dendl; + } + + if (!fs->mds_map.is_data_pool(pool_id)) { + std::cerr << "Warning: pool '" << data_pool_name << "' is not a " + "CephFS data pool!" << std::endl; + if (!force_pool) { + std::cerr << "Use --force-pool to continue" << std::endl; + return -EINVAL; + } + } + + dout(4) << "opening data pool '" << data_pool_name << "'" << dendl; + extra_data_ios.push_back({}); + r = rados.ioctx_create(data_pool_name.c_str(), extra_data_ios.back()); + if (r != 0) { + return r; + } + } + } + // Initialize metadata_io from MDSMap for scan_frags if (command == "scan_frags" || command == "scan_links") { const auto fs = fsmap->get_filesystem(fscid); @@ -501,46 +541,64 @@ int parse_oid(const std::string &oid, uint64_t *inode_no, uint64_t *obj_id) int DataScan::scan_extents() { - return forall_objects(data_io, false, [this]( + std::vector data_ios; + data_ios.push_back(&data_io); + for (auto &extra_data_io : extra_data_ios) { + data_ios.push_back(&extra_data_io); + } + + for (auto ioctx : data_ios) { + int r = forall_objects(*ioctx, false, [this, ioctx]( std::string const &oid, uint64_t obj_name_ino, uint64_t obj_name_offset) -> int - { - // Read size - uint64_t size; - time_t mtime; - int r = data_io.stat(oid, &size, &mtime); - dout(10) << "handling object " << obj_name_ino - << "." << obj_name_offset << dendl; - if (r != 0) { - dout(4) << "Cannot stat '" << oid << "': skipping" << dendl; - return r; - } + { + // Read size + uint64_t size; + time_t mtime; + int r = ioctx->stat(oid, &size, &mtime); + dout(10) << "handling object " << obj_name_ino + << "." << obj_name_offset << dendl; + if (r != 0) { + dout(4) << "Cannot stat '" << oid << "': skipping" << dendl; + return r; + } + int64_t obj_pool_id = data_io.get_id() != ioctx->get_id() ? + ioctx->get_id() : -1; + + // I need to keep track of + // * The highest object ID seen + // * The size of the highest object ID seen + // * The largest object seen + // * The pool of the objects seen (if it is not the main data pool) + // + // Given those things, I can later infer the object chunking + // size, the offset of the last object (chunk size * highest ID seen), + // the actual size (offset of last object + size of highest ID seen), + // and the layout pool id. + // + // This logic doesn't take account of striping. + r = ClsCephFSClient::accumulate_inode_metadata( + data_io, + obj_name_ino, + obj_name_offset, + size, + obj_pool_id, + mtime); + if (r < 0) { + derr << "Failed to accumulate metadata data from '" + << oid << "': " << cpp_strerror(r) << dendl; + return r; + } - // I need to keep track of - // * The highest object ID seen - // * The size of the highest object ID seen - // * The largest object seen - // - // Given those things, I can later infer the object chunking - // size, the offset of the last object (chunk size * highest ID seen) - // and the actual size (offset of last object + size of highest ID seen) - // - // This logic doesn't take account of striping. - r = ClsCephFSClient::accumulate_inode_metadata( - data_io, - obj_name_ino, - obj_name_offset, - size, - mtime); + return r; + }); if (r < 0) { - derr << "Failed to accumulate metadata data from '" - << oid << "': " << cpp_strerror(r) << dendl; return r; } + } - return r; - }); + return 0; } int DataScan::probe_filter(librados::IoCtx &ioctx) @@ -706,7 +764,37 @@ int DataScan::scan_inodes() // This is the layout we will use for injection, populated either // from loaded_layout or from best guesses file_layout_t guessed_layout; - guessed_layout.pool_id = data_pool_id; + if (accum_res.obj_pool_id == -1) { + guessed_layout.pool_id = data_pool_id; + } else { + guessed_layout.pool_id = accum_res.obj_pool_id; + + librados::IoCtx ioctx; + r = librados::Rados(data_io).ioctx_create2(guessed_layout.pool_id, ioctx); + if (r != 0) { + derr << "Unexpected error opening file data pool id=" + << guessed_layout.pool_id << ": " << cpp_strerror(r) << dendl; + return r; + } + + bufferlist bl; + int r = ioctx.getxattr(oid, "layout", bl); + if (r < 0) { + if (r != -ENODATA) { + derr << "Unexpected error reading layout for " << oid << ": " + << cpp_strerror(r) << dendl; + return r; + } + } else { + try { + auto q = bl.cbegin(); + decode(loaded_layout, q); + } catch (ceph::buffer::error &e) { + derr << "Unexpected error decoding layout for " << oid << dendl; + return -EINVAL; + } + } + } // Calculate file_size, guess the layout if (accum_res.ceiling_obj_index > 0) { @@ -737,14 +825,20 @@ int DataScan::scan_inodes() // We have a stashed layout that we can't disprove, so apply it guessed_layout = loaded_layout; dout(20) << "loaded layout from xattr:" + << " pi: " << guessed_layout.pool_id << " os: " << guessed_layout.object_size << " sc: " << guessed_layout.stripe_count << " su: " << guessed_layout.stripe_unit << dendl; // User might have transplanted files from a pool with a different - // ID, so whatever the loaded_layout says, we'll force the injected - // layout to point to the pool we really read from - guessed_layout.pool_id = data_pool_id; + // ID, so if the pool from loaded_layout is not found in the list of + // the data pools, we'll force the injected layout to point to the + // pool we read from. + if (!fsmap->get_filesystem(fscid)->mds_map.is_data_pool( + guessed_layout.pool_id)) { + dout(20) << "overwriting layout pool_id " << data_pool_id << dendl; + guessed_layout.pool_id = data_pool_id; + } } if (guessed_layout.stripe_count == 1) { @@ -755,6 +849,19 @@ int DataScan::scan_inodes() // Striped file: need to examine the last stripe_count objects // in the file to determine the size. + librados::IoCtx ioctx; + if (guessed_layout.pool_id == data_io.get_id()) { + ioctx.dup(data_io); + } else { + r = librados::Rados(data_io).ioctx_create2(guessed_layout.pool_id, + ioctx); + if (r != 0) { + derr << "Unexpected error opening file data pool id=" + << guessed_layout.pool_id << ": " << cpp_strerror(r) << dendl; + return r; + } + } + // How many complete (i.e. not last stripe) objects? uint64_t complete_objs = 0; if (accum_res.ceiling_obj_index > guessed_layout.stripe_count - 1) { @@ -782,7 +889,7 @@ int DataScan::scan_inodes() uint64_t osize(0); time_t omtime(0); - r = data_io.stat(std::string(buf), &osize, &omtime); + r = ioctx.stat(std::string(buf), &osize, &omtime); if (r == 0) { if (osize > 0) { // Upper bound within this object @@ -814,7 +921,8 @@ int DataScan::scan_inodes() || loaded_layout.object_size < accum_res.max_obj_size) { // No layout loaded, or inconsistent layout, use default guessed_layout = file_layout_t::get_default(); - guessed_layout.pool_id = data_pool_id; + guessed_layout.pool_id = accum_res.obj_pool_id != -1 ? + accum_res.obj_pool_id : data_pool_id; } else { guessed_layout = loaded_layout; } diff --git a/src/tools/cephfs/DataScan.h b/src/tools/cephfs/DataScan.h index a8d73e5825c66..4b8f34bf69a29 100644 --- a/src/tools/cephfs/DataScan.h +++ b/src/tools/cephfs/DataScan.h @@ -252,6 +252,8 @@ class DataScan : public MDSUtility, public MetadataTool librados::IoCtx data_io; // Remember the data pool ID for use in layouts int64_t data_pool_id; + // IoCtxs for extra data pools + std::vector extra_data_ios; uint32_t n; uint32_t m; -- 2.39.5