From c83cb7a11df3d47e232420c4762ba0d72c6fbd7a Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 8 Sep 2016 23:58:26 +0100 Subject: [PATCH] tools/cephfs: add pg_files command Sometimes users know that particular data pool PGs have been damaged, and they would like to scan their files to work out which ones might have been affected. Fixes: http://tracker.ceph.com/issues/17249 Signed-off-by: John Spray --- doc/cephfs/disaster-recovery.rst | 37 ++++++ src/mds/CInode.cc | 3 + src/tools/cephfs/CMakeLists.txt | 3 +- src/tools/cephfs/DataScan.cc | 29 +++++ src/tools/cephfs/PgFiles.cc | 192 +++++++++++++++++++++++++++++++ src/tools/cephfs/PgFiles.h | 51 ++++++++ 6 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 src/tools/cephfs/PgFiles.cc create mode 100644 src/tools/cephfs/PgFiles.h diff --git a/doc/cephfs/disaster-recovery.rst b/doc/cephfs/disaster-recovery.rst index 8d6748d57428c..ee54c90a70a63 100644 --- a/doc/cephfs/disaster-recovery.rst +++ b/doc/cephfs/disaster-recovery.rst @@ -160,3 +160,40 @@ the range 0-(N_workers - 1), like so: It is important to ensure that all workers have completed the scan_extents phase before any workers enter the scan_inodes phase. + +Finding files affected by lost data PGs +--------------------------------------- + +Losing a data PG may affect many files. Files are split into many objects, +so identifying which files are affected by loss of particular PGs requires +a full scan over all object IDs that may exist within the size of a file. +This type of scan may be useful for identifying which files require +restoring from a backup. + +.. danger:: + + This command does not repair any metadata, so when restoring files in + this case you must *remove* the damaged file, and replace it in order + to have a fresh inode. Do not overwrite damaged files in place. + +If you know that objects have been lost from PGs, use the ``pg_files`` +subcommand to scan for files that may have been damaged as a result: + +:: + + cephfs-data-scan pg_files [...] + +For example, if you have lost data from PGs 1.4 and 4.5, and you would like +to know which files under /home/bob might have been damaged: + +:: + + cephfs-data-scan pg_files /home/bob 1.4 4.5 + +The output will be a list of paths to potentially damaged files, one +per line. + +Note that this command acts as a normal CephFS client to find all the +files in the filesystem and read their layouts, so the MDS must be +up and running. + diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 5dcdcb1577f2f..9a882285599e4 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -3816,6 +3816,9 @@ void CInode::validate_disk_state(CInode::validated_data *results, // Ignore rval because it's the result of a FAILOK operation // from fetch_backtrace_and_tag: the real result is in // backtrace.ondisk_read_retval + MDCache *mdcache = in->mdcache; + const inode_t& inode = in->inode; + dout(20) << "ondisk_read_retval: " << results->backtrace.ondisk_read_retval << dendl; if (results->backtrace.ondisk_read_retval != 0) { results->backtrace.error_str << "failed to read off disk; see retval"; goto next; diff --git a/src/tools/cephfs/CMakeLists.txt b/src/tools/cephfs/CMakeLists.txt index 03fcf7243ad60..c9339ce8ac618 100644 --- a/src/tools/cephfs/CMakeLists.txt +++ b/src/tools/cephfs/CMakeLists.txt @@ -25,9 +25,10 @@ set(cephfs_data_scan_srcs cephfs-data-scan.cc DataScan.cc RoleSelector.cc + PgFiles.cc MDSUtility.cc) add_executable(cephfs-data-scan ${cephfs_data_scan_srcs}) -target_link_libraries(cephfs-data-scan librados mds osdc global +target_link_libraries(cephfs-data-scan librados cephfs mds osdc global cls_cephfs_client ${BLKID_LIBRARIES} ${CMAKE_DL_LIBS}) diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index c7a843c076ddc..dd5978c9e6109 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -20,6 +20,7 @@ #include "mds/CInode.h" #include "cls/cephfs/cls_cephfs_client.h" +#include "PgFiles.h" #include "DataScan.h" #include "include/compat.h" @@ -33,6 +34,7 @@ void DataScan::usage() << " cephfs-data-scan init [--force-init]\n" << " cephfs-data-scan scan_extents [--force-pool] \n" << " cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] \n" + << " cephfs-data-scan pg_files [...]\n" << "\n" << " --force-corrupt: overrite apparently corrupt structures\n" << " --force-init: write root inodes even if they exist\n" @@ -142,6 +144,9 @@ int DataScan::main(const std::vector &args) std::string data_pool_name; std::string metadata_pool_name; + std::string pg_files_path; + std::set pg_files_pgs; + // Consume any known --key val or --flag arguments for (std::vector::const_iterator i = args.begin() + 1; i != args.end(); ++i) { @@ -164,6 +169,24 @@ int DataScan::main(const std::vector &args) continue; } + if (command == "pg_files") { + if (i == args.begin() + 1) { + pg_files_path = *i; + continue; + } else { + pg_t pg; + bool parsed = pg.parse(*i); + if (!parsed) { + std::cerr << "Invalid PG '" << *i << "'" << std::endl; + return -EINVAL; + } else { + pg_files_pgs.insert(pg); + continue; + } + } + + } + // Fall through: unhandled std::cerr << "Unknown argument '" << *i << "'" << std::endl; return -EINVAL; @@ -203,6 +226,12 @@ int DataScan::main(const std::vector &args) return r; } + if (command == "pg_files") { + auto pge = PgFiles(objecter, pg_files_pgs); + pge.init(); + return pge.scan_path(pg_files_path); + } + // Initialize data_io for those commands that need it if (command == "scan_inodes" || command == "scan_extents") { diff --git a/src/tools/cephfs/PgFiles.cc b/src/tools/cephfs/PgFiles.cc new file mode 100644 index 0000000000000..c581eb9ac509a --- /dev/null +++ b/src/tools/cephfs/PgFiles.cc @@ -0,0 +1,192 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/errno.h" +#include "osdc/Striper.h" + +#include "PgFiles.h" + + +#define dout_subsys ceph_subsys_mds +#undef dout_prefix +#define dout_prefix *_dout << "pgeffects." << __func__ << ": " + +int PgFiles::init() +{ + int r = ceph_create_with_context(&cmount, g_ceph_context); + if (r != 0) { + return r; + } + + return ceph_init(cmount); +} + +PgFiles::PgFiles(Objecter *o, std::set pgs_) + : objecter(o), pgs(pgs_) +{ + for (const auto &i : pgs) { + pools.insert(i.m_pool); + } +} + +PgFiles::~PgFiles() +{ + ceph_release(cmount); +} + +void PgFiles::hit_dir(std::string const &path) +{ + dout(10) << "entering " << path << dendl; + + ceph_dir_result *dr = nullptr; + int r = ceph_opendir(cmount, path.c_str(), &dr); + if (r != 0) { + derr << "Failed to open path: " << cpp_strerror(r) << dendl; + return; + } + + struct dirent de; + while((r = ceph_readdir_r(cmount, dr, &de)) != 0) { + if (r < 0) { + derr << "Error reading path " << path << ": " << cpp_strerror(r) + << dendl; + ceph_closedir(cmount, dr); // best effort, ignore r + return; + } + + if (std::string(de.d_name) == "." || std::string(de.d_name) == "..") { + continue; + } + + struct stat st; + std::string de_path = (path + std::string("/") + de.d_name); + r = ceph_stat(cmount, de_path.c_str(), &st); + if (r != 0) { + derr << "Failed to stat path " << de_path << ": " + << cpp_strerror(r) << dendl; + // Don't hold up the whole process for one bad inode + continue; + } + + if (S_ISREG(st.st_mode)) { + hit_file(de_path, st); + } else if (S_ISDIR(st.st_mode)) { + hit_dir(de_path); + } else { + dout(20) << "Skipping non reg/dir file: " << de_path << dendl; + } + } + + r = ceph_closedir(cmount, dr); + if (r != 0) { + derr << "Error closing path " << path << ": " << cpp_strerror(r) << dendl; + return; + } +} + +void PgFiles::hit_file(std::string const &path, struct stat const &st) +{ + assert(S_ISREG(st.st_mode)); + + dout(20) << "Hitting file '" << path << "'" << dendl; + + int l_stripe_unit = 0; + int l_stripe_count = 0; + int l_object_size = 0; + int l_pool_id = 0; + int r = ceph_get_path_layout(cmount, path.c_str(), &l_stripe_unit, + &l_stripe_count, &l_object_size, + &l_pool_id); + if (r != 0) { + derr << "Error reading layout on " << path << ": " << cpp_strerror(r) + << dendl; + return; + } + + struct file_layout_t layout; + layout.stripe_unit = l_stripe_unit; + layout.stripe_count = l_stripe_count; + layout.object_size = l_object_size; + layout.pool_id = l_pool_id; + + // Avoid calculating PG if the layout targeted a completely different pool + if (pools.count(layout.pool_id) == 0) { + dout(20) << "Fast check missed: pool " << layout.pool_id << " not in " + "target set" << dendl; + return; + } + + auto num_objects = Striper::get_num_objects(layout, st.st_size); + + for (uint64_t i = 0; i < num_objects; ++i) { + char buf[32]; + snprintf(buf, sizeof(buf), "%llx.%08llx", (long long unsigned)st.st_ino, + (long long unsigned int)i); + dout(20) << " object " << std::string(buf) << dendl; + + pg_t target; + object_t oid; + object_locator_t loc; + loc.pool = layout.pool_id; + loc.key = std::string(buf); + + unsigned pg_num_mask = 0; + unsigned pg_num = 0; + + int r = 0; + objecter->with_osdmap([&r, oid, loc, &target, &pg_num_mask, &pg_num] + (const OSDMap &osd_map) { + r = osd_map.object_locator_to_pg(oid, loc, target); + if (r == 0) { + auto pool = osd_map.get_pg_pool(loc.pool); + pg_num_mask = pool->get_pg_num_mask(); + pg_num = pool->get_pg_num(); + } + }); + if (r != 0) { + // Can happen if layout pointed to pool not in osdmap, for example + continue; + } + + target.m_seed = ceph_stable_mod(target.ps(), pg_num, pg_num_mask); + + dout(20) << " target " << target << dendl; + + if (pgs.count(target)) { + std::cout << path << std::endl; + return; + } + } + +} + +int PgFiles::scan_path(std::string const &path) +{ + int r = ceph_mount(cmount, "/"); + if (r != 0) { + derr << "Failed to mount: " << cpp_strerror(r) << dendl; + return r; + } + + hit_dir(path); + + r = ceph_unmount(cmount); + if (r != 0) { + derr << "Failed to unmount: " << cpp_strerror(r) << dendl; + return r; + } + + return r; +} + diff --git a/src/tools/cephfs/PgFiles.h b/src/tools/cephfs/PgFiles.h new file mode 100644 index 0000000000000..38337b6460b02 --- /dev/null +++ b/src/tools/cephfs/PgFiles.h @@ -0,0 +1,51 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef PG_EFFECTS_H_ +#define PG_EFFECTS_H_ + +#include "include/cephfs/libcephfs.h" +#include "osd/osd_types.h" +#include +#include "osdc/Objecter.h" + +/** + * This utility scans the files (via an online MDS) and works out + * which ones rely on named PGs. For use when someone has + * some bad/damaged PGs and wants to see which files might be + * affected. + */ +class PgFiles +{ +private: + Objecter *objecter; + struct ceph_mount_info *cmount; + + std::set pgs; + std::set pools; + + void hit_file(std::string const &path, const struct stat &st); + void hit_dir(std::string const &path); + + +public: + PgFiles(Objecter *o, std::set pgs_); + ~PgFiles(); + + int init(); + int scan_path(std::string const &path); +}; + +#endif + -- 2.39.5