From 17045abb66bde2a85e3b9def9c108a780a43b7fd Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Sat, 1 Oct 2022 15:04:22 -0400 Subject: [PATCH] tools/cephfs: convert first-damage.sh to python To have only one RADOS instance and improve performance/reliability. Signed-off-by: Patrick Donnelly --- src/tools/cephfs/first-damage.py | 137 +++++++++++++++++++++++++++++++ src/tools/cephfs/first-damage.sh | 120 --------------------------- 2 files changed, 137 insertions(+), 120 deletions(-) create mode 100644 src/tools/cephfs/first-damage.py delete mode 100755 src/tools/cephfs/first-damage.sh diff --git a/src/tools/cephfs/first-damage.py b/src/tools/cephfs/first-damage.py new file mode 100644 index 00000000000..fb744ed9abb --- /dev/null +++ b/src/tools/cephfs/first-damage.py @@ -0,0 +1,137 @@ +# Ceph - scalable distributed file system +# +# Copyright (C) 2022 Red Hat, Inc. +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation. See file COPYING. + +# Suggested recovery sequence (for single MDS cluster): +# +# 1) Unmount all clients. +# +# 2) Flush the journal (if possible): +# +# ceph tell mds.:0 flush journal +# +# 3) Fail the file system: +# +# ceph fs fail +# +# 4a) Recover dentries from the journal. This will be a no-op if the MDS flushed the journal successfully: +# +# cephfs-journal-tool --rank=:0 event recover_dentries summary +# +# 4b) If all good so far, reset the journal: +# +# cephfs-journal-tool --rank=:0 journal reset +# +# 5) Run this tool to see list of damaged dentries: +# +# python3 first-damage.py --memo run.1 +# +# 6) Optionally, remove them: +# +# python3 first-damage.py --memo run.2 --remove +# +# Note: use --memo to specify a different file to save objects that have +# already been traversed, for independent runs. +# +# This has the effect of removing that dentry from the snapshot or HEAD +# (current hierarchy). Note: the inode's linkage will be lost. The inode may +# be recoverable in lost+found during a future data scan recovery. + +import argparse +import logging +import os +import rados +import re +import sys +import struct + +log = logging.getLogger("first-damage-traverse") + +MEMO = None +REMOVE = False +POOL = None +NEXT_SNAP = None +CONF = None + +DIR_PATTERN = re.compile(r'[0-9a-fA-F]{8,}\.[0-9a-fA-F]+') + +CACHE = set() + +def traverse(MEMO, ioctx): + for o in ioctx.list_objects(): + if not DIR_PATTERN.fullmatch(o.key): + log.debug("skipping %s", o.key) + continue + elif o.key in CACHE: + log.debug("skipping previously examined object %s", o.key) + continue + log.info("examining: %s", o.key) + + with rados.ReadOpCtx() as rctx: + it = ioctx.get_omap_vals(rctx, None, None, 100000)[0] + ioctx.operate_read_op(rctx, o.key) + for (dnk, val) in it: + log.debug('\t%s', dnk) + (first,) = struct.unpack(' NEXT_SNAP: + log.warning(f"found {o.key}:{dnk} first (0x{first:x}) > NEXT_SNAP (0x{NEXT_SNAP:x})") + if REMOVE: + log.warning(f"removing {o.key}:{dnk}") + with rados.WriteOpCtx() as wctx: + ioctx.remove_omap_keys(wctx, [dnk]) + ioctx.operate_write_op(wctx, o.key) + MEMO.write(f"{o.key}\n") + +if __name__ == '__main__': + outpath = os.path.join(os.path.expanduser('~'), os.path.basename(sys.argv[0])) + P = argparse.ArgumentParser(description="remove CephFS metadata dentries with invalid first snapshot") + P.add_argument('--conf', action='store', help='Ceph conf file', type=str) + P.add_argument('--debug', action='store', help='debug file', type=str, default=outpath+'.log') + P.add_argument('--memo', action='store', help='db for traversed dirs', default=outpath+'.memo') + P.add_argument('--next-snap', action='store', help='force next-snap (dev)', type=int) + P.add_argument('--remove', action='store_true', help='remove bad dentries', default=False) + P.add_argument('pool', action='store', help='metadata pool', type=str) + NS = P.parse_args() + + logging.basicConfig(filename=NS.debug, level=logging.DEBUG) + + MEMO = NS.memo + REMOVE = NS.remove + POOL = NS.pool + NEXT_SNAP = NS.next_snap + CONF = NS.conf + + log.info("running as pid %d", os.getpid()) + + try: + with open(MEMO) as f: + for line in f.readlines(): + CACHE.add(line.rstrip()) + except FileNotFoundError: + pass + + R = rados.Rados(conffile=CONF) + R.connect() + ioctx = R.open_ioctx(POOL) + + if NEXT_SNAP is None: + data = ioctx.read("mds_snaptable") + # skip "version" of MDSTable payload + # V=$(dd if="$SNAPTABLE" bs=1 count=1 skip=8 | od --endian=little -An -t u1) + V = struct.unpack(':0 flush journal -# -# 3) Fail the file system: -# -# ceph fs fail -# -# 4a) Recover dentries from the journal. This will be a no-op if the MDS flushed the journal successfully: -# -# cephfs-journal-tool --rank=:0 event recover_dentries summary -# -# 4b) If all good so far, reset the journal: -# -# cephfs-journal-tool --rank=:0 journal reset -# -# 5) Run this tool to see list of damaged dentries: -# -# first-damage.sh -# -# 6) Optionally, remove them: -# -# first-damage.sh --remove -# -# This has the effect of removing that dentry from the snapshot or HEAD -# (current hierarchy). Note: the inode's linkage will be lost. The inode may -# be recoverable in lost+found during a future data scan recovery. - -set -ex - -function usage { - printf '%s: [--remove] [newest snapid]\n' "$0" - printf ' remove CephFS metadata dentries with invalid first snapshot' - exit 1 -} - -function mrados { - rados --pool="$METADATA_POOL" "$@" -} - -function traverse { - local T=$(mktemp -p /tmp MDS_TRAVERSAL.XXXXXX) - mrados ls | grep -E '[[:xdigit:]]{8,}\.[[:xdigit:]]+' > "$T" - while read obj; do - local O=$(mktemp -p /tmp "$obj".XXXXXX) - local KEYS=$(mktemp -p /tmp "$obj"-keys.XXXXXX) - mrados listomapkeys "$obj" > "$KEYS" - while read dnk; do - mrados getomapval "$obj" "$dnk" "$O" - local first=$(dd if="$O" bs=1 count=4 | od --endian=little -An -t u8) - if [ "$first" -gt "$NEXT_SNAP" ]; then - printf 'found "%s:%s" first (0x%x) > NEXT_SNAP (0x%x)\n' "$obj" "$dnk" "$first" "$NEXT_SNAP" - if [ "$REMOVE" -ne 0 ]; then - printf 'removing "%s:%s"\n' "$obj" "$dnk" - mrados rmomapkey "$obj" "$dnk" - fi - fi - done < "$KEYS" - rm "$O" - done < "$T" -} - -function main { - eval set -- $(getopt --name "$0" --options 'r' --longoptions 'help,remove' -- "$@") - - while [ "$#" -gt 0 ]; do - case "$1" in - -h|--help) - usage - ;; - --remove) - REMOVE=1 - shift - ;; - --) - shift - break - ;; - esac - done - - if [ -z "$1" ]; then - usage - fi - METADATA_POOL="$1" - NEXT_SNAP="$2" - - if [ -z "$NEXT_SNAP" ]; then - SNAPTABLE=$(mktemp -p /tmp MDS_SNAPTABLE.XXXXXX) - rados --pool="$METADATA_POOL" get mds_snaptable "$SNAPTABLE" - # skip "version" of MDSTable payload - V=$(dd if="$SNAPTABLE" bs=1 count=1 skip=8 | od --endian=little -An -t u1) - if [ "$V" -ne 5 ]; then - printf 'incompatible snaptable\n' - exit 2 - fi - # skip version,struct_v,compat_v,length - NEXT_SNAP=$((1 + $(dd if="$SNAPTABLE" bs=1 count=8 skip=14 | od --endian=little -An -t u8))) - printf 'found latest snap: %d\n' "$NEXT_SNAP" - fi - - traverse -} - -main "$@" -- 2.39.5