From 5fa0825bf3be437f924edf10efc93304e605ac1c Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 10 Aug 2022 13:20:39 -0400 Subject: [PATCH] tools/cephfs: add basic detection/cleanup tool for dentry first damage This tool iterates through the metadata pool looking for dentries with non-sensical "first" (snapshot) metadata. It only specifically looks for damage of the kind where the "first" value is greater than the latest generated snapshot for the file system. It does not detect other kinds of genuine damage to the "first" metadata. Fixes: https://tracker.ceph.com/issues/56140 Signed-off-by: Patrick Donnelly --- src/tools/cephfs/first-damage.sh | 118 +++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100755 src/tools/cephfs/first-damage.sh diff --git a/src/tools/cephfs/first-damage.sh b/src/tools/cephfs/first-damage.sh new file mode 100755 index 0000000000000..db50e8bdb0cfe --- /dev/null +++ b/src/tools/cephfs/first-damage.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# Ceph - scalable distributed file system +# +# Copyright (C) 2022 Red Hat, Inc. +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation. See file COPYING. + +# Suggested recovery sequence (for single MDS cluster): +# +# 1) Unmount all clients. +# +# 2) Flush the journal (if possible): +# +# ceph tell mds.:0 flush journal +# +# 3) Fail the file system: +# +# ceph fs fail +# +# 4a) Recover dentries from the journal. This will be a no-op if the MDS flushed the journal successfully: +# +# cephfs-journal-tool --rank=:0 event recover_dentries summary +# +# 4b) If all good so far, reset the journal: +# +# cephfs-journal-tool --rank=:0 journal reset +# +# 5) Run this tool to see list of damaged dentries: +# +# first-damage.sh +# +# 6) Optionally, remove them: +# +# first-damage.sh --remove +# +# This has the effect of removing that dentry from the snapshot or HEAD +# (current hierarchy). Note: the inode's linkage will be lost. The inode may +# be recoverable in lost+found during a future data scan recovery. + +set -ex + +function usage { + printf '%s: [--remove] [newest snapid]\n' "$0" + printf ' remove CephFS metadata dentries with invalid first snapshot' + exit 1 +} + +function mrados { + rados --pool="$METADATA_POOL" "$@" +} + +function traverse { + local T=$(mktemp -p /tmp MDS_TRAVERSAL.XXXXXX) + mrados ls | grep -E '[[:xdigit:]]{8,}\.[[:xdigit:]]+' > "$T" + while read obj; do + local O=$(mktemp -p /tmp "$obj".XXXXXX) + for dnk in $(mrados listomapkeys "$obj"); do + mrados getomapval "$obj" "$dnk" "$O" + local first=$(dd if="$O" bs=1 count=4 | od --endian=little -An -t u8) + if [ "$first" -gt "$NEXT_SNAP" ]; then + printf 'found "%s:%s" first (0x%x) > NEXT_SNAP (0x%x)\n' "$obj" "$dnk" "$first" "$NEXT_SNAP" + if [ "$REMOVE" -ne 0 ]; then + printf 'removing "%s:%s"\n' "$obj" "$dnk" + mrados rmomapkey "$obj" "$dnk" + fi + fi + done + rm "$O" + done < "$T" +} + +function main { + eval set -- $(getopt --name "$0" --options 'r' --longoptions 'help,remove' -- "$@") + + while [ "$#" -gt 0 ]; do + case "$1" in + -h|--help) + usage + ;; + --remove) + REMOVE=1 + shift + ;; + --) + shift + break + ;; + esac + done + + if [ -z "$1" ]; then + usage + fi + METADATA_POOL="$1" + NEXT_SNAP="$2" + + if [ -z "$NEXT_SNAP" ]; then + SNAPTABLE=$(mktemp -p /tmp MDS_SNAPTABLE.XXXXXX) + rados --pool="$METADATA_POOL" get mds_snaptable "$SNAPTABLE" + # skip "version" of MDSTable payload + V=$(dd if="$SNAPTABLE" bs=1 count=1 skip=8 | od --endian=little -An -t u1) + if [ "$V" -ne 5 ]; then + printf 'incompatible snaptable\n' + exit 2 + fi + # skip version,struct_v,compat_v,length + NEXT_SNAP=$((1 + $(dd if="$SNAPTABLE" bs=1 count=8 skip=14 | od --endian=little -An -t u8))) + printf 'found latest snap: %d\n' "$NEXT_SNAP" + fi + + traverse +} + +main "$@" -- 2.39.5