# nuke snap 1 version of "a"
rados --pool="$METADATA_POOL" getomapval "$IS" a_$(printf %x $((LS-4))) "$T"
- printf '\xff\xff\xff\xf0' | dd of="$T" count=4 bs=1
+ printf '\xff\xff\xff\xf0' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
rados --pool="$METADATA_POOL" setomapval "$IS" a_$(printf %x $((LS-4))) --input-file="$T"
# nuke snap 4 version of "a"
rados --pool="$METADATA_POOL" getomapval "$IS" a_$(printf %x $((LS-1))) "$T"
- printf '\xff\xff\xff\xff' | dd of="$T" count=4 bs=1
+ printf '\xff\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
rados --pool="$METADATA_POOL" setomapval "$IS" a_$(printf %x $((LS-1))) --input-file="$T"
+ # screw up HEAD
+ rados --pool="$METADATA_POOL" getomapval "$IS" a_head "$T"
+ printf '\xfe\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat
+ rados --pool="$METADATA_POOL" setomapval "$IS" a_head --input-file="$T"
+
rm -f "$T"
}
sleep 5
cephfs-journal-tool --rank="$FS":0 event recover_dentries summary
cephfs-journal-tool --rank="$FS":0 journal reset
- python3 $FIRST_DAMAGE --memo /tmp/memo1 "$METADATA_POOL"
- python3 $FIRST_DAMAGE --memo /tmp/memo2 --remove "$METADATA_POOL"
+ python3 $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL"
+ python3 $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap "$METADATA_POOL"
+ python3 $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL"
ceph fs set "$FS" joinable true
}
function check {
stat dir || exit 1
+ stat dir/a || exit 1
for i in `seq 1 5`; do
stat dir/.snap/$i || exit 2
done
function cleanup {
rmdir dir/.snap/*
+ find dir
rm -rf dir
}
POOL = None
NEXT_SNAP = None
CONF = os.environ['CEPH_CONF']
+REPAIR_NOSNAP = None
+
+CEPH_NOSNAP = 0xfffffffe # int32 -2
DIR_PATTERN = re.compile(r'[0-9a-fA-F]{8,}\.[0-9a-fA-F]+')
it = ioctx.get_omap_vals(rctx, None, None, 100000)[0]
ioctx.operate_read_op(rctx, o.key)
for (dnk, val) in it:
- log.debug('\t%s', dnk)
+ log.debug('\t%s: val size %d', dnk, len(val))
(first,) = struct.unpack('<I', val[:4])
if first > NEXT_SNAP:
log.warning(f"found {o.key}:{dnk} first (0x{first:x}) > NEXT_SNAP (0x{NEXT_SNAP:x})")
- if REMOVE:
+ if REPAIR_NOSNAP and dnk.endswith("_head") and first == CEPH_NOSNAP:
+ log.warning(f"repairing first==CEPH_NOSNAP damage, setting to NEXT_SNAP (0x{NEXT_SNAP:x})")
+ first = NEXT_SNAP
+ nval = bytearray(val)
+ struct.pack_into("<I", nval, 0, NEXT_SNAP)
+ with rados.WriteOpCtx() as wctx:
+ ioctx.set_omap(wctx, (dnk,), (bytes(nval),))
+ ioctx.operate_write_op(wctx, o.key)
+ elif REMOVE:
log.warning(f"removing {o.key}:{dnk}")
with rados.WriteOpCtx() as wctx:
ioctx.remove_omap_keys(wctx, [dnk])
P.add_argument('--memo', action='store', help='db for traversed dirs', default=outpath+'.memo')
P.add_argument('--next-snap', action='store', help='force next-snap (dev)', type=int)
P.add_argument('--remove', action='store_true', help='remove bad dentries', default=False)
+ P.add_argument('--repair-nosnap', action='store_true', help='repair first=CEPH_NOSNAP damage', default=False)
P.add_argument('pool', action='store', help='metadata pool', type=str)
NS = P.parse_args()
POOL = NS.pool
NEXT_SNAP = NS.next_snap
CONF = NS.conf
+ REPAIR_NOSNAP = NS.repair_nosnap
log.info("running as pid %d", os.getpid())