From 84d9c4d177bb1b197910e2b93a4b186a9ccd13f1 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 23 Aug 2022 05:12:18 +0000 Subject: [PATCH] tests/osd: creating a Teuthology test re missing SnapMapper entries The test (in the standalone/scrub suite) verifies that the scrubber detects (and issues a cluster-log error) whenever a mapping entry ("SNA_") is missing in the SnapMapper DB. Specifically, here the entry is corrupted - shortened as per https://tracker.ceph.com/issues/56147. Signed-off-by: Ronen Friedman --- qa/standalone/scrub/osd-mapper.sh | 158 +++++++++++++++++++++++++++ qa/standalone/scrub/scrub-helpers.sh | 3 +- src/osd/scrubber/scrub_backend.cc | 12 +- 3 files changed, 167 insertions(+), 6 deletions(-) create mode 100755 qa/standalone/scrub/osd-mapper.sh diff --git a/qa/standalone/scrub/osd-mapper.sh b/qa/standalone/scrub/osd-mapper.sh new file mode 100755 index 00000000000..989271b9158 --- /dev/null +++ b/qa/standalone/scrub/osd-mapper.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# -*- mode:text; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +# vim: ts=8 sw=2 smarttab +# +# test the handling of a corrupted SnapMapper DB by Scrub + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +# one clone & multiple snaps (according to the number of parameters) +function make_a_clone() +{ + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + local pool=$1 + local obj=$2 + echo $RANDOM | rados -p $pool put $obj - || return 1 + shift 2 + for snap in $@ ; do + rados -p $pool mksnap $snap || return 1 + done + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + +function TEST_truncated_sna_record() { + local dir=$1 + local -A cluster_conf=( + ['osds_num']="3" + ['pgs_in_pool']="4" + ['pool_name']="test" + ) + + local extr_dbg=1 + (( extr_dbg > 1 )) && echo "Dir: $dir" + standard_scrub_cluster $dir cluster_conf + ceph tell osd.* config set osd_stats_update_period_not_scrubbing "1" + ceph tell osd.* config set osd_stats_update_period_scrubbing "1" + + local osdn=${cluster_conf['osds_num']} + local poolid=${cluster_conf['pool_id']} + local poolname=${cluster_conf['pool_name']} + local objname="objxxx" + + # create an object and clone it + make_a_clone $poolname $objname snap01 snap02 || return 1 + make_a_clone $poolname $objname snap13 || return 1 + make_a_clone $poolname $objname snap24 snap25 || return 1 + echo $RANDOM | rados -p $poolname put $objname - || return 1 + + #identify the PG and the primary OSD + local pgid=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.pgid'` + local osd=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'` + echo "pgid is $pgid (primary: osd.$osd)" + # turn on the publishing of test data in the 'scrubber' section of 'pg query' output + set_query_debug $pgid + + # verify the existence of these clones + (( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname + + # scrub the PG + ceph pg $pgid deep_scrub || return 1 + + # we aren't just waiting for the scrub to terminate, but also for the + # logs to be published + sleep 3 + ceph pg dump pgs + until grep -a -q -- "event: --^^^^---- ScrubFinished" $dir/osd.$osd.log ; do + sleep 0.2 + done + + ceph pg dump pgs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + sleep 5 + grep -a -q -v "ERR" $dir/osd.$osd.log || return 1 + + # kill the OSDs + kill_daemons $dir TERM osd || return 1 + + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/0 dump "p" + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump "p" | grep -a SNA_ + (( extr_dbg >= 2 )) && grep -a SNA_ /tmp/oo2.dump + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump p 2> /dev/null + + for sdn in $(seq 0 $(expr $osdn - 1)) + do + kvdir=$dir/$sdn + echo "corrupting the SnapMapper DB of osd.$sdn (db: $kvdir)" + (( extr_dbg >= 3 )) && ceph-kvstore-tool bluestore-kv $kvdir dump "p" + + # truncate the 'mapping' (SNA_) entry corresponding to the snap13 clone + KY=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_0000000000000003_000000000000000' \ + | awk -e '{print $2;}'` + (( extr_dbg >= 1 )) && echo "SNA key: $KY" | cat -v + + tmp_fn1=`mktemp -p /tmp --suffix="_the_val"` + (( extr_dbg >= 1 )) && echo "Value dumped in: $tmp_fn1" + ceph-kvstore-tool bluestore-kv $kvdir get p "$KY" out $tmp_fn1 2> /dev/null + (( extr_dbg >= 2 )) && od -xc $tmp_fn1 + + NKY=${KY:0:-30} + ceph-kvstore-tool bluestore-kv $kvdir rm "p" "$KY" 2> /dev/null + ceph-kvstore-tool bluestore-kv $kvdir set "p" "$NKY" in $tmp_fn1 2> /dev/null + + (( extr_dbg >= 1 )) || rm $tmp_fn1 + done + + orig_osd_args=" ${cluster_conf['osd_args']}" + orig_osd_args=" $(echo $orig_osd_args)" + (( extr_dbg >= 2 )) && echo "Copied OSD args: /$orig_osd_args/ /${orig_osd_args:1}/" + for sdn in $(seq 0 $(expr $osdn - 1)) + do + CEPH_ARGS="$CEPH_ARGS $orig_osd_args" activate_osd $dir $sdn + done + sleep 1 + + for sdn in $(seq 0 $(expr $osdn - 1)) + do + timeout 60 ceph tell osd.$sdn version + done + + # when scrubbing now - we expect the scrub to emit a cluster log ERR message regarding SnapMapper internal inconsistency + ceph osd unset nodeep-scrub || return 1 + ceph osd unset noscrub || return 1 + + # what is the primary now? + local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'` + ceph pg dump pgs + sleep 2 + ceph pg $pgid deep_scrub || return 1 + sleep 5 + ceph pg dump pgs + (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log + grep -a -q "ERR" $dir/osd.$cur_prim.log || return 1 +} + + + +main osd-mapper "$@" diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh index c7f48150cc1..38677cf7566 100644 --- a/qa/standalone/scrub/scrub-helpers.sh +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -243,7 +243,7 @@ function standard_scrub_cluster() { for osd in $(seq 0 $(expr $OSDS - 1)) do - run_osd $dir $osd $ceph_osd_args || return 1 + run_osd $dir $osd $(echo $ceph_osd_args) || return 1 done create_pool $poolname $pg_num $pg_num @@ -254,6 +254,7 @@ function standard_scrub_cluster() { name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'` echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id" args['pool_id']="${name_n_id##* }" + args['osd_args']=$ceph_osd_args if [[ -n "$saved_echo_flag" ]]; then set -x; fi } diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc index 1f7c734a636..e12eb106d14 100644 --- a/src/osd/scrubber/scrub_backend.cc +++ b/src/osd/scrubber/scrub_backend.cc @@ -1892,13 +1892,15 @@ std::optional ScrubBackend::scan_object_snaps( } /* - * Process: * Building a map of objects suitable for snapshot validation. - * The data in m_cleaned_meta_map is the leftover partial items that need to - * be completed before they can be processed. * - * Snapshots in maps precede the head object, which is why we are scanning - * backwards. + * We are moving all "full" clone sets, i.e. the head and (preceding it, as + * snapshots precede the head entry) the clone entries, into 'for_meta_scrub'. + * That collection, not containing partial items, will be scrubbed by + * scrub_snapshot_metadata(). + * + * What's left in m_cleaned_meta_map is the leftover partial items that need to + * be completed before they can be processed. */ ScrubMap ScrubBackend::clean_meta_map(ScrubMap& cleaned, bool max_reached) { -- 2.39.5