From: David Zafman Date: Thu, 15 Feb 2018 00:56:39 +0000 (-0800) Subject: osd: Add new snapset_inconsistency error check X-Git-Tag: v13.0.2~232^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=33e747724a0cb92e8200c41e16231973ff0aa2ad;p=ceph.git osd: Add new snapset_inconsistency error check Includes new test case Caused by: 5f58301a1364e948834dabe503200dda07fc2790 This changed attr consistency checking to exclude system keys, which required snapset to be handled just like object info. Fixes: http://tracker.ceph.com/issues/22996 Signed-off-by: David Zafman --- diff --git a/doc/rados/command/list-inconsistent-obj.json b/doc/rados/command/list-inconsistent-obj.json index 76ca43e321de..859dc84ca2b5 100644 --- a/doc/rados/command/list-inconsistent-obj.json +++ b/doc/rados/command/list-inconsistent-obj.json @@ -85,7 +85,8 @@ "omap_digest_mismatch", "size_mismatch", "attr_value_mismatch", - "attr_name_mismatch" + "attr_name_mismatch", + "snapset_inconsistency" ] }, "minItems": 0, diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 148a6078314b..10d9ac3fd238 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -2767,6 +2767,153 @@ function TEST_periodic_scrub_replicated() { rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1 } +# +# Corrupt snapset in replicated pool +# +function TEST_corrupt_snapset_scrub_rep() { + local dir=$1 + local poolname=csr_pool + local total_objs=2 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool foo 1 || return 1 + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + add_something $dir $poolname $objname || return 1 + + rados --pool $poolname setomapheader $objname hdr-$objname || return 1 + rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + local pg=$(get_pg $poolname ROBJ0) + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + + # Alternate corruption between osd.0 and osd.1 + local osd=$(expr $i % 2) + + rados -p $poolname mksnap snap1 + echo -n head_of_snapshot_data > $dir/change + + case $i in + 1) + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1 + ;; + + 2) + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1 + ;; + + esac + done + rm $dir/change + + pg_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + + jq "$jqfilter" << EOF | python -c "$sortkeys" | sed -e "$sedfilter" > $dir/checkcsjson +{ + "epoch": 34, + "inconsistents": [ + { + "object": { + "name": "ROBJ1", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [ + "snapset_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(27'8 client.4143.0:1 dirty|omap|data_digest s 21 uv 8 dd 53acb008 alloc_hint [0 0 0])", + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 21, + "snapset": "1=[1]:{1=[1]}" + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 21, + "snapset": "0=[]:{1=[1]}" + } + ] + }, + { + "object": { + "name": "ROBJ2", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "snapset_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": "3:e97ce31e:::ROBJ2:head(31'10 client.4155.0:1 dirty|omap|data_digest s 21 uv 10 dd 53acb008 alloc_hint [0 0 0])", + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 21, + "snapset": "0=[]:{1=[1]}" + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 21, + "snapset": "1=[1]:{1=[1]}" + } + ] + } + ] +} +EOF + + jq "$jqfilter" $dir/json | python -c "$sortkeys" | sed -e "$sedfilter" > $dir/csjson + diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save6.json + fi + + if which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + rados rmpool $poolname $poolname --yes-i-really-really-mean-it + teardown $dir || return 1 +} main osd-scrub-repair "$@" diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h index 1f4b9fb84f36..2a17928344a3 100644 --- a/src/common/scrub_types.h +++ b/src/common/scrub_types.h @@ -117,6 +117,9 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t { void set_attr_name_mismatch() { errors |= obj_err_t::ATTR_NAME_MISMATCH; } + void set_snapset_inconsistency() { + errors |= obj_err_t::SNAPSET_INCONSISTENCY; + } void add_shard(const pg_shard_t& pgs, const shard_info_wrapper& shard); void set_auth_missing(const hobject_t& hoid, const map&, diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp index 7829e2870243..37e9d2854598 100644 --- a/src/include/rados/rados_types.hpp +++ b/src/include/rados/rados_types.hpp @@ -151,10 +151,11 @@ struct obj_err_t { SIZE_MISMATCH = 1 << 6, ATTR_VALUE_MISMATCH = 1 << 7, ATTR_NAME_MISMATCH = 1 << 8, + SNAPSET_INCONSISTENCY = 1 << 9, // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS }; uint64_t errors = 0; - static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH; + static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH|SNAPSET_INCONSISTENCY; static constexpr uint64_t DEEP_ERRORS = DATA_DIGEST_MISMATCH|OMAP_DIGEST_MISMATCH; bool has_object_info_inconsistency() const { return errors & OBJECT_INFO_INCONSISTENCY; @@ -180,6 +181,9 @@ struct obj_err_t { bool has_deep_errors() const { return errors & DEEP_ERRORS; } + bool has_snapset_inconsistency() const { + return errors & SNAPSET_INCONSISTENCY; + } }; struct inconsistent_obj_t : obj_err_t { diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index d076f7166b36..8b610a4d84eb 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -761,7 +761,7 @@ map::const_iterator inconsistent_obj_wrapper &object_error) { eversion_t auth_version; - bufferlist first_bl; + bufferlist first_oi_bl, first_ss_bl; // Create list of shards with primary first so it will be auth copy all // other things being equal. @@ -826,6 +826,12 @@ map::const_iterator try { bufferlist::iterator bliter = ss_bl.begin(); decode(ss, bliter); + if (first_ss_bl.length() == 0) { + first_ss_bl.append(ss_bl); + } else if (!object_error.has_snapset_inconsistency() && !ss_bl.contents_equal(first_ss_bl)) { + object_error.set_snapset_inconsistency(); + error_string += " snapset_inconsistency"; + } } catch (...) { // invalid snapset, probably corrupt shard_info.set_ss_attr_corrupted(); @@ -855,9 +861,9 @@ map::const_iterator // This is automatically corrected in PG::_repair_oinfo_oid() assert(oi.soid == obj); - if (first_bl.length() == 0) { - first_bl.append(bl); - } else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_bl)) { + if (first_oi_bl.length() == 0) { + first_oi_bl.append(bl); + } else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_oi_bl)) { object_error.set_object_info_inconsistency(); error_string += " object_info_inconsistency"; } diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index bfcdc88b53d5..c5761eaa1dce 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -1379,6 +1379,16 @@ static void dump_shard(const shard_info_t& shard, decode(oi, bliter); // Can't be corrupted f.dump_stream("object_info") << oi; } + if (!shard.has_ss_attr_missing() && !shard.has_ss_attr_corrupted() && + inc.has_snapset_inconsistency()) { + SnapSet ss; + bufferlist bl; + map::iterator k = (const_cast(shard)).attrs.find(SS_ATTR); + assert(k != shard.attrs.end()); // Can't be missing + bufferlist::iterator bliter = k->second.begin(); + decode(ss, bliter); // Can't be corrupted + f.dump_stream("snapset") << ss; + } if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch() || inc.union_shards.has_oi_attr_missing() || inc.union_shards.has_oi_attr_corrupted() @@ -1412,6 +1422,8 @@ static void dump_obj_errors(const obj_err_t &err, Formatter &f) f.dump_string("error", "attr_value_mismatch"); if (err.has_attr_name_mismatch()) f.dump_string("error", "attr_name_mismatch"); + if (err.has_snapset_inconsistency()) + f.dump_string("error", "snapset_inconsistency"); f.close_section(); }