]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Add new snapset_inconsistency error check 20450/head
authorDavid Zafman <dzafman@redhat.com>
Thu, 15 Feb 2018 00:56:39 +0000 (16:56 -0800)
committerDavid Zafman <dzafman@redhat.com>
Thu, 15 Feb 2018 17:03:49 +0000 (09:03 -0800)
Includes new test case

Caused by: 5f58301a1364e948834dabe503200dda07fc2790
This changed attr consistency checking to exclude system keys,
which required snapset to be handled just like object info.

Fixes: http://tracker.ceph.com/issues/22996
Signed-off-by: David Zafman <dzafman@redhat.com>
doc/rados/command/list-inconsistent-obj.json
qa/standalone/scrub/osd-scrub-repair.sh
src/common/scrub_types.h
src/include/rados/rados_types.hpp
src/osd/PGBackend.cc
src/tools/rados/rados.cc

index 76ca43e321de9eb2da01ee2820f482286931a659..859dc84ca2b5a9aa65ba339e7f8870cd8de54e46 100644 (file)
@@ -85,7 +85,8 @@
                 "omap_digest_mismatch",
                 "size_mismatch",
                 "attr_value_mismatch",
-                "attr_name_mismatch"
+                "attr_name_mismatch",
+                "snapset_inconsistency"
               ]
             },
             "minItems": 0,
index 148a6078314b26e898c0f250fcfdcfa7a910bf4d..10d9ac3fd238014cc634e3e71f49e571cd4f3c8d 100755 (executable)
@@ -2767,6 +2767,153 @@ function TEST_periodic_scrub_replicated() {
     rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
 }
 
+#
+# Corrupt snapset in replicated pool
+#
+function TEST_corrupt_snapset_scrub_rep() {
+    local dir=$1
+    local poolname=csr_pool
+    local total_objs=2
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_pool foo 1 || return 1
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+        add_something $dir $poolname $objname || return 1
+
+        rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+        rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+    done
+
+    local pg=$(get_pg $poolname ROBJ0)
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+
+        # Alternate corruption between osd.0 and osd.1
+        local osd=$(expr $i % 2)
+
+        rados -p $poolname mksnap snap1
+        echo -n head_of_snapshot_data > $dir/change
+
+        case $i in
+        1)
+          rados --pool $poolname put $objname $dir/change
+          objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
+          ;;
+
+        2)
+          rados --pool $poolname put $objname $dir/change
+          objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
+          ;;
+
+        esac
+    done
+    rm $dir/change
+
+    pg_scrub $pg
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+
+    jq "$jqfilter" << EOF | python -c "$sortkeys" | sed -e "$sedfilter" > $dir/checkcsjson
+{
+  "epoch": 34,
+  "inconsistents": [
+    {
+      "object": {
+        "name": "ROBJ1",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 8
+      },
+      "errors": [
+        "snapset_inconsistency"
+      ],
+      "union_shard_errors": [],
+      "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(27'8 client.4143.0:1 dirty|omap|data_digest s 21 uv 8 dd 53acb008 alloc_hint [0 0 0])",
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "errors": [],
+          "size": 21,
+          "snapset": "1=[1]:{1=[1]}"
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "errors": [],
+          "size": 21,
+          "snapset": "0=[]:{1=[1]}"
+        }
+      ]
+    },
+    {
+      "object": {
+        "name": "ROBJ2",
+        "nspace": "",
+        "locator": "",
+        "snap": "head",
+        "version": 10
+      },
+      "errors": [
+        "snapset_inconsistency"
+      ],
+      "union_shard_errors": [],
+      "selected_object_info": "3:e97ce31e:::ROBJ2:head(31'10 client.4155.0:1 dirty|omap|data_digest s 21 uv 10 dd 53acb008 alloc_hint [0 0 0])",
+      "shards": [
+        {
+          "osd": 0,
+          "primary": false,
+          "errors": [],
+          "size": 21,
+          "snapset": "0=[]:{1=[1]}"
+        },
+        {
+          "osd": 1,
+          "primary": true,
+          "errors": [],
+          "size": 21,
+          "snapset": "1=[1]:{1=[1]}"
+        }
+      ]
+    }
+  ]
+}
+EOF
+
+    jq "$jqfilter" $dir/json | python -c "$sortkeys" | sed -e "$sedfilter" > $dir/csjson
+    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save6.json
+    fi
+
+    if which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    rados rmpool $poolname $poolname --yes-i-really-really-mean-it
+    teardown $dir || return 1
+}
 
 main osd-scrub-repair "$@"
 
index 1f4b9fb84f3625895f236c19a4d7958d4e357a5e..2a17928344a374244e173d73dfcacf6e8b0f8329 100644 (file)
@@ -117,6 +117,9 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
   void set_attr_name_mismatch() {
     errors |= obj_err_t::ATTR_NAME_MISMATCH;
   }
+  void set_snapset_inconsistency() {
+    errors |= obj_err_t::SNAPSET_INCONSISTENCY;
+  }
   void add_shard(const pg_shard_t& pgs, const shard_info_wrapper& shard);
   void set_auth_missing(const hobject_t& hoid,
                         const map<pg_shard_t, ScrubMap*>&,
index 7829e28702434fc11bb1228c7191c5f7d5a0f729..37e9d285459882fd499980ef92899348a0688024 100644 (file)
@@ -151,10 +151,11 @@ struct obj_err_t {
     SIZE_MISMATCH        = 1 << 6,
     ATTR_VALUE_MISMATCH  = 1 << 7,
     ATTR_NAME_MISMATCH    = 1 << 8,
+    SNAPSET_INCONSISTENCY   = 1 << 9,
     // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
   };
   uint64_t errors = 0;
-  static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH;
+  static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH|SNAPSET_INCONSISTENCY;
   static constexpr uint64_t DEEP_ERRORS = DATA_DIGEST_MISMATCH|OMAP_DIGEST_MISMATCH;
   bool has_object_info_inconsistency() const {
     return errors & OBJECT_INFO_INCONSISTENCY;
@@ -180,6 +181,9 @@ struct obj_err_t {
   bool has_deep_errors() const {
     return errors & DEEP_ERRORS;
   }
+  bool has_snapset_inconsistency() const {
+    return errors & SNAPSET_INCONSISTENCY;
+  }
 };
 
 struct inconsistent_obj_t : obj_err_t {
index d076f7166b367494d53352d9735c546ca1b5753d..8b610a4d84ebf8e96c3c3077bdc260734051b634 100644 (file)
@@ -761,7 +761,7 @@ map<pg_shard_t, ScrubMap *>::const_iterator
   inconsistent_obj_wrapper &object_error)
 {
   eversion_t auth_version;
-  bufferlist first_bl;
+  bufferlist first_oi_bl, first_ss_bl;
 
   // Create list of shards with primary first so it will be auth copy all
   // other things being equal.
@@ -826,6 +826,12 @@ map<pg_shard_t, ScrubMap *>::const_iterator
         try {
          bufferlist::iterator bliter = ss_bl.begin();
          decode(ss, bliter);
+         if (first_ss_bl.length() == 0) {
+           first_ss_bl.append(ss_bl);
+         } else if (!object_error.has_snapset_inconsistency() && !ss_bl.contents_equal(first_ss_bl)) {
+           object_error.set_snapset_inconsistency();
+           error_string += " snapset_inconsistency";
+         }
         } catch (...) {
          // invalid snapset, probably corrupt
          shard_info.set_ss_attr_corrupted();
@@ -855,9 +861,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
     // This is automatically corrected in PG::_repair_oinfo_oid()
     assert(oi.soid == obj);
 
-    if (first_bl.length() == 0) {
-      first_bl.append(bl);
-    } else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_bl)) {
+    if (first_oi_bl.length() == 0) {
+      first_oi_bl.append(bl);
+    } else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_oi_bl)) {
       object_error.set_object_info_inconsistency();
       error_string += " object_info_inconsistency";
     }
index bfcdc88b53d552167e43b4aa824148a2aa2f066d..c5761eaa1dce8ef24dba42af4731a05cb5db1bc8 100644 (file)
@@ -1379,6 +1379,16 @@ static void dump_shard(const shard_info_t& shard,
     decode(oi, bliter);  // Can't be corrupted
     f.dump_stream("object_info") << oi;
   }
+  if (!shard.has_ss_attr_missing() && !shard.has_ss_attr_corrupted() &&
+      inc.has_snapset_inconsistency()) {
+    SnapSet ss;
+    bufferlist bl;
+    map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(SS_ATTR);
+    assert(k != shard.attrs.end()); // Can't be missing
+    bufferlist::iterator bliter = k->second.begin();
+    decode(ss, bliter);  // Can't be corrupted
+    f.dump_stream("snapset") << ss;
+  }
   if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()
      || inc.union_shards.has_oi_attr_missing()
      || inc.union_shards.has_oi_attr_corrupted()
@@ -1412,6 +1422,8 @@ static void dump_obj_errors(const obj_err_t &err, Formatter &f)
     f.dump_string("error", "attr_value_mismatch");
   if (err.has_attr_name_mismatch())
     f.dump_string("error", "attr_name_mismatch");
+  if (err.has_snapset_inconsistency())
+    f.dump_string("error", "snapset_inconsistency");
   f.close_section();
 }