]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd, rados: Improve size scrub error handling
authorDavid Zafman <dzafman@redhat.com>
Fri, 14 Jul 2017 04:01:18 +0000 (21:01 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 11 Aug 2017 18:37:32 +0000 (11:37 -0700)
Fixes: http://tracker.ceph.com/issues/20243
Signed-off-by: David Zafman <dzafman@redhat.com>
doc/rados/command/list-inconsistent-obj.json
qa/standalone/scrub/osd-scrub-repair.sh
src/common/scrub_types.h
src/include/rados/rados_types.hpp
src/osd/PGBackend.cc
src/tools/rados/rados.cc

index a7c17ace9be540784215d3e0a1a395e6ee2a8b01..4e18fe525e20dd446aa55c75615c986e5f9e84fa 100644 (file)
@@ -66,7 +66,8 @@
                 "ec_hash_error",
                 "ec_size_error",
                 "oi_attr_missing",
-                "oi_attr_corrupted"
+                "oi_attr_corrupted",
+                "obj_size_oi_mismatch"
               ]
             },
             "minItems": 0,
                       "ec_hash_error",
                       "ec_size_error",
                       "oi_attr_missing",
-                      "oi_attr_corrupted"
+                      "oi_attr_corrupted",
+                      "obj_size_oi_mismatch"
                     ]
                   },
                   "minItems": 0,
index 38051c3938ee3cf16dd73abd286b2fb05a50e388..27ad2c956313a116a98acca6fdbb8b0b699dbaad 100755 (executable)
@@ -634,7 +634,8 @@ function TEST_corrupt_scrub_replicated() {
         {
           "size": 9,
           "errors": [
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -642,7 +643,8 @@ function TEST_corrupt_scrub_replicated() {
       ],
       "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])",
       "union_shard_errors": [
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
@@ -717,6 +719,18 @@ function TEST_corrupt_scrub_replicated() {
       "shards": [
         {
           "size": 7,
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "",
+              "name": "_"
+            },
+            {
+              "Base64": true,
+              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+              "name": "snapset"
+            }
+          ],
           "errors": [
             "oi_attr_corrupted"
           ],
@@ -725,6 +739,13 @@ function TEST_corrupt_scrub_replicated() {
         },
         {
           "size": 7,
+          "attrs": [
+            {
+              "Base64": true,
+              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+              "name": "snapset"
+            }
+          ],
           "errors": [
             "oi_attr_missing"
           ],
@@ -785,9 +806,7 @@ function TEST_corrupt_scrub_replicated() {
       "union_shard_errors": [
         "oi_attr_missing"
       ],
-      "errors": [
-        "attr_name_mismatch"
-      ],
+      "errors": [],
       "object": {
         "version": 45,
         "snap": "head",
@@ -901,18 +920,6 @@ function TEST_corrupt_scrub_replicated() {
     {
       "shards": [
         {
-          "attrs": [
-            {
-              "Base64": true,
-              "value": "",
-              "name": "_"
-            },
-            {
-              "Base64": true,
-              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
-              "name": "snapset"
-            }
-          ],
           "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
           "size": 1,
           "errors": [],
@@ -920,30 +927,21 @@ function TEST_corrupt_scrub_replicated() {
           "primary": false
         },
         {
-          "attrs": [
-            {
-              "Base64": true,
-              "value": "",
-              "name": "_"
-            },
-            {
-              "Base64": true,
-              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
-              "name": "snapset"
-            }
-          ],
           "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])",
           "size": 1,
-          "errors": [],
+          "errors": [
+            "obj_size_oi_mismatch"
+          ],
           "osd": 1,
           "primary": true
         }
       ],
       "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
-      "union_shard_errors": [],
+      "union_shard_errors": [
+         "obj_size_oi_mismatch"
+      ],
       "errors": [
-        "object_info_inconsistency",
-        "attr_value_mismatch"
+        "object_info_inconsistency"
       ],
       "object": {
         "version": 63,
@@ -1020,7 +1018,8 @@ EOF
           "size": 9,
           "errors": [
             "data_digest_mismatch_oi",
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -1029,7 +1028,8 @@ EOF
       "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "data_digest_mismatch_oi",
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "data_digest_mismatch",
@@ -1176,6 +1176,18 @@ EOF
     {
       "shards": [
         {
+          "attrs": [
+            {
+              "Base64": false,
+              "value": "",
+              "name": "_"
+            },
+            {
+              "Base64": true,
+              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+              "name": "snapset"
+            }
+          ],
           "data_digest": "0x2ddbf8f5",
           "omap_digest": "0x4f14f849",
           "size": 7,
@@ -1186,6 +1198,13 @@ EOF
           "primary": false
         },
         {
+          "attrs": [
+            {
+              "Base64": true,
+              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+              "name": "snapset"
+            }
+          ],
           "data_digest": "0x2ddbf8f5",
           "omap_digest": "0x4f14f849",
           "size": 7,
@@ -1253,9 +1272,7 @@ EOF
       "union_shard_errors": [
         "oi_attr_missing"
       ],
-      "errors": [
-        "attr_name_mismatch"
-      ],
+      "errors": [],
       "object": {
         "version": 45,
         "snap": "head",
@@ -1555,39 +1572,17 @@ EOF
     {
       "shards": [
         {
-          "attrs": [
-            {
-              "Base64": true,
-              "value": "",
-              "name": "_"
-            },
-            {
-              "Base64": true,
-              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
-              "name": "snapset"
-            }
-          ],
           "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])",
           "data_digest": "0x1f26fb26",
           "omap_digest": "0x2eecc539",
           "size": 3,
-          "errors": [],
+          "errors": [
+            "obj_size_oi_mismatch"
+          ],
           "osd": 0,
           "primary": false
         },
         {
-          "attrs": [
-            {
-              "Base64": true,
-              "value": "",
-              "name": "_"
-            },
-            {
-              "Base64": true,
-              "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
-              "name": "snapset"
-            }
-          ],
           "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
           "data_digest": "0x1f26fb26",
           "omap_digest": "0x2eecc539",
@@ -1598,10 +1593,11 @@ EOF
         }
       ],
       "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
-      "union_shard_errors": [],
+      "union_shard_errors": [
+        "obj_size_oi_mismatch"
+      ],
       "errors": [
-        "object_info_inconsistency",
-        "attr_value_mismatch"
+        "object_info_inconsistency"
       ],
       "object": {
         "version": 64,
@@ -1735,7 +1731,8 @@ function corrupt_scrub_erasure() {
           "size": 9,
           "shard": 0,
           "errors": [
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -1750,7 +1747,8 @@ function corrupt_scrub_erasure() {
       ],
       "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
       "union_shard_errors": [
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
@@ -1933,7 +1931,8 @@ function corrupt_scrub_erasure() {
           "size": 4096,
           "shard": 0,
           "errors": [
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -1948,7 +1947,8 @@ function corrupt_scrub_erasure() {
       ],
       "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
       "union_shard_errors": [
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
@@ -2011,7 +2011,8 @@ EOF
           "shard": 0,
           "errors": [
             "read_error",
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -2029,7 +2030,8 @@ EOF
       "selected_object_info": "3:9175b684:::EOBJ1:head(27'1 client.4155.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
       "union_shard_errors": [
         "read_error",
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
@@ -2225,7 +2227,8 @@ EOF
           "omap_digest": "0xffffffff",
           "size": 4096,
           "errors": [
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "shard": 0,
           "osd": 1,
@@ -2243,7 +2246,8 @@ EOF
       ],
       "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4288.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
       "union_shard_errors": [
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
@@ -2282,7 +2286,8 @@ EOF
           "shard": 0,
           "errors": [
             "read_error",
-            "size_mismatch_oi"
+            "size_mismatch_oi",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -2300,7 +2305,8 @@ EOF
       "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
       "union_shard_errors": [
         "read_error",
-        "size_mismatch_oi"
+        "size_mismatch_oi",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
@@ -2539,7 +2545,8 @@ EOF
           "shard": 0,
           "errors": [
             "size_mismatch_oi",
-            "ec_size_error"
+            "ec_size_error",
+            "obj_size_oi_mismatch"
           ],
           "osd": 1,
           "primary": true
@@ -2557,7 +2564,8 @@ EOF
       "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
       "union_shard_errors": [
         "size_mismatch_oi",
-        "ec_size_error"
+        "ec_size_error",
+        "obj_size_oi_mismatch"
       ],
       "errors": [
         "size_mismatch"
index 21e557b98aa4c2740cd2268e82d4da848b25b8f7..9e30fbd9aa222ea0870239cea92242610d2fca07 100644 (file)
@@ -78,6 +78,9 @@ public:
   void set_ss_attr_corrupted() {
     errors |= err_t::SS_ATTR_CORRUPTED;
   }
+  void set_obj_size_oi_mismatch() {
+    errors |= err_t::OBJ_SIZE_OI_MISMATCH;
+  }
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& bp);
 };
index 1d5aca85beacf32723a8d2b8799cb5f0b49a668c..7829e28702434fc11bb1228c7191c5f7d5a0f729 100644 (file)
@@ -63,11 +63,12 @@ struct err_t {
     OI_ATTR_MISSING         = 1 << 14,
     OI_ATTR_CORRUPTED       = 1 << 15,
     SS_ATTR_MISSING         = 1 << 16,
-    SS_ATTR_CORRUPTED       = 1 << 17
+    SS_ATTR_CORRUPTED       = 1 << 17,
+    OBJ_SIZE_OI_MISMATCH      = 1 << 18
     // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
   };
   uint64_t errors = 0;
-  static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED;
+  static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED|OBJ_SIZE_OI_MISMATCH;
   static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH;
   bool has_shard_missing() const {
     return errors & SHARD_MISSING;
@@ -111,6 +112,9 @@ struct err_t {
   bool has_deep_errors() const {
     return errors & DEEP_ERRORS;
   }
+  bool has_obj_size_oi_mismatch() const {
+    return errors & OBJ_SIZE_OI_MISMATCH;
+  }
 };
 
 struct shard_info_t : err_t {
index 312e9a7a44c1e32bc28805b8b827cebc9a608d50..848f036a6c1ffd33c96b1312dbe83d7d572ef44c 100644 (file)
@@ -711,6 +711,9 @@ bool PGBackend::be_compare_scrub_objects(
   for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
        i != auth.attrs.end();
        ++i) {
+    // We check system keys seperately
+    if (i->first == OI_ATTR || i->first == SS_ATTR)
+      continue;
     if (!candidate.attrs.count(i->first)) {
       if (error != CLEAN)
         errorstream << ", ";
@@ -728,6 +731,9 @@ bool PGBackend::be_compare_scrub_objects(
   for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
        i != candidate.attrs.end();
        ++i) {
+    // We check system keys seperately
+    if (i->first == OI_ATTR || i->first == SS_ATTR)
+      continue;
     if (!auth.attrs.count(i->first)) {
       if (error != CLEAN)
         errorstream << ", ";
@@ -839,10 +845,12 @@ map<pg_shard_t, ScrubMap *>::const_iterator
       error_string += " object_info_inconsistency";
     }
 
-    // Don't use this particular shard because it won't be able to repair data
-    // XXX: For now we can't pick one shard for repair and another's object info
-    if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
+    if (i->second.size != be_get_ondisk_size(oi.size)) {
+      dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl;
+      shard_info.set_obj_size_oi_mismatch();
+      error_string += " obj_size_oi_mismatch";
       goto out;
+    }
 
     // We don't set errors here for snapset, but we won't pick an auth copy if the
     // snapset is missing or won't decode.
@@ -861,6 +869,11 @@ map<pg_shard_t, ScrubMap *>::const_iterator
       }
     }
 
+    // Don't use this particular shard because it won't be able to repair data
+    // XXX: For now we can't pick one shard for repair and another's object info
+    if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
+      goto out;
+
     if (auth_version == eversion_t() || oi.version > auth_version ||
         (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
       auth = j;
index 5e8684deccd11a38de7cb38d02015d98846fc7cb..682b53059bf3d02807ac42d41e5bf56da2136b5f 100644 (file)
@@ -1338,6 +1338,8 @@ static void dump_errors(const err_t &err, Formatter &f, const char *name)
     f.dump_string("error", "oi_attr_missing");
   if (err.has_oi_attr_corrupted())
     f.dump_string("error", "oi_attr_corrupted");
+  if (err.has_obj_size_oi_mismatch())
+    f.dump_string("error", "obj_size_oi_mismatch");
   f.close_section();
 }
 
@@ -1369,7 +1371,11 @@ static void dump_shard(const shard_info_t& shard,
     ::decode(oi, bliter);  // Can't be corrupted
     f.dump_stream("object_info") << oi;
   }
-  if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) {
+  if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()
+     || inc.union_shards.has_oi_attr_missing()
+     || inc.union_shards.has_oi_attr_corrupted()
+     || inc.union_shards.has_ss_attr_missing()
+     || inc.union_shards.has_ss_attr_corrupted()) {
     f.open_array_section("attrs");
     for (auto kv : shard.attrs) {
       f.open_object_section("attr");