]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Object level shard errors are tracked and used if no auth available 15421/head
authorDavid Zafman <dzafman@redhat.com>
Wed, 31 May 2017 22:39:19 +0000 (15:39 -0700)
committerDavid Zafman <dzafman@redhat.com>
Thu, 1 Jun 2017 21:40:05 +0000 (14:40 -0700)
Shards with object mismatch are tracked to mark them inconsistent
Fix test because storing omap_digest in object_info not behaving as before

Fixes: http://tracker.ceph.com/issues/20089
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 1cacbea763c7aabfeaaf4bd5e878301044184117)

src/osd/PGBackend.cc
src/test/osd/osd-scrub-repair.sh

index 199d4ba3a8ea47b0ca1eecd386e9a178cea32dc5..0b6e03155e0deb97e7b4846b62fe5c42a5dbe3e8 100644 (file)
@@ -751,6 +751,7 @@ void PGBackend::be_compare_scrubmaps(
       be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
 
     list<pg_shard_t> auth_list;
+    set<pg_shard_t> object_errors;
     if (auth == maps.end()) {
       object_error.set_version(0);
       object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors);
@@ -794,6 +795,10 @@ void PGBackend::be_compare_scrubmaps(
          if (found)
            errorstream << pgid << " shard " << j->first << ": soid " << *k
                      << " " << ss.str() << "\n";
+       } else if (found) {
+         // Track possible shard to use as authoritative, if needed
+         // There are errors, without identifying the shard
+         object_errors.insert(j->first);
        } else {
          // XXX: The auth shard might get here that we don't know
          // that it has the "correct" data.
@@ -811,10 +816,25 @@ void PGBackend::be_compare_scrubmaps(
     }
 
     if (auth_list.empty()) {
-      errorstream << pgid.pgid << " soid " << *k
+      if (object_errors.empty()) {
+        errorstream << pgid.pgid << " soid " << *k
                  << ": failed to pick suitable auth object\n";
-      goto out;
+        goto out;
+      }
+      // Object errors exist and we haven't found an authortative shard
+      // Prefer the primary shard otherwise take first from list.
+      pg_shard_t auth_shard;
+      if (object_errors.count(get_parent()->whoami_shard())) {
+       auth_shard = get_parent()->whoami_shard();
+      } else {
+       auth_shard = *(object_errors.begin());
+      }
+      auth_list.push_back(auth_shard);
+      object_errors.erase(auth_shard);
     }
+    // At this point auth_list is populated, so we add the object errors shards
+    // as inconsistent.
+    cur_inconsistent.insert(object_errors.begin(), object_errors.end());
     if (!cur_missing.empty()) {
       missing[*k] = cur_missing;
     }
index 680af924ad1c7c3bd092fdfcadb0491451c540ec..168537cb20d0e778a1fe9805897d00d1bb9af7cb 100755 (executable)
@@ -406,6 +406,19 @@ function TEST_corrupt_scrub_replicated() {
 
         rados --pool $poolname setomapheader $objname hdr-$objname || return 1
         rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+    done
+
+    local pg=$(get_pg $poolname ROBJ0)
+
+    # Compute an old omap digest and save oi
+    CEPH_ARGS='' ceph daemon $dir//ceph-osd.0.asok \
+        config set osd_deep_scrub_update_digest_min_age 0
+    CEPH_ARGS='' ceph daemon $dir//ceph-osd.1.asok \
+        config set osd_deep_scrub_update_digest_min_age 0
+    pg_deep_scrub $pg
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
 
         # Alternate corruption between osd.0 and osd.1
         local osd=$(expr $i % 2)
@@ -536,7 +549,7 @@ function TEST_corrupt_scrub_replicated() {
           "osd": 1
         }
       ],
-      "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(15'3 client.4169.0:1 dirty|omap|data_digest s 7 uv 3 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "size_mismatch_oi"
       ],
@@ -565,13 +578,13 @@ function TEST_corrupt_scrub_replicated() {
           "osd": 1
         }
       ],
-      "selected_object_info": "2:bc819597:::ROBJ12:head(110'39 client.4732.0:1 dirty|omap|data_digest s 7 uv 39 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])",
       "union_shard_errors": [
         "stat_error"
       ],
       "errors": [],
       "object": {
-        "version": 39,
+        "version": 36,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -592,13 +605,13 @@ function TEST_corrupt_scrub_replicated() {
           "osd": 1
         }
       ],
-      "selected_object_info": "2:d60617f9:::ROBJ13:head(112'42 client.4737.0:1 dirty|omap|data_digest s 7 uv 42 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:d60617f9:::ROBJ13:head(47'55 osd.0.0:54 dirty|omap|data_digest|omap_digest s 7 uv 39 dd 2ddbf8f5 od 6441854d alloc_hint [0 0 0])",
       "union_shard_errors": [
         "stat_error"
       ],
       "errors": [],
       "object": {
-        "version": 42,
+        "version": 39,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -669,7 +682,7 @@ function TEST_corrupt_scrub_replicated() {
           "osd": 1
         }
       ],
-      "selected_object_info": "2:30259878:::ROBJ15:head(127'48 client.4820.0:1 dirty|omap|data_digest s 7 uv 48 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])",
       "union_shard_errors": [
         "oi_attr_missing"
       ],
@@ -677,7 +690,7 @@ function TEST_corrupt_scrub_replicated() {
         "attr_name_mismatch"
       ],
       "object": {
-        "version": 48,
+        "version": 45,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -698,7 +711,7 @@ function TEST_corrupt_scrub_replicated() {
           "osd": 1
         }
       ],
-      "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(29'9 client.4251.0:1 dirty|omap|data_digest s 7 uv 9 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])",
       "union_shard_errors": [
         "missing"
       ],
@@ -768,14 +781,14 @@ function TEST_corrupt_scrub_replicated() {
           "osd": 1
         }
       ],
-      "selected_object_info": "2:86586531:::ROBJ8:head(70'26 client.4495.0:1 dirty|omap|data_digest s 7 uv 26 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
       "union_shard_errors": [],
       "errors": [
         "attr_value_mismatch",
         "attr_name_mismatch"
       ],
       "object": {
-        "version": 26,
+        "version": 62,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -797,7 +810,7 @@ function TEST_corrupt_scrub_replicated() {
               "name": "snapset"
             }
           ],
-          "object_info": "2:ffdb2004:::ROBJ9:head(94'30 client.4649.0:1 dirty|omap|data_digest s 1 uv 30 dd 2b63260d alloc_hint [0 0 0])",
+          "object_info": "2:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
           "size": 1,
           "errors": [],
           "osd": 0
@@ -815,20 +828,20 @@ function TEST_corrupt_scrub_replicated() {
               "name": "snapset"
             }
           ],
-          "object_info": "2:ffdb2004:::ROBJ9:head(89'29 client.4612.0:1 dirty|omap|data_digest s 7 uv 29 dd 2ddbf8f5 alloc_hint [0 0 0])",
+          "object_info": "2:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])",
           "size": 1,
           "errors": [],
           "osd": 1
         }
       ],
-      "selected_object_info": "2:ffdb2004:::ROBJ9:head(94'30 client.4649.0:1 dirty|omap|data_digest s 1 uv 30 dd 2b63260d alloc_hint [0 0 0])",
+      "selected_object_info": "2:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
       "union_shard_errors": [],
       "errors": [
         "object_info_inconsistency",
         "attr_value_mismatch"
       ],
       "object": {
-        "version": 30,
+        "version": 63,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -852,13 +865,6 @@ EOF
       jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
     fi
 
-    # Compute an old omap digest and save oi
-    CEPH_ARGS='' ceph daemon $dir//ceph-osd.0.asok \
-        config set osd_deep_scrub_update_digest_min_age 0
-    CEPH_ARGS='' ceph daemon $dir//ceph-osd.1.asok \
-        config set osd_deep_scrub_update_digest_min_age 0
-    pg_deep_scrub $pg
-
     objname=ROBJ9
     # Change data and size again because digest was recomputed
     echo -n ZZZ > $dir/change
@@ -919,7 +925,7 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(15'3 client.4171.0:1 dirty|omap|data_digest s 7 uv 3 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "data_digest_mismatch_oi",
         "size_mismatch_oi"
@@ -957,13 +963,13 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:b1f19cbd:::ROBJ10:head(136'51 osd.0.0:8 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od c2025a24 alloc_hint [0 0 0])",
+      "selected_object_info": "2:b1f19cbd:::ROBJ10:head(47'51 osd.0.0:50 dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "omap_digest_mismatch_oi"
       ],
       "errors": [],
       "object": {
-        "version": 33,
+        "version": 30,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -987,13 +993,13 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:87abbf36:::ROBJ11:head(105'36 client.4699.0:1 dirty|omap|data_digest s 7 uv 36 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:87abbf36:::ROBJ11:head(47'48 osd.0.0:47 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od a03cef03 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "read_error"
       ],
       "errors": [],
       "object": {
-        "version": 36,
+        "version": 33,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -1016,13 +1022,13 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:bc819597:::ROBJ12:head(107'39 client.4704.0:1 dirty|omap|data_digest s 7 uv 39 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])",
       "union_shard_errors": [
         "stat_error"
       ],
       "errors": [],
       "object": {
-        "version": 39,
+        "version": 36,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -1130,7 +1136,7 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:30259878:::ROBJ15:head(124'48 client.4792.0:1 dirty|omap|data_digest s 7 uv 48 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])",
       "union_shard_errors": [
         "oi_attr_missing"
       ],
@@ -1138,7 +1144,7 @@ EOF
         "attr_name_mismatch"
       ],
       "object": {
-        "version": 48,
+        "version": 45,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -1164,7 +1170,7 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:e97ce31e:::ROBJ2:head(22'6 client.4214.0:1 dirty|omap|data_digest s 7 uv 6 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:e97ce31e:::ROBJ2:head(47'56 osd.0.0:55 dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "data_digest_mismatch_oi"
       ],
@@ -1195,7 +1201,7 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(29'9 client.4255.0:1 dirty|omap|data_digest s 7 uv 9 dd 2ddbf8f5 alloc_hint [0 0 0])",
+      "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])",
       "union_shard_errors": [
         "missing"
       ],
@@ -1214,20 +1220,20 @@ EOF
           "data_digest": "0x2ddbf8f5",
           "omap_digest": "0xd7178dfe",
           "size": 7,
-          "errors": [],
+          "errors": [
+            "omap_digest_mismatch_oi"
+          ],
           "osd": 0
         },
         {
           "data_digest": "0x2ddbf8f5",
           "omap_digest": "0xe2d46ea4",
           "size": 7,
-          "errors": [
-            "omap_digest_mismatch_oi"
-          ],
+          "errors": [],
           "osd": 1
         }
       ],
-      "selected_object_info": "2:f4981d31:::ROBJ4:head(136'52 osd.0.0:9 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od d7178dfe alloc_hint [0 0 0])",
+      "selected_object_info": "2:f4981d31:::ROBJ4:head(47'58 osd.0.0:57 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "omap_digest_mismatch_oi"
       ],
@@ -1261,7 +1267,7 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:f4bfd4d1:::ROBJ5:head(136'53 osd.0.0:10 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [0 0 0])",
+      "selected_object_info": "2:f4bfd4d1:::ROBJ5:head(47'59 osd.0.0:58 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [0 0 0])",
       "union_shard_errors": [
         "omap_digest_mismatch_oi"
       ],
@@ -1282,20 +1288,20 @@ EOF
           "data_digest": "0x2ddbf8f5",
           "omap_digest": "0x689ee887",
           "size": 7,
-          "errors": [],
+          "errors": [
+            "omap_digest_mismatch_oi"
+          ],
           "osd": 0
         },
         {
           "data_digest": "0x2ddbf8f5",
           "omap_digest": "0x179c919f",
           "size": 7,
-          "errors": [
-            "omap_digest_mismatch_oi"
-          ],
+          "errors": [],
           "osd": 1
         }
       ],
-      "selected_object_info": "2:a53c12e8:::ROBJ6:head(136'50 osd.0.0:7 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 689ee887 alloc_hint [0 0 0])",
+      "selected_object_info": "2:a53c12e8:::ROBJ6:head(47'50 osd.0.0:49 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [0 0 0])",
       "union_shard_errors": [
         "omap_digest_mismatch_oi"
       ],
@@ -1329,7 +1335,7 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:8b55fa4b:::ROBJ7:head(123'50 osd.0.0:7 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [0 0 0])",
+      "selected_object_info": "2:8b55fa4b:::ROBJ7:head(47'49 osd.0.0:48 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [0 0 0])",
       "union_shard_errors": [
         "omap_digest_mismatch_oi"
       ],
@@ -1405,14 +1411,14 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:86586531:::ROBJ8:head(136'49 osd.0.0:6 dirty|omap|data_digest|omap_digest s 7 uv 26 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
+      "selected_object_info": "2:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
       "union_shard_errors": [],
       "errors": [
         "attr_value_mismatch",
         "attr_name_mismatch"
       ],
       "object": {
-        "version": 26,
+        "version": 62,
         "snap": "head",
         "locator": "",
         "nspace": "",
@@ -1434,7 +1440,7 @@ EOF
               "name": "snapset"
             }
           ],
-          "object_info": "2:ffdb2004:::ROBJ9:head(90'29 client.4615.0:1 dirty|omap|data_digest s 7 uv 29 dd 2ddbf8f5 alloc_hint [0 0 0])",
+          "object_info": "2:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])",
           "data_digest": "0x1f26fb26",
           "omap_digest": "0x2eecc539",
           "size": 3,
@@ -1454,7 +1460,7 @@ EOF
               "name": "snapset"
             }
           ],
-          "object_info": "2:ffdb2004:::ROBJ9:head(123'56 client.4891.0:1 dirty|omap|data_digest|omap_digest s 3 uv 56 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
+          "object_info": "2:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
           "data_digest": "0x1f26fb26",
           "omap_digest": "0x2eecc539",
           "size": 3,
@@ -1462,14 +1468,14 @@ EOF
           "osd": 1
         }
       ],
-      "selected_object_info": "2:ffdb2004:::ROBJ9:head(123'56 client.4891.0:1 dirty|omap|data_digest|omap_digest s 3 uv 56 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
+      "selected_object_info": "2:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
       "union_shard_errors": [],
       "errors": [
         "object_info_inconsistency",
         "attr_value_mismatch"
       ],
       "object": {
-        "version": 56,
+        "version": 64,
         "snap": "head",
         "locator": "",
         "nspace": "",