From: David Zafman Date: Wed, 31 May 2017 22:39:19 +0000 (-0700) Subject: osd: Object level shard errors are tracked and used if no auth available X-Git-Tag: v10.2.8~44^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F15416%2Fhead;p=ceph.git osd: Object level shard errors are tracked and used if no auth available Shards with object mismatch are tracked to mark them inconsistent Fix test because storing omap_digest in object_info not behaving as before Fixes: http://tracker.ceph.com/issues/20089 Signed-off-by: David Zafman (cherry picked from commit 1cacbea763c7aabfeaaf4bd5e878301044184117) Conflicts: src/test/osd/osd-scrub-repair.sh (no alloc_hint in object_info) --- diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index f6acbf4bb1eb..4c149bd9b3e5 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -666,6 +666,7 @@ void PGBackend::be_compare_scrubmaps( be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error); list auth_list; + set object_errors; if (auth == maps.end()) { object_error.set_version(0); object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors); @@ -709,6 +710,10 @@ void PGBackend::be_compare_scrubmaps( if (found) errorstream << pgid << " shard " << j->first << ": soid " << *k << " " << ss.str() << "\n"; + } else if (found) { + // Track possible shard to use as authoritative, if needed + // There are errors, without identifying the shard + object_errors.insert(j->first); } else { // XXX: The auth shard might get here that we don't know // that it has the "correct" data. @@ -726,10 +731,25 @@ void PGBackend::be_compare_scrubmaps( } if (auth_list.empty()) { - errorstream << pgid.pgid << " soid " << *k + if (object_errors.empty()) { + errorstream << pgid.pgid << " soid " << *k << ": failed to pick suitable auth object\n"; - goto out; + goto out; + } + // Object errors exist and we haven't found an authortative shard + // Prefer the primary shard otherwise take first from list. + pg_shard_t auth_shard; + if (object_errors.count(get_parent()->whoami_shard())) { + auth_shard = get_parent()->whoami_shard(); + } else { + auth_shard = *(object_errors.begin()); + } + auth_list.push_back(auth_shard); + object_errors.erase(auth_shard); } + // At this point auth_list is populated, so we add the object errors shards + // as inconsistent. + cur_inconsistent.insert(object_errors.begin(), object_errors.end()); if (!cur_missing.empty()) { missing[*k] = cur_missing; } diff --git a/src/test/osd/osd-scrub-repair.sh b/src/test/osd/osd-scrub-repair.sh index cf06c5139472..517e795e7fc8 100755 --- a/src/test/osd/osd-scrub-repair.sh +++ b/src/test/osd/osd-scrub-repair.sh @@ -403,6 +403,19 @@ function TEST_corrupt_scrub_replicated() { rados --pool $poolname setomapheader $objname hdr-$objname || return 1 rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + local pg=$(get_pg $poolname ROBJ0) + + # Compute an old omap digest and save oi + CEPH_ARGS='' ceph daemon $dir//ceph-osd.0.asok \ + config set osd_deep_scrub_update_digest_min_age 0 + CEPH_ARGS='' ceph daemon $dir//ceph-osd.1.asok \ + config set osd_deep_scrub_update_digest_min_age 0 + pg_deep_scrub $pg + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} # Alternate corruption between osd.0 and osd.1 local osd=$(expr $i % 2) @@ -533,7 +546,7 @@ function TEST_corrupt_scrub_replicated() { "osd": 1 } ], - "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(16'3 client.4130.0:1 dirty|omap|data_digest s 7 uv 3 dd 2ddbf8f5)", + "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6)", "union_shard_errors": [ "size_mismatch_oi" ], @@ -562,13 +575,13 @@ function TEST_corrupt_scrub_replicated() { "osd": 1 } ], - "selected_object_info": "2:bc819597:::ROBJ12:head(98'39 client.4320.0:1 dirty|omap|data_digest s 7 uv 39 dd 2ddbf8f5)", + "selected_object_info": "2:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a)", "union_shard_errors": [ "stat_error" ], "errors": [], "object": { - "version": 39, + "version": 36, "snap": "head", "locator": "", "nspace": "", @@ -589,13 +602,13 @@ function TEST_corrupt_scrub_replicated() { "osd": 1 } ], - "selected_object_info": "2:d60617f9:::ROBJ13:head(100'42 client.4325.0:1 dirty|omap|data_digest s 7 uv 42 dd 2ddbf8f5)", + "selected_object_info": "2:d60617f9:::ROBJ13:head(47'55 osd.0.0:54 dirty|omap|data_digest|omap_digest s 7 uv 39 dd 2ddbf8f5 od 6441854d)", "union_shard_errors": [ "stat_error" ], "errors": [], "object": { - "version": 42, + "version": 39, "snap": "head", "locator": "", "nspace": "", @@ -666,7 +679,7 @@ function TEST_corrupt_scrub_replicated() { "osd": 1 } ], - "selected_object_info": "2:30259878:::ROBJ15:head(113'48 client.4357.0:1 dirty|omap|data_digest s 7 uv 48 dd 2ddbf8f5)", + "selected_object_info": "2:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e)", "union_shard_errors": [ "oi_attr_missing" ], @@ -674,7 +687,7 @@ function TEST_corrupt_scrub_replicated() { "attr_name_mismatch" ], "object": { - "version": 48, + "version": 45, "snap": "head", "locator": "", "nspace": "", @@ -695,7 +708,7 @@ function TEST_corrupt_scrub_replicated() { "osd": 1 } ], - "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(30'9 client.4162.0:1 dirty|omap|data_digest s 7 uv 9 dd 2ddbf8f5)", + "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd)", "union_shard_errors": [ "missing" ], @@ -765,14 +778,14 @@ function TEST_corrupt_scrub_replicated() { "osd": 1 } ], - "selected_object_info": "2:86586531:::ROBJ8:head(65'26 client.4244.0:1 dirty|omap|data_digest s 7 uv 26 dd 2ddbf8f5)", + "selected_object_info": "2:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc)", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 26, + "version": 62, "snap": "head", "locator": "", "nspace": "", @@ -794,7 +807,7 @@ function TEST_corrupt_scrub_replicated() { "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(87'30 client.4294.0:1 dirty|omap|data_digest s 1 uv 30 dd 2b63260d)", + "object_info": "2:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539)", "size": 1, "errors": [], "osd": 0 @@ -812,20 +825,20 @@ function TEST_corrupt_scrub_replicated() { "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(82'29 client.4282.0:1 dirty|omap|data_digest s 7 uv 29 dd 2ddbf8f5)", + "object_info": "2:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539)", "size": 1, "errors": [], "osd": 1 } ], - "selected_object_info": "2:ffdb2004:::ROBJ9:head(87'30 client.4294.0:1 dirty|omap|data_digest s 1 uv 30 dd 2b63260d)", + "selected_object_info": "2:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539)", "union_shard_errors": [], "errors": [ "object_info_inconsistency", "attr_value_mismatch" ], "object": { - "version": 30, + "version": 63, "snap": "head", "locator": "", "nspace": "", @@ -849,13 +862,6 @@ EOF jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 fi - # Compute an old omap digest and save oi - CEPH_ARGS='' ceph daemon $dir//ceph-osd.0.asok \ - config set osd_deep_scrub_update_digest_min_age 0 - CEPH_ARGS='' ceph daemon $dir//ceph-osd.1.asok \ - config set osd_deep_scrub_update_digest_min_age 0 - pg_deep_scrub $pg - objname=ROBJ9 # Change data and size again because digest was recomputed echo -n ZZZ > $dir/change @@ -916,7 +922,7 @@ EOF "osd": 1 } ], - "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(16'3 client.4130.0:1 dirty|omap|data_digest s 7 uv 3 dd 2ddbf8f5)", + "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6)", "union_shard_errors": [ "data_digest_mismatch_oi", "size_mismatch_oi" @@ -954,13 +960,13 @@ EOF "osd": 1 } ], - "selected_object_info": "2:b1f19cbd:::ROBJ10:head(118'52 osd.0.0:10 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od c2025a24)", + "selected_object_info": "2:b1f19cbd:::ROBJ10:head(47'51 osd.0.0:50 dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], "errors": [], "object": { - "version": 33, + "version": 30, "snap": "head", "locator": "", "nspace": "", @@ -984,13 +990,13 @@ EOF "osd": 1 } ], - "selected_object_info": "2:87abbf36:::ROBJ11:head(96'36 client.4315.0:1 dirty|omap|data_digest s 7 uv 36 dd 2ddbf8f5)", + "selected_object_info": "2:87abbf36:::ROBJ11:head(47'48 osd.0.0:47 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od a03cef03)", "union_shard_errors": [ "read_error" ], "errors": [], "object": { - "version": 36, + "version": 33, "snap": "head", "locator": "", "nspace": "", @@ -1013,13 +1019,13 @@ EOF "osd": 1 } ], - "selected_object_info": "2:bc819597:::ROBJ12:head(98'39 client.4320.0:1 dirty|omap|data_digest s 7 uv 39 dd 2ddbf8f5)", + "selected_object_info": "2:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a)", "union_shard_errors": [ "stat_error" ], "errors": [], "object": { - "version": 39, + "version": 36, "snap": "head", "locator": "", "nspace": "", @@ -1127,7 +1133,7 @@ EOF "osd": 1 } ], - "selected_object_info": "2:30259878:::ROBJ15:head(113'48 client.4357.0:1 dirty|omap|data_digest s 7 uv 48 dd 2ddbf8f5)", + "selected_object_info": "2:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e)", "union_shard_errors": [ "oi_attr_missing" ], @@ -1135,7 +1141,7 @@ EOF "attr_name_mismatch" ], "object": { - "version": 48, + "version": 45, "snap": "head", "locator": "", "nspace": "", @@ -1161,7 +1167,7 @@ EOF "osd": 1 } ], - "selected_object_info": "2:e97ce31e:::ROBJ2:head(23'6 client.4146.0:1 dirty|omap|data_digest s 7 uv 6 dd 2ddbf8f5)", + "selected_object_info": "2:e97ce31e:::ROBJ2:head(47'56 osd.0.0:55 dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918)", "union_shard_errors": [ "data_digest_mismatch_oi" ], @@ -1192,7 +1198,7 @@ EOF "osd": 1 } ], - "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(30'9 client.4162.0:1 dirty|omap|data_digest s 7 uv 9 dd 2ddbf8f5)", + "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd)", "union_shard_errors": [ "missing" ], @@ -1211,20 +1217,20 @@ EOF "data_digest": "0x2ddbf8f5", "omap_digest": "0xd7178dfe", "size": 7, - "errors": [], + "errors": [ + "omap_digest_mismatch_oi" + ], "osd": 0 }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0xe2d46ea4", "size": 7, - "errors": [ - "omap_digest_mismatch_oi" - ], + "errors": [], "osd": 1 } ], - "selected_object_info": "2:f4981d31:::ROBJ4:head(118'53 osd.0.0:11 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od d7178dfe)", + "selected_object_info": "2:f4981d31:::ROBJ4:head(47'58 osd.0.0:57 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1258,7 +1264,7 @@ EOF "osd": 1 } ], - "selected_object_info": "2:f4bfd4d1:::ROBJ5:head(118'54 osd.0.0:12 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41)", + "selected_object_info": "2:f4bfd4d1:::ROBJ5:head(47'59 osd.0.0:58 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1279,20 +1285,20 @@ EOF "data_digest": "0x2ddbf8f5", "omap_digest": "0x689ee887", "size": 7, - "errors": [], + "errors": [ + "omap_digest_mismatch_oi" + ], "osd": 0 }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0x179c919f", "size": 7, - "errors": [ - "omap_digest_mismatch_oi" - ], + "errors": [], "osd": 1 } ], - "selected_object_info": "2:a53c12e8:::ROBJ6:head(118'51 osd.0.0:9 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 689ee887)", + "selected_object_info": "2:a53c12e8:::ROBJ6:head(47'50 osd.0.0:49 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1326,7 +1332,7 @@ EOF "osd": 1 } ], - "selected_object_info": "2:8b55fa4b:::ROBJ7:head(118'50 osd.0.0:8 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a)", + "selected_object_info": "2:8b55fa4b:::ROBJ7:head(47'49 osd.0.0:48 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1402,14 +1408,14 @@ EOF "osd": 1 } ], - "selected_object_info": "2:86586531:::ROBJ8:head(118'49 osd.0.0:7 dirty|omap|data_digest|omap_digest s 7 uv 26 dd 2ddbf8f5 od d6be81dc)", + "selected_object_info": "2:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc)", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 26, + "version": 62, "snap": "head", "locator": "", "nspace": "", @@ -1431,7 +1437,7 @@ EOF "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(82'29 client.4282.0:1 dirty|omap|data_digest s 7 uv 29 dd 2ddbf8f5)", + "object_info": "2:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539)", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, @@ -1451,7 +1457,7 @@ EOF "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(118'56 client.4386.0:1 dirty|omap|data_digest|omap_digest s 3 uv 56 dd 1f26fb26 od 2eecc539)", + "object_info": "2:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539)", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, @@ -1459,14 +1465,14 @@ EOF "osd": 1 } ], - "selected_object_info": "2:ffdb2004:::ROBJ9:head(118'56 client.4386.0:1 dirty|omap|data_digest|omap_digest s 3 uv 56 dd 1f26fb26 od 2eecc539)", + "selected_object_info": "2:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539)", "union_shard_errors": [], "errors": [ "object_info_inconsistency", "attr_value_mismatch" ], "object": { - "version": 56, + "version": 64, "snap": "head", "locator": "", "nspace": "",