From: David Zafman Date: Tue, 30 Aug 2016 19:22:29 +0000 (-0700) Subject: rados, osd: Improve attrs output of list-inconsistent-obj X-Git-Tag: v11.1.0~256^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=df3ff6dafeadb3822b35c424a890db9a14d7f60f;p=ceph.git rados, osd: Improve attrs output of list-inconsistent-obj Persist the user_version and shard id of scrubbed obj Rados command dump inconsistent obj's version and shard-id so they can be passed to repair command Rados list-inconsistent-obj output of attrs Make attrs an array since there are more than one Use base64 encode for values with non-printable chars Add indication if base64 encoding used Add checking for ss_attr_missing and ss_attr_corrupted Rename attr errors to attr_key_mismatch and attr_value_mismatch Add missing size_mismatch_oi scrub checking For erasure coded pools add ec_size_error and ec_hash_error not just read_error Use oi_attr_missing and oi_attr_corrupted just like list-inconsistent-snap does Pick an object info based on version and use that to find specific shards in error Check for object info inconsistency which should be rare Make all errors based on comparing shards to each other object errors We don't want give the impression that we've picked the correct one Signed-off-by: Kefu Chai Signed-off-by: David Zafman --- diff --git a/src/common/scrub_types.cc b/src/common/scrub_types.cc index db7c915907c9..f53d8ea3f93d 100644 --- a/src/common/scrub_types.cc +++ b/src/common/scrub_types.cc @@ -27,6 +27,29 @@ static void encode(const object_id_t& obj, bufferlist& bl) reinterpret_cast(obj).encode(bl); } +void osd_shard_wrapper::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + ::encode(osd, bl); + ::encode(shard, bl); + ENCODE_FINISH(bl); +} + +void osd_shard_wrapper::decode(bufferlist::iterator& bp) +{ + DECODE_START(1, bp); + ::decode(osd, bp); + ::decode(shard, bp); + DECODE_FINISH(bp); +} + +namespace librados { + static void encode(const osd_shard_t& shard, bufferlist& bl) + { + reinterpret_cast(shard).encode(bl); + } +} + void shard_info_wrapper::set_object(const ScrubMap::object& object) { for (auto attr : object.attrs) { @@ -43,17 +66,11 @@ void shard_info_wrapper::set_object(const ScrubMap::object& object) data_digest_present = true; data_digest = object.digest; } - if (object.read_error) { - errors |= SHARD_READ_ERR; - } - if (object.stat_error) { - errors |= SHARD_STAT_ERR; - } } void shard_info_wrapper::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); ::encode(errors, bl); if (has_shard_missing()) { return; @@ -64,12 +81,13 @@ void shard_info_wrapper::encode(bufferlist& bl) const ::encode(omap_digest, bl); ::encode(data_digest_present, bl); ::encode(data_digest, bl); + ::encode(selected_oi, bl); ENCODE_FINISH(bl); } void shard_info_wrapper::decode(bufferlist::iterator& bp) { - DECODE_START(1, bp); + DECODE_START(2, bp); ::decode(errors, bp); if (has_shard_missing()) { return; @@ -80,6 +98,8 @@ void shard_info_wrapper::decode(bufferlist::iterator& bp) ::decode(omap_digest, bp); ::decode(data_digest_present, bp); ::decode(data_digest, bp); + if (struct_v > 1) + ::decode(selected_oi, bp); DECODE_FINISH(bp); } @@ -92,28 +112,28 @@ inconsistent_obj_wrapper::inconsistent_obj_wrapper(const hobject_t& hoid) void inconsistent_obj_wrapper::add_shard(const pg_shard_t& pgs, const shard_info_wrapper& shard) { - errors |= shard.errors; - shards[pgs.osd] = shard; + union_shards.errors |= shard.errors; + shards.emplace(osd_shard_t{pgs.osd, int8_t(pgs.shard)}, shard); } void inconsistent_obj_wrapper::set_auth_missing(const hobject_t& hoid, - const map& maps) + const map& maps, + map &shard_map, + int &shallow_errors, int &deep_errors) { - errors |= (err_t::SHARD_MISSING | - err_t::SHARD_READ_ERR | - err_t::OMAP_DIGEST_MISMATCH | - err_t::DATA_DIGEST_MISMATCH | - err_t::ATTR_MISMATCH); for (auto pg_map : maps) { auto oid_object = pg_map.second->objects.find(hoid); - shard_info_wrapper shard; - if (oid_object == pg_map.second->objects.end()) { - shard.set_missing(); - } else { - shard.set_object(oid_object->second); - } - shards[pg_map.first.osd] = shard; + if (oid_object == pg_map.second->objects.end()) + shard_map[pg_map.first].set_missing(); + else + shard_map[pg_map.first].set_object(oid_object->second); + if (shard_map[pg_map.first].has_deep_errors()) + ++deep_errors; + else if (shard_map[pg_map.first].has_shallow_errors()) + ++shallow_errors; + union_shards.errors |= shard_map[pg_map.first].errors; + shards.emplace(osd_shard_t{pg_map.first.osd, pg_map.first.shard}, shard_map[pg_map.first]); } } @@ -126,19 +146,24 @@ namespace librados { void inconsistent_obj_wrapper::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode(errors, bl); ::encode(object, bl); + ::encode(version, bl); ::encode(shards, bl); + ::encode(union_shards.errors, bl); ENCODE_FINISH(bl); } void inconsistent_obj_wrapper::decode(bufferlist::iterator& bp) { - DECODE_START(1, bp); + DECODE_START(2, bp); + DECODE_OLDEST(2); ::decode(errors, bp); ::decode(object, bp); + ::decode(version, bp); ::decode(shards, bp); + ::decode(union_shards.errors, bp); DECODE_FINISH(bp); } diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h index dc93c88357d9..6143e9e17c87 100644 --- a/src/common/scrub_types.h +++ b/src/common/scrub_types.h @@ -25,6 +25,19 @@ inline void decode(librados::object_id_t& obj, bufferlist::iterator& bp) { reinterpret_cast(obj).decode(bp); } +struct osd_shard_wrapper : public librados::osd_shard_t { + void encode(bufferlist& bl) const; + void decode(bufferlist::iterator& bp); +}; + +WRITE_CLASS_ENCODER(osd_shard_wrapper) + +namespace librados { + inline void decode(librados::osd_shard_t& shard, bufferlist::iterator& bp) { + reinterpret_cast(shard).decode(bp); + } +} + struct shard_info_wrapper : public librados::shard_info_t { public: shard_info_wrapper() = default; @@ -35,29 +48,38 @@ public: void set_missing() { errors |= err_t::SHARD_MISSING; } - void set_omap_digest_mismatch() { - errors |= err_t::OMAP_DIGEST_MISMATCH; - } void set_omap_digest_mismatch_oi() { errors |= err_t::OMAP_DIGEST_MISMATCH_OI; } - void set_data_digest_mismatch() { - errors |= err_t::DATA_DIGEST_MISMATCH; + void set_size_mismatch_oi() { + errors |= err_t::SIZE_MISMATCH_OI; } void set_data_digest_mismatch_oi() { errors |= err_t::DATA_DIGEST_MISMATCH_OI; } - void set_size_mismatch() { - errors |= err_t::SIZE_MISMATCH; + void set_read_error() { + errors |= err_t::SHARD_READ_ERR; } - void set_attr_missing() { - errors |= err_t::ATTR_MISSING; + void set_stat_error() { + errors |= err_t::SHARD_STAT_ERR; } - void set_attr_mismatch() { - errors |= err_t::ATTR_MISMATCH; + void set_ec_hash_mismatch() { + errors |= err_t::SHARD_EC_HASH_MISMATCH; } - void set_attr_unexpected() { - errors |= err_t::ATTR_UNEXPECTED; + void set_ec_size_mismatch() { + errors |= err_t::SHARD_EC_SIZE_MISMATCH; + } + void set_oi_attr_missing() { + errors |= err_t::OI_ATTR_MISSING; + } + void set_oi_attr_corrupted() { + errors |= err_t::OI_ATTR_CORRUPTED; + } + void set_ss_attr_missing() { + errors |= err_t::SS_ATTR_MISSING; + } + void set_ss_attr_corrupted() { + errors |= err_t::SS_ATTR_CORRUPTED; } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bp); @@ -75,9 +97,30 @@ namespace librados { struct inconsistent_obj_wrapper : librados::inconsistent_obj_t { inconsistent_obj_wrapper(const hobject_t& hoid); + void set_object_info_inconsistency() { + errors |= obj_err_t::OBJECT_INFO_INCONSISTENCY; + } + void set_omap_digest_mismatch() { + errors |= obj_err_t::OMAP_DIGEST_MISMATCH; + } + void set_data_digest_mismatch() { + errors |= obj_err_t::DATA_DIGEST_MISMATCH; + } + void set_size_mismatch() { + errors |= obj_err_t::SIZE_MISMATCH; + } + void set_attr_value_mismatch() { + errors |= obj_err_t::ATTR_VALUE_MISMATCH; + } + void set_attr_name_mismatch() { + errors |= obj_err_t::ATTR_NAME_MISMATCH; + } void add_shard(const pg_shard_t& pgs, const shard_info_wrapper& shard); void set_auth_missing(const hobject_t& hoid, - const map& map); + const map&, + map&, + int &shallow_errors, int &deep_errors); + void set_version(uint64_t ver) { version = ver; } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bp); }; diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp index ca7a49018be1..9b79f8780ee6 100644 --- a/src/include/rados/rados_types.hpp +++ b/src/include/rados/rados_types.hpp @@ -51,24 +51,24 @@ struct object_id_t { }; struct err_t { - enum { - ATTR_UNEXPECTED = 1 << 0, + enum : uint64_t { SHARD_MISSING = 1 << 1, SHARD_STAT_ERR = 1 << 2, SHARD_READ_ERR = 1 << 3, - DATA_DIGEST_MISMATCH = 1 << 4, - OMAP_DIGEST_MISMATCH = 1 << 5, - SIZE_MISMATCH = 1 << 6, - ATTR_MISMATCH = 1 << 7, - ATTR_MISSING = 1 << 8, DATA_DIGEST_MISMATCH_OI = 1 << 9, OMAP_DIGEST_MISMATCH_OI = 1 << 10, SIZE_MISMATCH_OI = 1 << 11, + SHARD_EC_HASH_MISMATCH = 1 << 12, + SHARD_EC_SIZE_MISMATCH = 1 << 13, + OI_ATTR_MISSING = 1 << 14, + OI_ATTR_CORRUPTED = 1 << 15, + SS_ATTR_MISSING = 1 << 16, + SS_ATTR_CORRUPTED = 1 << 17 + // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS }; uint64_t errors = 0; - bool has_attr_unexpected() const { - return errors & ATTR_UNEXPECTED; - } + static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED; + static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH; bool has_shard_missing() const { return errors & SHARD_MISSING; } @@ -78,31 +78,38 @@ struct err_t { bool has_read_error() const { return errors & SHARD_READ_ERR; } - bool has_data_digest_mismatch() const { - return errors & DATA_DIGEST_MISMATCH; - } - bool has_omap_digest_mismatch() const { - return errors & OMAP_DIGEST_MISMATCH; - } - // deep error bool has_data_digest_mismatch_oi() const { return errors & DATA_DIGEST_MISMATCH_OI; } - // deep error bool has_omap_digest_mismatch_oi() const { return errors & OMAP_DIGEST_MISMATCH_OI; } - bool has_size_mismatch() const { - return errors & SIZE_MISMATCH; - } bool has_size_mismatch_oi() const { return errors & SIZE_MISMATCH_OI; } - bool has_attr_mismatch() const { - return errors & ATTR_MISMATCH; + bool has_ec_hash_error() const { + return errors & SHARD_EC_HASH_MISMATCH; + } + bool has_ec_size_error() const { + return errors & SHARD_EC_SIZE_MISMATCH; + } + bool has_oi_attr_missing() const { + return errors & OI_ATTR_MISSING; + } + bool has_oi_attr_corrupted() const { + return errors & OI_ATTR_CORRUPTED; + } + bool has_ss_attr_missing() const { + return errors & SS_ATTR_MISSING; } - bool has_attr_missing() const { - return errors & ATTR_MISSING; + bool has_ss_attr_corrupted() const { + return errors & SS_ATTR_CORRUPTED; + } + bool has_shallow_errors() const { + return errors & SHALLOW_ERRORS; + } + bool has_deep_errors() const { + return errors & DEEP_ERRORS; } }; @@ -113,16 +120,72 @@ struct shard_info_t : err_t { uint32_t omap_digest = 0; bool data_digest_present = false; uint32_t data_digest = 0; + bool selected_oi = false; +}; + +struct osd_shard_t { + int32_t osd; + int8_t shard; +}; + +inline bool operator<(const osd_shard_t &lhs, const osd_shard_t &rhs) { + if (lhs.osd < rhs.osd) + return true; + else if (lhs.osd > rhs.osd) + return false; + else + return lhs.shard < rhs.shard; +} + +struct obj_err_t { + enum : uint64_t { + OBJECT_INFO_INCONSISTENCY = 1 << 1, + // XXX: Can an older rados binary work if these bits stay the same? + DATA_DIGEST_MISMATCH = 1 << 4, + OMAP_DIGEST_MISMATCH = 1 << 5, + SIZE_MISMATCH = 1 << 6, + ATTR_VALUE_MISMATCH = 1 << 7, + ATTR_NAME_MISMATCH = 1 << 8, + // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS + }; + uint64_t errors = 0; + static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH; + static constexpr uint64_t DEEP_ERRORS = DATA_DIGEST_MISMATCH|OMAP_DIGEST_MISMATCH; + bool has_object_info_inconsistency() const { + return errors & OBJECT_INFO_INCONSISTENCY; + } + bool has_data_digest_mismatch() const { + return errors & DATA_DIGEST_MISMATCH; + } + bool has_omap_digest_mismatch() const { + return errors & OMAP_DIGEST_MISMATCH; + } + bool has_size_mismatch() const { + return errors & SIZE_MISMATCH; + } + bool has_attr_value_mismatch() const { + return errors & ATTR_VALUE_MISMATCH; + } + bool has_attr_name_mismatch() const { + return errors & ATTR_NAME_MISMATCH; + } + bool has_shallow_errors() const { + return errors & SHALLOW_ERRORS; + } + bool has_deep_errors() const { + return errors & DEEP_ERRORS; + } }; -struct inconsistent_obj_t : err_t { +struct inconsistent_obj_t : obj_err_t { inconsistent_obj_t() = default; inconsistent_obj_t(const object_id_t& object) - : object{object} + : object{object}, version(0) {} object_id_t object; - // osd => shard_info - std::map shards; + uint64_t version; // XXX: Redundant with object info attr + std::map shards; + err_t union_shards; }; struct inconsistent_snapset_t { diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 1bb865b0801b..013a508d9886 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -2117,15 +2117,15 @@ void ECBackend::be_deep_scrub( o.digest_present = false; return; } else { - if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != h.digest()) { - dout(0) << "_scan_list " << poid << " got incorrect hash on read" << dendl; - o.read_error = true; + if (hinfo->get_total_chunk_size() != pos) { + dout(0) << "_scan_list " << poid << " got incorrect size on read" << dendl; + o.ec_size_mismatch = true; return; } - if (hinfo->get_total_chunk_size() != pos) { - dout(0) << "_scan_list " << poid << " got incorrect size on read" << dendl; - o.read_error = true; + if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != h.digest()) { + dout(0) << "_scan_list " << poid << " got incorrect hash on read" << dendl; + o.ec_hash_mismatch = true; return; } diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 6359fb175752..c3153d01a1a6 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -405,64 +405,91 @@ void PGBackend::be_scan_list( } } -enum scrub_error_type PGBackend::be_compare_scrub_objects( +bool PGBackend::be_compare_scrub_objects( pg_shard_t auth_shard, const ScrubMap::object &auth, const object_info_t& auth_oi, const ScrubMap::object &candidate, - shard_info_wrapper &result, + shard_info_wrapper &shard_result, + inconsistent_obj_wrapper &obj_result, ostream &errorstream) { - enum scrub_error_type error = CLEAN; + enum { CLEAN, FOUND_ERROR } error = CLEAN; if (candidate.stat_error) { - error = SHALLOW_ERROR; + assert(shard_result.has_stat_error()); + error = FOUND_ERROR; errorstream << "candidate had a stat error"; } - if (candidate.read_error) { - error = DEEP_ERROR; + if (candidate.read_error || candidate.ec_hash_mismatch || candidate.ec_size_mismatch) { + error = FOUND_ERROR; errorstream << "candidate had a read error"; } if (auth.digest_present && candidate.digest_present) { if (auth.digest != candidate.digest) { if (error != CLEAN) errorstream << ", "; - error = DEEP_ERROR; - bool known = auth_oi.is_data_digest() && - auth.digest == auth_oi.data_digest; + error = FOUND_ERROR; errorstream << "data_digest 0x" << std::hex << candidate.digest - << " != " - << (known ? "known" : "best guess") - << " data_digest 0x" << auth.digest << std::dec - << " from auth shard " << auth_shard; - result.set_data_digest_mismatch(); + << " != data_digest 0x" << auth.digest << std::dec + << " from shard " << auth_shard; + obj_result.set_data_digest_mismatch(); } } if (auth.omap_digest_present && candidate.omap_digest_present) { if (auth.omap_digest != candidate.omap_digest) { if (error != CLEAN) errorstream << ", "; - error = DEEP_ERROR; - bool known = auth_oi.is_omap_digest() && - auth.omap_digest == auth_oi.omap_digest; + error = FOUND_ERROR; errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest - << " != " - << (known ? "known" : "best guess") - << " omap_digest 0x" << auth.omap_digest << std::dec - << " from auth shard " << auth_shard; - result.set_omap_digest_mismatch(); + << " != omap_digest 0x" << auth.omap_digest << std::dec + << " from shard " << auth_shard; + obj_result.set_omap_digest_mismatch(); } } - if (!candidate.stat_error && auth.size != candidate.size) { + if (parent->get_pool().is_replicated()) { + if (auth_oi.is_data_digest() && candidate.digest_present) { + if (auth_oi.data_digest != candidate.digest) { + if (error != CLEAN) + errorstream << ", "; + error = FOUND_ERROR; + errorstream << "data_digest 0x" << std::hex << candidate.digest + << " != data_digest 0x" << auth_oi.data_digest << std::dec + << " from auth oi " << auth_oi; + shard_result.set_data_digest_mismatch_oi(); + } + } + if (auth_oi.is_omap_digest() && candidate.omap_digest_present) { + if (auth_oi.omap_digest != candidate.omap_digest) { + if (error != CLEAN) + errorstream << ", "; + error = FOUND_ERROR; + errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest + << " != omap_digest 0x" << auth_oi.omap_digest << std::dec + << " from auth oi " << auth_oi; + shard_result.set_omap_digest_mismatch_oi(); + } + } + } + if (candidate.stat_error) + return error == FOUND_ERROR; + uint64_t oi_size = be_get_ondisk_size(auth_oi.size); + if (oi_size != candidate.size) { + if (error != CLEAN) + errorstream << ", "; + error = FOUND_ERROR; + errorstream << "size " << candidate.size + << " != size " << oi_size + << " from auth oi " << auth_oi; + shard_result.set_size_mismatch_oi(); + } + if (auth.size != candidate.size) { if (error != CLEAN) errorstream << ", "; - if (error != DEEP_ERROR) - error = SHALLOW_ERROR; - bool known = auth.size == be_get_ondisk_size(auth_oi.size); + error = FOUND_ERROR; errorstream << "size " << candidate.size - << " != " - << (known ? "known" : "best guess") - << " size " << auth.size; - result.set_size_mismatch(); + << " != size " << auth.size + << " from shard " << auth_shard; + obj_result.set_size_mismatch(); } for (map::const_iterator i = auth.attrs.begin(); i != auth.attrs.end(); @@ -470,17 +497,15 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects( if (!candidate.attrs.count(i->first)) { if (error != CLEAN) errorstream << ", "; - if (error != DEEP_ERROR) - error = SHALLOW_ERROR; - errorstream << "missing attr " << i->first; - result.set_attr_missing(); + error = FOUND_ERROR; + errorstream << "attr name mismatch '" << i->first << "'"; + obj_result.set_attr_name_mismatch(); } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) { if (error != CLEAN) errorstream << ", "; - if (error != DEEP_ERROR) - error = SHALLOW_ERROR; - errorstream << "attr value mismatch " << i->first; - result.set_attr_mismatch(); + error = FOUND_ERROR; + errorstream << "attr value mismatch '" << i->first << "'"; + obj_result.set_attr_value_mismatch(); } } for (map::const_iterator i = candidate.attrs.begin(); @@ -489,21 +514,35 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects( if (!auth.attrs.count(i->first)) { if (error != CLEAN) errorstream << ", "; - if (error != DEEP_ERROR) - error = SHALLOW_ERROR; - errorstream << "extra attr " << i->first; - result.set_attr_unexpected(); + error = FOUND_ERROR; + errorstream << "attr name mismatch '" << i->first << "'"; + obj_result.set_attr_name_mismatch(); } } - return error; + return error == FOUND_ERROR; +} + +static int dcount(const object_info_t &oi) +{ + int count = 0; + if (oi.is_data_digest()) + count++; + if (oi.is_omap_digest()) + count++; + return count; } map::const_iterator PGBackend::be_select_auth_object( const hobject_t &obj, const map &maps, - object_info_t *auth_oi) + object_info_t *auth_oi, + map &shard_map, + inconsistent_obj_wrapper &object_error) { + eversion_t auth_version; + bufferlist auth_bl; + map::const_iterator auth = maps.end(); for (map::const_iterator j = maps.begin(); j != maps.end(); @@ -513,76 +552,81 @@ map::const_iterator if (i == j->second->objects.end()) { continue; } - if (i->second.read_error || i->second.stat_error) { - // scrub encountered read error or stat_error, probably corrupt - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << "," << (i->second.read_error ? " read_error" : "") - << (i->second.stat_error ? " stat_error" : "") - << dendl; - continue; + string error_string; + auto& shard_info = shard_map[j->first]; + if (i->second.read_error) { + shard_info.set_read_error(); + error_string += " read_error"; } - map::iterator k = i->second.attrs.find(OI_ATTR); - if (k == i->second.attrs.end()) { - // no object info on object, probably corrupt - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", no oi attr" - << dendl; - continue; + if (i->second.ec_hash_mismatch) { + shard_info.set_ec_hash_mismatch(); + error_string += " ec_hash_mismatch"; + } + if (i->second.ec_size_mismatch) { + shard_info.set_ec_size_mismatch(); + error_string += " ec_size_mismatch"; } + object_info_t oi; bufferlist bl; + map::iterator k; + + if (i->second.stat_error) { + shard_info.set_stat_error(); + error_string += " stat_error"; + // With stat_error no further checking + // We don't need to also see a missing_object_info_attr + goto out; + } + + k = i->second.attrs.find(OI_ATTR); + if (k == i->second.attrs.end()) { + // no object info on object, probably corrupt + shard_info.set_oi_attr_missing(); + error_string += " oi_attr_missing"; + goto out; + } bl.push_back(k->second); - object_info_t oi; try { bufferlist::iterator bliter = bl.begin(); ::decode(oi, bliter); } catch (...) { - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", corrupt oi attr" - << dendl; // invalid object info, probably corrupt - continue; + shard_info.set_oi_attr_corrupted(); + error_string += " oi_attr_corrupted"; + goto out; + } + + if (auth_version != eversion_t()) { + if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) { + object_error.set_object_info_inconsistency(); + error_string += " object_info_inconsistency"; + } } - // note candidate in case we can't find anything better, because - // something is better than nothing. FIXME. - auth = j; - *auth_oi = oi; + // Don't use this particular shard because it won't be able to repair data + // XXX: For now we can't pick one shard for repair and another's object info + if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch) + goto out; + + if (auth_version == eversion_t() || oi.version > auth_version || + (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) { + auth = j; + *auth_oi = oi; + auth_version = oi.version; + auth_bl.clear(); + auth_bl.append(bl); + } - uint64_t correct_size = be_get_ondisk_size(oi.size); - if (correct_size != i->second.size) { - // invalid size, probably corrupt - dout(10) << __func__ << ": rejecting osd " << j->first +out: + // Check error_string because some errors already generated messages + if (error_string != "") { + dout(10) << __func__ << ": error(s) osd " << j->first << " for obj " << obj - << ", size mismatch" + << "," << error_string << dendl; - // invalid object info, probably corrupt - continue; } - if (parent->get_pool().is_replicated()) { - if (oi.is_data_digest() && i->second.digest_present && - oi.data_digest != i->second.digest) { - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", data digest mismatch 0x" << std::hex - << i->second.digest << " != 0x" << oi.data_digest - << std::dec << dendl; - continue; - } - if (oi.is_omap_digest() && i->second.omap_digest_present && - oi.omap_digest != i->second.omap_digest) { - dout(10) << __func__ << ": rejecting osd " << j->first - << " for obj " << obj - << ", omap digest mismatch 0x" << std::hex - << i->second.omap_digest << " != 0x" << oi.omap_digest - << std::dec << dendl; - continue; - } - } - break; + // Keep scanning other shards } dout(10) << __func__ << ": selecting osd " << auth->first << " for obj " << obj @@ -621,60 +665,77 @@ void PGBackend::be_compare_scrubmaps( k != master_set.end(); ++k) { object_info_t auth_oi; - map::const_iterator auth = - be_select_auth_object(*k, maps, &auth_oi); + map shard_map; + inconsistent_obj_wrapper object_error{*k}; + map::const_iterator auth = + be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error); + list auth_list; if (auth == maps.end()) { - object_error.set_auth_missing(*k, maps); - ++shallow_errors; + object_error.set_version(0); + object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors); + if (object_error.has_deep_errors()) + ++deep_errors; + else if (object_error.has_shallow_errors()) + ++shallow_errors; + store->add_object_error(k->pool, object_error); errorstream << pgid.pgid << " soid " << *k - << ": failed to pick suitable auth object\n"; + << ": failed to pick suitable object info\n"; continue; } - auth_list.push_back(auth->first); - + object_error.set_version(auth_oi.user_version); ScrubMap::object& auth_object = auth->second->objects[*k]; set cur_missing; set cur_inconsistent; - bool clean = true; + for (j = maps.begin(); j != maps.end(); ++j) { if (j == auth) - continue; - shard_info_wrapper shard_info; + shard_map[auth->first].selected_oi = true; if (j->second->objects.count(*k)) { - shard_info.set_object(j->second->objects[*k]); + shard_map[j->first].set_object(j->second->objects[*k]); // Compare stringstream ss; - enum scrub_error_type error = - be_compare_scrub_objects(auth->first, + bool found = be_compare_scrub_objects(auth->first, auth_object, auth_oi, j->second->objects[*k], - shard_info, + shard_map[j->first], + object_error, ss); - if (error != CLEAN) { - clean = false; + // Some errors might have already been set in be_select_auth_object() + if (shard_map[j->first].errors != 0) { cur_inconsistent.insert(j->first); - if (error == SHALLOW_ERROR) - ++shallow_errors; - else + if (shard_map[j->first].has_deep_errors()) ++deep_errors; - errorstream << pgid << " shard " << j->first << ": soid " << *k + else + ++shallow_errors; + // Only true if be_compare_scrub_objects() found errors and put something + // in ss. + if (found) + errorstream << pgid << " shard " << j->first << ": soid " << *k << " " << ss.str() << "\n"; } else { + // XXX: The auth shard might get here that we don't know + // that it has the "correct" data. auth_list.push_back(j->first); } } else { - clean = false; cur_missing.insert(j->first); + shard_map[j->first].set_missing(); + // Can't have any other errors if there is no information available ++shallow_errors; errorstream << pgid << " shard " << j->first << " missing " << *k << "\n"; - shard_info.set_missing(); } - object_error.add_shard(j->first, shard_info); + object_error.add_shard(j->first, shard_map[j->first]); + } + + if (auth_list.empty()) { + errorstream << pgid.pgid << " soid " << *k + << ": failed to pick suitable auth object\n"; + goto out; } if (!cur_missing.empty()) { missing[*k] = cur_missing; @@ -684,12 +745,7 @@ void PGBackend::be_compare_scrubmaps( } if (!cur_inconsistent.empty() || !cur_missing.empty()) { authoritative[*k] = auth_list; - shard_info_wrapper auth_shard{auth_object}; - object_error.add_shard(auth->first, auth_shard); - } - - if (clean && - parent->get_pool().is_replicated()) { + } else if (parent->get_pool().is_replicated()) { enum { NO = 0, MAYBE = 1, @@ -709,12 +765,10 @@ void PGBackend::be_compare_scrubmaps( update = MAYBE; } - shard_info_wrapper auth_shard{auth_object}; // recorded digest != actual digest? if (auth_oi.is_data_digest() && auth_object.digest_present && auth_oi.data_digest != auth_object.digest) { - auth_shard.set_data_digest_mismatch_oi(); - ++deep_errors; + assert(shard_map[auth->first].has_data_digest_mismatch_oi()); errorstream << pgid << " recorded data digest 0x" << std::hex << auth_oi.data_digest << " != on disk 0x" << auth_object.digest << std::dec << " on " << auth_oi.soid @@ -724,8 +778,7 @@ void PGBackend::be_compare_scrubmaps( } if (auth_oi.is_omap_digest() && auth_object.omap_digest_present && auth_oi.omap_digest != auth_object.omap_digest) { - auth_shard.set_omap_digest_mismatch_oi(); - ++deep_errors; + assert(shard_map[auth->first].has_omap_digest_mismatch_oi()); errorstream << pgid << " recorded omap digest 0x" << std::hex << auth_oi.omap_digest << " != on disk 0x" << auth_object.omap_digest << std::dec @@ -733,7 +786,6 @@ void PGBackend::be_compare_scrubmaps( if (repair) update = FORCE; } - object_error.add_shard(auth->first, auth_shard); if (update != NO) { utime_t age = now - auth_oi.local_mtime; @@ -749,7 +801,12 @@ void PGBackend::be_compare_scrubmaps( } } } - if (object_error.errors) { +out: + if (object_error.has_deep_errors()) + ++deep_errors; + else if (object_error.has_shallow_errors()) + ++shallow_errors; + if (object_error.errors || object_error.union_shards.errors) { store->add_object_error(k->pool, object_error); } } diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index b9b2e20cff02..b629b53657e6 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -29,6 +29,7 @@ namespace Scrub { class Store; } struct shard_info_wrapper; +struct inconsistent_obj_wrapper; //forward declaration class OSDMap; @@ -586,17 +587,20 @@ typedef ceph::shared_ptr OSDMapRef; void be_scan_list( ScrubMap &map, const vector &ls, bool deep, uint32_t seed, ThreadPool::TPHandle &handle); - enum scrub_error_type be_compare_scrub_objects( + bool be_compare_scrub_objects( pg_shard_t auth_shard, const ScrubMap::object &auth, const object_info_t& auth_oi, const ScrubMap::object &candidate, shard_info_wrapper& shard_error, + inconsistent_obj_wrapper &result, ostream &errorstream); map::const_iterator be_select_auth_object( const hobject_t &obj, const map &maps, - object_info_t *auth_oi); + object_info_t *auth_oi, + map &shard_map, + inconsistent_obj_wrapper &object_error); void be_compare_scrubmaps( const map &maps, bool repair, diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index dc6acbbda052..d7cb2bed55e2 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -5145,7 +5145,8 @@ void ScrubMap::generate_test_instances(list& o) void ScrubMap::object::encode(bufferlist& bl) const { - ENCODE_START(7, 2, bl); + bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch; + ENCODE_START(8, 2, bl); ::encode(size, bl); ::encode(negative, bl); ::encode(attrs, bl); @@ -5155,16 +5156,19 @@ void ScrubMap::object::encode(bufferlist& bl) const ::encode(snapcolls, bl); ::encode(omap_digest, bl); ::encode(omap_digest_present, bl); - ::encode(read_error, bl); + ::encode(compat_read_error, bl); ::encode(stat_error, bl); + ::encode(read_error, bl); + ::encode(ec_hash_mismatch, bl); + ::encode(ec_size_mismatch, bl); ENCODE_FINISH(bl); } void ScrubMap::object::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(7, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(8, 2, 2, bl); ::decode(size, bl); - bool tmp; + bool tmp, compat_read_error = false; ::decode(tmp, bl); negative = tmp; ::decode(attrs, bl); @@ -5187,13 +5191,23 @@ void ScrubMap::object::decode(bufferlist::iterator& bl) omap_digest_present = tmp; } if (struct_v >= 6) { - ::decode(tmp, bl); - read_error = tmp; + ::decode(compat_read_error, bl); } if (struct_v >= 7) { ::decode(tmp, bl); stat_error = tmp; } + if (struct_v >= 8) { + ::decode(tmp, bl); + read_error = tmp; + ::decode(tmp, bl); + ec_hash_mismatch = tmp; + ::decode(tmp, bl); + ec_size_mismatch = tmp; + } + // If older encoder found a read_error, set read_error + if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch) + read_error = true; DECODE_FINISH(bl); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 5bf08c1c3965..3c744e10e813 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -4588,12 +4588,14 @@ struct ScrubMap { bool omap_digest_present:1; bool read_error:1; bool stat_error:1; + bool ec_hash_mismatch:1; + bool ec_size_mismatch:1; object() : // Init invalid size so it won't match if we get a stat EIO error size(-1), omap_digest(0), digest(0), nlinks(0), negative(false), digest_present(false), omap_digest_present(false), - read_error(false), stat_error(false) {} + read_error(false), stat_error(false), ec_hash_mismatch(false), ec_size_mismatch(false) {} void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); @@ -4895,12 +4897,6 @@ struct obj_list_snap_response_t { WRITE_CLASS_ENCODER(obj_list_snap_response_t) -enum scrub_error_type { - CLEAN, - DEEP_ERROR, - SHALLOW_ERROR -}; - // PromoteCounter struct PromoteCounter { diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 4ae230fe65aa..fdd367f7dacc 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -46,6 +46,7 @@ using namespace libradosstriper; #include "cls/lock/cls_lock_client.h" #include "include/compat.h" +#include "include/util.h" #include "common/hobject.h" #include "PoolDump.h" @@ -1318,19 +1319,43 @@ static int do_get_inconsistent_pg_cmd(const std::vector &nargs, return 0; } +static void dump_errors(const err_t &err, Formatter &f, const char *name) +{ + f.open_array_section(name); + if (err.has_shard_missing()) + f.dump_string("error", "missing"); + if (err.has_stat_error()) + f.dump_string("error", "stat_error"); + if (err.has_read_error()) + f.dump_string("error", "read_error"); + if (err.has_data_digest_mismatch_oi()) + f.dump_string("error", "data_digest_mismatch_oi"); + if (err.has_omap_digest_mismatch_oi()) + f.dump_string("error", "omap_digest_mismatch_oi"); + if (err.has_size_mismatch_oi()) + f.dump_string("error", "size_mismatch_oi"); + if (err.has_ec_hash_error()) + f.dump_string("error", "ec_hash_error"); + if (err.has_ec_size_error()) + f.dump_string("error", "ec_size_error"); + if (err.has_oi_attr_missing()) + f.dump_string("error", "oi_attr_missing"); + if (err.has_oi_attr_corrupted()) + f.dump_string("error", "oi_attr_corrupted"); + f.close_section(); +} + static void dump_shard(const shard_info_t& shard, const inconsistent_obj_t& inc, Formatter &f) { - // A missing shard just has that error and nothing else - if (shard.has_shard_missing()) { - f.open_array_section("errors"); - f.dump_string("error", "missing"); - f.close_section(); + dump_errors(shard, f, "errors"); + + if (shard.has_shard_missing()) return; - } - f.dump_unsigned("size", shard.size); + if (!shard.has_stat_error()) + f.dump_unsigned("size", shard.size); if (shard.omap_digest_present) { f.dump_format("omap_digest", "0x%08x", shard.omap_digest); } @@ -1338,44 +1363,48 @@ static void dump_shard(const shard_info_t& shard, f.dump_format("data_digest", "0x%08x", shard.data_digest); } - f.open_array_section("errors"); - if (shard.has_read_error()) - f.dump_string("error", "read_error"); - if (shard.has_data_digest_mismatch()) - f.dump_string("error", "data_digest_mismatch"); - if (shard.has_omap_digest_mismatch()) - f.dump_string("error", "omap_digest_mismatch"); - if (shard.has_size_mismatch()) - f.dump_string("error", "size_mismatch"); - if (!shard.has_read_error()) { - if (shard.has_data_digest_mismatch_oi()) - f.dump_string("error", "data_digest_mismatch_oi"); - if (shard.has_omap_digest_mismatch_oi()) - f.dump_string("error", "omap_digest_mismatch_oi"); - if (shard.has_size_mismatch_oi()) - f.dump_string("error", "size_mismatch_oi"); - } - if (shard.has_attr_missing()) - f.dump_string("error", "attr_missing"); - if (shard.has_attr_unexpected()) - f.dump_string("error", "attr_unexpected"); - f.close_section(); - - if (inc.has_attr_mismatch()) { - f.open_object_section("attrs"); + if (!shard.has_oi_attr_missing() && !shard.has_oi_attr_corrupted() && + inc.has_object_info_inconsistency()) { + object_info_t oi; + bufferlist bl; + map::iterator k = (const_cast(shard)).attrs.find(OI_ATTR); + assert(k != shard.attrs.end()); // Can't be missing + bufferlist::iterator bliter = k->second.begin(); + ::decode(oi, bliter); // Can't be corrupted + f.dump_stream("object_info") << oi; + } + if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) { + f.open_array_section("attrs"); for (auto kv : shard.attrs) { f.open_object_section("attr"); f.dump_string("name", kv.first); - bufferlist b64; - kv.second.encode_base64(b64); - string v(b64.c_str(), b64.length()); - f.dump_string("value", v); + bool b64; + f.dump_string("value", cleanbin(kv.second, b64)); + f.dump_bool("Base64", b64); f.close_section(); } f.close_section(); } } +static void dump_obj_errors(const obj_err_t &err, Formatter &f) +{ + f.open_array_section("errors"); + if (err.has_object_info_inconsistency()) + f.dump_string("error", "object_info_inconsistency"); + if (err.has_data_digest_mismatch()) + f.dump_string("error", "data_digest_mismatch"); + if (err.has_omap_digest_mismatch()) + f.dump_string("error", "omap_digest_mismatch"); + if (err.has_size_mismatch()) + f.dump_string("error", "size_mismatch"); + if (err.has_attr_value_mismatch()) + f.dump_string("error", "attr_value_mismatch"); + if (err.has_attr_name_mismatch()) + f.dump_string("error", "attr_name_mismatch"); + f.close_section(); +} + static void dump_object_id(const object_id_t& object, Formatter &f) { @@ -1400,32 +1429,33 @@ static void dump_inconsistent(const inconsistent_obj_t& inc, { f.open_object_section("object"); dump_object_id(inc.object, f); + f.dump_unsigned("version", inc.version); f.close_section(); - f.open_array_section("errors"); - if (inc.has_attr_unexpected()) - f.dump_string("error", "attr_unexpected"); - if (inc.has_shard_missing()) - f.dump_string("error", "missing"); - if (inc.has_stat_error()) - f.dump_string("error", "stat_error"); - if (inc.has_read_error()) - f.dump_string("error", "read_error"); - if (inc.has_data_digest_mismatch()) - f.dump_string("error", "data_digest_mismatch"); - if (inc.has_omap_digest_mismatch()) - f.dump_string("error", "omap_digest_mismatch"); - if (inc.has_size_mismatch()) - f.dump_string("error", "size_mismatch"); - if (inc.has_attr_mismatch()) - f.dump_string("error", "attr_mismatch"); - f.close_section(); - + dump_obj_errors(inc, f); + dump_errors(inc.union_shards, f, "union_shard_errors"); + for (const auto& shard_info : inc.shards) { + shard_info_t shard = const_cast(shard_info.second); + if (shard.selected_oi) { + object_info_t oi; + bufferlist bl; + auto k = shard.attrs.find(OI_ATTR); + assert(k != shard.attrs.end()); // Can't be missing + bufferlist::iterator bliter = k->second.begin(); + ::decode(oi, bliter); // Can't be corrupted + f.dump_stream("selected_object_info") << oi; + break; + } + } f.open_array_section("shards"); - for (auto osd_shard : inc.shards) { + for (const auto& shard_info : inc.shards) { f.open_object_section("shard"); - f.dump_int("osd", osd_shard.first); - dump_shard(osd_shard.second, inc, f); + auto& osd_shard = shard_info.first; + f.dump_int("osd", osd_shard.osd); + auto shard = osd_shard.shard; + if (shard != shard_id_t::NO_SHARD) + f.dump_unsigned("shard", shard); + dump_shard(shard_info.second, inc, f); f.close_section(); } f.close_section();