reinterpret_cast<const object_id_wrapper&>(obj).encode(bl);
}
+void osd_shard_wrapper::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(osd, bl);
+ ::encode(shard, bl);
+ ENCODE_FINISH(bl);
+}
+
+void osd_shard_wrapper::decode(bufferlist::iterator& bp)
+{
+ DECODE_START(1, bp);
+ ::decode(osd, bp);
+ ::decode(shard, bp);
+ DECODE_FINISH(bp);
+}
+
+namespace librados {
+ static void encode(const osd_shard_t& shard, bufferlist& bl)
+ {
+ reinterpret_cast<const osd_shard_wrapper&>(shard).encode(bl);
+ }
+}
+
void shard_info_wrapper::set_object(const ScrubMap::object& object)
{
for (auto attr : object.attrs) {
data_digest_present = true;
data_digest = object.digest;
}
- if (object.read_error) {
- errors |= SHARD_READ_ERR;
- }
- if (object.stat_error) {
- errors |= SHARD_STAT_ERR;
- }
}
void shard_info_wrapper::encode(bufferlist& bl) const
{
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
::encode(errors, bl);
if (has_shard_missing()) {
return;
::encode(omap_digest, bl);
::encode(data_digest_present, bl);
::encode(data_digest, bl);
+ ::encode(selected_oi, bl);
ENCODE_FINISH(bl);
}
void shard_info_wrapper::decode(bufferlist::iterator& bp)
{
- DECODE_START(1, bp);
+ DECODE_START(2, bp);
::decode(errors, bp);
if (has_shard_missing()) {
return;
::decode(omap_digest, bp);
::decode(data_digest_present, bp);
::decode(data_digest, bp);
+ if (struct_v > 1)
+ ::decode(selected_oi, bp);
DECODE_FINISH(bp);
}
void inconsistent_obj_wrapper::add_shard(const pg_shard_t& pgs,
const shard_info_wrapper& shard)
{
- errors |= shard.errors;
- shards[pgs.osd] = shard;
+ union_shards.errors |= shard.errors;
+ shards.emplace(osd_shard_t{pgs.osd, int8_t(pgs.shard)}, shard);
}
void
inconsistent_obj_wrapper::set_auth_missing(const hobject_t& hoid,
- const map<pg_shard_t, ScrubMap*>& maps)
+ const map<pg_shard_t, ScrubMap*>& maps,
+ map<pg_shard_t, shard_info_wrapper> &shard_map,
+ int &shallow_errors, int &deep_errors)
{
- errors |= (err_t::SHARD_MISSING |
- err_t::SHARD_READ_ERR |
- err_t::OMAP_DIGEST_MISMATCH |
- err_t::DATA_DIGEST_MISMATCH |
- err_t::ATTR_MISMATCH);
for (auto pg_map : maps) {
auto oid_object = pg_map.second->objects.find(hoid);
- shard_info_wrapper shard;
- if (oid_object == pg_map.second->objects.end()) {
- shard.set_missing();
- } else {
- shard.set_object(oid_object->second);
- }
- shards[pg_map.first.osd] = shard;
+ if (oid_object == pg_map.second->objects.end())
+ shard_map[pg_map.first].set_missing();
+ else
+ shard_map[pg_map.first].set_object(oid_object->second);
+ if (shard_map[pg_map.first].has_deep_errors())
+ ++deep_errors;
+ else if (shard_map[pg_map.first].has_shallow_errors())
+ ++shallow_errors;
+ union_shards.errors |= shard_map[pg_map.first].errors;
+ shards.emplace(osd_shard_t{pg_map.first.osd, pg_map.first.shard}, shard_map[pg_map.first]);
}
}
void inconsistent_obj_wrapper::encode(bufferlist& bl) const
{
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 2, bl);
::encode(errors, bl);
::encode(object, bl);
+ ::encode(version, bl);
::encode(shards, bl);
+ ::encode(union_shards.errors, bl);
ENCODE_FINISH(bl);
}
void inconsistent_obj_wrapper::decode(bufferlist::iterator& bp)
{
- DECODE_START(1, bp);
+ DECODE_START(2, bp);
+ DECODE_OLDEST(2);
::decode(errors, bp);
::decode(object, bp);
+ ::decode(version, bp);
::decode(shards, bp);
+ ::decode(union_shards.errors, bp);
DECODE_FINISH(bp);
}
reinterpret_cast<object_id_wrapper&>(obj).decode(bp);
}
+struct osd_shard_wrapper : public librados::osd_shard_t {
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bp);
+};
+
+WRITE_CLASS_ENCODER(osd_shard_wrapper)
+
+namespace librados {
+ inline void decode(librados::osd_shard_t& shard, bufferlist::iterator& bp) {
+ reinterpret_cast<osd_shard_wrapper&>(shard).decode(bp);
+ }
+}
+
struct shard_info_wrapper : public librados::shard_info_t {
public:
shard_info_wrapper() = default;
void set_missing() {
errors |= err_t::SHARD_MISSING;
}
- void set_omap_digest_mismatch() {
- errors |= err_t::OMAP_DIGEST_MISMATCH;
- }
void set_omap_digest_mismatch_oi() {
errors |= err_t::OMAP_DIGEST_MISMATCH_OI;
}
- void set_data_digest_mismatch() {
- errors |= err_t::DATA_DIGEST_MISMATCH;
+ void set_size_mismatch_oi() {
+ errors |= err_t::SIZE_MISMATCH_OI;
}
void set_data_digest_mismatch_oi() {
errors |= err_t::DATA_DIGEST_MISMATCH_OI;
}
- void set_size_mismatch() {
- errors |= err_t::SIZE_MISMATCH;
+ void set_read_error() {
+ errors |= err_t::SHARD_READ_ERR;
}
- void set_attr_missing() {
- errors |= err_t::ATTR_MISSING;
+ void set_stat_error() {
+ errors |= err_t::SHARD_STAT_ERR;
}
- void set_attr_mismatch() {
- errors |= err_t::ATTR_MISMATCH;
+ void set_ec_hash_mismatch() {
+ errors |= err_t::SHARD_EC_HASH_MISMATCH;
}
- void set_attr_unexpected() {
- errors |= err_t::ATTR_UNEXPECTED;
+ void set_ec_size_mismatch() {
+ errors |= err_t::SHARD_EC_SIZE_MISMATCH;
+ }
+ void set_oi_attr_missing() {
+ errors |= err_t::OI_ATTR_MISSING;
+ }
+ void set_oi_attr_corrupted() {
+ errors |= err_t::OI_ATTR_CORRUPTED;
+ }
+ void set_ss_attr_missing() {
+ errors |= err_t::SS_ATTR_MISSING;
+ }
+ void set_ss_attr_corrupted() {
+ errors |= err_t::SS_ATTR_CORRUPTED;
}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);
struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
inconsistent_obj_wrapper(const hobject_t& hoid);
+ void set_object_info_inconsistency() {
+ errors |= obj_err_t::OBJECT_INFO_INCONSISTENCY;
+ }
+ void set_omap_digest_mismatch() {
+ errors |= obj_err_t::OMAP_DIGEST_MISMATCH;
+ }
+ void set_data_digest_mismatch() {
+ errors |= obj_err_t::DATA_DIGEST_MISMATCH;
+ }
+ void set_size_mismatch() {
+ errors |= obj_err_t::SIZE_MISMATCH;
+ }
+ void set_attr_value_mismatch() {
+ errors |= obj_err_t::ATTR_VALUE_MISMATCH;
+ }
+ void set_attr_name_mismatch() {
+ errors |= obj_err_t::ATTR_NAME_MISMATCH;
+ }
void add_shard(const pg_shard_t& pgs, const shard_info_wrapper& shard);
void set_auth_missing(const hobject_t& hoid,
- const map<pg_shard_t, ScrubMap*>& map);
+ const map<pg_shard_t, ScrubMap*>&,
+ map<pg_shard_t, shard_info_wrapper>&,
+ int &shallow_errors, int &deep_errors);
+ void set_version(uint64_t ver) { version = ver; }
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);
};
};
struct err_t {
- enum {
- ATTR_UNEXPECTED = 1 << 0,
+ enum : uint64_t {
SHARD_MISSING = 1 << 1,
SHARD_STAT_ERR = 1 << 2,
SHARD_READ_ERR = 1 << 3,
- DATA_DIGEST_MISMATCH = 1 << 4,
- OMAP_DIGEST_MISMATCH = 1 << 5,
- SIZE_MISMATCH = 1 << 6,
- ATTR_MISMATCH = 1 << 7,
- ATTR_MISSING = 1 << 8,
DATA_DIGEST_MISMATCH_OI = 1 << 9,
OMAP_DIGEST_MISMATCH_OI = 1 << 10,
SIZE_MISMATCH_OI = 1 << 11,
+ SHARD_EC_HASH_MISMATCH = 1 << 12,
+ SHARD_EC_SIZE_MISMATCH = 1 << 13,
+ OI_ATTR_MISSING = 1 << 14,
+ OI_ATTR_CORRUPTED = 1 << 15,
+ SS_ATTR_MISSING = 1 << 16,
+ SS_ATTR_CORRUPTED = 1 << 17
+ // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
};
uint64_t errors = 0;
- bool has_attr_unexpected() const {
- return errors & ATTR_UNEXPECTED;
- }
+ static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED;
+ static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH;
bool has_shard_missing() const {
return errors & SHARD_MISSING;
}
bool has_read_error() const {
return errors & SHARD_READ_ERR;
}
- bool has_data_digest_mismatch() const {
- return errors & DATA_DIGEST_MISMATCH;
- }
- bool has_omap_digest_mismatch() const {
- return errors & OMAP_DIGEST_MISMATCH;
- }
- // deep error
bool has_data_digest_mismatch_oi() const {
return errors & DATA_DIGEST_MISMATCH_OI;
}
- // deep error
bool has_omap_digest_mismatch_oi() const {
return errors & OMAP_DIGEST_MISMATCH_OI;
}
- bool has_size_mismatch() const {
- return errors & SIZE_MISMATCH;
- }
bool has_size_mismatch_oi() const {
return errors & SIZE_MISMATCH_OI;
}
- bool has_attr_mismatch() const {
- return errors & ATTR_MISMATCH;
+ bool has_ec_hash_error() const {
+ return errors & SHARD_EC_HASH_MISMATCH;
+ }
+ bool has_ec_size_error() const {
+ return errors & SHARD_EC_SIZE_MISMATCH;
+ }
+ bool has_oi_attr_missing() const {
+ return errors & OI_ATTR_MISSING;
+ }
+ bool has_oi_attr_corrupted() const {
+ return errors & OI_ATTR_CORRUPTED;
+ }
+ bool has_ss_attr_missing() const {
+ return errors & SS_ATTR_MISSING;
}
- bool has_attr_missing() const {
- return errors & ATTR_MISSING;
+ bool has_ss_attr_corrupted() const {
+ return errors & SS_ATTR_CORRUPTED;
+ }
+ bool has_shallow_errors() const {
+ return errors & SHALLOW_ERRORS;
+ }
+ bool has_deep_errors() const {
+ return errors & DEEP_ERRORS;
}
};
uint32_t omap_digest = 0;
bool data_digest_present = false;
uint32_t data_digest = 0;
+ bool selected_oi = false;
+};
+
+struct osd_shard_t {
+ int32_t osd;
+ int8_t shard;
+};
+
+inline bool operator<(const osd_shard_t &lhs, const osd_shard_t &rhs) {
+ if (lhs.osd < rhs.osd)
+ return true;
+ else if (lhs.osd > rhs.osd)
+ return false;
+ else
+ return lhs.shard < rhs.shard;
+}
+
+struct obj_err_t {
+ enum : uint64_t {
+ OBJECT_INFO_INCONSISTENCY = 1 << 1,
+ // XXX: Can an older rados binary work if these bits stay the same?
+ DATA_DIGEST_MISMATCH = 1 << 4,
+ OMAP_DIGEST_MISMATCH = 1 << 5,
+ SIZE_MISMATCH = 1 << 6,
+ ATTR_VALUE_MISMATCH = 1 << 7,
+ ATTR_NAME_MISMATCH = 1 << 8,
+ // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
+ };
+ uint64_t errors = 0;
+ static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH;
+ static constexpr uint64_t DEEP_ERRORS = DATA_DIGEST_MISMATCH|OMAP_DIGEST_MISMATCH;
+ bool has_object_info_inconsistency() const {
+ return errors & OBJECT_INFO_INCONSISTENCY;
+ }
+ bool has_data_digest_mismatch() const {
+ return errors & DATA_DIGEST_MISMATCH;
+ }
+ bool has_omap_digest_mismatch() const {
+ return errors & OMAP_DIGEST_MISMATCH;
+ }
+ bool has_size_mismatch() const {
+ return errors & SIZE_MISMATCH;
+ }
+ bool has_attr_value_mismatch() const {
+ return errors & ATTR_VALUE_MISMATCH;
+ }
+ bool has_attr_name_mismatch() const {
+ return errors & ATTR_NAME_MISMATCH;
+ }
+ bool has_shallow_errors() const {
+ return errors & SHALLOW_ERRORS;
+ }
+ bool has_deep_errors() const {
+ return errors & DEEP_ERRORS;
+ }
};
-struct inconsistent_obj_t : err_t {
+struct inconsistent_obj_t : obj_err_t {
inconsistent_obj_t() = default;
inconsistent_obj_t(const object_id_t& object)
- : object{object}
+ : object{object}, version(0)
{}
object_id_t object;
- // osd => shard_info
- std::map<int32_t, shard_info_t> shards;
+ uint64_t version; // XXX: Redundant with object info attr
+ std::map<osd_shard_t, shard_info_t> shards;
+ err_t union_shards;
};
struct inconsistent_snapset_t {
o.digest_present = false;
return;
} else {
- if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != h.digest()) {
- dout(0) << "_scan_list " << poid << " got incorrect hash on read" << dendl;
- o.read_error = true;
+ if (hinfo->get_total_chunk_size() != pos) {
+ dout(0) << "_scan_list " << poid << " got incorrect size on read" << dendl;
+ o.ec_size_mismatch = true;
return;
}
- if (hinfo->get_total_chunk_size() != pos) {
- dout(0) << "_scan_list " << poid << " got incorrect size on read" << dendl;
- o.read_error = true;
+ if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != h.digest()) {
+ dout(0) << "_scan_list " << poid << " got incorrect hash on read" << dendl;
+ o.ec_hash_mismatch = true;
return;
}
}
}
-enum scrub_error_type PGBackend::be_compare_scrub_objects(
+bool PGBackend::be_compare_scrub_objects(
pg_shard_t auth_shard,
const ScrubMap::object &auth,
const object_info_t& auth_oi,
const ScrubMap::object &candidate,
- shard_info_wrapper &result,
+ shard_info_wrapper &shard_result,
+ inconsistent_obj_wrapper &obj_result,
ostream &errorstream)
{
- enum scrub_error_type error = CLEAN;
+ enum { CLEAN, FOUND_ERROR } error = CLEAN;
if (candidate.stat_error) {
- error = SHALLOW_ERROR;
+ assert(shard_result.has_stat_error());
+ error = FOUND_ERROR;
errorstream << "candidate had a stat error";
}
- if (candidate.read_error) {
- error = DEEP_ERROR;
+ if (candidate.read_error || candidate.ec_hash_mismatch || candidate.ec_size_mismatch) {
+ error = FOUND_ERROR;
errorstream << "candidate had a read error";
}
if (auth.digest_present && candidate.digest_present) {
if (auth.digest != candidate.digest) {
if (error != CLEAN)
errorstream << ", ";
- error = DEEP_ERROR;
- bool known = auth_oi.is_data_digest() &&
- auth.digest == auth_oi.data_digest;
+ error = FOUND_ERROR;
errorstream << "data_digest 0x" << std::hex << candidate.digest
- << " != "
- << (known ? "known" : "best guess")
- << " data_digest 0x" << auth.digest << std::dec
- << " from auth shard " << auth_shard;
- result.set_data_digest_mismatch();
+ << " != data_digest 0x" << auth.digest << std::dec
+ << " from shard " << auth_shard;
+ obj_result.set_data_digest_mismatch();
}
}
if (auth.omap_digest_present && candidate.omap_digest_present) {
if (auth.omap_digest != candidate.omap_digest) {
if (error != CLEAN)
errorstream << ", ";
- error = DEEP_ERROR;
- bool known = auth_oi.is_omap_digest() &&
- auth.omap_digest == auth_oi.omap_digest;
+ error = FOUND_ERROR;
errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest
- << " != "
- << (known ? "known" : "best guess")
- << " omap_digest 0x" << auth.omap_digest << std::dec
- << " from auth shard " << auth_shard;
- result.set_omap_digest_mismatch();
+ << " != omap_digest 0x" << auth.omap_digest << std::dec
+ << " from shard " << auth_shard;
+ obj_result.set_omap_digest_mismatch();
}
}
- if (!candidate.stat_error && auth.size != candidate.size) {
+ if (parent->get_pool().is_replicated()) {
+ if (auth_oi.is_data_digest() && candidate.digest_present) {
+ if (auth_oi.data_digest != candidate.digest) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = FOUND_ERROR;
+ errorstream << "data_digest 0x" << std::hex << candidate.digest
+ << " != data_digest 0x" << auth_oi.data_digest << std::dec
+ << " from auth oi " << auth_oi;
+ shard_result.set_data_digest_mismatch_oi();
+ }
+ }
+ if (auth_oi.is_omap_digest() && candidate.omap_digest_present) {
+ if (auth_oi.omap_digest != candidate.omap_digest) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = FOUND_ERROR;
+ errorstream << "omap_digest 0x" << std::hex << candidate.omap_digest
+ << " != omap_digest 0x" << auth_oi.omap_digest << std::dec
+ << " from auth oi " << auth_oi;
+ shard_result.set_omap_digest_mismatch_oi();
+ }
+ }
+ }
+ if (candidate.stat_error)
+ return error == FOUND_ERROR;
+ uint64_t oi_size = be_get_ondisk_size(auth_oi.size);
+ if (oi_size != candidate.size) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = FOUND_ERROR;
+ errorstream << "size " << candidate.size
+ << " != size " << oi_size
+ << " from auth oi " << auth_oi;
+ shard_result.set_size_mismatch_oi();
+ }
+ if (auth.size != candidate.size) {
if (error != CLEAN)
errorstream << ", ";
- if (error != DEEP_ERROR)
- error = SHALLOW_ERROR;
- bool known = auth.size == be_get_ondisk_size(auth_oi.size);
+ error = FOUND_ERROR;
errorstream << "size " << candidate.size
- << " != "
- << (known ? "known" : "best guess")
- << " size " << auth.size;
- result.set_size_mismatch();
+ << " != size " << auth.size
+ << " from shard " << auth_shard;
+ obj_result.set_size_mismatch();
}
for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
i != auth.attrs.end();
if (!candidate.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
- if (error != DEEP_ERROR)
- error = SHALLOW_ERROR;
- errorstream << "missing attr " << i->first;
- result.set_attr_missing();
+ error = FOUND_ERROR;
+ errorstream << "attr name mismatch '" << i->first << "'";
+ obj_result.set_attr_name_mismatch();
} else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
if (error != CLEAN)
errorstream << ", ";
- if (error != DEEP_ERROR)
- error = SHALLOW_ERROR;
- errorstream << "attr value mismatch " << i->first;
- result.set_attr_mismatch();
+ error = FOUND_ERROR;
+ errorstream << "attr value mismatch '" << i->first << "'";
+ obj_result.set_attr_value_mismatch();
}
}
for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
if (!auth.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
- if (error != DEEP_ERROR)
- error = SHALLOW_ERROR;
- errorstream << "extra attr " << i->first;
- result.set_attr_unexpected();
+ error = FOUND_ERROR;
+ errorstream << "attr name mismatch '" << i->first << "'";
+ obj_result.set_attr_name_mismatch();
}
}
- return error;
+ return error == FOUND_ERROR;
+}
+
+static int dcount(const object_info_t &oi)
+{
+ int count = 0;
+ if (oi.is_data_digest())
+ count++;
+ if (oi.is_omap_digest())
+ count++;
+ return count;
}
map<pg_shard_t, ScrubMap *>::const_iterator
PGBackend::be_select_auth_object(
const hobject_t &obj,
const map<pg_shard_t,ScrubMap*> &maps,
- object_info_t *auth_oi)
+ object_info_t *auth_oi,
+ map<pg_shard_t, shard_info_wrapper> &shard_map,
+ inconsistent_obj_wrapper &object_error)
{
+ eversion_t auth_version;
+ bufferlist auth_bl;
+
map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
j != maps.end();
if (i == j->second->objects.end()) {
continue;
}
- if (i->second.read_error || i->second.stat_error) {
- // scrub encountered read error or stat_error, probably corrupt
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << "," << (i->second.read_error ? " read_error" : "")
- << (i->second.stat_error ? " stat_error" : "")
- << dendl;
- continue;
+ string error_string;
+ auto& shard_info = shard_map[j->first];
+ if (i->second.read_error) {
+ shard_info.set_read_error();
+ error_string += " read_error";
}
- map<string, bufferptr>::iterator k = i->second.attrs.find(OI_ATTR);
- if (k == i->second.attrs.end()) {
- // no object info on object, probably corrupt
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", no oi attr"
- << dendl;
- continue;
+ if (i->second.ec_hash_mismatch) {
+ shard_info.set_ec_hash_mismatch();
+ error_string += " ec_hash_mismatch";
+ }
+ if (i->second.ec_size_mismatch) {
+ shard_info.set_ec_size_mismatch();
+ error_string += " ec_size_mismatch";
}
+ object_info_t oi;
bufferlist bl;
+ map<string, bufferptr>::iterator k;
+
+ if (i->second.stat_error) {
+ shard_info.set_stat_error();
+ error_string += " stat_error";
+ // With stat_error no further checking
+ // We don't need to also see a missing_object_info_attr
+ goto out;
+ }
+
+ k = i->second.attrs.find(OI_ATTR);
+ if (k == i->second.attrs.end()) {
+ // no object info on object, probably corrupt
+ shard_info.set_oi_attr_missing();
+ error_string += " oi_attr_missing";
+ goto out;
+ }
bl.push_back(k->second);
- object_info_t oi;
try {
bufferlist::iterator bliter = bl.begin();
::decode(oi, bliter);
} catch (...) {
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", corrupt oi attr"
- << dendl;
// invalid object info, probably corrupt
- continue;
+ shard_info.set_oi_attr_corrupted();
+ error_string += " oi_attr_corrupted";
+ goto out;
+ }
+
+ if (auth_version != eversion_t()) {
+ if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) {
+ object_error.set_object_info_inconsistency();
+ error_string += " object_info_inconsistency";
+ }
}
- // note candidate in case we can't find anything better, because
- // something is better than nothing. FIXME.
- auth = j;
- *auth_oi = oi;
+ // Don't use this particular shard because it won't be able to repair data
+ // XXX: For now we can't pick one shard for repair and another's object info
+ if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
+ goto out;
+
+ if (auth_version == eversion_t() || oi.version > auth_version ||
+ (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
+ auth = j;
+ *auth_oi = oi;
+ auth_version = oi.version;
+ auth_bl.clear();
+ auth_bl.append(bl);
+ }
- uint64_t correct_size = be_get_ondisk_size(oi.size);
- if (correct_size != i->second.size) {
- // invalid size, probably corrupt
- dout(10) << __func__ << ": rejecting osd " << j->first
+out:
+ // Check error_string because some errors already generated messages
+ if (error_string != "") {
+ dout(10) << __func__ << ": error(s) osd " << j->first
<< " for obj " << obj
- << ", size mismatch"
+ << "," << error_string
<< dendl;
- // invalid object info, probably corrupt
- continue;
}
- if (parent->get_pool().is_replicated()) {
- if (oi.is_data_digest() && i->second.digest_present &&
- oi.data_digest != i->second.digest) {
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", data digest mismatch 0x" << std::hex
- << i->second.digest << " != 0x" << oi.data_digest
- << std::dec << dendl;
- continue;
- }
- if (oi.is_omap_digest() && i->second.omap_digest_present &&
- oi.omap_digest != i->second.omap_digest) {
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", omap digest mismatch 0x" << std::hex
- << i->second.omap_digest << " != 0x" << oi.omap_digest
- << std::dec << dendl;
- continue;
- }
- }
- break;
+ // Keep scanning other shards
}
dout(10) << __func__ << ": selecting osd " << auth->first
<< " for obj " << obj
k != master_set.end();
++k) {
object_info_t auth_oi;
- map<pg_shard_t, ScrubMap *>::const_iterator auth =
- be_select_auth_object(*k, maps, &auth_oi);
+ map<pg_shard_t, shard_info_wrapper> shard_map;
+
inconsistent_obj_wrapper object_error{*k};
+ map<pg_shard_t, ScrubMap *>::const_iterator auth =
+ be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
+
list<pg_shard_t> auth_list;
if (auth == maps.end()) {
- object_error.set_auth_missing(*k, maps);
- ++shallow_errors;
+ object_error.set_version(0);
+ object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors);
+ if (object_error.has_deep_errors())
+ ++deep_errors;
+ else if (object_error.has_shallow_errors())
+ ++shallow_errors;
+ store->add_object_error(k->pool, object_error);
errorstream << pgid.pgid << " soid " << *k
- << ": failed to pick suitable auth object\n";
+ << ": failed to pick suitable object info\n";
continue;
}
- auth_list.push_back(auth->first);
-
+ object_error.set_version(auth_oi.user_version);
ScrubMap::object& auth_object = auth->second->objects[*k];
set<pg_shard_t> cur_missing;
set<pg_shard_t> cur_inconsistent;
- bool clean = true;
+
for (j = maps.begin(); j != maps.end(); ++j) {
if (j == auth)
- continue;
- shard_info_wrapper shard_info;
+ shard_map[auth->first].selected_oi = true;
if (j->second->objects.count(*k)) {
- shard_info.set_object(j->second->objects[*k]);
+ shard_map[j->first].set_object(j->second->objects[*k]);
// Compare
stringstream ss;
- enum scrub_error_type error =
- be_compare_scrub_objects(auth->first,
+ bool found = be_compare_scrub_objects(auth->first,
auth_object,
auth_oi,
j->second->objects[*k],
- shard_info,
+ shard_map[j->first],
+ object_error,
ss);
- if (error != CLEAN) {
- clean = false;
+ // Some errors might have already been set in be_select_auth_object()
+ if (shard_map[j->first].errors != 0) {
cur_inconsistent.insert(j->first);
- if (error == SHALLOW_ERROR)
- ++shallow_errors;
- else
+ if (shard_map[j->first].has_deep_errors())
++deep_errors;
- errorstream << pgid << " shard " << j->first << ": soid " << *k
+ else
+ ++shallow_errors;
+ // Only true if be_compare_scrub_objects() found errors and put something
+ // in ss.
+ if (found)
+ errorstream << pgid << " shard " << j->first << ": soid " << *k
<< " " << ss.str() << "\n";
} else {
+ // XXX: The auth shard might get here that we don't know
+ // that it has the "correct" data.
auth_list.push_back(j->first);
}
} else {
- clean = false;
cur_missing.insert(j->first);
+ shard_map[j->first].set_missing();
+ // Can't have any other errors if there is no information available
++shallow_errors;
errorstream << pgid << " shard " << j->first << " missing " << *k
<< "\n";
- shard_info.set_missing();
}
- object_error.add_shard(j->first, shard_info);
+ object_error.add_shard(j->first, shard_map[j->first]);
+ }
+
+ if (auth_list.empty()) {
+ errorstream << pgid.pgid << " soid " << *k
+ << ": failed to pick suitable auth object\n";
+ goto out;
}
if (!cur_missing.empty()) {
missing[*k] = cur_missing;
}
if (!cur_inconsistent.empty() || !cur_missing.empty()) {
authoritative[*k] = auth_list;
- shard_info_wrapper auth_shard{auth_object};
- object_error.add_shard(auth->first, auth_shard);
- }
-
- if (clean &&
- parent->get_pool().is_replicated()) {
+ } else if (parent->get_pool().is_replicated()) {
enum {
NO = 0,
MAYBE = 1,
update = MAYBE;
}
- shard_info_wrapper auth_shard{auth_object};
// recorded digest != actual digest?
if (auth_oi.is_data_digest() && auth_object.digest_present &&
auth_oi.data_digest != auth_object.digest) {
- auth_shard.set_data_digest_mismatch_oi();
- ++deep_errors;
+ assert(shard_map[auth->first].has_data_digest_mismatch_oi());
errorstream << pgid << " recorded data digest 0x"
<< std::hex << auth_oi.data_digest << " != on disk 0x"
<< auth_object.digest << std::dec << " on " << auth_oi.soid
}
if (auth_oi.is_omap_digest() && auth_object.omap_digest_present &&
auth_oi.omap_digest != auth_object.omap_digest) {
- auth_shard.set_omap_digest_mismatch_oi();
- ++deep_errors;
+ assert(shard_map[auth->first].has_omap_digest_mismatch_oi());
errorstream << pgid << " recorded omap digest 0x"
<< std::hex << auth_oi.omap_digest << " != on disk 0x"
<< auth_object.omap_digest << std::dec
if (repair)
update = FORCE;
}
- object_error.add_shard(auth->first, auth_shard);
if (update != NO) {
utime_t age = now - auth_oi.local_mtime;
}
}
}
- if (object_error.errors) {
+out:
+ if (object_error.has_deep_errors())
+ ++deep_errors;
+ else if (object_error.has_shallow_errors())
+ ++shallow_errors;
+ if (object_error.errors || object_error.union_shards.errors) {
store->add_object_error(k->pool, object_error);
}
}
class Store;
}
struct shard_info_wrapper;
+struct inconsistent_obj_wrapper;
//forward declaration
class OSDMap;
void be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle);
- enum scrub_error_type be_compare_scrub_objects(
+ bool be_compare_scrub_objects(
pg_shard_t auth_shard,
const ScrubMap::object &auth,
const object_info_t& auth_oi,
const ScrubMap::object &candidate,
shard_info_wrapper& shard_error,
+ inconsistent_obj_wrapper &result,
ostream &errorstream);
map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
const hobject_t &obj,
const map<pg_shard_t,ScrubMap*> &maps,
- object_info_t *auth_oi);
+ object_info_t *auth_oi,
+ map<pg_shard_t, shard_info_wrapper> &shard_map,
+ inconsistent_obj_wrapper &object_error);
void be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
bool repair,
void ScrubMap::object::encode(bufferlist& bl) const
{
- ENCODE_START(7, 2, bl);
+ bool compat_read_error = read_error || ec_hash_mismatch || ec_size_mismatch;
+ ENCODE_START(8, 2, bl);
::encode(size, bl);
::encode(negative, bl);
::encode(attrs, bl);
::encode(snapcolls, bl);
::encode(omap_digest, bl);
::encode(omap_digest_present, bl);
- ::encode(read_error, bl);
+ ::encode(compat_read_error, bl);
::encode(stat_error, bl);
+ ::encode(read_error, bl);
+ ::encode(ec_hash_mismatch, bl);
+ ::encode(ec_size_mismatch, bl);
ENCODE_FINISH(bl);
}
void ScrubMap::object::decode(bufferlist::iterator& bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(7, 2, 2, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(8, 2, 2, bl);
::decode(size, bl);
- bool tmp;
+ bool tmp, compat_read_error = false;
::decode(tmp, bl);
negative = tmp;
::decode(attrs, bl);
omap_digest_present = tmp;
}
if (struct_v >= 6) {
- ::decode(tmp, bl);
- read_error = tmp;
+ ::decode(compat_read_error, bl);
}
if (struct_v >= 7) {
::decode(tmp, bl);
stat_error = tmp;
}
+ if (struct_v >= 8) {
+ ::decode(tmp, bl);
+ read_error = tmp;
+ ::decode(tmp, bl);
+ ec_hash_mismatch = tmp;
+ ::decode(tmp, bl);
+ ec_size_mismatch = tmp;
+ }
+ // If older encoder found a read_error, set read_error
+ if (compat_read_error && !read_error && !ec_hash_mismatch && !ec_size_mismatch)
+ read_error = true;
DECODE_FINISH(bl);
}
bool omap_digest_present:1;
bool read_error:1;
bool stat_error:1;
+ bool ec_hash_mismatch:1;
+ bool ec_size_mismatch:1;
object() :
// Init invalid size so it won't match if we get a stat EIO error
size(-1), omap_digest(0), digest(0), nlinks(0),
negative(false), digest_present(false), omap_digest_present(false),
- read_error(false), stat_error(false) {}
+ read_error(false), stat_error(false), ec_hash_mismatch(false), ec_size_mismatch(false) {}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
WRITE_CLASS_ENCODER(obj_list_snap_response_t)
-enum scrub_error_type {
- CLEAN,
- DEEP_ERROR,
- SHALLOW_ERROR
-};
-
// PromoteCounter
struct PromoteCounter {
#include "cls/lock/cls_lock_client.h"
#include "include/compat.h"
+#include "include/util.h"
#include "common/hobject.h"
#include "PoolDump.h"
return 0;
}
+static void dump_errors(const err_t &err, Formatter &f, const char *name)
+{
+ f.open_array_section(name);
+ if (err.has_shard_missing())
+ f.dump_string("error", "missing");
+ if (err.has_stat_error())
+ f.dump_string("error", "stat_error");
+ if (err.has_read_error())
+ f.dump_string("error", "read_error");
+ if (err.has_data_digest_mismatch_oi())
+ f.dump_string("error", "data_digest_mismatch_oi");
+ if (err.has_omap_digest_mismatch_oi())
+ f.dump_string("error", "omap_digest_mismatch_oi");
+ if (err.has_size_mismatch_oi())
+ f.dump_string("error", "size_mismatch_oi");
+ if (err.has_ec_hash_error())
+ f.dump_string("error", "ec_hash_error");
+ if (err.has_ec_size_error())
+ f.dump_string("error", "ec_size_error");
+ if (err.has_oi_attr_missing())
+ f.dump_string("error", "oi_attr_missing");
+ if (err.has_oi_attr_corrupted())
+ f.dump_string("error", "oi_attr_corrupted");
+ f.close_section();
+}
+
static void dump_shard(const shard_info_t& shard,
const inconsistent_obj_t& inc,
Formatter &f)
{
- // A missing shard just has that error and nothing else
- if (shard.has_shard_missing()) {
- f.open_array_section("errors");
- f.dump_string("error", "missing");
- f.close_section();
+ dump_errors(shard, f, "errors");
+
+ if (shard.has_shard_missing())
return;
- }
- f.dump_unsigned("size", shard.size);
+ if (!shard.has_stat_error())
+ f.dump_unsigned("size", shard.size);
if (shard.omap_digest_present) {
f.dump_format("omap_digest", "0x%08x", shard.omap_digest);
}
f.dump_format("data_digest", "0x%08x", shard.data_digest);
}
- f.open_array_section("errors");
- if (shard.has_read_error())
- f.dump_string("error", "read_error");
- if (shard.has_data_digest_mismatch())
- f.dump_string("error", "data_digest_mismatch");
- if (shard.has_omap_digest_mismatch())
- f.dump_string("error", "omap_digest_mismatch");
- if (shard.has_size_mismatch())
- f.dump_string("error", "size_mismatch");
- if (!shard.has_read_error()) {
- if (shard.has_data_digest_mismatch_oi())
- f.dump_string("error", "data_digest_mismatch_oi");
- if (shard.has_omap_digest_mismatch_oi())
- f.dump_string("error", "omap_digest_mismatch_oi");
- if (shard.has_size_mismatch_oi())
- f.dump_string("error", "size_mismatch_oi");
- }
- if (shard.has_attr_missing())
- f.dump_string("error", "attr_missing");
- if (shard.has_attr_unexpected())
- f.dump_string("error", "attr_unexpected");
- f.close_section();
-
- if (inc.has_attr_mismatch()) {
- f.open_object_section("attrs");
+ if (!shard.has_oi_attr_missing() && !shard.has_oi_attr_corrupted() &&
+ inc.has_object_info_inconsistency()) {
+ object_info_t oi;
+ bufferlist bl;
+ map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(OI_ATTR);
+ assert(k != shard.attrs.end()); // Can't be missing
+ bufferlist::iterator bliter = k->second.begin();
+ ::decode(oi, bliter); // Can't be corrupted
+ f.dump_stream("object_info") << oi;
+ }
+ if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) {
+ f.open_array_section("attrs");
for (auto kv : shard.attrs) {
f.open_object_section("attr");
f.dump_string("name", kv.first);
- bufferlist b64;
- kv.second.encode_base64(b64);
- string v(b64.c_str(), b64.length());
- f.dump_string("value", v);
+ bool b64;
+ f.dump_string("value", cleanbin(kv.second, b64));
+ f.dump_bool("Base64", b64);
f.close_section();
}
f.close_section();
}
}
+static void dump_obj_errors(const obj_err_t &err, Formatter &f)
+{
+ f.open_array_section("errors");
+ if (err.has_object_info_inconsistency())
+ f.dump_string("error", "object_info_inconsistency");
+ if (err.has_data_digest_mismatch())
+ f.dump_string("error", "data_digest_mismatch");
+ if (err.has_omap_digest_mismatch())
+ f.dump_string("error", "omap_digest_mismatch");
+ if (err.has_size_mismatch())
+ f.dump_string("error", "size_mismatch");
+ if (err.has_attr_value_mismatch())
+ f.dump_string("error", "attr_value_mismatch");
+ if (err.has_attr_name_mismatch())
+ f.dump_string("error", "attr_name_mismatch");
+ f.close_section();
+}
+
static void dump_object_id(const object_id_t& object,
Formatter &f)
{
{
f.open_object_section("object");
dump_object_id(inc.object, f);
+ f.dump_unsigned("version", inc.version);
f.close_section();
- f.open_array_section("errors");
- if (inc.has_attr_unexpected())
- f.dump_string("error", "attr_unexpected");
- if (inc.has_shard_missing())
- f.dump_string("error", "missing");
- if (inc.has_stat_error())
- f.dump_string("error", "stat_error");
- if (inc.has_read_error())
- f.dump_string("error", "read_error");
- if (inc.has_data_digest_mismatch())
- f.dump_string("error", "data_digest_mismatch");
- if (inc.has_omap_digest_mismatch())
- f.dump_string("error", "omap_digest_mismatch");
- if (inc.has_size_mismatch())
- f.dump_string("error", "size_mismatch");
- if (inc.has_attr_mismatch())
- f.dump_string("error", "attr_mismatch");
- f.close_section();
-
+ dump_obj_errors(inc, f);
+ dump_errors(inc.union_shards, f, "union_shard_errors");
+ for (const auto& shard_info : inc.shards) {
+ shard_info_t shard = const_cast<shard_info_t&>(shard_info.second);
+ if (shard.selected_oi) {
+ object_info_t oi;
+ bufferlist bl;
+ auto k = shard.attrs.find(OI_ATTR);
+ assert(k != shard.attrs.end()); // Can't be missing
+ bufferlist::iterator bliter = k->second.begin();
+ ::decode(oi, bliter); // Can't be corrupted
+ f.dump_stream("selected_object_info") << oi;
+ break;
+ }
+ }
f.open_array_section("shards");
- for (auto osd_shard : inc.shards) {
+ for (const auto& shard_info : inc.shards) {
f.open_object_section("shard");
- f.dump_int("osd", osd_shard.first);
- dump_shard(osd_shard.second, inc, f);
+ auto& osd_shard = shard_info.first;
+ f.dump_int("osd", osd_shard.osd);
+ auto shard = osd_shard.shard;
+ if (shard != shard_id_t::NO_SHARD)
+ f.dump_unsigned("shard", shard);
+ dump_shard(shard_info.second, inc, f);
f.close_section();
}
f.close_section();