From dd1bd2c4ece5adb536e8968ef838e56d7b4a24ea Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 11 Feb 2011 14:46:05 -0800 Subject: [PATCH] PG: refactor scrubmap comparison and repair logic The previous version gave erroneous results. This version seems simpler and can be more easily unit tested as the error detection logic has been seperated from the repair logic. Signed-off-by: Samuel Just --- src/osd/PG.cc | 282 +++++++++++++++++++++++++++----------------------- src/osd/PG.h | 8 ++ 2 files changed, 158 insertions(+), 132 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 6130c9a63c7ea..782106228fa66 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3307,6 +3307,105 @@ bool PG::scrub_gather_replica_maps() { } } +bool PG::_compare_scrub_objects(ScrubMap::object &auth, + ScrubMap::object &candidate, + ostream &errorstream) +{ + bool ok = true; + if (auth.size != candidate.size) { + ok = false; + errorstream << "size " << candidate.size + << " != known size " << auth.size; + } + for (map::const_iterator i = auth.attrs.begin(); + i != auth.attrs.end(); + i++) { + if (!candidate.attrs.count(i->first)) { + if (!ok) + errorstream << ", "; + ok = false; + errorstream << "missing attr " << i->first; + } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) { + if (!ok) + errorstream << ", "; + ok = false; + errorstream << "attr value mismatch " << i->first; + } + } + for (map::const_iterator i = candidate.attrs.begin(); + i != candidate.attrs.end(); + i++) { + if (!auth.attrs.count(i->first)) { + if (!ok) + errorstream << ", "; + ok = false; + errorstream << "extra attr " << i->first; + } + } + if (ok) + errorstream << " is ok!"; + return ok; +} + +void PG::_compare_scrubmaps(const map &maps, + map > &missing, + map > &inconsistent, + map &authoritative, + ostream &errorstream) +{ + map::const_iterator i; + map::const_iterator j; + set master_set; + + // Construct master set + for (j = maps.begin(); j != maps.end(); j++) { + for (i = j->second->objects.begin(); i != j->second->objects.end(); i++) { + master_set.insert(i->first); + } + } + + // Check maps against master set and each other + for (set::const_iterator k = master_set.begin(); + k != master_set.end(); + k++) { + map::const_iterator auth = maps.end(); + set cur_missing; + set cur_inconsistent; + for (j = maps.begin(); j != maps.end(); j++) { + if (j->second->objects.count(*k)) { + if (auth == maps.end()) { + // Take first osd to have it as authoritative + auth = j; + } else { + // Compare + errorstream << info.pgid << " osd" << acting[j->first] + << ": soid " << *k; + if (!_compare_scrub_objects(auth->second->objects[*k], + j->second->objects[*k], + errorstream)) { + cur_inconsistent.insert(j->first); + } + errorstream << std::endl; + } + } else { + cur_missing.insert(j->first); + errorstream << info.pgid + << " osd" << acting[j->first] + << " missing " << *k << std::endl; + } + } + if (cur_missing.size()) { + missing[*k] = cur_missing; + } + if (cur_inconsistent.size()) { + inconsistent[*k] = cur_inconsistent; + } + if (cur_inconsistent.size() || cur_missing.size()) { + authoritative[*k] = auth->first; + } + } +} + void PG::scrub_finalize() { osd->map_lock.get_read(); lock(); @@ -3329,159 +3428,78 @@ void PG::scrub_finalize() { osd->map_lock.put_read(); dout(10) << "scrub_finalize has maps, analyzing" << dendl; - stringstream ss; int errors = 0, fixed = 0; bool repair = state_test(PG_STATE_REPAIR); const char *mode = repair ? "repair":"scrub"; if (acting.size() > 1) { dout(10) << "scrub comparing replica scrub maps" << dendl; - // first, compare scrub maps - vector m(acting.size()); - m[0] = &primary_scrubmap; - for (unsigned i=1; i::iterator p[acting.size()]; - for (unsigned i=0; iobjects.size() << " items" << dendl; - p[i] = m[i]->objects.begin(); + stringstream ss; + + // Maps from objects with erros to missing/inconsistent peers + map > missing; + map > inconsistent; + + // Map from object with errors to good peer + map authoritative; + map maps; + + dout(2) << "scrub osd" << acting[0] << " has " + << primary_scrubmap.objects.size() << " items" << dendl; + maps[0] = &primary_scrubmap; + for (unsigned i=1; iobjects.end()) { - anymissing = true; - continue; - } - if (!po) { - psoid = &(p[i]->first); - po = &(p[i]->second); - pi = i; - } - else if (*psoid != p[i]->first) { - anymissing = true; - if (*psoid > p[i]->first) { - psoid = &(p[0]->first); - po = &(p[i]->second); - pi = i; + + _compare_scrubmaps(maps, missing, inconsistent, authoritative, ss); + + if (authoritative.size()) { + ss << info.pgid << " " << mode << " " << missing.size() << " missing, " + << inconsistent.size() << " inconsistent objects\n"; + dout(2) << ss.str() << dendl; + osd->clog.error(ss); + state_set(PG_STATE_INCONSISTENT); + if (repair) { + state_clear(PG_STATE_CLEAN); + for (map::iterator i = authoritative.begin(); + i != authoritative.end(); + i++) { + set::iterator j; + + if (missing.count(i->first)) { + for (j = missing[i->first].begin(); + j != missing[i->first].end(); + j++) { + repair_object(i->first, + &maps[i->second]->objects[i->first], + acting[*j], + acting[i->second]); + } } - } - } - if (!po) { - break; - } - if (anymissing) { - for (unsigned i=0; iobjects.end() || *psoid != p[i]->first) { - osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " missing " << *psoid; - num_missing++; - - if (repair) - repair_object(*psoid, po, acting[i], acting[pi]); - } else - p[i]++; - } - continue; - } - - // compare - bool ok = true; - for (unsigned i=1; isize != p[i]->second.size) { - dout(0) << "scrub osd" << acting[i] << " " << *psoid - << " size " << p[i]->second.size << " != " << po->size << dendl; - osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid - << " size " << p[i]->second.size << " != " << po->size; - peerok = ok = false; - num_bad++; - } - if (po->attrs.size() != p[i]->second.attrs.size()) { - dout(0) << "scrub osd" << acting[i] << " " << *psoid - << " attr count " << p[i]->second.attrs.size() << " != " << po->attrs.size() << dendl; - osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid - << " attr count " << p[i]->second.attrs.size() << " != " << po->attrs.size(); - peerok = ok = false; - num_bad++; - } - for (map::iterator q = po->attrs.begin(); q != po->attrs.end(); q++) { - if (p[i]->second.attrs.count(q->first)) { - if (q->second.cmp(p[i]->second.attrs[q->first])) { - dout(0) << "scrub osd" << acting[i] << " " << *psoid - << " attr " << q->first << " value mismatch" << dendl; - osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid - << " attr " << q->first << " value mismatch"; - peerok = ok = false; - num_bad++; + if (inconsistent.count(i->first)) { + for (j = inconsistent[i->first].begin(); + j != inconsistent[i->first].end(); + j++) { + repair_object(i->first, + &maps[i->second]->objects[i->first], + acting[*j], + acting[i->second]); } - } else { - dout(0) << "scrub osd" << acting[i] << " " << *psoid - << " attr " << q->first << " missing" << dendl; - osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid - << " attr " << q->first << " missing"; - peerok = ok = false; - num_bad++; } - } - if (!peerok && repair) - repair_object(*psoid, po, acting[i], acting[pi]); + } } - - if (ok) - dout(20) << "scrub " << *psoid << " size " << po->size << " ok" << dendl; - - // next - for (unsigned i=0; iflush(); - osd->clog.error(ss); - state_set(PG_STATE_INCONSISTENT); - if (repair) - state_clear(PG_STATE_CLEAN); } } - /* - lock(); - if (epoch != info.history.same_acting_since) { - dout(10) << "scrub pg changed, aborting" << dendl; - goto out; - } - */ - // discard peer scrub info. peer_scrub_map.clear(); - /* - unlock(); - */ - // ok, do the pg-type specific scrubbing _scrub(primary_scrubmap, errors, fixed); - /* - lock(); - if (epoch != info.history.same_acting_since) { - dout(10) << "scrub pg changed, aborting" << dendl; - goto out; - } - */ - { stringstream oss; oss << info.pgid << " " << mode << " "; diff --git a/src/osd/PG.h b/src/osd/PG.h index 4842c590dc8de..da532fdbdf5db 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -906,6 +906,14 @@ public: MOSDRepScrub *active_rep_scrub; void repair_object(const sobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer); + bool _compare_scrub_objects(ScrubMap::object &auth, + ScrubMap::object &candidate, + ostream &errorstream); + void _compare_scrubmaps(const map &maps, + map > &missing, + map > &inconsistent, + map &authoritative, + ostream &errorstream); void scrub(); void scrub_finalize(); void scrub_clear_state(); -- 2.39.5