From: Loic Dachary Date: Thu, 4 Dec 2014 10:44:34 +0000 (+0100) Subject: osd: accumulate authoritative peers during recovery X-Git-Tag: v0.92~31^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9406b7f71f91f2f0d6825b5acbc00d7994aeeefd;p=ceph.git osd: accumulate authoritative peers during recovery When PGBackend::be_compare_scrubmaps finds multiple good peers, it only keeps the last one. This is fine for replication but erasure coding needs to know all good peers for recovery. PGBackend::be_compare_scrubmaps is modified to accumulate all good peers and return them to PGBackend::be_compare_scrubmaps and indirectly to PG::scrub_compare_maps. PG::scrub_compare_maps will dispatch the good peers to authmap and good_peers. In the case of authmap, the data structure is not modified and only the last good peer is set. The ReplicatedPG::_scrub uses authmap in a non trivial way and it should probably be modified to use information from multiple good peers instead of just the last one. This could be the focus of another change. The scrubber.authoritative data structure is changed to include a list of pair instead of a single pair to pass to PG::repair_object and allow it to add all good peers to the missing_loc locations if the primary has a missing object. It could be just a list of pg_shard_t instead because the ScrubMap::object is not used but makes more sense to keep both and it will presumably be useful when / if the logic changes. http://tracker.ceph.com/issues/10017 Fixes: #10017 Signed-off-by: Loic Dachary --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 9fb255f2468a..8cc4e4eda3e7 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3586,14 +3586,15 @@ int PG::build_scrub_map_chunk( } void PG::repair_object( - const hobject_t& soid, ScrubMap::object *po, - pg_shard_t bad_peer, pg_shard_t ok_peer) + const hobject_t& soid, list > *ok_peers, + pg_shard_t bad_peer) { dout(10) << "repair_object " << soid << " bad_peer osd." - << bad_peer << " ok_peer osd." << ok_peer << dendl; + << bad_peer << " ok_peers osd.{" << ok_peers << "}" << dendl; + ScrubMap::object &po = ok_peers->back().first; eversion_t v; bufferlist bv; - bv.push_back(po->attrs[OI_ATTR]); + bv.push_back(po.attrs[OI_ATTR]); object_info_t oi(bv); if (bad_peer != primary) { peer_missing[bad_peer].add(soid, oi.version, eversion_t()); @@ -3603,9 +3604,14 @@ void PG::repair_object( pg_log.missing_add(soid, oi.version, eversion_t()); missing_loc.add_missing(soid, oi.version, eversion_t()); - missing_loc.add_location(soid, ok_peer); + list >::iterator i; + for (i = ok_peers->begin(); + i != ok_peers->end(); + i++) + missing_loc.add_location(soid, i->second); pg_log.set_last_requested(0); + dout(10) << __func__ << ": primary = " << primary << dendl; } } @@ -4092,7 +4098,7 @@ void PG::scrub_compare_maps() stringstream ss; // Map from object with errors to good peer - map authoritative; + map > authoritative; map maps; dout(2) << __func__ << " osd." << acting[0] << " has " @@ -4126,20 +4132,26 @@ void PG::scrub_compare_maps() osd->clog->error(ss); } - for (map::iterator i = authoritative.begin(); + for (map >::iterator i = authoritative.begin(); i != authoritative.end(); ++i) { + list > good_peers; + for (list::const_iterator j = i->second.begin(); + j != i->second.end(); + j++) { + good_peers.push_back(make_pair(maps[*j]->objects[i->first], *j)); + } scrubber.authoritative.insert( make_pair( i->first, - make_pair(maps[i->second]->objects[i->first], i->second))); + good_peers)); } - for (map::iterator i = authoritative.begin(); + for (map >::iterator i = authoritative.begin(); i != authoritative.end(); ++i) { authmap.objects.erase(i->first); - authmap.objects.insert(*(maps[i->second]->objects.find(i->first))); + authmap.objects.insert(*(maps[i->second.back()]->objects.find(i->first))); } } @@ -4149,7 +4161,7 @@ void PG::scrub_compare_maps() void PG::scrub_process_inconsistent() { - dout(10) << "process_inconsistent() checking authoritative" << dendl; + dout(10) << __func__ << ": checking authoritative" << dendl; bool repair = state_test(PG_STATE_REPAIR); bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); @@ -4163,7 +4175,7 @@ void PG::scrub_process_inconsistent() osd->clog->error(ss); if (repair) { state_clear(PG_STATE_CLEAN); - for (map >::iterator i = + for (map > >::iterator i = scrubber.authoritative.begin(); i != scrubber.authoritative.end(); ++i) { @@ -4175,9 +4187,8 @@ void PG::scrub_process_inconsistent() ++j) { repair_object( i->first, - &(i->second.first), - *j, - i->second.second); + &(i->second), + *j); ++scrubber.fixed; } } @@ -4186,9 +4197,8 @@ void PG::scrub_process_inconsistent() j != scrubber.inconsistent[i->first].end(); ++j) { repair_object(i->first, - &(i->second.first), - *j, - i->second.second); + &(i->second), + *j); ++scrubber.fixed; } } diff --git a/src/osd/PG.h b/src/osd/PG.h index 0e02dcaa73c8..5835c2f0382f 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1062,8 +1062,8 @@ public: map > missing; map > inconsistent; - // Map from object with errors to good peer - map > authoritative; + // Map from object with errors to good peers + map > > authoritative; // Objects who need digest updates map > missing_digest; @@ -1176,9 +1176,8 @@ public: int active_pushes; void repair_object( - const hobject_t& soid, ScrubMap::object *po, - pg_shard_t bad_peer, - pg_shard_t ok_peer); + const hobject_t& soid, list > *ok_peers, + pg_shard_t bad_peer); void scrub(ThreadPool::TPHandle &handle); void chunky_scrub(ThreadPool::TPHandle &handle); diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index ec51698a4a61..cc5da8fafcda 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -446,14 +446,14 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects( return error; } -map::const_iterator +list::const_iterator> PGBackend::be_select_auth_object( const hobject_t &obj, const map &maps, bool okseed, object_info_t *auth_oi) { - map::const_iterator auth = maps.end(); + list::const_iterator> auth; for (map::const_iterator j = maps.begin(); j != maps.end(); ++j) { @@ -528,7 +528,7 @@ map::const_iterator << " for obj " << obj << dendl; *auth_oi = oi; - auth = j; + auth.push_back(j); } return auth; } @@ -538,7 +538,7 @@ void PGBackend::be_compare_scrubmaps( bool okseed, map > &missing, map > &inconsistent, - map &authoritative, + map > &authoritative, map > &missing_digest, int &shallow_errors, int &deep_errors, const spg_t& pgid, @@ -562,14 +562,14 @@ void PGBackend::be_compare_scrubmaps( k != master_set.end(); ++k) { object_info_t auth_oi; - map::const_iterator auth = + list::const_iterator> auth = be_select_auth_object(*k, maps, okseed, &auth_oi); - if (auth == maps.end()) { + if (auth.empty()) { // Something is better than nothing // TODO: something is NOT better than nothing, do something like // unfound_lost if no valid copies can be found, or just mark unfound map::const_iterator fallback = maps.begin(); - auth = fallback; + auth.push_back(fallback); dout(10) << __func__ << ": selecting osd " << fallback->first << " for obj " << *k << ", something is better than nothing, FIXME" @@ -577,18 +577,17 @@ void PGBackend::be_compare_scrubmaps( continue; } - assert(auth != maps.end()); - ScrubMap::object& auth_object = auth->second->objects[*k]; + ScrubMap::object& auth_object = auth.back()->second->objects[*k]; set cur_missing; set cur_inconsistent; for (j = maps.begin(); j != maps.end(); ++j) { - if (j == auth) + if (j == auth.back()) continue; if (j->second->objects.count(*k)) { // Compare stringstream ss; enum scrub_error_type error = - be_compare_scrub_objects(auth->first, + be_compare_scrub_objects(auth.back()->first, auth_object, auth_oi, okseed, @@ -601,16 +600,15 @@ void PGBackend::be_compare_scrubmaps( else ++deep_errors; errorstream << __func__ << ": " << pgid << " shard " << j->first - << ": soid " << *k << " " << ss.str() << std::endl; + << ": soid " << *k << " " << ss.str(); } } else { cur_missing.insert(j->first); ++shallow_errors; errorstream << __func__ << ": " << pgid << " shard " << j->first - << " missing " << *k << std::endl; + << " missing " << *k; } } - assert(auth != maps.end()); if (!cur_missing.empty()) { missing[*k] = cur_missing; } @@ -618,7 +616,10 @@ void PGBackend::be_compare_scrubmaps( inconsistent[*k] = cur_inconsistent; } if (!cur_inconsistent.empty() || !cur_missing.empty()) { - authoritative[*k] = auth->first; + list::const_iterator>::const_iterator i; + for (i = auth.begin(); i != auth.end(); i++) { + authoritative[*k].push_back((*i)->first); + } } if (okseed && auth_object.digest_present && auth_object.omap_digest_present && diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 2b2102a1a605..9e0ca4f8c7ba 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -598,7 +598,7 @@ bool okseed, const ScrubMap::object &candidate, ostream &errorstream); - map::const_iterator be_select_auth_object( + list::const_iterator> be_select_auth_object( const hobject_t &obj, const map &maps, bool okseed, @@ -608,7 +608,7 @@ bool okseed, ///< true if scrub digests have same seed our oi digests map > &missing, map > &inconsistent, - map &authoritative, + map > &authoritative, map > &missing_digest, int &shallow_errors, int &deep_errors, const spg_t& pgid,