]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: accumulate authoritative peers during recovery
authorLoic Dachary <ldachary@redhat.com>
Thu, 4 Dec 2014 10:44:34 +0000 (11:44 +0100)
committerLoic Dachary <ldachary@redhat.com>
Fri, 9 Jan 2015 12:19:07 +0000 (13:19 +0100)
When PGBackend::be_compare_scrubmaps finds multiple good peers, it only
keeps the last one. This is fine for replication but erasure coding
needs to know all good peers for recovery.

PGBackend::be_compare_scrubmaps is modified to accumulate all good peers
and return them to PGBackend::be_compare_scrubmaps and indirectly to
PG::scrub_compare_maps.

PG::scrub_compare_maps will dispatch the good peers to authmap and
good_peers. In the case of authmap, the data structure is not modified
and only the last good peer is set. The ReplicatedPG::_scrub uses
authmap in a non trivial way and it should probably be modified to use
information from multiple good peers instead of just the last one. This
could be the focus of another change.

The scrubber.authoritative data structure is changed to include a list
of pair<ScrubMap::object, pg_shard_t> instead of a single
pair<ScrubMap::object, pg_shard_t> to pass to PG::repair_object and
allow it to add all good peers to the missing_loc locations if the
primary has a missing object. It could be just a list of pg_shard_t
instead because the ScrubMap::object is not used but makes more sense to
keep both and it will presumably be useful when / if the logic changes.

http://tracker.ceph.com/issues/10017 Fixes: #10017

Signed-off-by: Loic Dachary <ldachary@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/PGBackend.cc
src/osd/PGBackend.h

index 9fb255f2468a91294956f7855c20317d4f9e4910..8cc4e4eda3e75a0c362382b0352fddf65d15de69 100644 (file)
@@ -3586,14 +3586,15 @@ int PG::build_scrub_map_chunk(
 }
 
 void PG::repair_object(
-  const hobject_t& soid, ScrubMap::object *po,
-  pg_shard_t bad_peer, pg_shard_t ok_peer)
+  const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
+  pg_shard_t bad_peer)
 {
   dout(10) << "repair_object " << soid << " bad_peer osd."
-          << bad_peer << " ok_peer osd." << ok_peer << dendl;
+          << bad_peer << " ok_peers osd.{" << ok_peers << "}" << dendl;
+  ScrubMap::object &po = ok_peers->back().first;
   eversion_t v;
   bufferlist bv;
-  bv.push_back(po->attrs[OI_ATTR]);
+  bv.push_back(po.attrs[OI_ATTR]);
   object_info_t oi(bv);
   if (bad_peer != primary) {
     peer_missing[bad_peer].add(soid, oi.version, eversion_t());
@@ -3603,9 +3604,14 @@ void PG::repair_object(
 
     pg_log.missing_add(soid, oi.version, eversion_t());
     missing_loc.add_missing(soid, oi.version, eversion_t());
-    missing_loc.add_location(soid, ok_peer);
+    list<pair<ScrubMap::object, pg_shard_t> >::iterator i;
+    for (i = ok_peers->begin();
+        i != ok_peers->end();
+        i++)
+      missing_loc.add_location(soid, i->second);
 
     pg_log.set_last_requested(0);
+    dout(10) << __func__ << ": primary = " << primary << dendl;
   }
 }
 
@@ -4092,7 +4098,7 @@ void PG::scrub_compare_maps()
     stringstream ss;
 
     // Map from object with errors to good peer
-    map<hobject_t, pg_shard_t> authoritative;
+    map<hobject_t, list<pg_shard_t> > authoritative;
     map<pg_shard_t, ScrubMap *> maps;
 
     dout(2) << __func__ << "   osd." << acting[0] << " has "
@@ -4126,20 +4132,26 @@ void PG::scrub_compare_maps()
       osd->clog->error(ss);
     }
 
-    for (map<hobject_t, pg_shard_t>::iterator i = authoritative.begin();
+    for (map<hobject_t, list<pg_shard_t> >::iterator i = authoritative.begin();
         i != authoritative.end();
         ++i) {
+      list<pair<ScrubMap::object, pg_shard_t> > good_peers;
+      for (list<pg_shard_t>::const_iterator j = i->second.begin();
+          j != i->second.end();
+          j++) {
+       good_peers.push_back(make_pair(maps[*j]->objects[i->first], *j));
+      }
       scrubber.authoritative.insert(
        make_pair(
          i->first,
-         make_pair(maps[i->second]->objects[i->first], i->second)));
+         good_peers));
     }
 
-    for (map<hobject_t, pg_shard_t>::iterator i = authoritative.begin();
+    for (map<hobject_t, list<pg_shard_t> >::iterator i = authoritative.begin();
         i != authoritative.end();
         ++i) {
       authmap.objects.erase(i->first);
-      authmap.objects.insert(*(maps[i->second]->objects.find(i->first)));
+      authmap.objects.insert(*(maps[i->second.back()]->objects.find(i->first)));
     }
   }
 
@@ -4149,7 +4161,7 @@ void PG::scrub_compare_maps()
 
 void PG::scrub_process_inconsistent()
 {
-  dout(10) << "process_inconsistent() checking authoritative" << dendl;
+  dout(10) << __func__ << ": checking authoritative" << dendl;
   bool repair = state_test(PG_STATE_REPAIR);
   bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
   const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
@@ -4163,7 +4175,7 @@ void PG::scrub_process_inconsistent()
     osd->clog->error(ss);
     if (repair) {
       state_clear(PG_STATE_CLEAN);
-      for (map<hobject_t, pair<ScrubMap::object, pg_shard_t> >::iterator i =
+      for (map<hobject_t, list<pair<ScrubMap::object, pg_shard_t> > >::iterator i =
             scrubber.authoritative.begin();
           i != scrubber.authoritative.end();
           ++i) {
@@ -4175,9 +4187,8 @@ void PG::scrub_process_inconsistent()
               ++j) {
            repair_object(
              i->first,
-             &(i->second.first),
-             *j,
-             i->second.second);
+             &(i->second),
+             *j);
            ++scrubber.fixed;
          }
        }
@@ -4186,9 +4197,8 @@ void PG::scrub_process_inconsistent()
               j != scrubber.inconsistent[i->first].end(); 
               ++j) {
            repair_object(i->first, 
-             &(i->second.first),
-             *j,
-             i->second.second);
+             &(i->second),
+             *j);
            ++scrubber.fixed;
          }
        }
index 0e02dcaa73c8fbc6ab4d1a2ae104536be0a094fb..5835c2f0382fd3f7a875f5881eb045dbc9feacc7 100644 (file)
@@ -1062,8 +1062,8 @@ public:
     map<hobject_t, set<pg_shard_t> > missing;
     map<hobject_t, set<pg_shard_t> > inconsistent;
 
-    // Map from object with errors to good peer
-    map<hobject_t, pair<ScrubMap::object, pg_shard_t> > authoritative;
+    // Map from object with errors to good peers
+    map<hobject_t, list<pair<ScrubMap::object, pg_shard_t> > > authoritative;
 
     // Objects who need digest updates
     map<hobject_t, pair<uint32_t,uint32_t> > missing_digest;
@@ -1176,9 +1176,8 @@ public:
   int active_pushes;
 
   void repair_object(
-    const hobject_t& soid, ScrubMap::object *po,
-    pg_shard_t bad_peer,
-    pg_shard_t ok_peer);
+    const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
+    pg_shard_t bad_peer);
 
   void scrub(ThreadPool::TPHandle &handle);
   void chunky_scrub(ThreadPool::TPHandle &handle);
index ec51698a4a61891e0ba39c91c356e2071451694b..cc5da8fafcda43d1d4a6c7cc1b7fb384007c22fc 100644 (file)
@@ -446,14 +446,14 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects(
   return error;
 }
 
-map<pg_shard_t, ScrubMap *>::const_iterator
+list<map<pg_shard_t, ScrubMap *>::const_iterator>
   PGBackend::be_select_auth_object(
   const hobject_t &obj,
   const map<pg_shard_t,ScrubMap*> &maps,
   bool okseed,
   object_info_t *auth_oi)
 {
-  map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+  list<map<pg_shard_t, ScrubMap *>::const_iterator> auth;
   for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
        j != maps.end();
        ++j) {
@@ -528,7 +528,7 @@ map<pg_shard_t, ScrubMap *>::const_iterator
             << " for obj " << obj
             << dendl;
     *auth_oi = oi;
-    auth = j;
+    auth.push_back(j);
   }
   return auth;
 }
@@ -538,7 +538,7 @@ void PGBackend::be_compare_scrubmaps(
   bool okseed,
   map<hobject_t, set<pg_shard_t> > &missing,
   map<hobject_t, set<pg_shard_t> > &inconsistent,
-  map<hobject_t, pg_shard_t> &authoritative,
+  map<hobject_t, list<pg_shard_t> > &authoritative,
   map<hobject_t, pair<uint32_t,uint32_t> > &missing_digest,
   int &shallow_errors, int &deep_errors,
   const spg_t& pgid,
@@ -562,14 +562,14 @@ void PGBackend::be_compare_scrubmaps(
        k != master_set.end();
        ++k) {
     object_info_t auth_oi;
-    map<pg_shard_t, ScrubMap *>::const_iterator auth =
+    list<map<pg_shard_t, ScrubMap *>::const_iterator> auth =
       be_select_auth_object(*k, maps, okseed, &auth_oi);
-    if (auth == maps.end()) {
+    if (auth.empty()) {
       // Something is better than nothing
       // TODO: something is NOT better than nothing, do something like
       // unfound_lost if no valid copies can be found, or just mark unfound
       map<pg_shard_t, ScrubMap *>::const_iterator fallback = maps.begin();
-      auth = fallback;
+      auth.push_back(fallback);
       dout(10) << __func__ << ": selecting osd " << fallback->first
               << " for obj " << *k
               << ", something is better than nothing, FIXME"
@@ -577,18 +577,17 @@ void PGBackend::be_compare_scrubmaps(
       continue;
     }
 
-    assert(auth != maps.end());
-    ScrubMap::object& auth_object = auth->second->objects[*k];
+    ScrubMap::object& auth_object = auth.back()->second->objects[*k];
     set<pg_shard_t> cur_missing;
     set<pg_shard_t> cur_inconsistent;
     for (j = maps.begin(); j != maps.end(); ++j) {
-      if (j == auth)
+      if (j == auth.back())
        continue;
       if (j->second->objects.count(*k)) {
        // Compare
        stringstream ss;
        enum scrub_error_type error =
-         be_compare_scrub_objects(auth->first,
+         be_compare_scrub_objects(auth.back()->first,
                                   auth_object,
                                   auth_oi,
                                   okseed,
@@ -601,16 +600,15 @@ void PGBackend::be_compare_scrubmaps(
           else
            ++deep_errors;
          errorstream << __func__ << ": " << pgid << " shard " << j->first
-                     << ": soid " << *k << " " << ss.str() << std::endl;
+                     << ": soid " << *k << " " << ss.str();
        }
       } else {
        cur_missing.insert(j->first);
        ++shallow_errors;
        errorstream << __func__ << ": " << pgid << " shard " << j->first
-                   << " missing " << *k << std::endl;
+                   << " missing " << *k;
       }
     }
-    assert(auth != maps.end());
     if (!cur_missing.empty()) {
       missing[*k] = cur_missing;
     }
@@ -618,7 +616,10 @@ void PGBackend::be_compare_scrubmaps(
       inconsistent[*k] = cur_inconsistent;
     }
     if (!cur_inconsistent.empty() || !cur_missing.empty()) {
-      authoritative[*k] = auth->first;
+      list<map<pg_shard_t, ScrubMap *>::const_iterator>::const_iterator i;
+      for (i = auth.begin(); i != auth.end(); i++) {
+       authoritative[*k].push_back((*i)->first);
+      }
     }
     if (okseed &&
        auth_object.digest_present && auth_object.omap_digest_present &&
index 2b2102a1a605adcedfe71280c95d13142ed33eeb..9e0ca4f8c7baa214abf5c868f57300756b048b76 100644 (file)
      bool okseed,
      const ScrubMap::object &candidate,
      ostream &errorstream);
-   map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
+   list<map<pg_shard_t, ScrubMap *>::const_iterator> be_select_auth_object(
      const hobject_t &obj,
      const map<pg_shard_t,ScrubMap*> &maps,
      bool okseed,
      bool okseed,   ///< true if scrub digests have same seed our oi digests
      map<hobject_t, set<pg_shard_t> > &missing,
      map<hobject_t, set<pg_shard_t> > &inconsistent,
-     map<hobject_t, pg_shard_t> &authoritative,
+     map<hobject_t, list<pg_shard_t> > &authoritative,
      map<hobject_t, pair<uint32_t,uint32_t> > &missing_digest,
      int &shallow_errors, int &deep_errors,
      const spg_t& pgid,