]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PG: refactor scrubmap comparison and repair logic
authorSamuel Just <samuel.just@dreamhost.com>
Fri, 11 Feb 2011 22:46:05 +0000 (14:46 -0800)
committerSamuel Just <samuel.just@dreamhost.com>
Mon, 28 Feb 2011 23:43:42 +0000 (15:43 -0800)
The previous version gave erroneous results.  This version seems simpler
and can be more easily unit tested as the error detection logic has been
seperated from the repair logic.

Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
src/osd/PG.cc
src/osd/PG.h

index 6130c9a63c7eab36bbb215f8c0d6e1e354e27b9f..782106228fa66145ee462ed356b965bac5ca83bb 100644 (file)
@@ -3307,6 +3307,105 @@ bool PG::scrub_gather_replica_maps() {
   }
 }
 
+bool PG::_compare_scrub_objects(ScrubMap::object &auth,
+                               ScrubMap::object &candidate,
+                               ostream &errorstream)
+{
+  bool ok = true;
+  if (auth.size != candidate.size) {
+    ok = false;
+    errorstream << "size " << candidate.size 
+               << " != known size " << auth.size;
+  }
+  for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
+       i != auth.attrs.end();
+       i++) {
+    if (!candidate.attrs.count(i->first)) {
+      if (!ok)
+       errorstream << ", ";
+      ok = false;
+      errorstream << "missing attr " << i->first;
+    } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
+      if (!ok)
+       errorstream << ", ";
+      ok = false;
+      errorstream << "attr value mismatch " << i->first;
+    }
+  }
+  for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
+       i != candidate.attrs.end();
+       i++) {
+    if (!auth.attrs.count(i->first)) {
+      if (!ok)
+       errorstream << ", ";
+      ok = false;
+      errorstream << "extra attr " << i->first;
+    }
+  }
+  if (ok)
+    errorstream << " is ok!";
+  return ok;
+}
+
+void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,  
+                           map<sobject_t, set<int> > &missing,
+                           map<sobject_t, set<int> > &inconsistent,
+                           map<sobject_t, int> &authoritative,
+                           ostream &errorstream)
+{
+  map<sobject_t,ScrubMap::object>::const_iterator i;
+  map<int, ScrubMap *>::const_iterator j;
+  set<sobject_t> master_set;
+
+  // Construct master set
+  for (j = maps.begin(); j != maps.end(); j++) {
+    for (i = j->second->objects.begin(); i != j->second->objects.end(); i++) {
+      master_set.insert(i->first);
+    }
+  }
+
+  // Check maps against master set and each other
+  for (set<sobject_t>::const_iterator k = master_set.begin();
+       k != master_set.end();
+       k++) {
+    map<int, ScrubMap *>::const_iterator auth = maps.end();
+    set<int> cur_missing;
+    set<int> cur_inconsistent;
+    for (j = maps.begin(); j != maps.end(); j++) {
+      if (j->second->objects.count(*k)) {
+       if (auth == maps.end()) {
+         // Take first osd to have it as authoritative
+         auth = j;
+       } else {
+         // Compare 
+         errorstream << info.pgid << " osd" << acting[j->first]
+                     << ": soid " << *k;
+         if (!_compare_scrub_objects(auth->second->objects[*k],
+                                     j->second->objects[*k],
+                                     errorstream)) {
+           cur_inconsistent.insert(j->first);
+         }
+         errorstream << std::endl;
+       }
+      } else {
+       cur_missing.insert(j->first);
+       errorstream << info.pgid
+                   << " osd" << acting[j->first] 
+                   << " missing " << *k << std::endl;
+      }
+    }
+    if (cur_missing.size()) {
+      missing[*k] = cur_missing;
+    }
+    if (cur_inconsistent.size()) {
+      inconsistent[*k] = cur_inconsistent;
+    }
+    if (cur_inconsistent.size() || cur_missing.size()) {
+      authoritative[*k] = auth->first;
+    }
+  }
+}
+
 void PG::scrub_finalize() {
   osd->map_lock.get_read();
   lock();
@@ -3329,159 +3428,78 @@ void PG::scrub_finalize() {
   osd->map_lock.put_read();
 
   dout(10) << "scrub_finalize has maps, analyzing" << dendl;
-  stringstream ss;
   int errors = 0, fixed = 0;
   bool repair = state_test(PG_STATE_REPAIR);
   const char *mode = repair ? "repair":"scrub";
   if (acting.size() > 1) {
     dout(10) << "scrub  comparing replica scrub maps" << dendl;
 
-    // first, compare scrub maps
-    vector<ScrubMap*> m(acting.size());
-    m[0] = &primary_scrubmap;
-    for (unsigned i=1; i<acting.size(); i++)
-      m[i] = &scrub_received_maps[acting[i]];
-    map<sobject_t,ScrubMap::object>::iterator p[acting.size()];
-    for (unsigned i=0; i<acting.size(); i++) {
-      dout(2) << "scrub   osd" << acting[i] << " has " << m[i]->objects.size() << " items" << dendl;
-      p[i] = m[i]->objects.begin();
+    stringstream ss;
+
+    // Maps from objects with erros to missing/inconsistent peers
+    map<sobject_t, set<int> > missing;
+    map<sobject_t, set<int> > inconsistent;
+
+    // Map from object with errors to good peer
+    map<sobject_t, int> authoritative;
+    map<int,ScrubMap *> maps;
+
+    dout(2) << "scrub   osd" << acting[0] << " has " 
+           << primary_scrubmap.objects.size() << " items" << dendl;
+    maps[0] = &primary_scrubmap;
+    for (unsigned i=1; i<acting.size(); i++) {
+      dout(2) << "scrub   osd" << acting[i] << " has " 
+             << scrub_received_maps[acting[i]].objects.size() << " items" << dendl;
+      maps[i] = &scrub_received_maps[acting[i]];
     }
-    
-    int num_missing = 0;
-    int num_bad = 0;
-    
-    while (1) {
-      ScrubMap::object *po = 0;
-      const sobject_t *psoid = 0;
-      int pi = -1;
-      bool anymissing = false;
-      for (unsigned i=0; i<acting.size(); i++) {
-       if (p[i] == m[i]->objects.end()) {
-         anymissing = true;
-         continue;
-       }
-       if (!po) {
-         psoid = &(p[i]->first);
-         po = &(p[i]->second);
-         pi = i;
-       }
-       else if (*psoid != p[i]->first) {
-         anymissing = true;
-         if (*psoid > p[i]->first) {
-           psoid = &(p[0]->first);
-           po = &(p[i]->second);
-           pi = i;
+
+    _compare_scrubmaps(maps, missing, inconsistent, authoritative, ss);
+
+    if (authoritative.size()) {
+      ss << info.pgid << " " << mode << " " << missing.size() << " missing, "
+        << inconsistent.size() << " inconsistent objects\n";
+      dout(2) << ss.str() << dendl;
+      osd->clog.error(ss);
+      state_set(PG_STATE_INCONSISTENT);
+      if (repair) {
+       state_clear(PG_STATE_CLEAN);
+       for (map<sobject_t, int>::iterator i = authoritative.begin();
+            i != authoritative.end();
+            i++) {
+         set<int>::iterator j;
+         
+         if (missing.count(i->first)) {
+           for (j = missing[i->first].begin();
+                j != missing[i->first].end(); 
+                j++) {
+             repair_object(i->first, 
+                           &maps[i->second]->objects[i->first],
+                           acting[*j],
+                           acting[i->second]);
+           }
          }
-       }
-      }
-      if (!po) {
-       break;
-      }
-      if (anymissing) {
-       for (unsigned i=0; i<acting.size(); i++) {
-         if (p[i] == m[i]->objects.end() || *psoid != p[i]->first) {
-           osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " missing " << *psoid;
-           num_missing++;
-           
-           if (repair)
-             repair_object(*psoid, po, acting[i], acting[pi]);
-         } else
-           p[i]++;
-       }
-       continue;
-      }
-      
-      // compare
-      bool ok = true;
-      for (unsigned i=1; i<acting.size(); i++) {
-       bool peerok = true;
-       if (po->size != p[i]->second.size) {
-         dout(0) << "scrub osd" << acting[i] << " " << *psoid
-                 << " size " << p[i]->second.size << " != " << po->size << dendl;
-         osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid
-            << " size " << p[i]->second.size << " != " << po->size;
-         peerok = ok = false;
-         num_bad++;
-       }
-       if (po->attrs.size() != p[i]->second.attrs.size()) {
-         dout(0) << "scrub osd" << acting[i] << " " << *psoid
-                 << " attr count " << p[i]->second.attrs.size() << " != " << po->attrs.size() << dendl;
-         osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid
-                           << " attr count " << p[i]->second.attrs.size() << " != " << po->attrs.size();
-         peerok = ok = false;
-         num_bad++;
-       }
-       for (map<string,bufferptr>::iterator q = po->attrs.begin(); q != po->attrs.end(); q++) {
-         if (p[i]->second.attrs.count(q->first)) {
-           if (q->second.cmp(p[i]->second.attrs[q->first])) {
-             dout(0) << "scrub osd" << acting[i] << " " << *psoid
-                     << " attr " << q->first << " value mismatch" << dendl;
-             osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid
-                               << " attr " << q->first << " value mismatch";
-             peerok = ok = false;
-             num_bad++;
+         if (inconsistent.count(i->first)) {
+           for (j = inconsistent[i->first].begin(); 
+                j != inconsistent[i->first].end(); 
+                j++) {
+             repair_object(i->first, 
+                           &maps[i->second]->objects[i->first],
+                           acting[*j],
+                           acting[i->second]);
            }
-         } else {
-           dout(0) << "scrub osd" << acting[i] << " " << *psoid
-                   << " attr " << q->first << " missing" << dendl;
-           osd->clog.error() << info.pgid << " " << mode << " osd" << acting[i] << " " << *psoid
-              << " attr " << q->first << " missing";
-           peerok = ok = false;
-           num_bad++;
          }
-       }
 
-       if (!peerok && repair)
-         repair_object(*psoid, po, acting[i], acting[pi]);
+       }
       }
-      
-      if (ok)
-       dout(20) << "scrub  " << *psoid << " size " << po->size << " ok" << dendl;
-      
-      // next
-      for (unsigned i=0; i<acting.size(); i++)
-       p[i]++;
-    }
-    
-    if (num_missing || num_bad) {
-      stringstream ss;
-      ss << info.pgid << " " << mode << " " << num_missing << " missing, "
-        << num_bad << " bad objects\n";
-      *_dout << ss.str();
-      _dout->flush();
-      osd->clog.error(ss);
-      state_set(PG_STATE_INCONSISTENT);
-      if (repair)
-       state_clear(PG_STATE_CLEAN);
     }
   }
 
-  /*
-  lock();
-  if (epoch != info.history.same_acting_since) {
-    dout(10) << "scrub  pg changed, aborting" << dendl;
-    goto out;
-  }
-  */
-
   // discard peer scrub info.
   peer_scrub_map.clear();
 
-  /*
-  unlock();
-  */
-
   // ok, do the pg-type specific scrubbing
   _scrub(primary_scrubmap, errors, fixed);
 
-  /*
-  lock();
-  if (epoch != info.history.same_acting_since) {
-    dout(10) << "scrub  pg changed, aborting" << dendl;
-    goto out;
-  }
-  */
-
   {
     stringstream oss;
     oss << info.pgid << " " << mode << " ";
index 4842c590dc8dec6a5414cbd28fe7c47355aa9f13..da532fdbdf5db1664e099bdd057ea380550983ac 100644 (file)
@@ -906,6 +906,14 @@ public:
   MOSDRepScrub *active_rep_scrub;
 
   void repair_object(const sobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer);
+  bool _compare_scrub_objects(ScrubMap::object &auth,
+                             ScrubMap::object &candidate,
+                             ostream &errorstream);
+  void _compare_scrubmaps(const map<int,ScrubMap*> &maps,  
+                         map<sobject_t, set<int> > &missing,
+                         map<sobject_t, set<int> > &inconsistent,
+                         map<sobject_t, int> &authoritative,
+                         ostream &errorstream);
   void scrub();
   void scrub_finalize();
   void scrub_clear_state();