]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Better SnapSet scrub checking (find issues instead of asserting)
authorDavid Zafman <dzafman@redhat.com>
Fri, 4 Sep 2015 02:43:35 +0000 (19:43 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 30 Oct 2015 20:01:24 +0000 (13:01 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/ReplicatedPG.cc

index 80a4780c1e076dbaa2039678df8af827011bf5f3..676cc430f107f862dba81361a68f5f6fdb735cb8 100644 (file)
@@ -12368,17 +12368,49 @@ void ReplicatedPG::_scrub(
 
       dout(20) << __func__ << " " << mode << " matched clone " << soid << dendl;
 
-      stat.num_bytes += snapset.get().get_clone_bytes(soid.snap);
-
-      if (oi && oi.get().size != snapset.get().clone_size[*curclone]) {
+      if (snapset->clone_size.count(soid.snap) == 0) {
        osd->clog->error() << mode << " " << info.pgid << " " << soid
-                         << " size " << oi.get().size << " != clone_size "
-                         << snapset.get().clone_size[*curclone];
+                          << " is missing in clone_size";
        ++scrubber.shallow_errors;
-      }
+      } else {
+        if (oi && oi->size != snapset->clone_size[soid.snap]) {
+         osd->clog->error() << mode << " " << info.pgid << " " << soid
+                            << " size " << oi->size << " != clone_size "
+                            << snapset->clone_size[*curclone];
+         ++scrubber.shallow_errors;
+        }
 
-      // verify overlap?
-      // ...
+        if (snapset->clone_overlap.count(soid.snap) == 0) {
+         osd->clog->error() << mode << " " << info.pgid << " " << soid
+                            << " is missing in clone_overlap";
+         ++scrubber.shallow_errors;
+        } else {
+         // This checking is based on get_clone_bytes().  The first 2 asserts
+         // can't happen because we know we have a clone_size and
+         // a clone_overlap.  Now we check that the interval_set won't
+         // cause the last assert.
+         uint64_t size = snapset->clone_size.find(soid.snap)->second;
+         const interval_set<uint64_t> &overlap =
+               snapset->clone_overlap.find(soid.snap)->second;
+         bool bad_interval_set = false;
+         for (interval_set<uint64_t>::const_iterator i = overlap.begin();
+              i != overlap.end(); ++i) {
+           if (size < i.get_len()) {
+             bad_interval_set = true;
+             break;
+           }
+           size -= i.get_len();
+         }
+
+         if (bad_interval_set) {
+           osd->clog->error() << mode << " " << info.pgid << " " << soid
+                              << " bad interval_set in clone_overlap";
+           ++scrubber.shallow_errors;
+         } else {
+            stat.num_bytes += snapset->get_clone_bytes(soid.snap);
+         }
+        }
+      }
 
       // what's next?
       ++curclone;