]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Better SnapSet scrub checking (find issues instead of asserting)
authorDavid Zafman <dzafman@redhat.com>
Fri, 4 Sep 2015 02:43:35 +0000 (19:43 -0700)
committerDavid Zafman <dzafman@redhat.com>
Thu, 25 Feb 2016 20:50:25 +0000 (12:50 -0800)
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 3b381caaad20c683a330e8b7a4e1c017abcb60df)

src/osd/ReplicatedPG.cc

index 15a6ae7c311c1166a9c58238b2da8e2b9e9e8b0c..784b87e248d4c4e7e47b55fe3a355fc6934c011c 100644 (file)
@@ -11498,17 +11498,49 @@ void ReplicatedPG::_scrub(
 
       dout(20) << __func__ << " " << mode << " matched clone " << soid << dendl;
 
-      stat.num_bytes += snapset.get().get_clone_bytes(soid.snap);
-
-      if (oi && oi.get().size != snapset.get().clone_size[*curclone]) {
+      if (snapset->clone_size.count(soid.snap) == 0) {
        osd->clog->error() << mode << " " << info.pgid << " " << soid
-                         << " size " << oi.get().size << " != clone_size "
-                         << snapset.get().clone_size[*curclone];
+                          << " is missing in clone_size";
        ++scrubber.shallow_errors;
-      }
+      } else {
+        if (oi && oi->size != snapset->clone_size[soid.snap]) {
+         osd->clog->error() << mode << " " << info.pgid << " " << soid
+                            << " size " << oi->size << " != clone_size "
+                            << snapset->clone_size[*curclone];
+         ++scrubber.shallow_errors;
+        }
 
-      // verify overlap?
-      // ...
+        if (snapset->clone_overlap.count(soid.snap) == 0) {
+         osd->clog->error() << mode << " " << info.pgid << " " << soid
+                            << " is missing in clone_overlap";
+         ++scrubber.shallow_errors;
+        } else {
+         // This checking is based on get_clone_bytes().  The first 2 asserts
+         // can't happen because we know we have a clone_size and
+         // a clone_overlap.  Now we check that the interval_set won't
+         // cause the last assert.
+         uint64_t size = snapset->clone_size.find(soid.snap)->second;
+         const interval_set<uint64_t> &overlap =
+               snapset->clone_overlap.find(soid.snap)->second;
+         bool bad_interval_set = false;
+         for (interval_set<uint64_t>::const_iterator i = overlap.begin();
+              i != overlap.end(); ++i) {
+           if (size < i.get_len()) {
+             bad_interval_set = true;
+             break;
+           }
+           size -= i.get_len();
+         }
+
+         if (bad_interval_set) {
+           osd->clog->error() << mode << " " << info.pgid << " " << soid
+                              << " bad interval_set in clone_overlap";
+           ++scrubber.shallow_errors;
+         } else {
+            stat.num_bytes += snapset->get_clone_bytes(soid.snap);
+         }
+        }
+      }
 
       // what's next?
       ++curclone;