]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: diff-iterate shouldn't ever report "new hole" against a hole
authorIlya Dryomov <idryomov@gmail.com>
Thu, 9 Nov 2023 19:44:18 +0000 (20:44 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 26 Dec 2023 20:51:47 +0000 (21:51 +0100)
If an object doesn't exist in both start and end versions but there is
an intermediate snapshot which contains it (i.e. the object is written
to and captured at some point but then discarded prior to or in the end
version), diff-iterate reports "new hole" -- callback is invoked with
exists=false.  This occurs both on the slow list_snaps path and in
fast-diff mode.

Despite going all the way back to the introduction of diff-iterate in
commit 0296c7cdae91 ("librbd: implement diff_iterate"), this behavior
is wrong and contradicts diff-iterate API documentation added in commit
a69532e86450 ("librbd: document diff_iterate in header") in the same
series:

    If the source snapshot name is NULL, we interpret that as
    the beginning of time and return all allocated regions of the
    image.

It also triggered an assert added in commit c680531e070a ("librbd:
change diff_iterate interface to be more C-friendly") in the same
series.  Unfortunately, commit f1f6407221a0 ("test_librbd: add
diff_iterate test including discard"), also part of the same series,
added a test which expected the wrong behavior.  Very confusing!

A year later, a different manifestation of this bug was fixed in commit
9a1ab95176fe ("rbd: Fix rbd diff for non-existent objects"), but the
fix only covered the case where calc_snap_set_diff() goes past the end
snap ID while processing clones.  The case where it runs out of clones
to process before reaching the end snap ID remained mishandled.

A year after that, commit 3ccc3bb4bd35 ("librbd: diff_iterate needs to
handle holes in parent images") dropped the assert mentioned above and
this bug got enshrined in the newly introduced fast-diff mode.

Finally, a few years later, deep-copy actually started relying on this
bug in commit e5a21e904142 ("librbd: deep-copy image copy state machine
skips clean objects").  This necessitates bifurcation in DiffRequest
because deep-copy wants the "has this object been touched" semantics,
which is different from diff-iterate (and also potentially much more
expensive to produce!).

This commit brings a minimal update to TestMockObjectMapDiffRequest
tests and DiffIterateTest.DiffIterateDiscard.  Coverage is expanded in
the following commits.

Fixes: https://tracker.ceph.com/issues/53897
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit be507aaed15fb7a193a90c5f88eda61b9be2af1b)

src/librados/snap_set_diff.cc
src/librbd/api/DiffIterate.cc
src/librbd/deep_copy/ImageCopyRequest.cc
src/librbd/object_map/DiffRequest.cc
src/librbd/object_map/DiffRequest.h
src/librbd/object_map/Types.h
src/test/cli-integration/rbd/snap-diff.t
src/test/librbd/deep_copy/test_mock_ImageCopyRequest.cc
src/test/librbd/object_map/test_mock_DiffRequest.cc
src/test/librbd/test_librbd.cc

index f80105b44ae10788d72e091c451e78b05d999cc0..0029bcd647801be2779df94a5677016797fb9a48 100644 (file)
@@ -31,9 +31,8 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
   *clone_end_snap_id = 0;
   *whole_object = false;
 
-  for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
-       r != snap_set.clones.end();
-       ) {
+  auto r = snap_set.clones.begin();
+  while (r != snap_set.clones.end()) {
     // make an interval, and hide the fact that the HEAD doesn't
     // include itself in the snaps list
     librados::snap_t a, b;
@@ -77,12 +76,6 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
     }
 
     if (end < a) {
-      ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl;
-      *end_exists = false;
-      diff->clear();
-      if (start_size) {
-       diff->insert(0, start_size);
-      }
       break;
     }
     if (end <= b) {
@@ -90,7 +83,7 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
       *end_size = r->size;
       *end_exists = true;
       *clone_end_snap_id = b;
-      break;
+      return;
     }
 
     // start with the max(this size, next size), and subtract off any
@@ -114,4 +107,16 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
     diff->union_of(diff_to_next);
     ldout(cct, 20) << "  diff now " << *diff << dendl;
   }
+
+  if (r != snap_set.clones.end()) {
+    ldout(cct, 20) << " past end " << end
+                   << ", end object does not exist" << dendl;
+  } else {
+    ldout(cct, 20) << " ran out of clones before reaching end " << end
+                   << ", end object does not exist" << dendl;
+  }
+  diff->clear();
+  if (start_size) {
+    diff->insert(0, start_size);
+  }
 }
index fa1df6df373403868b98d41f3d961e61292035c9..d8760692e1cb8ecbee00556d2327d3bbbbad6c26 100644 (file)
@@ -249,7 +249,7 @@ int DiffIterate<I>::execute() {
   if (m_whole_object) {
     C_SaferCond ctx;
     auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
-                                                  end_snap_id,
+                                                  end_snap_id, true,
                                                   &object_diff_state, &ctx);
     req->send();
 
index 08e959dd572325f74beda92c334167a29836e0ab..abd8891c026328d9bf04562ab1463fb0cf9a26a9 100644 (file)
@@ -101,9 +101,10 @@ void ImageCopyRequest<I>::compute_diff() {
 
   auto ctx = create_context_callback<
     ImageCopyRequest<I>, &ImageCopyRequest<I>::handle_compute_diff>(this);
-  auto req = object_map::DiffRequest<I>::create(m_src_image_ctx, m_src_snap_id_start,
-                                                m_src_snap_id_end, &m_object_diff_state,
-                                                ctx);
+  auto req = object_map::DiffRequest<I>::create(m_src_image_ctx,
+                                                m_src_snap_id_start,
+                                                m_src_snap_id_end, false,
+                                                &m_object_diff_state, ctx);
   req->send();
 }
 
index 606d48bbf33c4d6eb9343a5b1735af47ee21cc8c..8820aacc87bbba4db5f37ba0b6c56519ace750ff 100644 (file)
@@ -195,16 +195,34 @@ void DiffRequest<I>::handle_load_object_map(int r) {
   for (; it != overlap_end_it; ++it, ++diff_it, ++i) {
     uint8_t object_map_state = *it;
     uint8_t prev_object_diff_state = *diff_it;
-    if (object_map_state == OBJECT_EXISTS ||
-        object_map_state == OBJECT_PENDING ||
-        (object_map_state == OBJECT_EXISTS_CLEAN &&
-         prev_object_diff_state != DIFF_STATE_DATA &&
-         prev_object_diff_state != DIFF_STATE_DATA_UPDATED)) {
-      *diff_it = DIFF_STATE_DATA_UPDATED;
-    } else if (object_map_state == OBJECT_NONEXISTENT &&
-               prev_object_diff_state != DIFF_STATE_HOLE &&
-               prev_object_diff_state != DIFF_STATE_HOLE_UPDATED) {
-      *diff_it = DIFF_STATE_HOLE_UPDATED;
+    switch (prev_object_diff_state) {
+    case DIFF_STATE_HOLE:
+      if (object_map_state != OBJECT_NONEXISTENT) {
+        // stay in HOLE on intermediate snapshots for diff-iterate
+        if (!m_diff_iterate_range || m_current_snap_id == m_snap_id_end) {
+          *diff_it = DIFF_STATE_DATA_UPDATED;
+        }
+      }
+      break;
+    case DIFF_STATE_DATA:
+      if (object_map_state == OBJECT_NONEXISTENT) {
+        *diff_it = DIFF_STATE_HOLE_UPDATED;
+      } else if (object_map_state != OBJECT_EXISTS_CLEAN) {
+        *diff_it = DIFF_STATE_DATA_UPDATED;
+      }
+      break;
+    case DIFF_STATE_HOLE_UPDATED:
+      if (object_map_state != OBJECT_NONEXISTENT) {
+        *diff_it = DIFF_STATE_DATA_UPDATED;
+      }
+      break;
+    case DIFF_STATE_DATA_UPDATED:
+      if (object_map_state == OBJECT_NONEXISTENT) {
+        *diff_it = DIFF_STATE_HOLE_UPDATED;
+      }
+      break;
+    default:
+      ceph_abort();
     }
 
     ldout(cct, 20) << "object state: " << i << " "
@@ -225,8 +243,29 @@ void DiffRequest<I>::handle_load_object_map(int r) {
       } else if (diff_from_start ||
                  (m_object_diff_state_valid &&
                   object_map_state != OBJECT_EXISTS_CLEAN)) {
-        *diff_it = DIFF_STATE_DATA_UPDATED;
+        // diffing against the beginning of time or image was grown
+        // (implicit) starting state is HOLE, this is the first object
+        // map after
+        if (m_diff_iterate_range) {
+          // for diff-iterate, if the object is discarded prior to or
+          // in the end version, result should be HOLE
+          // since DATA_UPDATED can transition only to HOLE_UPDATED,
+          // stay in HOLE on intermediate snapshots -- another way to
+          // put this is that when starting with a hole, intermediate
+          // snapshots can be ignored as the result depends only on the
+          // end version
+          if (m_current_snap_id == m_snap_id_end) {
+            *diff_it = DIFF_STATE_DATA_UPDATED;
+          } else {
+            *diff_it = DIFF_STATE_HOLE;
+          }
+        } else {
+          // for deep-copy, if the object is discarded prior to or
+          // in the end version, result should be HOLE_UPDATED
+          *diff_it = DIFF_STATE_DATA_UPDATED;
+        }
       } else {
+        // diffing against a snapshot, this is its object map
         *diff_it = DIFF_STATE_DATA;
       }
 
index e83a1629e62339023c81090a9f7e3ec767d54cde..b02ef34ba347e34dd9fcf757bff2992d7bd3de27 100644 (file)
@@ -22,19 +22,19 @@ template <typename ImageCtxT>
 class DiffRequest {
 public:
   static DiffRequest* create(ImageCtxT* image_ctx, uint64_t snap_id_start,
-                             uint64_t snap_id_end,
+                             uint64_t snap_id_end, bool diff_iterate_range,
                              BitVector<2>* object_diff_state,
                              Context* on_finish) {
     return new DiffRequest(image_ctx, snap_id_start, snap_id_end,
-                           object_diff_state, on_finish);
+                           diff_iterate_range, object_diff_state, on_finish);
   }
 
   DiffRequest(ImageCtxT* image_ctx, uint64_t snap_id_start,
-              uint64_t snap_id_end, BitVector<2>* object_diff_state,
-              Context* on_finish)
+              uint64_t snap_id_end, bool diff_iterate_range,
+              BitVector<2>* object_diff_state, Context* on_finish)
     : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start),
-      m_snap_id_end(snap_id_end), m_object_diff_state(object_diff_state),
-      m_on_finish(on_finish) {
+      m_snap_id_end(snap_id_end), m_diff_iterate_range(diff_iterate_range),
+      m_object_diff_state(object_diff_state), m_on_finish(on_finish) {
   }
 
   void send();
@@ -58,6 +58,7 @@ private:
   ImageCtxT* m_image_ctx;
   uint64_t m_snap_id_start;
   uint64_t m_snap_id_end;
+  bool m_diff_iterate_range;
   BitVector<2>* m_object_diff_state;
   Context* m_on_finish;
 
index 0ce91bd96a1c140ecc0cf94f5e9c49bcb5984ce2..576ea0e4b6b70f774db57a5c46f0eb6b888db8aa 100644 (file)
@@ -8,10 +8,17 @@ namespace librbd {
 namespace object_map {
 
 enum DiffState {
-  DIFF_STATE_HOLE         = 0, /* unchanged hole */
-  DIFF_STATE_DATA         = 1, /* unchanged data */
-  DIFF_STATE_HOLE_UPDATED = 2, /* new hole */
-  DIFF_STATE_DATA_UPDATED = 3  /* new data */
+  // diff-iterate: hole with or without data captured in intermediate snapshot
+  // deep-copy: hole without data captured in intermediate snapshot
+  DIFF_STATE_HOLE         = 0,
+  // diff-iterate, deep-copy: unchanged data
+  DIFF_STATE_DATA         = 1,
+  // diff-iterate: new hole (data -> hole)
+  // deep-copy: new hole (data -> hole) or hole with data captured in
+  //            intermediate snapshot
+  DIFF_STATE_HOLE_UPDATED = 2,
+  // diff-iterate, deep-copy: new data (hole -> data) or changed data
+  DIFF_STATE_DATA_UPDATED = 3
 };
 
 } // namespace object_map
index 1ca2fb04ddd96543150e929892e49b39a47df335..fa564891a4b9653eeb4ed7f82c5a6a4eadcfc97a 100644 (file)
   $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json
   []
   $ rbd snap rollback xrbddiff1/xtestdiff1@snap1 --no-progress
+  $ rbd diff --from-snap=allzeroes xrbddiff1/xtestdiff1 --format json
+  [{"offset":0,"length":1048576,"exists":"true"}]
   $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json
   []
   $ rbd snap rollback xrbddiff1/xtestdiff1@allzeroes --no-progress
   $ rbd diff --from-snap=allzeroes xrbddiff1/xtestdiff1 --format json
+  []
+  $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json
   [{"offset":0,"length":1048576,"exists":"false"}]
   $ ceph osd pool rm xrbddiff1 xrbddiff1 --yes-i-really-really-mean-it
   pool 'xrbddiff1' removed
index e38ffffdbe49380a015de99ebc9b7ee1adcd8ffe..634cabfa79e3ece5e3deb33f58b21cde1b8c9e1f 100644 (file)
@@ -92,6 +92,7 @@ struct DiffRequest<MockTestImageCtx> {
   static DiffRequest* s_instance;
   static DiffRequest* create(MockTestImageCtx *image_ctx,
                              uint64_t snap_id_start, uint64_t snap_id_end,
+                             bool diff_iterate_range,
                              BitVector<2>* object_diff_state,
                              Context* on_finish) {
     ceph_assert(s_instance != nullptr);
index c25ae4a95c5e628852a73269e2e9a9b32bd9d3db..85fe456d929ef48d16285dbf0856356e923f88f5 100644 (file)
@@ -42,6 +42,10 @@ public:
     ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx));
   }
 
+  bool is_diff_iterate() const {
+    return true;
+  }
+
   void expect_get_flags(MockTestImageCtx& mock_image_ctx, uint64_t snap_id,
                         int32_t flags, int r) {
     EXPECT_CALL(mock_image_ctx, get_flags(snap_id, _))
@@ -87,7 +91,8 @@ TEST_F(TestMockObjectMapDiffRequest, InvalidStartSnap) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, CEPH_NOSNAP, 0,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -98,7 +103,7 @@ TEST_F(TestMockObjectMapDiffRequest, StartEndSnapEqual) {
   InSequence seq;
 
   C_SaferCond ctx;
-  auto req = new MockDiffRequest(&mock_image_ctx, 1, 1,
+  auto req = new MockDiffRequest(&mock_image_ctx, 1, 1, is_diff_iterate(),
                                  &m_object_diff_state, &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -115,7 +120,8 @@ TEST_F(TestMockObjectMapDiffRequest, FastDiffDisabled) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -133,7 +139,8 @@ TEST_F(TestMockObjectMapDiffRequest, FastDiffInvalid) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -180,7 +187,8 @@ TEST_F(TestMockObjectMapDiffRequest, FullDelta) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -188,7 +196,7 @@ TEST_F(TestMockObjectMapDiffRequest, FullDelta) {
   expected_diff_state.resize(object_count);
   expected_diff_state[1] = DIFF_STATE_DATA_UPDATED;
   expected_diff_state[2] = DIFF_STATE_DATA_UPDATED;
-  expected_diff_state[3] = DIFF_STATE_HOLE_UPDATED;
+  expected_diff_state[3] = DIFF_STATE_HOLE;
   ASSERT_EQ(expected_diff_state, m_object_diff_state);
 }
 
@@ -226,7 +234,7 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateDelta) {
   expect_load_map(mock_image_ctx, 2U, object_map_2, 0);
 
   C_SaferCond ctx;
-  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2,
+  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, is_diff_iterate(),
                                  &m_object_diff_state, &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -274,7 +282,8 @@ TEST_F(TestMockObjectMapDiffRequest, EndDelta) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 2, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -302,7 +311,8 @@ TEST_F(TestMockObjectMapDiffRequest, StartSnapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 1, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -328,7 +338,7 @@ TEST_F(TestMockObjectMapDiffRequest, EndSnapDNE) {
   expect_load_map(mock_image_ctx, 1U, object_map_1, 0);
 
   C_SaferCond ctx;
-  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2,
+  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, is_diff_iterate(),
                                  &m_object_diff_state, &ctx);
   req->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
@@ -367,7 +377,8 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateSnapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -394,7 +405,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadObjectMapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -427,7 +439,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadIntermediateObjectMapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -458,7 +471,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadObjectMapError) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EPERM, ctx.wait());
 }
@@ -484,7 +498,8 @@ TEST_F(TestMockObjectMapDiffRequest, ObjectMapTooSmall) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
index 682f39f57933515a92cc75e3a82c900c1b35029f..01313e4f92a60e1bea7832360d7cb5af7f5d8348 100644 (file)
@@ -6390,6 +6390,10 @@ TYPED_TEST(DiffIterateTest, DiffIterateDiscard)
   extents.clear();
   ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object,
                                   vector_iterate_cb, (void *) &extents));
+  ASSERT_EQ(0u, extents.size());
+
+  ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object,
+                                   vector_iterate_cb, (void *) &extents));
   ASSERT_EQ(1u, extents.size());
   ASSERT_EQ(diff_extent(0, 256, false, object_size), extents[0]);
   ASSERT_PASSED(this->validate_object_map, image);