]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: diff-iterate shouldn't ever report "new hole" against a hole
authorIlya Dryomov <idryomov@gmail.com>
Thu, 9 Nov 2023 19:44:18 +0000 (20:44 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 18 Dec 2023 17:41:25 +0000 (18:41 +0100)
If an object doesn't exist in both start and end versions but there is
an intermediate snapshot which contains it (i.e. the object is written
to and captured at some point but then discarded prior to or in the end
version), diff-iterate reports "new hole" -- callback is invoked with
exists=false.  This occurs both on the slow list_snaps path and in
fast-diff mode.

Despite going all the way back to the introduction of diff-iterate in
commit 0296c7cdae91 ("librbd: implement diff_iterate"), this behavior
is wrong and contradicts diff-iterate API documentation added in commit
a69532e86450 ("librbd: document diff_iterate in header") in the same
series:

    If the source snapshot name is NULL, we interpret that as
    the beginning of time and return all allocated regions of the
    image.

It also triggered an assert added in commit c680531e070a ("librbd:
change diff_iterate interface to be more C-friendly") in the same
series.  Unfortunately, commit f1f6407221a0 ("test_librbd: add
diff_iterate test including discard"), also part of the same series,
added a test which expected the wrong behavior.  Very confusing!

A year later, a different manifestation of this bug was fixed in commit
9a1ab95176fe ("rbd: Fix rbd diff for non-existent objects"), but the
fix only covered the case where calc_snap_set_diff() goes past the end
snap ID while processing clones.  The case where it runs out of clones
to process before reaching the end snap ID remained mishandled.

A year after that, commit 3ccc3bb4bd35 ("librbd: diff_iterate needs to
handle holes in parent images") dropped the assert mentioned above and
this bug got enshrined in the newly introduced fast-diff mode.

Finally, a few years later, deep-copy actually started relying on this
bug in commit e5a21e904142 ("librbd: deep-copy image copy state machine
skips clean objects").  This necessitates bifurcation in DiffRequest
because deep-copy wants the "has this object been touched" semantics,
which is different from diff-iterate (and also potentially much more
expensive to produce!).

This commit brings a minimal update to TestMockObjectMapDiffRequest
tests and DiffIterateTest.DiffIterateDiscard.  Coverage is expanded in
the following commits.

Fixes: https://tracker.ceph.com/issues/53897
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit be507aaed15fb7a193a90c5f88eda61b9be2af1b)

src/librados/snap_set_diff.cc
src/librbd/api/DiffIterate.cc
src/librbd/deep_copy/ImageCopyRequest.cc
src/librbd/object_map/DiffRequest.cc
src/librbd/object_map/DiffRequest.h
src/librbd/object_map/Types.h
src/test/cli-integration/rbd/snap-diff.t
src/test/librbd/deep_copy/test_mock_ImageCopyRequest.cc
src/test/librbd/object_map/test_mock_DiffRequest.cc
src/test/librbd/test_librbd.cc

index c5159195f464e2eafa8438e38b0d5649548d94d3..0211b57658c82aa11091407092861855802ef215 100644 (file)
@@ -30,9 +30,8 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
   *clone_end_snap_id = 0;
   *whole_object = false;
 
-  for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
-       r != snap_set.clones.end();
-       ) {
+  auto r = snap_set.clones.begin();
+  while (r != snap_set.clones.end()) {
     // make an interval, and hide the fact that the HEAD doesn't
     // include itself in the snaps list
     librados::snap_t a, b;
@@ -76,12 +75,6 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
     }
 
     if (end < a) {
-      ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl;
-      *end_exists = false;
-      diff->clear();
-      if (start_size) {
-       diff->insert(0, start_size);
-      }
       break;
     }
     if (end <= b) {
@@ -89,7 +82,7 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
       *end_size = r->size;
       *end_exists = true;
       *clone_end_snap_id = b;
-      break;
+      return;
     }
 
     // start with the max(this size, next size), and subtract off any
@@ -113,4 +106,16 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
     diff->union_of(diff_to_next);
     ldout(cct, 20) << "  diff now " << *diff << dendl;
   }
+
+  if (r != snap_set.clones.end()) {
+    ldout(cct, 20) << " past end " << end
+                   << ", end object does not exist" << dendl;
+  } else {
+    ldout(cct, 20) << " ran out of clones before reaching end " << end
+                   << ", end object does not exist" << dendl;
+  }
+  diff->clear();
+  if (start_size) {
+    diff->insert(0, start_size);
+  }
 }
index 042f5eafb3563270c1f209808d28bbf7ab5e9bb3..9f643fa34884fde5806d717701ee16e62fa09163 100644 (file)
@@ -249,7 +249,7 @@ int DiffIterate<I>::execute() {
   if (m_whole_object) {
     C_SaferCond ctx;
     auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
-                                                  end_snap_id,
+                                                  end_snap_id, true,
                                                   &object_diff_state, &ctx);
     req->send();
 
index 08e959dd572325f74beda92c334167a29836e0ab..abd8891c026328d9bf04562ab1463fb0cf9a26a9 100644 (file)
@@ -101,9 +101,10 @@ void ImageCopyRequest<I>::compute_diff() {
 
   auto ctx = create_context_callback<
     ImageCopyRequest<I>, &ImageCopyRequest<I>::handle_compute_diff>(this);
-  auto req = object_map::DiffRequest<I>::create(m_src_image_ctx, m_src_snap_id_start,
-                                                m_src_snap_id_end, &m_object_diff_state,
-                                                ctx);
+  auto req = object_map::DiffRequest<I>::create(m_src_image_ctx,
+                                                m_src_snap_id_start,
+                                                m_src_snap_id_end, false,
+                                                &m_object_diff_state, ctx);
   req->send();
 }
 
index 566e98ac0f0fa2ccbe846e5a34ddf15895db1699..62feb063b819d09300af91b197b7c6451e5ad7bb 100644 (file)
@@ -195,16 +195,34 @@ void DiffRequest<I>::handle_load_object_map(int r) {
   for (; it != overlap_end_it; ++it, ++diff_it, ++i) {
     uint8_t object_map_state = *it;
     uint8_t prev_object_diff_state = *diff_it;
-    if (object_map_state == OBJECT_EXISTS ||
-        object_map_state == OBJECT_PENDING ||
-        (object_map_state == OBJECT_EXISTS_CLEAN &&
-         prev_object_diff_state != DIFF_STATE_DATA &&
-         prev_object_diff_state != DIFF_STATE_DATA_UPDATED)) {
-      *diff_it = DIFF_STATE_DATA_UPDATED;
-    } else if (object_map_state == OBJECT_NONEXISTENT &&
-               prev_object_diff_state != DIFF_STATE_HOLE &&
-               prev_object_diff_state != DIFF_STATE_HOLE_UPDATED) {
-      *diff_it = DIFF_STATE_HOLE_UPDATED;
+    switch (prev_object_diff_state) {
+    case DIFF_STATE_HOLE:
+      if (object_map_state != OBJECT_NONEXISTENT) {
+        // stay in HOLE on intermediate snapshots for diff-iterate
+        if (!m_diff_iterate_range || m_current_snap_id == m_snap_id_end) {
+          *diff_it = DIFF_STATE_DATA_UPDATED;
+        }
+      }
+      break;
+    case DIFF_STATE_DATA:
+      if (object_map_state == OBJECT_NONEXISTENT) {
+        *diff_it = DIFF_STATE_HOLE_UPDATED;
+      } else if (object_map_state != OBJECT_EXISTS_CLEAN) {
+        *diff_it = DIFF_STATE_DATA_UPDATED;
+      }
+      break;
+    case DIFF_STATE_HOLE_UPDATED:
+      if (object_map_state != OBJECT_NONEXISTENT) {
+        *diff_it = DIFF_STATE_DATA_UPDATED;
+      }
+      break;
+    case DIFF_STATE_DATA_UPDATED:
+      if (object_map_state == OBJECT_NONEXISTENT) {
+        *diff_it = DIFF_STATE_HOLE_UPDATED;
+      }
+      break;
+    default:
+      ceph_abort();
     }
 
     ldout(cct, 20) << "object state: " << i << " "
@@ -225,8 +243,29 @@ void DiffRequest<I>::handle_load_object_map(int r) {
       } else if (diff_from_start ||
                  (m_object_diff_state_valid &&
                   object_map_state != OBJECT_EXISTS_CLEAN)) {
-        *diff_it = DIFF_STATE_DATA_UPDATED;
+        // diffing against the beginning of time or image was grown
+        // (implicit) starting state is HOLE, this is the first object
+        // map after
+        if (m_diff_iterate_range) {
+          // for diff-iterate, if the object is discarded prior to or
+          // in the end version, result should be HOLE
+          // since DATA_UPDATED can transition only to HOLE_UPDATED,
+          // stay in HOLE on intermediate snapshots -- another way to
+          // put this is that when starting with a hole, intermediate
+          // snapshots can be ignored as the result depends only on the
+          // end version
+          if (m_current_snap_id == m_snap_id_end) {
+            *diff_it = DIFF_STATE_DATA_UPDATED;
+          } else {
+            *diff_it = DIFF_STATE_HOLE;
+          }
+        } else {
+          // for deep-copy, if the object is discarded prior to or
+          // in the end version, result should be HOLE_UPDATED
+          *diff_it = DIFF_STATE_DATA_UPDATED;
+        }
       } else {
+        // diffing against a snapshot, this is its object map
         *diff_it = DIFF_STATE_DATA;
       }
 
index e83a1629e62339023c81090a9f7e3ec767d54cde..b02ef34ba347e34dd9fcf757bff2992d7bd3de27 100644 (file)
@@ -22,19 +22,19 @@ template <typename ImageCtxT>
 class DiffRequest {
 public:
   static DiffRequest* create(ImageCtxT* image_ctx, uint64_t snap_id_start,
-                             uint64_t snap_id_end,
+                             uint64_t snap_id_end, bool diff_iterate_range,
                              BitVector<2>* object_diff_state,
                              Context* on_finish) {
     return new DiffRequest(image_ctx, snap_id_start, snap_id_end,
-                           object_diff_state, on_finish);
+                           diff_iterate_range, object_diff_state, on_finish);
   }
 
   DiffRequest(ImageCtxT* image_ctx, uint64_t snap_id_start,
-              uint64_t snap_id_end, BitVector<2>* object_diff_state,
-              Context* on_finish)
+              uint64_t snap_id_end, bool diff_iterate_range,
+              BitVector<2>* object_diff_state, Context* on_finish)
     : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start),
-      m_snap_id_end(snap_id_end), m_object_diff_state(object_diff_state),
-      m_on_finish(on_finish) {
+      m_snap_id_end(snap_id_end), m_diff_iterate_range(diff_iterate_range),
+      m_object_diff_state(object_diff_state), m_on_finish(on_finish) {
   }
 
   void send();
@@ -58,6 +58,7 @@ private:
   ImageCtxT* m_image_ctx;
   uint64_t m_snap_id_start;
   uint64_t m_snap_id_end;
+  bool m_diff_iterate_range;
   BitVector<2>* m_object_diff_state;
   Context* m_on_finish;
 
index 0ce91bd96a1c140ecc0cf94f5e9c49bcb5984ce2..576ea0e4b6b70f774db57a5c46f0eb6b888db8aa 100644 (file)
@@ -8,10 +8,17 @@ namespace librbd {
 namespace object_map {
 
 enum DiffState {
-  DIFF_STATE_HOLE         = 0, /* unchanged hole */
-  DIFF_STATE_DATA         = 1, /* unchanged data */
-  DIFF_STATE_HOLE_UPDATED = 2, /* new hole */
-  DIFF_STATE_DATA_UPDATED = 3  /* new data */
+  // diff-iterate: hole with or without data captured in intermediate snapshot
+  // deep-copy: hole without data captured in intermediate snapshot
+  DIFF_STATE_HOLE         = 0,
+  // diff-iterate, deep-copy: unchanged data
+  DIFF_STATE_DATA         = 1,
+  // diff-iterate: new hole (data -> hole)
+  // deep-copy: new hole (data -> hole) or hole with data captured in
+  //            intermediate snapshot
+  DIFF_STATE_HOLE_UPDATED = 2,
+  // diff-iterate, deep-copy: new data (hole -> data) or changed data
+  DIFF_STATE_DATA_UPDATED = 3
 };
 
 } // namespace object_map
index 1ca2fb04ddd96543150e929892e49b39a47df335..fa564891a4b9653eeb4ed7f82c5a6a4eadcfc97a 100644 (file)
   $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json
   []
   $ rbd snap rollback xrbddiff1/xtestdiff1@snap1 --no-progress
+  $ rbd diff --from-snap=allzeroes xrbddiff1/xtestdiff1 --format json
+  [{"offset":0,"length":1048576,"exists":"true"}]
   $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json
   []
   $ rbd snap rollback xrbddiff1/xtestdiff1@allzeroes --no-progress
   $ rbd diff --from-snap=allzeroes xrbddiff1/xtestdiff1 --format json
+  []
+  $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json
   [{"offset":0,"length":1048576,"exists":"false"}]
   $ ceph osd pool rm xrbddiff1 xrbddiff1 --yes-i-really-really-mean-it
   pool 'xrbddiff1' removed
index c56b681b92843e23f680d91f639134e81a18f9c1..f6c845b07e3fefa519661a42c819c0d73069af3a 100644 (file)
@@ -92,6 +92,7 @@ struct DiffRequest<MockTestImageCtx> {
   static DiffRequest* s_instance;
   static DiffRequest* create(MockTestImageCtx *image_ctx,
                              uint64_t snap_id_start, uint64_t snap_id_end,
+                             bool diff_iterate_range,
                              BitVector<2>* object_diff_state,
                              Context* on_finish) {
     ceph_assert(s_instance != nullptr);
index c25ae4a95c5e628852a73269e2e9a9b32bd9d3db..85fe456d929ef48d16285dbf0856356e923f88f5 100644 (file)
@@ -42,6 +42,10 @@ public:
     ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx));
   }
 
+  bool is_diff_iterate() const {
+    return true;
+  }
+
   void expect_get_flags(MockTestImageCtx& mock_image_ctx, uint64_t snap_id,
                         int32_t flags, int r) {
     EXPECT_CALL(mock_image_ctx, get_flags(snap_id, _))
@@ -87,7 +91,8 @@ TEST_F(TestMockObjectMapDiffRequest, InvalidStartSnap) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, CEPH_NOSNAP, 0,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -98,7 +103,7 @@ TEST_F(TestMockObjectMapDiffRequest, StartEndSnapEqual) {
   InSequence seq;
 
   C_SaferCond ctx;
-  auto req = new MockDiffRequest(&mock_image_ctx, 1, 1,
+  auto req = new MockDiffRequest(&mock_image_ctx, 1, 1, is_diff_iterate(),
                                  &m_object_diff_state, &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -115,7 +120,8 @@ TEST_F(TestMockObjectMapDiffRequest, FastDiffDisabled) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -133,7 +139,8 @@ TEST_F(TestMockObjectMapDiffRequest, FastDiffInvalid) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -180,7 +187,8 @@ TEST_F(TestMockObjectMapDiffRequest, FullDelta) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -188,7 +196,7 @@ TEST_F(TestMockObjectMapDiffRequest, FullDelta) {
   expected_diff_state.resize(object_count);
   expected_diff_state[1] = DIFF_STATE_DATA_UPDATED;
   expected_diff_state[2] = DIFF_STATE_DATA_UPDATED;
-  expected_diff_state[3] = DIFF_STATE_HOLE_UPDATED;
+  expected_diff_state[3] = DIFF_STATE_HOLE;
   ASSERT_EQ(expected_diff_state, m_object_diff_state);
 }
 
@@ -226,7 +234,7 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateDelta) {
   expect_load_map(mock_image_ctx, 2U, object_map_2, 0);
 
   C_SaferCond ctx;
-  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2,
+  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, is_diff_iterate(),
                                  &m_object_diff_state, &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
@@ -274,7 +282,8 @@ TEST_F(TestMockObjectMapDiffRequest, EndDelta) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 2, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -302,7 +311,8 @@ TEST_F(TestMockObjectMapDiffRequest, StartSnapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 1, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -328,7 +338,7 @@ TEST_F(TestMockObjectMapDiffRequest, EndSnapDNE) {
   expect_load_map(mock_image_ctx, 1U, object_map_1, 0);
 
   C_SaferCond ctx;
-  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2,
+  auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, is_diff_iterate(),
                                  &m_object_diff_state, &ctx);
   req->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
@@ -367,7 +377,8 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateSnapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -394,7 +405,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadObjectMapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -427,7 +439,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadIntermediateObjectMapDNE) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(0, ctx.wait());
 
@@ -458,7 +471,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadObjectMapError) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EPERM, ctx.wait());
 }
@@ -484,7 +498,8 @@ TEST_F(TestMockObjectMapDiffRequest, ObjectMapTooSmall) {
 
   C_SaferCond ctx;
   auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP,
-                                 &m_object_diff_state, &ctx);
+                                 is_diff_iterate(), &m_object_diff_state,
+                                 &ctx);
   req->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
index ea97b53a201365cc9aa4a40d6eb220f12da50f51..11ffd58d8f189ee6a9567c85b75bd607a9251d27 100644 (file)
@@ -4660,6 +4660,10 @@ TYPED_TEST(DiffIterateTest, DiffIterateDiscard)
   extents.clear();
   ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object,
                                   vector_iterate_cb, (void *) &extents));
+  ASSERT_EQ(0u, extents.size());
+
+  ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object,
+                                   vector_iterate_cb, (void *) &extents));
   ASSERT_EQ(1u, extents.size());
   ASSERT_EQ(diff_extent(0, 256, false, object_size), extents[0]);
   ASSERT_PASSED(this->validate_object_map, image);