From 87a0bfd08f8404b42032fad964923a7684799744 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Nov 2023 20:44:18 +0100 Subject: [PATCH] librbd: diff-iterate shouldn't ever report "new hole" against a hole If an object doesn't exist in both start and end versions but there is an intermediate snapshot which contains it (i.e. the object is written to and captured at some point but then discarded prior to or in the end version), diff-iterate reports "new hole" -- callback is invoked with exists=false. This occurs both on the slow list_snaps path and in fast-diff mode. Despite going all the way back to the introduction of diff-iterate in commit 0296c7cdae91 ("librbd: implement diff_iterate"), this behavior is wrong and contradicts diff-iterate API documentation added in commit a69532e86450 ("librbd: document diff_iterate in header") in the same series: If the source snapshot name is NULL, we interpret that as the beginning of time and return all allocated regions of the image. It also triggered an assert added in commit c680531e070a ("librbd: change diff_iterate interface to be more C-friendly") in the same series. Unfortunately, commit f1f6407221a0 ("test_librbd: add diff_iterate test including discard"), also part of the same series, added a test which expected the wrong behavior. Very confusing! A year later, a different manifestation of this bug was fixed in commit 9a1ab95176fe ("rbd: Fix rbd diff for non-existent objects"), but the fix only covered the case where calc_snap_set_diff() goes past the end snap ID while processing clones. The case where it runs out of clones to process before reaching the end snap ID remained mishandled. A year after that, commit 3ccc3bb4bd35 ("librbd: diff_iterate needs to handle holes in parent images") dropped the assert mentioned above and this bug got enshrined in the newly introduced fast-diff mode. Finally, a few years later, deep-copy actually started relying on this bug in commit e5a21e904142 ("librbd: deep-copy image copy state machine skips clean objects"). This necessitates bifurcation in DiffRequest because deep-copy wants the "has this object been touched" semantics, which is different from diff-iterate (and also potentially much more expensive to produce!). This commit brings a minimal update to TestMockObjectMapDiffRequest tests and DiffIterateTest.DiffIterateDiscard. Coverage is expanded in the following commits. Fixes: https://tracker.ceph.com/issues/53897 Signed-off-by: Ilya Dryomov (cherry picked from commit be507aaed15fb7a193a90c5f88eda61b9be2af1b) --- src/librados/snap_set_diff.cc | 25 +++++--- src/librbd/api/DiffIterate.cc | 2 +- src/librbd/deep_copy/ImageCopyRequest.cc | 7 ++- src/librbd/object_map/DiffRequest.cc | 61 +++++++++++++++---- src/librbd/object_map/DiffRequest.h | 13 ++-- src/librbd/object_map/Types.h | 15 +++-- src/test/cli-integration/rbd/snap-diff.t | 4 ++ .../deep_copy/test_mock_ImageCopyRequest.cc | 1 + .../object_map/test_mock_DiffRequest.cc | 45 +++++++++----- src/test/librbd/test_librbd.cc | 4 ++ 10 files changed, 127 insertions(+), 50 deletions(-) diff --git a/src/librados/snap_set_diff.cc b/src/librados/snap_set_diff.cc index f80105b44ae10..0029bcd647801 100644 --- a/src/librados/snap_set_diff.cc +++ b/src/librados/snap_set_diff.cc @@ -31,9 +31,8 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, *clone_end_snap_id = 0; *whole_object = false; - for (vector::const_iterator r = snap_set.clones.begin(); - r != snap_set.clones.end(); - ) { + auto r = snap_set.clones.begin(); + while (r != snap_set.clones.end()) { // make an interval, and hide the fact that the HEAD doesn't // include itself in the snaps list librados::snap_t a, b; @@ -77,12 +76,6 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, } if (end < a) { - ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl; - *end_exists = false; - diff->clear(); - if (start_size) { - diff->insert(0, start_size); - } break; } if (end <= b) { @@ -90,7 +83,7 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, *end_size = r->size; *end_exists = true; *clone_end_snap_id = b; - break; + return; } // start with the max(this size, next size), and subtract off any @@ -114,4 +107,16 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, diff->union_of(diff_to_next); ldout(cct, 20) << " diff now " << *diff << dendl; } + + if (r != snap_set.clones.end()) { + ldout(cct, 20) << " past end " << end + << ", end object does not exist" << dendl; + } else { + ldout(cct, 20) << " ran out of clones before reaching end " << end + << ", end object does not exist" << dendl; + } + diff->clear(); + if (start_size) { + diff->insert(0, start_size); + } } diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc index fa1df6df37340..d8760692e1cb8 100644 --- a/src/librbd/api/DiffIterate.cc +++ b/src/librbd/api/DiffIterate.cc @@ -249,7 +249,7 @@ int DiffIterate::execute() { if (m_whole_object) { C_SaferCond ctx; auto req = object_map::DiffRequest::create(&m_image_ctx, from_snap_id, - end_snap_id, + end_snap_id, true, &object_diff_state, &ctx); req->send(); diff --git a/src/librbd/deep_copy/ImageCopyRequest.cc b/src/librbd/deep_copy/ImageCopyRequest.cc index 08e959dd57232..abd8891c02632 100644 --- a/src/librbd/deep_copy/ImageCopyRequest.cc +++ b/src/librbd/deep_copy/ImageCopyRequest.cc @@ -101,9 +101,10 @@ void ImageCopyRequest::compute_diff() { auto ctx = create_context_callback< ImageCopyRequest, &ImageCopyRequest::handle_compute_diff>(this); - auto req = object_map::DiffRequest::create(m_src_image_ctx, m_src_snap_id_start, - m_src_snap_id_end, &m_object_diff_state, - ctx); + auto req = object_map::DiffRequest::create(m_src_image_ctx, + m_src_snap_id_start, + m_src_snap_id_end, false, + &m_object_diff_state, ctx); req->send(); } diff --git a/src/librbd/object_map/DiffRequest.cc b/src/librbd/object_map/DiffRequest.cc index 606d48bbf33c4..8820aacc87bbb 100644 --- a/src/librbd/object_map/DiffRequest.cc +++ b/src/librbd/object_map/DiffRequest.cc @@ -195,16 +195,34 @@ void DiffRequest::handle_load_object_map(int r) { for (; it != overlap_end_it; ++it, ++diff_it, ++i) { uint8_t object_map_state = *it; uint8_t prev_object_diff_state = *diff_it; - if (object_map_state == OBJECT_EXISTS || - object_map_state == OBJECT_PENDING || - (object_map_state == OBJECT_EXISTS_CLEAN && - prev_object_diff_state != DIFF_STATE_DATA && - prev_object_diff_state != DIFF_STATE_DATA_UPDATED)) { - *diff_it = DIFF_STATE_DATA_UPDATED; - } else if (object_map_state == OBJECT_NONEXISTENT && - prev_object_diff_state != DIFF_STATE_HOLE && - prev_object_diff_state != DIFF_STATE_HOLE_UPDATED) { - *diff_it = DIFF_STATE_HOLE_UPDATED; + switch (prev_object_diff_state) { + case DIFF_STATE_HOLE: + if (object_map_state != OBJECT_NONEXISTENT) { + // stay in HOLE on intermediate snapshots for diff-iterate + if (!m_diff_iterate_range || m_current_snap_id == m_snap_id_end) { + *diff_it = DIFF_STATE_DATA_UPDATED; + } + } + break; + case DIFF_STATE_DATA: + if (object_map_state == OBJECT_NONEXISTENT) { + *diff_it = DIFF_STATE_HOLE_UPDATED; + } else if (object_map_state != OBJECT_EXISTS_CLEAN) { + *diff_it = DIFF_STATE_DATA_UPDATED; + } + break; + case DIFF_STATE_HOLE_UPDATED: + if (object_map_state != OBJECT_NONEXISTENT) { + *diff_it = DIFF_STATE_DATA_UPDATED; + } + break; + case DIFF_STATE_DATA_UPDATED: + if (object_map_state == OBJECT_NONEXISTENT) { + *diff_it = DIFF_STATE_HOLE_UPDATED; + } + break; + default: + ceph_abort(); } ldout(cct, 20) << "object state: " << i << " " @@ -225,8 +243,29 @@ void DiffRequest::handle_load_object_map(int r) { } else if (diff_from_start || (m_object_diff_state_valid && object_map_state != OBJECT_EXISTS_CLEAN)) { - *diff_it = DIFF_STATE_DATA_UPDATED; + // diffing against the beginning of time or image was grown + // (implicit) starting state is HOLE, this is the first object + // map after + if (m_diff_iterate_range) { + // for diff-iterate, if the object is discarded prior to or + // in the end version, result should be HOLE + // since DATA_UPDATED can transition only to HOLE_UPDATED, + // stay in HOLE on intermediate snapshots -- another way to + // put this is that when starting with a hole, intermediate + // snapshots can be ignored as the result depends only on the + // end version + if (m_current_snap_id == m_snap_id_end) { + *diff_it = DIFF_STATE_DATA_UPDATED; + } else { + *diff_it = DIFF_STATE_HOLE; + } + } else { + // for deep-copy, if the object is discarded prior to or + // in the end version, result should be HOLE_UPDATED + *diff_it = DIFF_STATE_DATA_UPDATED; + } } else { + // diffing against a snapshot, this is its object map *diff_it = DIFF_STATE_DATA; } diff --git a/src/librbd/object_map/DiffRequest.h b/src/librbd/object_map/DiffRequest.h index e83a1629e6233..b02ef34ba347e 100644 --- a/src/librbd/object_map/DiffRequest.h +++ b/src/librbd/object_map/DiffRequest.h @@ -22,19 +22,19 @@ template class DiffRequest { public: static DiffRequest* create(ImageCtxT* image_ctx, uint64_t snap_id_start, - uint64_t snap_id_end, + uint64_t snap_id_end, bool diff_iterate_range, BitVector<2>* object_diff_state, Context* on_finish) { return new DiffRequest(image_ctx, snap_id_start, snap_id_end, - object_diff_state, on_finish); + diff_iterate_range, object_diff_state, on_finish); } DiffRequest(ImageCtxT* image_ctx, uint64_t snap_id_start, - uint64_t snap_id_end, BitVector<2>* object_diff_state, - Context* on_finish) + uint64_t snap_id_end, bool diff_iterate_range, + BitVector<2>* object_diff_state, Context* on_finish) : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start), - m_snap_id_end(snap_id_end), m_object_diff_state(object_diff_state), - m_on_finish(on_finish) { + m_snap_id_end(snap_id_end), m_diff_iterate_range(diff_iterate_range), + m_object_diff_state(object_diff_state), m_on_finish(on_finish) { } void send(); @@ -58,6 +58,7 @@ private: ImageCtxT* m_image_ctx; uint64_t m_snap_id_start; uint64_t m_snap_id_end; + bool m_diff_iterate_range; BitVector<2>* m_object_diff_state; Context* m_on_finish; diff --git a/src/librbd/object_map/Types.h b/src/librbd/object_map/Types.h index 0ce91bd96a1c1..576ea0e4b6b70 100644 --- a/src/librbd/object_map/Types.h +++ b/src/librbd/object_map/Types.h @@ -8,10 +8,17 @@ namespace librbd { namespace object_map { enum DiffState { - DIFF_STATE_HOLE = 0, /* unchanged hole */ - DIFF_STATE_DATA = 1, /* unchanged data */ - DIFF_STATE_HOLE_UPDATED = 2, /* new hole */ - DIFF_STATE_DATA_UPDATED = 3 /* new data */ + // diff-iterate: hole with or without data captured in intermediate snapshot + // deep-copy: hole without data captured in intermediate snapshot + DIFF_STATE_HOLE = 0, + // diff-iterate, deep-copy: unchanged data + DIFF_STATE_DATA = 1, + // diff-iterate: new hole (data -> hole) + // deep-copy: new hole (data -> hole) or hole with data captured in + // intermediate snapshot + DIFF_STATE_HOLE_UPDATED = 2, + // diff-iterate, deep-copy: new data (hole -> data) or changed data + DIFF_STATE_DATA_UPDATED = 3 }; } // namespace object_map diff --git a/src/test/cli-integration/rbd/snap-diff.t b/src/test/cli-integration/rbd/snap-diff.t index 1ca2fb04ddd96..fa564891a4b96 100644 --- a/src/test/cli-integration/rbd/snap-diff.t +++ b/src/test/cli-integration/rbd/snap-diff.t @@ -39,10 +39,14 @@ $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json [] $ rbd snap rollback xrbddiff1/xtestdiff1@snap1 --no-progress + $ rbd diff --from-snap=allzeroes xrbddiff1/xtestdiff1 --format json + [{"offset":0,"length":1048576,"exists":"true"}] $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json [] $ rbd snap rollback xrbddiff1/xtestdiff1@allzeroes --no-progress $ rbd diff --from-snap=allzeroes xrbddiff1/xtestdiff1 --format json + [] + $ rbd diff --from-snap=snap1 xrbddiff1/xtestdiff1 --format json [{"offset":0,"length":1048576,"exists":"false"}] $ ceph osd pool rm xrbddiff1 xrbddiff1 --yes-i-really-really-mean-it pool 'xrbddiff1' removed diff --git a/src/test/librbd/deep_copy/test_mock_ImageCopyRequest.cc b/src/test/librbd/deep_copy/test_mock_ImageCopyRequest.cc index e38ffffdbe493..634cabfa79e3e 100644 --- a/src/test/librbd/deep_copy/test_mock_ImageCopyRequest.cc +++ b/src/test/librbd/deep_copy/test_mock_ImageCopyRequest.cc @@ -92,6 +92,7 @@ struct DiffRequest { static DiffRequest* s_instance; static DiffRequest* create(MockTestImageCtx *image_ctx, uint64_t snap_id_start, uint64_t snap_id_end, + bool diff_iterate_range, BitVector<2>* object_diff_state, Context* on_finish) { ceph_assert(s_instance != nullptr); diff --git a/src/test/librbd/object_map/test_mock_DiffRequest.cc b/src/test/librbd/object_map/test_mock_DiffRequest.cc index c25ae4a95c5e6..85fe456d929ef 100644 --- a/src/test/librbd/object_map/test_mock_DiffRequest.cc +++ b/src/test/librbd/object_map/test_mock_DiffRequest.cc @@ -42,6 +42,10 @@ public: ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx)); } + bool is_diff_iterate() const { + return true; + } + void expect_get_flags(MockTestImageCtx& mock_image_ctx, uint64_t snap_id, int32_t flags, int r) { EXPECT_CALL(mock_image_ctx, get_flags(snap_id, _)) @@ -87,7 +91,8 @@ TEST_F(TestMockObjectMapDiffRequest, InvalidStartSnap) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, CEPH_NOSNAP, 0, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-EINVAL, ctx.wait()); } @@ -98,7 +103,7 @@ TEST_F(TestMockObjectMapDiffRequest, StartEndSnapEqual) { InSequence seq; C_SaferCond ctx; - auto req = new MockDiffRequest(&mock_image_ctx, 1, 1, + auto req = new MockDiffRequest(&mock_image_ctx, 1, 1, is_diff_iterate(), &m_object_diff_state, &ctx); req->send(); ASSERT_EQ(0, ctx.wait()); @@ -115,7 +120,8 @@ TEST_F(TestMockObjectMapDiffRequest, FastDiffDisabled) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-EINVAL, ctx.wait()); } @@ -133,7 +139,8 @@ TEST_F(TestMockObjectMapDiffRequest, FastDiffInvalid) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-EINVAL, ctx.wait()); } @@ -180,7 +187,8 @@ TEST_F(TestMockObjectMapDiffRequest, FullDelta) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(0, ctx.wait()); @@ -188,7 +196,7 @@ TEST_F(TestMockObjectMapDiffRequest, FullDelta) { expected_diff_state.resize(object_count); expected_diff_state[1] = DIFF_STATE_DATA_UPDATED; expected_diff_state[2] = DIFF_STATE_DATA_UPDATED; - expected_diff_state[3] = DIFF_STATE_HOLE_UPDATED; + expected_diff_state[3] = DIFF_STATE_HOLE; ASSERT_EQ(expected_diff_state, m_object_diff_state); } @@ -226,7 +234,7 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateDelta) { expect_load_map(mock_image_ctx, 2U, object_map_2, 0); C_SaferCond ctx; - auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, + auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, is_diff_iterate(), &m_object_diff_state, &ctx); req->send(); ASSERT_EQ(0, ctx.wait()); @@ -274,7 +282,8 @@ TEST_F(TestMockObjectMapDiffRequest, EndDelta) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 2, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(0, ctx.wait()); @@ -302,7 +311,8 @@ TEST_F(TestMockObjectMapDiffRequest, StartSnapDNE) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 1, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-ENOENT, ctx.wait()); } @@ -328,7 +338,7 @@ TEST_F(TestMockObjectMapDiffRequest, EndSnapDNE) { expect_load_map(mock_image_ctx, 1U, object_map_1, 0); C_SaferCond ctx; - auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, + auto req = new MockDiffRequest(&mock_image_ctx, 1, 2, is_diff_iterate(), &m_object_diff_state, &ctx); req->send(); ASSERT_EQ(-ENOENT, ctx.wait()); @@ -367,7 +377,8 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateSnapDNE) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(0, ctx.wait()); @@ -394,7 +405,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadObjectMapDNE) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-ENOENT, ctx.wait()); } @@ -427,7 +439,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadIntermediateObjectMapDNE) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(0, ctx.wait()); @@ -458,7 +471,8 @@ TEST_F(TestMockObjectMapDiffRequest, LoadObjectMapError) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-EPERM, ctx.wait()); } @@ -484,7 +498,8 @@ TEST_F(TestMockObjectMapDiffRequest, ObjectMapTooSmall) { C_SaferCond ctx; auto req = new MockDiffRequest(&mock_image_ctx, 0, CEPH_NOSNAP, - &m_object_diff_state, &ctx); + is_diff_iterate(), &m_object_diff_state, + &ctx); req->send(); ASSERT_EQ(-EINVAL, ctx.wait()); } diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 682f39f579335..01313e4f92a60 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -6390,6 +6390,10 @@ TYPED_TEST(DiffIterateTest, DiffIterateDiscard) extents.clear(); ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, vector_iterate_cb, (void *) &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object, + vector_iterate_cb, (void *) &extents)); ASSERT_EQ(1u, extents.size()); ASSERT_EQ(diff_extent(0, 256, false, object_size), extents[0]); ASSERT_PASSED(this->validate_object_map, image); -- 2.39.5