From: Ilya Dryomov Date: Mon, 22 Apr 2019 10:21:07 +0000 (+0200) Subject: librbd: don't update snapshot object maps if copyup data is all zeros X-Git-Tag: v14.2.3~11^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=38c0216d837b4b06d85b49e263c47136fc77b05b;p=ceph.git librbd: don't update snapshot object maps if copyup data is all zeros If the data read from the parent is all zeros, deep copyup isn't performed. However snapshot object maps are updated unconditionally, causing inconsistencies where nonexistent objects are marked OBJECT_EXISTS or OBJECT_EXISTS_CLEAN. Fixes: http://tracker.ceph.com/issues/39435 Signed-off-by: Ilya Dryomov (cherry picked from commit 4456dc3939816a8bcb36dfc771adb1c699b37b0f) --- diff --git a/src/librbd/io/CopyupRequest.cc b/src/librbd/io/CopyupRequest.cc index 77beb50c7d23b..8ce262720bdf5 100644 --- a/src/librbd/io/CopyupRequest.cc +++ b/src/librbd/io/CopyupRequest.cc @@ -159,7 +159,6 @@ void CopyupRequest::read_from_parent() { return; } - m_deep_copy = false; auto comp = AioCompletion::create_and_start< CopyupRequest, &CopyupRequest::handle_read_from_parent>( @@ -179,6 +178,7 @@ void CopyupRequest::handle_read_from_parent(int r) { auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl; + m_image_ctx->snap_lock.get_read(); m_lock.Lock(); m_copyup_is_zero = m_copyup_data.is_zero(); m_copyup_required = is_copyup_required(); @@ -186,6 +186,7 @@ void CopyupRequest::handle_read_from_parent(int r) { if (r < 0 && r != -ENOENT) { m_lock.Unlock(); + m_image_ctx->snap_lock.put_read(); lderr(cct) << "error reading from parent: " << cpp_strerror(r) << dendl; finish(r); @@ -194,12 +195,22 @@ void CopyupRequest::handle_read_from_parent(int r) { if (!m_copyup_required) { m_lock.Unlock(); + m_image_ctx->snap_lock.put_read(); ldout(cct, 20) << "no-op, skipping" << dendl; finish(0); return; } + + // copyup() will affect snapshots only if parent data is not all + // zeros. + if (!m_copyup_is_zero) { + m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(), + m_image_ctx->snaps.rend()); + } + m_lock.Unlock(); + m_image_ctx->snap_lock.put_read(); update_object_maps(); } @@ -217,7 +228,6 @@ void CopyupRequest::deep_copy() { ldout(cct, 20) << "oid=" << m_oid << ", flatten=" << m_flatten << dendl; - m_deep_copy = true; auto ctx = util::create_context_callback< CopyupRequest, &CopyupRequest::handle_deep_copy>(this); auto req = deep_copy::ObjectCopyRequest::create( @@ -269,6 +279,14 @@ void CopyupRequest::handle_deep_copy(int r) { return; } + // For deep-copy, copyup() will never affect snapshots. However, + // this state machine is responsible for updating object maps for + // snapshots that have been created on destination image after + // migration started. + if (r != -ENOENT) { + compute_deep_copy_snap_ids(); + } + m_lock.Unlock(); m_image_ctx->snap_lock.put_read(); @@ -290,15 +308,6 @@ void CopyupRequest::update_object_maps() { auto cct = m_image_ctx->cct; ldout(cct, 20) << "oid=" << m_oid << dendl; - if (!m_image_ctx->snaps.empty()) { - if (m_deep_copy) { - compute_deep_copy_snap_ids(); - } else { - m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(), - m_image_ctx->snaps.rend()); - } - } - bool copy_on_read = m_pending_requests.empty(); uint8_t head_object_map_state = OBJECT_EXISTS; if (copy_on_read && !m_snap_ids.empty() && diff --git a/src/librbd/io/CopyupRequest.h b/src/librbd/io/CopyupRequest.h index 55eb5948bcbd6..e16b89bfd871b 100644 --- a/src/librbd/io/CopyupRequest.h +++ b/src/librbd/io/CopyupRequest.h @@ -84,7 +84,6 @@ private: Extents m_image_extents; ZTracer::Trace m_trace; - bool m_deep_copy = false; bool m_flatten = false; bool m_copyup_required = true; bool m_copyup_is_zero = true; diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc index 20146e075e07f..e95be0b8a9f20 100644 --- a/src/test/librbd/test_internal.cc +++ b/src/test/librbd/test_internal.cc @@ -15,6 +15,7 @@ #include "librbd/Operations.h" #include "librbd/api/DiffIterate.h" #include "librbd/api/Image.h" +#include "librbd/api/Migration.h" #include "librbd/api/PoolMetadata.h" #include "librbd/io/AioCompletion.h" #include "librbd/io/ImageRequest.h" @@ -665,6 +666,181 @@ TEST_F(TestInternal, SnapshotCopyup) } } +TEST_F(TestInternal, SnapshotCopyupZeros) +{ + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + // create an empty clone + ASSERT_EQ(0, snap_create(*ictx, "snap1")); + ASSERT_EQ(0, + ictx->operations->snap_protect(cls::rbd::UserSnapshotNamespace(), + "snap1")); + + uint64_t features; + ASSERT_EQ(0, librbd::get_features(ictx, &features)); + + std::string clone_name = get_temp_image_name(); + int order = ictx->order; + ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap1", m_ioctx, + clone_name.c_str(), features, &order, 0, 0)); + + librbd::ImageCtx *ictx2; + ASSERT_EQ(0, open_image(clone_name, &ictx2)); + + ASSERT_EQ(0, snap_create(*ictx2, "snap1")); + + bufferlist bl; + bl.append(std::string(256, '1')); + ASSERT_EQ(256, ictx2->io_work_queue->write(256, bl.length(), bufferlist{bl}, + 0)); + + librados::IoCtx snap_ctx; + snap_ctx.dup(ictx2->data_ctx); + snap_ctx.snap_set_read(CEPH_SNAPDIR); + + librados::snap_set_t snap_set; + ASSERT_EQ(0, snap_ctx.list_snaps(ictx2->get_object_name(0), &snap_set)); + + // verify that snapshot wasn't affected + ASSERT_EQ(1U, snap_set.clones.size()); + ASSERT_EQ(CEPH_NOSNAP, snap_set.clones[0].cloneid); + + bufferptr read_ptr(256); + bufferlist read_bl; + read_bl.push_back(read_ptr); + + std::list snaps = {"snap1", ""}; + librbd::io::ReadResult read_result{&read_bl}; + for (std::list::iterator it = snaps.begin(); + it != snaps.end(); ++it) { + const char *snap_name = it->empty() ? NULL : it->c_str(); + ASSERT_EQ(0, librbd::api::Image<>::snap_set( + ictx2, cls::rbd::UserSnapshotNamespace(), snap_name)); + + ASSERT_EQ(256, + ictx2->io_work_queue->read(0, 256, + librbd::io::ReadResult{read_result}, + 0)); + ASSERT_TRUE(read_bl.is_zero()); + + ASSERT_EQ(256, + ictx2->io_work_queue->read(256, 256, + librbd::io::ReadResult{read_result}, + 0)); + if (snap_name == NULL) { + ASSERT_TRUE(bl.contents_equal(read_bl)); + } else { + ASSERT_TRUE(read_bl.is_zero()); + } + + // verify that only HEAD object map was updated + if ((ictx2->features & RBD_FEATURE_OBJECT_MAP) != 0) { + uint8_t state = OBJECT_EXISTS; + if (snap_name != NULL) { + state = OBJECT_NONEXISTENT; + } + + librbd::ObjectMap<> object_map(*ictx2, ictx2->snap_id); + C_SaferCond ctx; + object_map.open(&ctx); + ASSERT_EQ(0, ctx.wait()); + + RWLock::WLocker object_map_locker(ictx2->object_map_lock); + ASSERT_EQ(state, object_map[0]); + } + } +} + +TEST_F(TestInternal, SnapshotCopyupZerosMigration) +{ + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + uint64_t features; + ASSERT_EQ(0, librbd::get_features(ictx, &features)); + + close_image(ictx); + + // migrate an empty image + std::string dst_name = get_temp_image_name(); + librbd::ImageOptions dst_opts; + dst_opts.set(RBD_IMAGE_OPTION_FEATURES, features); + ASSERT_EQ(0, librbd::api::Migration<>::prepare(m_ioctx, m_image_name, + m_ioctx, dst_name, + dst_opts)); + + librbd::ImageCtx *ictx2; + ASSERT_EQ(0, open_image(dst_name, &ictx2)); + + ASSERT_EQ(0, snap_create(*ictx2, "snap1")); + + bufferlist bl; + bl.append(std::string(256, '1')); + ASSERT_EQ(256, ictx2->io_work_queue->write(256, bl.length(), bufferlist{bl}, + 0)); + + librados::IoCtx snap_ctx; + snap_ctx.dup(ictx2->data_ctx); + snap_ctx.snap_set_read(CEPH_SNAPDIR); + + librados::snap_set_t snap_set; + ASSERT_EQ(0, snap_ctx.list_snaps(ictx2->get_object_name(0), &snap_set)); + + // verify that snapshot wasn't affected + ASSERT_EQ(1U, snap_set.clones.size()); + ASSERT_EQ(CEPH_NOSNAP, snap_set.clones[0].cloneid); + + bufferptr read_ptr(256); + bufferlist read_bl; + read_bl.push_back(read_ptr); + + std::list snaps = {"snap1", ""}; + librbd::io::ReadResult read_result{&read_bl}; + for (std::list::iterator it = snaps.begin(); + it != snaps.end(); ++it) { + const char *snap_name = it->empty() ? NULL : it->c_str(); + ASSERT_EQ(0, librbd::api::Image<>::snap_set( + ictx2, cls::rbd::UserSnapshotNamespace(), snap_name)); + + ASSERT_EQ(256, + ictx2->io_work_queue->read(0, 256, + librbd::io::ReadResult{read_result}, + 0)); + ASSERT_TRUE(read_bl.is_zero()); + + ASSERT_EQ(256, + ictx2->io_work_queue->read(256, 256, + librbd::io::ReadResult{read_result}, + 0)); + if (snap_name == NULL) { + ASSERT_TRUE(bl.contents_equal(read_bl)); + } else { + ASSERT_TRUE(read_bl.is_zero()); + } + + // verify that only HEAD object map was updated + if ((ictx2->features & RBD_FEATURE_OBJECT_MAP) != 0) { + uint8_t state = OBJECT_EXISTS; + if (snap_name != NULL) { + state = OBJECT_NONEXISTENT; + } + + librbd::ObjectMap<> object_map(*ictx2, ictx2->snap_id); + C_SaferCond ctx; + object_map.open(&ctx); + ASSERT_EQ(0, ctx.wait()); + + RWLock::WLocker object_map_locker(ictx2->object_map_lock); + ASSERT_EQ(state, object_map[0]); + } + } +} + TEST_F(TestInternal, ResizeCopyup) { REQUIRE_FEATURE(RBD_FEATURE_LAYERING);