]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: don't update snapshot object maps if copyup data is all zeros
authorIlya Dryomov <idryomov@gmail.com>
Mon, 22 Apr 2019 10:21:07 +0000 (12:21 +0200)
committerJason Dillaman <dillaman@redhat.com>
Sun, 18 Aug 2019 18:52:21 +0000 (14:52 -0400)
If the data read from the parent is all zeros, deep copyup isn't
performed.  However snapshot object maps are updated unconditionally,
causing inconsistencies where nonexistent objects are marked
OBJECT_EXISTS or OBJECT_EXISTS_CLEAN.

Fixes: http://tracker.ceph.com/issues/39435
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 4456dc3939816a8bcb36dfc771adb1c699b37b0f)

src/librbd/io/CopyupRequest.cc
src/librbd/io/CopyupRequest.h
src/test/librbd/test_internal.cc

index 77beb50c7d23bbabfc9b92bc7258c8589e10f0b8..8ce262720bdf563e2dad9022aabf06b7bce8c5bd 100644 (file)
@@ -159,7 +159,6 @@ void CopyupRequest<I>::read_from_parent() {
     return;
   }
 
-  m_deep_copy = false;
   auto comp = AioCompletion::create_and_start<
     CopyupRequest<I>,
     &CopyupRequest<I>::handle_read_from_parent>(
@@ -179,6 +178,7 @@ void CopyupRequest<I>::handle_read_from_parent(int r) {
   auto cct = m_image_ctx->cct;
   ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
 
+  m_image_ctx->snap_lock.get_read();
   m_lock.Lock();
   m_copyup_is_zero = m_copyup_data.is_zero();
   m_copyup_required = is_copyup_required();
@@ -186,6 +186,7 @@ void CopyupRequest<I>::handle_read_from_parent(int r) {
 
   if (r < 0 && r != -ENOENT) {
     m_lock.Unlock();
+    m_image_ctx->snap_lock.put_read();
 
     lderr(cct) << "error reading from parent: " << cpp_strerror(r) << dendl;
     finish(r);
@@ -194,12 +195,22 @@ void CopyupRequest<I>::handle_read_from_parent(int r) {
 
   if (!m_copyup_required) {
     m_lock.Unlock();
+    m_image_ctx->snap_lock.put_read();
 
     ldout(cct, 20) << "no-op, skipping" << dendl;
     finish(0);
     return;
   }
+
+  // copyup() will affect snapshots only if parent data is not all
+  // zeros.
+  if (!m_copyup_is_zero) {
+    m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(),
+                      m_image_ctx->snaps.rend());
+  }
+
   m_lock.Unlock();
+  m_image_ctx->snap_lock.put_read();
 
   update_object_maps();
 }
@@ -217,7 +228,6 @@ void CopyupRequest<I>::deep_copy() {
 
   ldout(cct, 20) << "oid=" << m_oid << ", flatten=" << m_flatten << dendl;
 
-  m_deep_copy = true;
   auto ctx = util::create_context_callback<
     CopyupRequest<I>, &CopyupRequest<I>::handle_deep_copy>(this);
   auto req = deep_copy::ObjectCopyRequest<I>::create(
@@ -269,6 +279,14 @@ void CopyupRequest<I>::handle_deep_copy(int r) {
     return;
   }
 
+  // For deep-copy, copyup() will never affect snapshots.  However,
+  // this state machine is responsible for updating object maps for
+  // snapshots that have been created on destination image after
+  // migration started.
+  if (r != -ENOENT) {
+    compute_deep_copy_snap_ids();
+  }
+
   m_lock.Unlock();
   m_image_ctx->snap_lock.put_read();
 
@@ -290,15 +308,6 @@ void CopyupRequest<I>::update_object_maps() {
   auto cct = m_image_ctx->cct;
   ldout(cct, 20) << "oid=" << m_oid << dendl;
 
-  if (!m_image_ctx->snaps.empty()) {
-    if (m_deep_copy) {
-      compute_deep_copy_snap_ids();
-    } else {
-      m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(),
-                        m_image_ctx->snaps.rend());
-    }
-  }
-
   bool copy_on_read = m_pending_requests.empty();
   uint8_t head_object_map_state = OBJECT_EXISTS;
   if (copy_on_read && !m_snap_ids.empty() &&
index 55eb5948bcbd6a5617761ecccc567fafa5c09e24..e16b89bfd871b181174df87650586f5ede3c70ab 100644 (file)
@@ -84,7 +84,6 @@ private:
   Extents m_image_extents;
   ZTracer::Trace m_trace;
 
-  bool m_deep_copy = false;
   bool m_flatten = false;
   bool m_copyup_required = true;
   bool m_copyup_is_zero = true;
index 20146e075e07fe7ed9875c6ec2ed0be4829a3f92..e95be0b8a9f20504cfe8820e4975173b80876720 100644 (file)
@@ -15,6 +15,7 @@
 #include "librbd/Operations.h"
 #include "librbd/api/DiffIterate.h"
 #include "librbd/api/Image.h"
+#include "librbd/api/Migration.h"
 #include "librbd/api/PoolMetadata.h"
 #include "librbd/io/AioCompletion.h"
 #include "librbd/io/ImageRequest.h"
@@ -665,6 +666,181 @@ TEST_F(TestInternal, SnapshotCopyup)
   }
 }
 
+TEST_F(TestInternal, SnapshotCopyupZeros)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  // create an empty clone
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0,
+           ictx->operations->snap_protect(cls::rbd::UserSnapshotNamespace(),
+                                          "snap1"));
+
+  uint64_t features;
+  ASSERT_EQ(0, librbd::get_features(ictx, &features));
+
+  std::string clone_name = get_temp_image_name();
+  int order = ictx->order;
+  ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap1", m_ioctx,
+                            clone_name.c_str(), features, &order, 0, 0));
+
+  librbd::ImageCtx *ictx2;
+  ASSERT_EQ(0, open_image(clone_name, &ictx2));
+
+  ASSERT_EQ(0, snap_create(*ictx2, "snap1"));
+
+  bufferlist bl;
+  bl.append(std::string(256, '1'));
+  ASSERT_EQ(256, ictx2->io_work_queue->write(256, bl.length(), bufferlist{bl},
+                                             0));
+
+  librados::IoCtx snap_ctx;
+  snap_ctx.dup(ictx2->data_ctx);
+  snap_ctx.snap_set_read(CEPH_SNAPDIR);
+
+  librados::snap_set_t snap_set;
+  ASSERT_EQ(0, snap_ctx.list_snaps(ictx2->get_object_name(0), &snap_set));
+
+  // verify that snapshot wasn't affected
+  ASSERT_EQ(1U, snap_set.clones.size());
+  ASSERT_EQ(CEPH_NOSNAP, snap_set.clones[0].cloneid);
+
+  bufferptr read_ptr(256);
+  bufferlist read_bl;
+  read_bl.push_back(read_ptr);
+
+  std::list<std::string> snaps = {"snap1", ""};
+  librbd::io::ReadResult read_result{&read_bl};
+  for (std::list<std::string>::iterator it = snaps.begin();
+       it != snaps.end(); ++it) {
+    const char *snap_name = it->empty() ? NULL : it->c_str();
+    ASSERT_EQ(0, librbd::api::Image<>::snap_set(
+                   ictx2, cls::rbd::UserSnapshotNamespace(), snap_name));
+
+    ASSERT_EQ(256,
+              ictx2->io_work_queue->read(0, 256,
+                                         librbd::io::ReadResult{read_result},
+                                         0));
+    ASSERT_TRUE(read_bl.is_zero());
+
+    ASSERT_EQ(256,
+              ictx2->io_work_queue->read(256, 256,
+                                         librbd::io::ReadResult{read_result},
+                                         0));
+    if (snap_name == NULL) {
+      ASSERT_TRUE(bl.contents_equal(read_bl));
+    } else {
+      ASSERT_TRUE(read_bl.is_zero());
+    }
+
+    // verify that only HEAD object map was updated
+    if ((ictx2->features & RBD_FEATURE_OBJECT_MAP) != 0) {
+      uint8_t state = OBJECT_EXISTS;
+      if (snap_name != NULL) {
+        state = OBJECT_NONEXISTENT;
+      }
+
+      librbd::ObjectMap<> object_map(*ictx2, ictx2->snap_id);
+      C_SaferCond ctx;
+      object_map.open(&ctx);
+      ASSERT_EQ(0, ctx.wait());
+
+      RWLock::WLocker object_map_locker(ictx2->object_map_lock);
+      ASSERT_EQ(state, object_map[0]);
+    }
+  }
+}
+
+TEST_F(TestInternal, SnapshotCopyupZerosMigration)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  uint64_t features;
+  ASSERT_EQ(0, librbd::get_features(ictx, &features));
+
+  close_image(ictx);
+
+  // migrate an empty image
+  std::string dst_name = get_temp_image_name();
+  librbd::ImageOptions dst_opts;
+  dst_opts.set(RBD_IMAGE_OPTION_FEATURES, features);
+  ASSERT_EQ(0, librbd::api::Migration<>::prepare(m_ioctx, m_image_name,
+                                                 m_ioctx, dst_name,
+                                                 dst_opts));
+
+  librbd::ImageCtx *ictx2;
+  ASSERT_EQ(0, open_image(dst_name, &ictx2));
+
+  ASSERT_EQ(0, snap_create(*ictx2, "snap1"));
+
+  bufferlist bl;
+  bl.append(std::string(256, '1'));
+  ASSERT_EQ(256, ictx2->io_work_queue->write(256, bl.length(), bufferlist{bl},
+                                             0));
+
+  librados::IoCtx snap_ctx;
+  snap_ctx.dup(ictx2->data_ctx);
+  snap_ctx.snap_set_read(CEPH_SNAPDIR);
+
+  librados::snap_set_t snap_set;
+  ASSERT_EQ(0, snap_ctx.list_snaps(ictx2->get_object_name(0), &snap_set));
+
+  // verify that snapshot wasn't affected
+  ASSERT_EQ(1U, snap_set.clones.size());
+  ASSERT_EQ(CEPH_NOSNAP, snap_set.clones[0].cloneid);
+
+  bufferptr read_ptr(256);
+  bufferlist read_bl;
+  read_bl.push_back(read_ptr);
+
+  std::list<std::string> snaps = {"snap1", ""};
+  librbd::io::ReadResult read_result{&read_bl};
+  for (std::list<std::string>::iterator it = snaps.begin();
+       it != snaps.end(); ++it) {
+    const char *snap_name = it->empty() ? NULL : it->c_str();
+    ASSERT_EQ(0, librbd::api::Image<>::snap_set(
+                   ictx2, cls::rbd::UserSnapshotNamespace(), snap_name));
+
+    ASSERT_EQ(256,
+              ictx2->io_work_queue->read(0, 256,
+                                         librbd::io::ReadResult{read_result},
+                                         0));
+    ASSERT_TRUE(read_bl.is_zero());
+
+    ASSERT_EQ(256,
+              ictx2->io_work_queue->read(256, 256,
+                                         librbd::io::ReadResult{read_result},
+                                         0));
+    if (snap_name == NULL) {
+      ASSERT_TRUE(bl.contents_equal(read_bl));
+    } else {
+      ASSERT_TRUE(read_bl.is_zero());
+    }
+
+    // verify that only HEAD object map was updated
+    if ((ictx2->features & RBD_FEATURE_OBJECT_MAP) != 0) {
+      uint8_t state = OBJECT_EXISTS;
+      if (snap_name != NULL) {
+        state = OBJECT_NONEXISTENT;
+      }
+
+      librbd::ObjectMap<> object_map(*ictx2, ictx2->snap_id);
+      C_SaferCond ctx;
+      object_map.open(&ctx);
+      ASSERT_EQ(0, ctx.wait());
+
+      RWLock::WLocker object_map_locker(ictx2->object_map_lock);
+      ASSERT_EQ(state, object_map[0]);
+    }
+  }
+}
+
 TEST_F(TestInternal, ResizeCopyup)
 {
   REQUIRE_FEATURE(RBD_FEATURE_LAYERING);