osd: avoid two copy with same src cancel each other

author YuanXin <yuanxin@didiglobal.com>

Sun, 21 Feb 2021 07:08:07 +0000 (15:08 +0800)

committer YuanXin <yuanxin@didiglobal.com>

Mon, 1 Mar 2021 23:52:59 +0000 (07:52 +0800)
author YuanXin <yuanxin@didiglobal.com>
Sun, 21 Feb 2021 07:08:07 +0000 (15:08 +0800)
committer YuanXin <yuanxin@didiglobal.com>
Mon, 1 Mar 2021 23:52:59 +0000 (07:52 +0800)
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index 2fa3c40f91f18ce3fd98c17aa868aac106b728a6..2f03ff7be6024b895045a1c548e1f8962cf30df7 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -10061,7 +10061,6 @@ void PrimaryLogPG::cancel_copy(CopyOpRef cop, bool requeue,
    copy_ops.erase(cop->obc->obs.oi.soid);
    cop->obc->stop_block();
  
-  kick_object_context_blocked(cop->obc);
    cop->results.should_requeue = requeue;
    CopyCallbackResults result(-ECANCELED, &cop->results);
    cop->cb->complete(result);
@@ -10072,13 +10071,15 @@ void PrimaryLogPG::cancel_copy(CopyOpRef cop, bool requeue,
    cop->obc = ObjectContextRef();
  }
  
-void PrimaryLogPG::cancel_copy_ops(bool requeue, vector<ceph_tid_t> *tids)
+void PrimaryLogPG::cancel_and_kick_copy_ops(bool requeue, vector<ceph_tid_t> *tids)
  {
    dout(10) << __func__ << dendl;
    map<hobject_t,CopyOpRef>::iterator p = copy_ops.begin();
    while (p != copy_ops.end()) {
+    ObjectContextRef obc = p->second->obc;
      // requeue this op? can I queue up all of them?
      cancel_copy((p++)->second, requeue, tids);
+    kick_object_context_blocked(obc);
    }
  }
  
@@ -12482,7 +12483,7 @@ void PrimaryLogPG::on_shutdown()
    m_scrubber->unreg_next_scrub();
  
    vector<ceph_tid_t> tids;
-  cancel_copy_ops(false, &tids);
+  cancel_and_kick_copy_ops(false, &tids);
    cancel_flush_ops(false, &tids);
    cancel_proxy_ops(false, &tids);
    cancel_manifest_ops(false, &tids);
@@ -12599,7 +12600,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction &t)
    requeue_ops(waiting_for_readable);
  
    vector<ceph_tid_t> tids;
-  cancel_copy_ops(is_primary(), &tids);
+  cancel_and_kick_copy_ops(is_primary(), &tids);
    cancel_flush_ops(is_primary(), &tids);
    cancel_proxy_ops(is_primary(), &tids);
    cancel_manifest_ops(is_primary(), &tids);
diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h

index 134b3ba1a937a7a40b6bf7b8cd94fb118e8606a5..8ea01e986ef86ec975a4c6655da21c76f098badc 100644 (file)
--- a/src/osd/PrimaryLogPG.h
+++ b/src/osd/PrimaryLogPG.h
@@ -1367,7 +1367,7 @@ protected:
    void finish_copyfrom(CopyFromCallback *cb);
    void finish_promote(int r, CopyResults *results, ObjectContextRef obc);
    void cancel_copy(CopyOpRef cop, bool requeue, std::vector<ceph_tid_t> *tids);
-  void cancel_copy_ops(bool requeue, std::vector<ceph_tid_t> *tids);
+  void cancel_and_kick_copy_ops(bool requeue, std::vector<ceph_tid_t> *tids);
  
    friend struct C_Copyfrom;
  
diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc

index dcfea88785f1dd2a5cfd19e1561a8bbe41e894cd..5e17c72a450c8a553de6df88f5a2d7e54621f654 100644 (file)
--- a/src/test/librados/tier_cxx.cc
+++ b/src/test/librados/tier_cxx.cc
@@ -1465,6 +1465,117 @@ TEST_F(LibRadosTwoPoolsPP, ListSnap){
    ioctx.selfmanaged_snap_remove(my_snaps[0]);
  }
  
+// This test case reproduces https://tracker.ceph.com/issues/49409
+TEST_F(LibRadosTwoPoolsPP, EvictSnapRollbackReadRace) {
+  // create object
+  {
+    bufferlist bl;
+    int len = string("hi there").length() * 2;
+    // append more chrunk data make sure the second promote
+    // op coming before the first promote op finished
+    for (int i=0; i<4*1024*1024/len; ++i)
+      bl.append("hi therehi there");
+    ObjectWriteOperation op;
+    op.write_full(bl);
+    ASSERT_EQ(0, ioctx.operate("foo", &op));
+  }
+
+  // create two snapshot, a clone
+  vector<uint64_t> my_snaps(2);
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[1]));
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
+                                                         my_snaps));
+  {
+    bufferlist bl;
+    bl.append("ciao!");
+    ObjectWriteOperation op;
+    op.write_full(bl);
+    ASSERT_EQ(0, ioctx.operate("foo", &op));
+  }
+
+  // configure cache
+  bufferlist inbl;
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
+    "\", \"tierpool\": \"" + cache_pool_name +
+    "\", \"force_nonempty\": \"--force-nonempty\" }",
+    inbl, NULL, NULL));
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
+    "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
+    inbl, NULL, NULL));
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
+    "\", \"mode\": \"writeback\"}",
+    inbl, NULL, NULL));
+
+  // wait for maps to settle
+  cluster.wait_for_latest_osdmap();
+
+  // read, trigger a promote on the head
+  {
+    bufferlist bl;
+    ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
+    ASSERT_EQ('c', bl[0]);
+  }
+
+  // try more times
+  int retries = 50;
+  for (int i=0; i<retries; ++i)
+  {
+    {
+      librados::AioCompletion * completion = cluster.aio_create_completion();
+      librados::AioCompletion * completion1 = cluster.aio_create_completion();
+
+      // send a snap rollback op and a snap read op parallel
+      // trigger two promote(copy) to the same snap clone obj
+      // the second snap read op is read-ordered make sure
+      // op not wait for objects_blocked_on_snap_promotion
+      ObjectWriteOperation op;
+      op.selfmanaged_snap_rollback(my_snaps[0]);
+      ASSERT_EQ(0, ioctx.aio_operate(
+        "foo", completion, &op));
+
+      ioctx.snap_set_read(my_snaps[1]);
+      std::map<uint64_t, uint64_t> extents;
+      bufferlist read_bl;
+      int rval = -1;
+      ObjectReadOperation op1;
+      op1.sparse_read(0, 8, &extents, &read_bl, &rval);
+      ASSERT_EQ(0, ioctx.aio_operate("foo", completion1, &op1, &read_bl));
+      ioctx.snap_set_read(librados::SNAP_HEAD);
+
+      completion->wait_for_safe();
+      ASSERT_EQ(0, completion->get_return_value());
+      completion->release();
+
+      completion1->wait_for_safe();
+      ASSERT_EQ(0, completion1->get_return_value());
+      completion1->release();
+    }
+
+    // evict foo snap
+    ioctx.snap_set_read(my_snaps[0]);
+    {
+      ObjectReadOperation op;
+      op.cache_evict();
+      librados::AioCompletion *completion = cluster.aio_create_completion();
+      ASSERT_EQ(0, ioctx.aio_operate(
+        "foo", completion, &op,
+        librados::OPERATION_IGNORE_CACHE, NULL));
+      completion->wait_for_safe();
+      ASSERT_EQ(0, completion->get_return_value());
+      completion->release();
+    }
+    ioctx.snap_set_read(librados::SNAP_HEAD);
+  }
+
+  // cleanup
+  ioctx.selfmanaged_snap_remove(my_snaps[0]);
+  ioctx.selfmanaged_snap_remove(my_snaps[1]);
+}
+
  TEST_F(LibRadosTwoPoolsPP, TryFlush) {
    // configure cache
    bufferlist inbl;
author	YuanXin <yuanxin@didiglobal.com>
	Sun, 21 Feb 2021 07:08:07 +0000 (15:08 +0800)
committer	YuanXin <yuanxin@didiglobal.com>
	Mon, 1 Mar 2021 23:52:59 +0000 (07:52 +0800)
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/osd/PrimaryLogPG.h		patch \| blob \| history
src/test/librados/tier_cxx.cc		patch \| blob \| history