From 617f7114e261595e2e85970e171dcad41f283599 Mon Sep 17 00:00:00 2001 From: YuanXin Date: Sun, 21 Feb 2021 15:08:07 +0800 Subject: [PATCH] osd: avoid two copy with same src cancel each other For cache tier, if some head object has two snaps, the two snaps share the same clone object, and the clone object was flush/evicted from cache pool, when a rollback requests and a read snap request to these two snaps at the same time will generate two promote requests to the same clone object, these two promote requests will generate two copy ops with same src, than the second copy op will cancel the first copy op by calling cancel_copy and kick_object_context_blocked, but after calling kick_object_context_blocked, a new promote request corresponding to first copy op will be restarted and generate a new copy op, the new copy op will cancel the second copy op again, so two promote requests will cancel their copy op each other and run into dead loop. Fixes: https://tracker.ceph.com/issues/49409 Signed-off-by: YuanXin --- src/osd/PrimaryLogPG.cc | 9 +-- src/osd/PrimaryLogPG.h | 2 +- src/test/librados/tier_cxx.cc | 111 ++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 2fa3c40f91f..2f03ff7be60 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -10061,7 +10061,6 @@ void PrimaryLogPG::cancel_copy(CopyOpRef cop, bool requeue, copy_ops.erase(cop->obc->obs.oi.soid); cop->obc->stop_block(); - kick_object_context_blocked(cop->obc); cop->results.should_requeue = requeue; CopyCallbackResults result(-ECANCELED, &cop->results); cop->cb->complete(result); @@ -10072,13 +10071,15 @@ void PrimaryLogPG::cancel_copy(CopyOpRef cop, bool requeue, cop->obc = ObjectContextRef(); } -void PrimaryLogPG::cancel_copy_ops(bool requeue, vector *tids) +void PrimaryLogPG::cancel_and_kick_copy_ops(bool requeue, vector *tids) { dout(10) << __func__ << dendl; map::iterator p = copy_ops.begin(); while (p != copy_ops.end()) { + ObjectContextRef obc = p->second->obc; // requeue this op? can I queue up all of them? cancel_copy((p++)->second, requeue, tids); + kick_object_context_blocked(obc); } } @@ -12482,7 +12483,7 @@ void PrimaryLogPG::on_shutdown() m_scrubber->unreg_next_scrub(); vector tids; - cancel_copy_ops(false, &tids); + cancel_and_kick_copy_ops(false, &tids); cancel_flush_ops(false, &tids); cancel_proxy_ops(false, &tids); cancel_manifest_ops(false, &tids); @@ -12599,7 +12600,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction &t) requeue_ops(waiting_for_readable); vector tids; - cancel_copy_ops(is_primary(), &tids); + cancel_and_kick_copy_ops(is_primary(), &tids); cancel_flush_ops(is_primary(), &tids); cancel_proxy_ops(is_primary(), &tids); cancel_manifest_ops(is_primary(), &tids); diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 134b3ba1a93..8ea01e986ef 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1367,7 +1367,7 @@ protected: void finish_copyfrom(CopyFromCallback *cb); void finish_promote(int r, CopyResults *results, ObjectContextRef obc); void cancel_copy(CopyOpRef cop, bool requeue, std::vector *tids); - void cancel_copy_ops(bool requeue, std::vector *tids); + void cancel_and_kick_copy_ops(bool requeue, std::vector *tids); friend struct C_Copyfrom; diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc index dcfea88785f..5e17c72a450 100644 --- a/src/test/librados/tier_cxx.cc +++ b/src/test/librados/tier_cxx.cc @@ -1465,6 +1465,117 @@ TEST_F(LibRadosTwoPoolsPP, ListSnap){ ioctx.selfmanaged_snap_remove(my_snaps[0]); } +// This test case reproduces https://tracker.ceph.com/issues/49409 +TEST_F(LibRadosTwoPoolsPP, EvictSnapRollbackReadRace) { + // create object + { + bufferlist bl; + int len = string("hi there").length() * 2; + // append more chrunk data make sure the second promote + // op coming before the first promote op finished + for (int i=0; i<4*1024*1024/len; ++i) + bl.append("hi therehi there"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + + // create two snapshot, a clone + vector my_snaps(2); + ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[1])); + ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + { + bufferlist bl; + bl.append("ciao!"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + + // configure cache + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name + + "\", \"tierpool\": \"" + cache_pool_name + + "\", \"force_nonempty\": \"--force-nonempty\" }", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name + + "\", \"overlaypool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name + + "\", \"mode\": \"writeback\"}", + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // read, trigger a promote on the head + { + bufferlist bl; + ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0)); + ASSERT_EQ('c', bl[0]); + } + + // try more times + int retries = 50; + for (int i=0; i extents; + bufferlist read_bl; + int rval = -1; + ObjectReadOperation op1; + op1.sparse_read(0, 8, &extents, &read_bl, &rval); + ASSERT_EQ(0, ioctx.aio_operate("foo", completion1, &op1, &read_bl)); + ioctx.snap_set_read(librados::SNAP_HEAD); + + completion->wait_for_safe(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + + completion1->wait_for_safe(); + ASSERT_EQ(0, completion1->get_return_value()); + completion1->release(); + } + + // evict foo snap + ioctx.snap_set_read(my_snaps[0]); + { + ObjectReadOperation op; + op.cache_evict(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_safe(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + ioctx.snap_set_read(librados::SNAP_HEAD); + } + + // cleanup + ioctx.selfmanaged_snap_remove(my_snaps[0]); + ioctx.selfmanaged_snap_remove(my_snaps[1]); +} + TEST_F(LibRadosTwoPoolsPP, TryFlush) { // configure cache bufferlist inbl; -- 2.39.5