From eed3163f2916446008cfdb1fc119b42d7a2ed943 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 18 Oct 2018 14:04:12 -0400 Subject: [PATCH] osd/PrimaryLogPG: uncommitted dup ops should respond with logged return code Fixes: http://tracker.ceph.com/issues/36408 Signed-off-by: Jason Dillaman --- src/osd/PG.h | 2 +- src/osd/PrimaryLogPG.cc | 41 ++++++++++++------------------ src/test/librados/tier.cc | 52 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/src/osd/PG.h b/src/osd/PG.h index f695f6c5e9dae..5d9b48b8b50af 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1283,7 +1283,7 @@ protected: map> callbacks_for_degraded_object; map > > waiting_for_ondisk; + list > > waiting_for_ondisk; void requeue_object_waiters(map>& m); void requeue_op(OpRequestRef op); diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 875c7ad4e7a5a..8cd65df20f603 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2167,7 +2167,7 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } else { dout(10) << " waiting for " << version << " to commit" << dendl; // always queue ondisk waiters, so that we can requeue if needed - waiting_for_ondisk[version].push_back(make_pair(op, user_version)); + waiting_for_ondisk[version].emplace_back(op, user_version, return_code); op->mark_delayed("waiting for ondisk"); } return; @@ -10371,12 +10371,13 @@ void PrimaryLogPG::eval_repop(RepGather *repop) auto it = waiting_for_ondisk.find(repop->v); if (it != waiting_for_ondisk.end()) { ceph_assert(waiting_for_ondisk.begin()->first == repop->v); - for (list >::iterator i = - it->second.begin(); - i != it->second.end(); - ++i) { - osd->reply_op_error(i->first, repop->r, repop->v, - i->second); + for (auto& i : it->second) { + int return_code = repop->r; + if (return_code >= 0) { + return_code = std::get<2>(i); + } + osd->reply_op_error(std::get<0>(i), return_code, repop->v, + std::get<1>(i)); } waiting_for_ondisk.erase(it); } @@ -11885,15 +11886,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue) } // also requeue any dups, interleaved into position - map > >::iterator p = - waiting_for_ondisk.find(repop->v); + auto p = waiting_for_ondisk.find(repop->v); if (p != waiting_for_ondisk.end()) { dout(10) << " also requeuing ondisk waiters " << p->second << dendl; - for (list >::iterator i = - p->second.begin(); - i != p->second.end(); - ++i) { - rq.push_back(i->first); + for (auto& i : p->second) { + rq.push_back(std::get<0>(i)); } waiting_for_ondisk.erase(p); } @@ -11907,17 +11904,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue) if (requeue) { requeue_ops(rq); if (!waiting_for_ondisk.empty()) { - for (map > >::iterator i = - waiting_for_ondisk.begin(); - i != waiting_for_ondisk.end(); - ++i) { - for (list >::iterator j = - i->second.begin(); - j != i->second.end(); - ++j) { - derr << __func__ << ": op " << *(j->first->get_req()) << " waiting on " - << i->first << dendl; - } + for (auto& i : waiting_for_ondisk) { + for (auto& j : i.second) { + derr << __func__ << ": op " << *(std::get<0>(j)->get_req()) + << " waiting on " << i.first << dendl; + } } ceph_assert(waiting_for_ondisk.empty()); } diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc index 4a777934af560..98d454c2238c7 100644 --- a/src/test/librados/tier.cc +++ b/src/test/librados/tier.cc @@ -6420,3 +6420,55 @@ TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) { // wait for maps to settle before next test cluster.wait_for_latest_osdmap(); } + +TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) { + // write object to base tier + bufferlist omap_bl; + encode(static_cast(0U), omap_bl); + + ObjectWriteOperation op1; + op1.omap_set({{"somekey", omap_bl}}); + ASSERT_EQ(0, ioctx.operate("propagate-base-tier-error", &op1)); + + // configure cache + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name + + "\", \"tierpool\": \"" + cache_pool_name + + "\", \"force_nonempty\": \"--force-nonempty\" }", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name + + "\", \"mode\": \"writeback\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name + + "\", \"overlaypool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_type", "bloom"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_count", 1), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_period", 600), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "target_max_objects", 250), + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // guarded op should fail so expect error to propagate to cache tier + bufferlist test_omap_bl; + encode(static_cast(1U), test_omap_bl); + + ObjectWriteOperation op2; + op2.omap_cmp({{"somekey", {test_omap_bl, CEPH_OSD_CMPXATTR_OP_EQ}}}, nullptr); + op2.omap_set({{"somekey", test_omap_bl}}); + + ASSERT_EQ(-ECANCELED, ioctx.operate("propagate-base-tier-error", &op2)); +} -- 2.39.5