From 75ff8fd14dccaa7d2f11ba8a561ad3c0f410585c Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 22 Jul 2020 11:25:56 -0400 Subject: [PATCH] librbd: flush all queued object IO from simple scheduler Normally IO is tracked via the AioCompletion's async_op but the scheduler will "complete" writes while the IO might be still executing. Therefore, prior to shutting down this dispatch layer we need to wait for all IO to complete. Fixes: https://tracker.ceph.com/issues/46668 Signed-off-by: Jason Dillaman --- .../io/SimpleSchedulerObjectDispatch.cc | 38 +++++++++--- src/librbd/io/SimpleSchedulerObjectDispatch.h | 3 + ...test_mock_SimpleSchedulerObjectDispatch.cc | 60 +++++++++++++------ 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/librbd/io/SimpleSchedulerObjectDispatch.cc b/src/librbd/io/SimpleSchedulerObjectDispatch.cc index eca053e8a70c2..66669ec4d03d3 100644 --- a/src/librbd/io/SimpleSchedulerObjectDispatch.cc +++ b/src/librbd/io/SimpleSchedulerObjectDispatch.cc @@ -7,6 +7,7 @@ #include "librbd/AsioEngine.h" #include "librbd/ImageCtx.h" #include "librbd/Utils.h" +#include "librbd/io/FlushTracker.h" #include "librbd/io/ObjectDispatchSpec.h" #include "librbd/io/ObjectDispatcher.h" #include "librbd/io/Utils.h" @@ -174,6 +175,7 @@ template SimpleSchedulerObjectDispatch::SimpleSchedulerObjectDispatch( I* image_ctx) : m_image_ctx(image_ctx), + m_flush_tracker(new FlushTracker(image_ctx)), m_lock(ceph::make_mutex(librbd::util::unique_lock_name( "librbd::io::SimpleSchedulerObjectDispatch::lock", this))), m_max_delay(image_ctx->config.template get_val( @@ -190,6 +192,7 @@ SimpleSchedulerObjectDispatch::SimpleSchedulerObjectDispatch( template SimpleSchedulerObjectDispatch::~SimpleSchedulerObjectDispatch() { + delete m_flush_tracker; } template @@ -206,6 +209,7 @@ void SimpleSchedulerObjectDispatch::shut_down(Context* on_finish) { auto cct = m_image_ctx->cct; ldout(cct, 5) << dendl; + m_flush_tracker->shut_down(); on_finish->complete(0); } @@ -260,6 +264,15 @@ bool SimpleSchedulerObjectDispatch::write( std::lock_guard locker{m_lock}; if (try_delay_write(object_no, object_off, std::move(data), snapc, op_flags, *object_dispatch_flags, on_dispatched)) { + + auto dispatch_seq = ++m_dispatch_seq; + m_flush_tracker->start_io(dispatch_seq); + *on_finish = new LambdaContext( + [this, dispatch_seq, ctx=*on_finish](int r) { + ctx->complete(r); + m_flush_tracker->finish_io(dispatch_seq); + }); + *dispatch_result = DISPATCH_RESULT_COMPLETE; return true; } @@ -316,10 +329,15 @@ bool SimpleSchedulerObjectDispatch::flush( auto cct = m_image_ctx->cct; ldout(cct, 20) << dendl; - std::lock_guard locker{m_lock}; - dispatch_all_delayed_requests(); + { + std::lock_guard locker{m_lock}; + dispatch_all_delayed_requests(); + } - return false; + *dispatch_result = DISPATCH_RESULT_CONTINUE; + m_flush_tracker->flush(on_dispatched); + + return true; } template @@ -403,24 +421,30 @@ void SimpleSchedulerObjectDispatch::register_in_flight_request( auto it = res.first; auto dispatch_seq = ++m_dispatch_seq; + m_flush_tracker->start_io(dispatch_seq); + it->second->set_dispatch_seq(dispatch_seq); *on_finish = new LambdaContext( [this, object_no, dispatch_seq, start_time, ctx=*on_finish](int r) { ctx->complete(r); - std::lock_guard locker{m_lock}; + std::unique_lock locker{m_lock}; if (m_latency_stats && start_time != utime_t()) { auto latency = ceph_clock_now() - start_time; m_latency_stats->add(latency.to_nsec()); } + auto it = m_requests.find(object_no); if (it == m_requests.end() || it->second->get_dispatch_seq() != dispatch_seq) { ldout(m_image_ctx->cct, 20) << "already dispatched" << dendl; - return; + } else { + dispatch_delayed_requests(it->second); + m_requests.erase(it); } - dispatch_delayed_requests(it->second); - m_requests.erase(it); + locker.unlock(); + + m_flush_tracker->finish_io(dispatch_seq); }); } diff --git a/src/librbd/io/SimpleSchedulerObjectDispatch.h b/src/librbd/io/SimpleSchedulerObjectDispatch.h index c32d513a2c89b..ca134ef385c57 100644 --- a/src/librbd/io/SimpleSchedulerObjectDispatch.h +++ b/src/librbd/io/SimpleSchedulerObjectDispatch.h @@ -22,6 +22,7 @@ class ImageCtx; namespace io { +template class FlushTracker; class LatencyStats; /** @@ -176,6 +177,8 @@ private: ImageCtxT *m_image_ctx; + FlushTracker* m_flush_tracker; + ceph::mutex m_lock; SafeTimer *m_timer; ceph::mutex *m_timer_lock; diff --git a/src/test/librbd/io/test_mock_SimpleSchedulerObjectDispatch.cc b/src/test/librbd/io/test_mock_SimpleSchedulerObjectDispatch.cc index da376a9acd445..ab2b0ba1d7c02 100644 --- a/src/test/librbd/io/test_mock_SimpleSchedulerObjectDispatch.cc +++ b/src/test/librbd/io/test_mock_SimpleSchedulerObjectDispatch.cc @@ -28,6 +28,25 @@ struct TypeTraits { typedef ::MockSafeTimer SafeTimer; }; +template <> +struct FlushTracker { + FlushTracker(MockTestImageCtx*) { + } + + void shut_down() { + } + + void flush(Context*) { + } + + void start_io(uint64_t) { + } + + void finish_io(uint64_t) { + } + +}; + } // namespace io } // namespace librbd @@ -208,10 +227,12 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, Flush) { MockSimpleSchedulerObjectDispatch mock_simple_scheduler_object_dispatch(&mock_image_ctx); + io::DispatchResult dispatch_result; C_SaferCond cond; Context *on_finish = &cond; - ASSERT_FALSE(mock_simple_scheduler_object_dispatch.flush( - FLUSH_SOURCE_USER, {}, nullptr, nullptr, &on_finish, nullptr)); + ASSERT_TRUE(mock_simple_scheduler_object_dispatch.flush( + FLUSH_SOURCE_USER, {}, nullptr, &dispatch_result, &on_finish, nullptr)); + ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result); ASSERT_EQ(on_finish, &cond); // not modified on_finish->complete(0); ASSERT_EQ(0, cond.wait()); @@ -251,7 +272,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteDelayed) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); expect_dispatch_delayed_requests(mock_image_ctx, 0); @@ -298,7 +319,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteDelayedFlush) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); expect_dispatch_delayed_requests(mock_image_ctx, 0); @@ -306,8 +327,9 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteDelayedFlush) { C_SaferCond cond3; Context *on_finish3 = &cond3; - ASSERT_FALSE(mock_simple_scheduler_object_dispatch.flush( - FLUSH_SOURCE_USER, {}, nullptr, nullptr, &on_finish3, nullptr)); + ASSERT_TRUE(mock_simple_scheduler_object_dispatch.flush( + FLUSH_SOURCE_USER, {}, nullptr, &dispatch_result, &on_finish3, nullptr)); + ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result); ASSERT_EQ(on_finish3, &cond3); on_finish1->complete(0); @@ -356,7 +378,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteMerged) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched2)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); object_off = 0; @@ -370,7 +392,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteMerged) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish3, &on_dispatched3)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish3, &cond3); + ASSERT_NE(on_finish3, &cond3); object_off = 10; data.clear(); @@ -383,7 +405,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteMerged) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish4, &on_dispatched4)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish4, &cond4); + ASSERT_NE(on_finish4, &cond4); object_off = 30; data.clear(); @@ -396,7 +418,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteMerged) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish5, &on_dispatched5)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish5, &cond5); + ASSERT_NE(on_finish5, &cond5); object_off = 50; data.clear(); @@ -409,7 +431,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteMerged) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish6, &on_dispatched6)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish6, &cond6); + ASSERT_NE(on_finish6, &cond6); // expect two requests dispatched: // 0~40 (merged 0~10, 10~10, 20~10, 30~10) and 50~10 @@ -472,7 +494,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, WriteNonSequential) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched2)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); expect_dispatch_delayed_requests(mock_image_ctx, 0); @@ -536,7 +558,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, Mixed) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched2)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); // write (3) 10~10 (delayed) @@ -551,7 +573,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, Mixed) { 0, object_off, std::move(data), mock_image_ctx.snapc, 0, {}, &object_dispatch_flags, nullptr, &dispatch_result, &on_finish3, &on_dispatched3)); - ASSERT_EQ(on_finish3, &cond3); + ASSERT_NE(on_finish3, &cond3); // discard (1) (non-seq io) // will dispatch the delayed writes (2) and (3) and wrap on_finish @@ -581,7 +603,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, Mixed) { 0, object_off, std::move(data), mock_image_ctx.snapc, 0, {}, &object_dispatch_flags, nullptr, &dispatch_result, &on_finish5, &on_dispatched5)); - ASSERT_EQ(on_finish5, &cond5); + ASSERT_NE(on_finish5, &cond5); ASSERT_NE(timer_task, nullptr); // discard (2) (non-seq io) @@ -611,7 +633,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, Mixed) { 0, object_off, std::move(data), mock_image_ctx.snapc, 0, {}, &object_dispatch_flags, nullptr, &dispatch_result, &on_finish7, &on_dispatched7)); - ASSERT_EQ(on_finish7, &cond7); + ASSERT_NE(on_finish7, &cond7); ASSERT_NE(timer_task, nullptr); // write (1) finishes @@ -688,7 +710,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, DispatchQueue) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched2)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); // send 2 writes to object 1 @@ -711,7 +733,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, DispatchQueue) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish4, &on_dispatched4)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish4, &cond4); + ASSERT_NE(on_finish4, &cond4); // finish write (1) to object 0 expect_dispatch_delayed_requests(mock_image_ctx, 0); @@ -771,7 +793,7 @@ TEST_F(TestMockIoSimpleSchedulerObjectDispatch, Timer) { &object_dispatch_flags, nullptr, &dispatch_result, &on_finish2, &on_dispatched)); ASSERT_EQ(dispatch_result, io::DISPATCH_RESULT_COMPLETE); - ASSERT_EQ(on_finish2, &cond2); + ASSERT_NE(on_finish2, &cond2); ASSERT_NE(timer_task, nullptr); expect_dispatch_delayed_requests(mock_image_ctx, 0); -- 2.39.5