From 5f3cb951c50e6d5da140605b9e4208d1126a1dfb Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 9 Jan 2019 14:46:49 -0500 Subject: [PATCH] librbd: keep access/modified timestamp updates out of IO path If the cache is enabled, it was possible for the timestamp updates to cause race conditions with librbd clients (e.g. rbd export) that had data corrupting effects since callbacks were expected to be serialized. Fixes: http://tracker.ceph.com/issues/37745 Signed-off-by: Jason Dillaman --- src/librbd/io/ImageRequest.cc | 129 +++++++++++-------- src/librbd/io/ImageRequest.h | 3 + src/test/librbd/io/test_mock_ImageRequest.cc | 13 +- 3 files changed, 83 insertions(+), 62 deletions(-) diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc index a4e00e4fd64..e0c242cf046 100644 --- a/src/librbd/io/ImageRequest.cc +++ b/src/librbd/io/ImageRequest.cc @@ -9,6 +9,7 @@ #include "librbd/Utils.h" #include "librbd/cache/ImageCache.h" #include "librbd/io/AioCompletion.h" +#include "librbd/io/AsyncOperation.h" #include "librbd/io/ObjectDispatchInterface.h" #include "librbd/io/ObjectDispatchSpec.h" #include "librbd/io/ObjectDispatcher.h" @@ -18,6 +19,7 @@ #include "common/perf_counters.h" #include "common/WorkQueue.h" #include "osdc/Striper.h" +#include #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -34,37 +36,41 @@ namespace { template struct C_UpdateTimestamp : public Context { public: - I* m_image_ctx; - AioCompletion* m_aio_comp; - bool modify; //if modify set to 'true', modify timestamp is updated, access timestamp otherwise + I& m_image_ctx; + bool m_modify; // if modify set to 'true', modify timestamp is updated, + // access timestamp otherwise + AsyncOperation m_async_op; - C_UpdateTimestamp(I* ictx, AioCompletion *c, bool m) - : m_image_ctx(ictx), m_aio_comp(c), modify(m) { - m_aio_comp->add_request(); + C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) { + m_async_op.start_op(*get_image_ctx(&m_image_ctx)); + } + ~C_UpdateTimestamp() override { + m_async_op.finish_op(); } void send() { librados::ObjectWriteOperation op; - if(modify) + if (m_modify) { cls_client::set_modify_timestamp(&op); - else + } else { cls_client::set_access_timestamp(&op); + } - librados::AioCompletion *comp = librbd::util::create_rados_callback(this); - int r = m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op); + auto comp = librbd::util::create_rados_callback(this); + int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); ceph_assert(r == 0); comp->release(); } void finish(int r) override { // ignore errors updating timestamp - m_aio_comp->complete_request(0); } }; bool should_update_timestamp(const utime_t& now, const utime_t& timestamp, uint64_t interval) { - return (interval && (static_cast(now.sec()) >= interval + timestamp)); + return (interval && + (static_cast(now.sec()) >= interval + timestamp)); } } // anonymous namespace @@ -151,6 +157,7 @@ void ImageRequest::send() { } if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) { + update_timestamp(); send_request(); } else { send_image_cache_request(); @@ -172,6 +179,58 @@ int ImageRequest::clip_request() { return 0; } +template +void ImageRequest::update_timestamp() { + bool modify = (get_aio_type() != AIO_TYPE_READ); + uint64_t update_interval; + if (modify) { + update_interval = m_image_ctx.mtime_update_interval; + } else { + update_interval = m_image_ctx.atime_update_interval; + } + + if (update_interval == 0) { + return; + } + + utime_t (I::*get_timestamp_fn)() const; + void (I::*set_timestamp_fn)(utime_t); + if (modify) { + get_timestamp_fn = &I::get_modify_timestamp; + set_timestamp_fn = &I::set_modify_timestamp; + } else { + get_timestamp_fn = &I::get_access_timestamp; + set_timestamp_fn = &I::set_access_timestamp; + } + + utime_t ts = ceph_clock_now(); + { + RWLock::RLocker timestamp_locker(m_image_ctx.timestamp_lock); + if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx), + update_interval)) { + return; + } + } + + { + RWLock::WLocker timestamp_locker(m_image_ctx.timestamp_lock); + bool update = should_update_timestamp( + ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval); + if (!update) { + return; + } + + std::invoke(set_timestamp_fn, m_image_ctx, ts); + } + + // TODO we fire and forget this outside the IO path to prevent + // potential race conditions with librbd client IO callbacks + // between different threads (e.g. librados and object cacher) + ldout(m_image_ctx.cct, 10) << get_request_type() << dendl; + auto req = new C_UpdateTimestamp(m_image_ctx, modify); + req->send(); +} + template ImageReadRequest::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents, @@ -238,28 +297,7 @@ void ImageReadRequest::send_request() { for (auto &object_extent : object_extents) { request_count += object_extent.second.size(); } - - utime_t ts = ceph_clock_now(); - - image_ctx.timestamp_lock.get_read(); - if(should_update_timestamp(ts,image_ctx.get_access_timestamp(), - image_ctx.atime_update_interval)) { - image_ctx.timestamp_lock.put_read(); - image_ctx.timestamp_lock.get_write(); - if(should_update_timestamp(ts,image_ctx.get_access_timestamp(), - image_ctx.atime_update_interval)) { - aio_comp->set_request_count(request_count + 1); - auto update_ctx = new C_UpdateTimestamp(&image_ctx, aio_comp, false); - update_ctx->send(); - image_ctx.set_access_timestamp(ts); - } else { - aio_comp->set_request_count(request_count); - } - image_ctx.timestamp_lock.put_write(); - } else { - image_ctx.timestamp_lock.put_read(); - aio_comp->set_request_count(request_count); - } + aio_comp->set_request_count(request_count); // issue the requests for (auto &object_extent : object_extents) { @@ -346,34 +384,13 @@ void AbstractImageWriteRequest::send_request() { if (!object_extents.empty()) { uint64_t journal_tid = 0; - - utime_t ts = ceph_clock_now(); - image_ctx.timestamp_lock.get_read(); - if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(), - image_ctx.mtime_update_interval)) { - image_ctx.timestamp_lock.put_read(); - image_ctx.timestamp_lock.get_write(); - if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(), - image_ctx.mtime_update_interval)) { - aio_comp->set_request_count(object_extents.size() + 1); - auto update_ctx = new C_UpdateTimestamp(&image_ctx, aio_comp, true); - update_ctx->send(); - image_ctx.set_modify_timestamp(ts); - } else { - aio_comp->set_request_count(object_extents.size()); - } - image_ctx.timestamp_lock.put_write(); - } else { - image_ctx.timestamp_lock.put_read(); - aio_comp->set_request_count(object_extents.size()); - } - if (journaling) { // in-flight ops are flushed prior to closing the journal ceph_assert(image_ctx.journal != NULL); journal_tid = append_journal_event(m_synchronous); } + aio_comp->set_request_count(object_extents.size()); send_object_requests(object_extents, snapc, journal_tid); } else { // no IO to perform -- fire completion diff --git a/src/librbd/io/ImageRequest.h b/src/librbd/io/ImageRequest.h index 7a934e5bebb..0b8d4179c02 100644 --- a/src/librbd/io/ImageRequest.h +++ b/src/librbd/io/ImageRequest.h @@ -84,6 +84,7 @@ protected: virtual int clip_request(); + virtual void update_timestamp(); virtual void send_request() = 0; virtual void send_image_cache_request() = 0; @@ -248,6 +249,8 @@ protected: int clip_request() override { return 0; } + void update_timestamp() override { + } void send_request() override; void send_image_cache_request() override; diff --git a/src/test/librbd/io/test_mock_ImageRequest.cc b/src/test/librbd/io/test_mock_ImageRequest.cc index 741be280d01..9fb071ba08d 100644 --- a/src/test/librbd/io/test_mock_ImageRequest.cc +++ b/src/test/librbd/io/test_mock_ImageRequest.cc @@ -82,8 +82,9 @@ struct TestMockIoImageRequest : public TestMockFixture { EXPECT_CALL(mock_image_ctx, get_modify_timestamp()) .WillOnce(Return(ceph_clock_now() - utime_t(10,0))); } else { - mock_image_ctx.mtime_update_interval = 0; - EXPECT_CALL(mock_image_ctx, get_modify_timestamp()); + mock_image_ctx.mtime_update_interval = 600; + EXPECT_CALL(mock_image_ctx, get_modify_timestamp()) + .WillOnce(Return(ceph_clock_now())); } } @@ -226,8 +227,8 @@ TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) { mock_image_ctx.journal = &mock_journal; InSequence seq; - expect_is_journal_appending(mock_journal, false); expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, false); expect_object_request_send(mock_image_ctx, 0); C_SaferCond aio_comp_ctx; @@ -256,8 +257,8 @@ TEST_F(TestMockIoImageRequest, AioDiscardJournalAppendDisabled) { mock_image_ctx.journal = &mock_journal; InSequence seq; - expect_is_journal_appending(mock_journal, false); expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, false); expect_object_request_send(mock_image_ctx, 0); C_SaferCond aio_comp_ctx; @@ -314,8 +315,8 @@ TEST_F(TestMockIoImageRequest, AioWriteSameJournalAppendDisabled) { mock_image_ctx.journal = &mock_journal; InSequence seq; - expect_is_journal_appending(mock_journal, false); expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, false); expect_object_request_send(mock_image_ctx, 0); C_SaferCond aio_comp_ctx; @@ -345,8 +346,8 @@ TEST_F(TestMockIoImageRequest, AioCompareAndWriteJournalAppendDisabled) { mock_image_ctx.journal = &mock_journal; InSequence seq; - expect_is_journal_appending(mock_journal, false); expect_get_modify_timestamp(mock_image_ctx, false); + expect_is_journal_appending(mock_journal, false); expect_object_request_send(mock_image_ctx, 0); C_SaferCond aio_comp_ctx; -- 2.47.3