]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: keep access/modified timestamp updates out of IO path 25883/head
authorJason Dillaman <dillaman@redhat.com>
Wed, 9 Jan 2019 19:46:49 +0000 (14:46 -0500)
committerJason Dillaman <dillaman@redhat.com>
Thu, 10 Jan 2019 15:00:20 +0000 (10:00 -0500)
If the cache is enabled, it was possible for the timestamp updates
to cause race conditions with librbd clients (e.g. rbd export)
that had data corrupting effects since callbacks were expected to
be serialized.

Fixes: http://tracker.ceph.com/issues/37745
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/io/ImageRequest.cc
src/librbd/io/ImageRequest.h
src/test/librbd/io/test_mock_ImageRequest.cc

index a4e00e4fd645e97450ab250ca3dc1c92832d4fa7..e0c242cf046b0f0f53f7c392ef8d5d11862d2720 100644 (file)
@@ -9,6 +9,7 @@
 #include "librbd/Utils.h"
 #include "librbd/cache/ImageCache.h"
 #include "librbd/io/AioCompletion.h"
+#include "librbd/io/AsyncOperation.h"
 #include "librbd/io/ObjectDispatchInterface.h"
 #include "librbd/io/ObjectDispatchSpec.h"
 #include "librbd/io/ObjectDispatcher.h"
@@ -18,6 +19,7 @@
 #include "common/perf_counters.h"
 #include "common/WorkQueue.h"
 #include "osdc/Striper.h"
+#include <functional>
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -34,37 +36,41 @@ namespace {
 template <typename I>
 struct C_UpdateTimestamp : public Context {
 public:
-  I* m_image_ctx;
-  AioCompletion* m_aio_comp;
-  bool modify; //if modify set to 'true', modify timestamp is updated, access timestamp otherwise
+  I& m_image_ctx;
+  bool m_modify; // if modify set to 'true', modify timestamp is updated,
+                 // access timestamp otherwise
+  AsyncOperation m_async_op;
 
-  C_UpdateTimestamp(I* ictx, AioCompletion *c, bool m)  
-    : m_image_ctx(ictx), m_aio_comp(c), modify(m) {
-    m_aio_comp->add_request();
+  C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
+    m_async_op.start_op(*get_image_ctx(&m_image_ctx));
+  }
+  ~C_UpdateTimestamp() override {
+    m_async_op.finish_op();
   }
 
   void send() {
     librados::ObjectWriteOperation op;
-    if(modify)
+    if (m_modify) {
       cls_client::set_modify_timestamp(&op);
-    else
+    } else {
       cls_client::set_access_timestamp(&op);
+    }
 
-    librados::AioCompletion *comp = librbd::util::create_rados_callback(this);
-    int r = m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op);
+    auto comp = librbd::util::create_rados_callback(this);
+    int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
     ceph_assert(r == 0);
     comp->release();
   }
 
   void finish(int r) override {
     // ignore errors updating timestamp
-    m_aio_comp->complete_request(0);
   }
 };
 
 bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
                              uint64_t interval) {
-    return (interval && (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
+    return (interval &&
+            (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
 }
 
 } // anonymous namespace
@@ -151,6 +157,7 @@ void ImageRequest<I>::send() {
   }
 
   if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
+    update_timestamp();
     send_request();
   } else {
     send_image_cache_request();
@@ -172,6 +179,58 @@ int ImageRequest<I>::clip_request() {
   return 0;
 }
 
+template <typename I>
+void ImageRequest<I>::update_timestamp() {
+  bool modify = (get_aio_type() != AIO_TYPE_READ);
+  uint64_t update_interval;
+  if (modify) {
+    update_interval = m_image_ctx.mtime_update_interval;
+  } else {
+    update_interval = m_image_ctx.atime_update_interval;
+  }
+
+  if (update_interval == 0) {
+    return;
+  }
+
+  utime_t (I::*get_timestamp_fn)() const;
+  void (I::*set_timestamp_fn)(utime_t);
+  if (modify) {
+    get_timestamp_fn = &I::get_modify_timestamp;
+    set_timestamp_fn = &I::set_modify_timestamp;
+  } else {
+    get_timestamp_fn = &I::get_access_timestamp;
+    set_timestamp_fn = &I::set_access_timestamp;
+  }
+
+  utime_t ts = ceph_clock_now();
+  {
+    RWLock::RLocker timestamp_locker(m_image_ctx.timestamp_lock);
+    if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
+                                update_interval)) {
+      return;
+    }
+  }
+
+  {
+    RWLock::WLocker timestamp_locker(m_image_ctx.timestamp_lock);
+    bool update = should_update_timestamp(
+      ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
+    if (!update) {
+      return;
+    }
+
+    std::invoke(set_timestamp_fn, m_image_ctx, ts);
+  }
+
+  // TODO we fire and forget this outside the IO path to prevent
+  // potential race conditions with librbd client IO callbacks
+  // between different threads (e.g. librados and object cacher)
+  ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
+  auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
+  req->send();
+}
+
 template <typename I>
 ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
                                       Extents &&image_extents,
@@ -238,28 +297,7 @@ void ImageReadRequest<I>::send_request() {
   for (auto &object_extent : object_extents) {
     request_count += object_extent.second.size();
   }
-  
-  utime_t ts = ceph_clock_now();
-
-  image_ctx.timestamp_lock.get_read();
-  if(should_update_timestamp(ts,image_ctx.get_access_timestamp(),
-                             image_ctx.atime_update_interval)) {
-    image_ctx.timestamp_lock.put_read();
-    image_ctx.timestamp_lock.get_write();
-    if(should_update_timestamp(ts,image_ctx.get_access_timestamp(),
-                               image_ctx.atime_update_interval)) {
-      aio_comp->set_request_count(request_count + 1);
-      auto update_ctx = new C_UpdateTimestamp<I>(&image_ctx, aio_comp, false);
-      update_ctx->send();
-      image_ctx.set_access_timestamp(ts);
-    } else {
-      aio_comp->set_request_count(request_count);
-    }
-    image_ctx.timestamp_lock.put_write();
-  } else {
-    image_ctx.timestamp_lock.put_read();
-    aio_comp->set_request_count(request_count);
-  }
+  aio_comp->set_request_count(request_count);
 
   // issue the requests
   for (auto &object_extent : object_extents) {
@@ -346,34 +384,13 @@ void AbstractImageWriteRequest<I>::send_request() {
 
   if (!object_extents.empty()) {
     uint64_t journal_tid = 0;
-
-    utime_t ts = ceph_clock_now();
-    image_ctx.timestamp_lock.get_read();
-    if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(),
-                               image_ctx.mtime_update_interval)) {
-      image_ctx.timestamp_lock.put_read();
-      image_ctx.timestamp_lock.get_write();
-      if(should_update_timestamp(ts,image_ctx.get_modify_timestamp(),
-                                 image_ctx.mtime_update_interval)) {
-        aio_comp->set_request_count(object_extents.size() + 1);
-        auto update_ctx = new C_UpdateTimestamp<I>(&image_ctx, aio_comp, true);
-        update_ctx->send();
-        image_ctx.set_modify_timestamp(ts);
-      } else {
-        aio_comp->set_request_count(object_extents.size());
-      }
-      image_ctx.timestamp_lock.put_write();
-    } else {
-      image_ctx.timestamp_lock.put_read();
-      aio_comp->set_request_count(object_extents.size());
-    }
-    
     if (journaling) {
       // in-flight ops are flushed prior to closing the journal
       ceph_assert(image_ctx.journal != NULL);
       journal_tid = append_journal_event(m_synchronous);
     }
 
+    aio_comp->set_request_count(object_extents.size());
     send_object_requests(object_extents, snapc, journal_tid);
   } else {
     // no IO to perform -- fire completion
index 7a934e5bebb103fcc174939d684ea57613fa68eb..0b8d4179c0262fb8c22a51b64a72056e22e72c0d 100644 (file)
@@ -84,6 +84,7 @@ protected:
 
 
   virtual int clip_request();
+  virtual void update_timestamp();
   virtual void send_request() = 0;
   virtual void send_image_cache_request() = 0;
 
@@ -248,6 +249,8 @@ protected:
   int clip_request() override {
     return 0;
   }
+  void update_timestamp() override {
+  }
   void send_request() override;
   void send_image_cache_request() override;
 
index 741be280d018182faa1bf71a473ff9d5bab5ed2c..9fb071ba08d60a52e64acf93f8bff6c90a61a617 100644 (file)
@@ -82,8 +82,9 @@ struct TestMockIoImageRequest : public TestMockFixture {
       EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
         .WillOnce(Return(ceph_clock_now() - utime_t(10,0)));
     } else {
-      mock_image_ctx.mtime_update_interval = 0;
-      EXPECT_CALL(mock_image_ctx, get_modify_timestamp());
+      mock_image_ctx.mtime_update_interval = 600;
+      EXPECT_CALL(mock_image_ctx, get_modify_timestamp())
+        .WillOnce(Return(ceph_clock_now()));
     }
   }
 
@@ -226,8 +227,8 @@ TEST_F(TestMockIoImageRequest, AioWriteJournalAppendDisabled) {
   mock_image_ctx.journal = &mock_journal;
 
   InSequence seq;
-  expect_is_journal_appending(mock_journal, false);
   expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, false);
   expect_object_request_send(mock_image_ctx, 0);
 
   C_SaferCond aio_comp_ctx;
@@ -256,8 +257,8 @@ TEST_F(TestMockIoImageRequest, AioDiscardJournalAppendDisabled) {
   mock_image_ctx.journal = &mock_journal;
 
   InSequence seq;
-  expect_is_journal_appending(mock_journal, false);
   expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, false);
   expect_object_request_send(mock_image_ctx, 0);
 
   C_SaferCond aio_comp_ctx;
@@ -314,8 +315,8 @@ TEST_F(TestMockIoImageRequest, AioWriteSameJournalAppendDisabled) {
   mock_image_ctx.journal = &mock_journal;
 
   InSequence seq;
-  expect_is_journal_appending(mock_journal, false);
   expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, false);
   expect_object_request_send(mock_image_ctx, 0);
 
   C_SaferCond aio_comp_ctx;
@@ -345,8 +346,8 @@ TEST_F(TestMockIoImageRequest, AioCompareAndWriteJournalAppendDisabled) {
   mock_image_ctx.journal = &mock_journal;
 
   InSequence seq;
-  expect_is_journal_appending(mock_journal, false);
   expect_get_modify_timestamp(mock_image_ctx, false);
+  expect_is_journal_appending(mock_journal, false);
   expect_object_request_send(mock_image_ctx, 0);
 
   C_SaferCond aio_comp_ctx;