]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd: new alternative write-around cache layer
authorJason Dillaman <dillaman@redhat.com>
Wed, 27 Mar 2019 23:09:55 +0000 (19:09 -0400)
committerJason Dillaman <dillaman@redhat.com>
Thu, 11 Apr 2019 16:47:00 +0000 (12:47 -0400)
This is a replacement for the ObjectCacher-based writethrough/
writeback cache. It permits a configurable maximum byte size
of in-flight writes (bytes) to the OSDs where the writes are
immediately completed back to the caller. Any IO errors are
propagated on the next flush request.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/common/options.cc
src/librbd/CMakeLists.txt
src/librbd/cache/WriteAroundObjectDispatch.cc [new file with mode: 0644]
src/librbd/cache/WriteAroundObjectDispatch.h [new file with mode: 0644]
src/librbd/image/OpenRequest.cc
src/test/librbd/CMakeLists.txt
src/test/librbd/cache/test_mock_WriteAroundObjectDispatch.cc [new file with mode: 0644]
src/test/librbd/test_Migration.cc

index 36ed0fbb355fdf1c88fac8bb80f275ba9b94f7a9..849aa94b135e783f5e321368516f0d1fd9717a7e 100644 (file)
@@ -6990,6 +6990,11 @@ static std::vector<Option> get_rbd_options() {
     .set_default(true)
     .set_description("whether to enable caching (writeback unless rbd_cache_max_dirty is 0)"),
 
+    Option("rbd_cache_policy", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_enum_allowed({"writethrough", "writeback", "writearound"})
+    .set_default("writeback")
+    .set_description("cache policy for handling writes."),
+
     Option("rbd_cache_writethrough_until_flush", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("whether to make writeback caching writethrough until "
index 9a7d9a0db00f5f37ded36ab61d737499fc61ec8f..265c95f1871b0429eed9d78255da6f3d85ddc8c7 100644 (file)
@@ -38,6 +38,7 @@ set(librbd_internal_srcs
   cache/ObjectCacherObjectDispatch.cc
   cache/ObjectCacherWriteback.cc
   cache/PassthroughImageCache.cc
+  cache/WriteAroundObjectDispatch.cc
   deep_copy/ImageCopyRequest.cc
   deep_copy/MetadataCopyRequest.cc
   deep_copy/ObjectCopyRequest.cc
diff --git a/src/librbd/cache/WriteAroundObjectDispatch.cc b/src/librbd/cache/WriteAroundObjectDispatch.cc
new file mode 100644 (file)
index 0000000..38a81fe
--- /dev/null
@@ -0,0 +1,510 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/cache/WriteAroundObjectDispatch.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/io/ObjectDispatchSpec.h"
+#include "librbd/io/ObjectDispatcher.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::WriteAroundObjectDispatch: " \
+                           << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+WriteAroundObjectDispatch<I>::WriteAroundObjectDispatch(
+    I* image_ctx, size_t max_dirty, bool writethrough_until_flush)
+  : m_image_ctx(image_ctx), m_init_max_dirty(max_dirty), m_max_dirty(max_dirty),
+    m_lock(util::unique_lock_name(
+      "librbd::cache::WriteAroundObjectDispatch::lock", this)) {
+  if (writethrough_until_flush) {
+    m_max_dirty = 0;
+  }
+}
+
+template <typename I>
+WriteAroundObjectDispatch<I>::~WriteAroundObjectDispatch() {
+}
+
+template <typename I>
+void WriteAroundObjectDispatch<I>::init() {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 5) << dendl;
+
+  // add ourself to the IO object dispatcher chain
+  m_image_ctx->io_object_dispatcher->register_object_dispatch(this);
+}
+
+template <typename I>
+void WriteAroundObjectDispatch<I>::shut_down(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 5) << dendl;
+
+  on_finish->complete(0);
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::read(
+    const std::string &oid, uint64_t object_no, uint64_t object_off,
+    uint64_t object_len, librados::snap_t snap_id, int op_flags,
+    const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+    io::ExtentMap* extent_map, int* object_dispatch_flags,
+    io::DispatchResult* dispatch_result, Context** on_finish,
+    Context* on_dispatched) {
+  return dispatch_unoptimized_io(object_no, object_off, object_len,
+                                 dispatch_result, on_dispatched);
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::discard(
+    const std::string &oid, uint64_t object_no, uint64_t object_off,
+    uint64_t object_len, const ::SnapContext &snapc, int discard_flags,
+    const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+    uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+    Context** on_finish, Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+                 << object_len << dendl;
+
+  return dispatch_io(object_no, object_off, object_len, dispatch_result,
+                     on_finish, on_dispatched);
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::write(
+    const std::string &oid, uint64_t object_no, uint64_t object_off,
+    ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+    const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+    uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+    Context**on_finish, Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+                 << data.length() << dendl;
+
+  return dispatch_io(object_no, object_off, data.length(), dispatch_result,
+                     on_finish, on_dispatched);
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::write_same(
+    const std::string &oid, uint64_t object_no, uint64_t object_off,
+    uint64_t object_len, io::Extents&& buffer_extents, ceph::bufferlist&& data,
+    const ::SnapContext &snapc, int op_flags,
+    const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+    uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+    Context**on_finish, Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+                 << object_len << dendl;
+
+  return dispatch_io(object_no, object_off, object_len, dispatch_result,
+                     on_finish, on_dispatched);
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::compare_and_write(
+    const std::string &oid, uint64_t object_no, uint64_t object_off,
+    ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
+    const ::SnapContext &snapc, int op_flags,
+    const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
+    int* object_dispatch_flags, uint64_t* journal_tid,
+    io::DispatchResult* dispatch_result, Context** on_finish,
+    Context* on_dispatched) {
+  return dispatch_unoptimized_io(object_no, object_off, cmp_data.length(),
+                                 dispatch_result, on_dispatched);
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::flush(
+    io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
+    uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+    Context** on_finish, Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << dendl;
+
+  Mutex::Locker locker(m_lock);
+  if (flush_source == io::FLUSH_SOURCE_USER && !m_user_flushed) {
+    m_user_flushed = true;
+    if (m_max_dirty == 0 && m_init_max_dirty > 0) {
+      ldout(cct, 5) << "first user flush: enabling write-around" << dendl;
+      m_max_dirty = m_init_max_dirty;
+    }
+  }
+
+  if (m_in_flight_io_tids.empty()) {
+    // no in-flight IO (also implies no queued/blocked IO)
+    return false;
+  }
+
+  auto tid = ++m_last_tid;
+  auto ctx = util::create_async_context_callback(*m_image_ctx, *on_finish);
+
+  *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+  *on_finish = new FunctionContext([this, tid](int r) {
+      handle_in_flight_flush_complete(r, tid);
+    });
+
+  if (m_queued_ios.empty() && m_blocked_ios.empty()) {
+    // immediately allow the flush to be dispatched
+    ldout(cct, 20) << "dispatching: tid=" << tid << dendl;
+    m_in_flight_flushes.emplace(tid, ctx);
+    return false;
+  }
+
+  // cannot dispatch the flush until after preceeding IO is dispatched
+  ldout(cct, 20) << "queueing: tid=" << tid << dendl;
+  m_queued_flushes.emplace(tid, QueuedFlush{ctx, on_dispatched});
+  return true;
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::dispatch_unoptimized_io(
+    uint64_t object_no, uint64_t object_off, uint64_t object_len,
+    io::DispatchResult* dispatch_result, Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+
+  m_lock.Lock();
+  auto in_flight_extents_it = m_in_flight_extents.find(object_no);
+  if (in_flight_extents_it == m_in_flight_extents.end() ||
+      !in_flight_extents_it->second.intersects(object_off, object_len)) {
+    // no IO in-flight to the specified extent
+    m_lock.Unlock();
+    return false;
+  }
+
+  // write IO is in-flight -- it needs to complete before the unoptimized
+  // IO can be dispatched
+  auto tid = ++m_last_tid;
+  ldout(cct, 20) << "blocked by in-flight IO: tid=" << tid << dendl;
+  *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+  m_blocked_unoptimized_ios[object_no].emplace(
+    tid, BlockedIO{object_off, object_len, nullptr, on_dispatched});
+  m_lock.Unlock();
+
+  return true;
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::dispatch_io(
+    uint64_t object_no, uint64_t object_off, uint64_t object_len,
+    io::DispatchResult* dispatch_result, Context** on_finish,
+    Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+
+  m_lock.Lock();
+  if (m_max_dirty == 0) {
+    // write-through mode is active -- no-op the cache
+    m_lock.Unlock();
+    return false;
+  }
+
+  auto tid = ++m_last_tid;
+  auto ctx = util::create_async_context_callback(*m_image_ctx, *on_finish);
+
+  *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+  *on_finish = new FunctionContext(
+    [this, tid, object_no, object_off, object_len](int r) {
+      handle_in_flight_io_complete(r, tid, object_no, object_off, object_len);
+    });
+
+  bool blocked = block_overlapping_io(&m_in_flight_extents[object_no],
+                                      object_off, object_len);
+  if (blocked) {
+    ldout(cct, 20) << "blocked on overlap: tid=" << tid << dendl;
+    m_queued_or_blocked_io_tids.insert(tid);
+    m_blocked_ios[object_no].emplace(tid, BlockedIO{object_off, object_len, ctx,
+                                                    on_dispatched});
+    m_lock.Unlock();
+  } else if (can_dispatch_io(tid, object_len)) {
+    m_lock.Unlock();
+
+    ldout(cct, 20) << "dispatching: tid=" << tid << dendl;
+    on_dispatched->complete(0);
+    ctx->complete(0);
+  } else {
+    ldout(cct, 20) << "queueing: tid=" << tid << dendl;
+    m_queued_or_blocked_io_tids.insert(tid);
+    m_queued_ios.emplace(tid, QueuedIO{object_len, ctx, on_dispatched});
+    m_lock.Unlock();
+  }
+  return true;
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::block_overlapping_io(
+    InFlightObjectExtents* in_flight_object_extents, uint64_t object_off,
+    uint64_t object_len) {
+  if (in_flight_object_extents->intersects(object_off, object_len)) {
+    return true;
+  }
+
+  in_flight_object_extents->insert(object_off, object_len);
+  return false;
+}
+
+template <typename I>
+void WriteAroundObjectDispatch<I>::unblock_overlapping_ios(
+    uint64_t object_no, uint64_t object_off, uint64_t object_len,
+    Contexts* unoptimized_io_dispatches) {
+  auto cct = m_image_ctx->cct;
+  ceph_assert(m_lock.is_locked());
+
+  auto in_flight_extents_it = m_in_flight_extents.find(object_no);
+  ceph_assert(in_flight_extents_it != m_in_flight_extents.end());
+
+  auto& in_flight_object_extents = in_flight_extents_it->second;
+  in_flight_object_extents.erase(object_off, object_len);
+
+  // handle unoptimized IOs that were blocked by in-flight IO
+  InFlightObjectExtents blocked_unoptimized_ios;
+  auto blocked_unoptimized_ios_it = m_blocked_unoptimized_ios.find(object_no);
+  if (blocked_unoptimized_ios_it != m_blocked_unoptimized_ios.end()) {
+    auto& blocked_unoptimized_object_ios = blocked_unoptimized_ios_it->second;
+    for (auto it = blocked_unoptimized_object_ios.begin();
+         it != blocked_unoptimized_object_ios.end();) {
+      auto& blocked_io = it->second;
+      if (!in_flight_object_extents.intersects(blocked_io.offset,
+                                               blocked_io.length)) {
+        unoptimized_io_dispatches->emplace(it->first, blocked_io.on_dispatched);
+        it = blocked_unoptimized_object_ios.erase(it);
+      } else {
+        blocked_unoptimized_ios.union_insert(blocked_io.offset,
+                                             blocked_io.length);
+        ++it;
+      }
+    }
+
+    if (blocked_unoptimized_object_ios.empty()) {
+      m_blocked_unoptimized_ios.erase(blocked_unoptimized_ios_it);
+    }
+  }
+
+  // handle optimized IOs that were blocked
+  auto blocked_io_it = m_blocked_ios.find(object_no);
+  if (blocked_io_it != m_blocked_ios.end()) {
+    auto& blocked_object_ios = blocked_io_it->second;
+
+    auto blocked_object_ios_it = blocked_object_ios.begin();
+    while (blocked_object_ios_it != blocked_object_ios.end()) {
+      auto next_blocked_object_ios_it = blocked_object_ios_it;
+      ++next_blocked_object_ios_it;
+
+      auto& blocked_io = blocked_object_ios_it->second;
+      if (blocked_unoptimized_ios.intersects(blocked_io.offset,
+                                             blocked_io.length) ||
+          block_overlapping_io(&in_flight_object_extents, blocked_io.offset,
+                               blocked_io.length)) {
+        break;
+      }
+
+      // move unblocked IO to the queued list, which will get processed when
+      // there is capacity
+      auto tid = blocked_object_ios_it->first;
+      ldout(cct, 20) << "queueing unblocked: tid=" << tid << dendl;
+      m_queued_ios.emplace(tid, blocked_io);
+
+      blocked_object_ios.erase(blocked_object_ios_it);
+      blocked_object_ios_it = next_blocked_object_ios_it;
+    }
+
+    if (blocked_object_ios.empty()) {
+      m_blocked_ios.erase(blocked_io_it);
+    }
+  }
+
+  if (in_flight_object_extents.empty()) {
+    m_in_flight_extents.erase(in_flight_extents_it);
+  }
+}
+
+template <typename I>
+bool WriteAroundObjectDispatch<I>::can_dispatch_io(
+    uint64_t tid, uint64_t length) {
+  ceph_assert(m_lock.is_locked());
+
+  if (m_in_flight_bytes == 0 || m_in_flight_bytes + length <= m_max_dirty) {
+    // no in-flight IO or still under max write-around in-flight limit.
+    // allow the dispatcher to proceed to send the IO but complete it back
+    // to the invoker.
+    m_in_flight_bytes += length;
+    m_in_flight_io_tids.insert(tid);
+    return true;
+  }
+
+  return false;
+}
+
+template <typename I>
+void WriteAroundObjectDispatch<I>::handle_in_flight_io_complete(
+    int r, uint64_t tid, uint64_t object_no, uint64_t object_off,
+    uint64_t object_len) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "r=" << r << ", tid=" << tid << dendl;
+
+  m_lock.Lock();
+  m_in_flight_io_tids.erase(tid);
+  ceph_assert(m_in_flight_bytes >= object_len);
+  m_in_flight_bytes -= object_len;
+
+  if (r < 0) {
+    lderr(cct) << "IO error encountered: tid=" << tid << ": "
+               << cpp_strerror(r) << dendl;
+    if (m_pending_flush_error == 0) {
+      m_pending_flush_error = r;
+    }
+  }
+
+  // any overlapping blocked IOs can be queued now
+  Contexts unoptimized_io_dispatches;
+  unblock_overlapping_ios(object_no, object_off, object_len,
+                          &unoptimized_io_dispatches);
+
+  // collect any flushes that are ready for completion
+  int pending_flush_error = 0;
+  auto finished_flushes = collect_finished_flushes();
+  if (!finished_flushes.empty()) {
+    std::swap(pending_flush_error, m_pending_flush_error);
+  }
+
+  // collect any queued IOs that are ready for dispatch
+  auto ready_ios = collect_ready_ios();
+
+  // collect any queued flushes that were tied to queued IOs
+  auto ready_flushes = collect_ready_flushes();
+  m_lock.Unlock();
+
+  // dispatch any ready unoptimized IOs
+  for (auto& it : unoptimized_io_dispatches) {
+    ldout(cct, 20) << "dispatching unoptimized IO: tid=" << it.first << dendl;
+    it.second->complete(0);
+  }
+
+  // complete flushes that were waiting on in-flight IO
+  // (and propogate any IO error to first flush)
+  for (auto& it : finished_flushes) {
+    ldout(cct, 20) << "completing flush: tid=" << it.first << ", "
+                   << "r=" << pending_flush_error << dendl;
+    it.second->complete(pending_flush_error);
+  }
+
+  // dispatch any ready queued IOs
+  for (auto& it : ready_ios) {
+    ldout(cct, 20) << "dispatching IO: tid=" << it.first << dendl;
+    it.second.on_dispatched->complete(0);
+    it.second.on_finish->complete(0);
+  }
+
+  // dispatch any ready flushes
+  for (auto& it : ready_flushes) {
+    ldout(cct, 20) << "dispatching flush: tid=" << it.first << dendl;
+    it.second->complete(0);
+  }
+}
+
+template <typename I>
+void WriteAroundObjectDispatch<I>::handle_in_flight_flush_complete(
+    int r, uint64_t tid) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "r=" << r << ", tid=" << tid << dendl;
+
+  m_lock.Lock();
+
+  // move the in-flight flush to the pending completion list
+  auto it = m_in_flight_flushes.find(tid);
+  ceph_assert(it != m_in_flight_flushes.end());
+
+  m_pending_flushes.emplace(it->first, it->second);
+  m_in_flight_flushes.erase(it);
+
+  // collect any flushes that are ready for completion
+  int pending_flush_error = 0;
+  auto finished_flushes = collect_finished_flushes();
+  if (!finished_flushes.empty()) {
+    std::swap(pending_flush_error, m_pending_flush_error);
+  }
+  m_lock.Unlock();
+
+  // complete flushes that were waiting on in-flight IO
+  // (and propogate any IO errors)
+  for (auto& it : finished_flushes) {
+    ldout(cct, 20) << "completing flush: tid=" << it.first << dendl;
+    it.second->complete(pending_flush_error);
+    pending_flush_error = 0;
+  }
+}
+
+template <typename I>
+typename WriteAroundObjectDispatch<I>::QueuedIOs
+WriteAroundObjectDispatch<I>::collect_ready_ios() {
+  ceph_assert(m_lock.is_locked());
+
+  QueuedIOs queued_ios;
+
+  while (true) {
+    auto it = m_queued_ios.begin();
+    if (it == m_queued_ios.end() ||
+        !can_dispatch_io(it->first, it->second.length)) {
+      break;
+    }
+
+    queued_ios.emplace(it->first, it->second);
+    m_queued_or_blocked_io_tids.erase(it->first);
+    m_queued_ios.erase(it);
+  }
+  return queued_ios;
+}
+
+template <typename I>
+typename WriteAroundObjectDispatch<I>::Contexts
+WriteAroundObjectDispatch<I>::collect_ready_flushes() {
+  ceph_assert(m_lock.is_locked());
+
+  Contexts ready_flushes;
+  auto io_tid_it = m_queued_or_blocked_io_tids.begin();
+  while (true) {
+    auto it = m_queued_flushes.begin();
+    if (it == m_queued_flushes.end() ||
+        (io_tid_it != m_queued_or_blocked_io_tids.end() &&
+         *io_tid_it < it->first)) {
+      break;
+    }
+
+    m_in_flight_flushes.emplace(it->first, it->second.on_finish);
+    ready_flushes.emplace(it->first, it->second.on_dispatched);
+    m_queued_flushes.erase(it);
+  }
+
+  return ready_flushes;
+}
+
+template <typename I>
+typename WriteAroundObjectDispatch<I>::Contexts
+WriteAroundObjectDispatch<I>::collect_finished_flushes() {
+  ceph_assert(m_lock.is_locked());
+
+  Contexts finished_flushes;
+  auto io_tid_it = m_in_flight_io_tids.begin();
+  while (true) {
+    auto it = m_pending_flushes.begin();
+    if (it == m_pending_flushes.end() ||
+        (io_tid_it != m_in_flight_io_tids.end() && *io_tid_it < it->first)) {
+      break;
+    }
+
+    finished_flushes.emplace(it->first, it->second);
+    m_pending_flushes.erase(it);
+  }
+  return finished_flushes;
+}
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::WriteAroundObjectDispatch<librbd::ImageCtx>;
diff --git a/src/librbd/cache/WriteAroundObjectDispatch.h b/src/librbd/cache/WriteAroundObjectDispatch.h
new file mode 100644 (file)
index 0000000..bafde05
--- /dev/null
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_WRITE_AROUND_OBJECT_DISPATCH_H
+#define CEPH_LIBRBD_CACHE_WRITE_AROUND_OBJECT_DISPATCH_H
+
+#include "librbd/io/ObjectDispatchInterface.h"
+#include "include/interval_set.h"
+#include "common/Mutex.h"
+#include "librbd/io/Types.h"
+#include <map>
+#include <set>
+#include <string>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace cache {
+
+template <typename ImageCtxT = ImageCtx>
+class WriteAroundObjectDispatch : public io::ObjectDispatchInterface {
+public:
+  static WriteAroundObjectDispatch* create(ImageCtxT* image_ctx,
+                                           size_t max_dirty,
+                                          bool writethrough_until_flush) {
+    return new WriteAroundObjectDispatch(image_ctx, max_dirty,
+                                         writethrough_until_flush);
+  }
+
+  WriteAroundObjectDispatch(ImageCtxT* image_ctx, size_t max_dirty,
+                            bool writethrough_until_flush);
+  ~WriteAroundObjectDispatch() override;
+
+  io::ObjectDispatchLayer get_object_dispatch_layer() const override {
+    return io::OBJECT_DISPATCH_LAYER_CACHE;
+  }
+
+  void init();
+  void shut_down(Context* on_finish) override;
+
+  bool read(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      uint64_t object_len, librados::snap_t snap_id, int op_flags,
+      const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+      io::ExtentMap* extent_map, int* object_dispatch_flags,
+      io::DispatchResult* dispatch_result, Context** on_finish,
+      Context* on_dispatched) override;
+
+  bool discard(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      uint64_t object_len, const ::SnapContext &snapc, int discard_flags,
+      const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context**on_finish, Context* on_dispatched) override;
+
+  bool write(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+      const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context**on_finish, Context* on_dispatched) override;
+
+  bool write_same(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      uint64_t object_len, io::Extents&& buffer_extents,
+      ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+      const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context**on_finish, Context* on_dispatched) override;
+
+  bool compare_and_write(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
+      const ::SnapContext &snapc, int op_flags,
+      const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
+      int* object_dispatch_flags, uint64_t* journal_tid,
+      io::DispatchResult* dispatch_result, Context** on_finish,
+      Context* on_dispatched) override;
+
+  bool flush(
+      io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context** on_finish, Context* on_dispatched) override;
+
+  bool invalidate_cache(Context* on_finish) override {
+    return false;
+  }
+
+  bool reset_existence_cache(Context* on_finish) override {
+    return false;
+  }
+
+  void extent_overwritten(
+      uint64_t object_no, uint64_t object_off, uint64_t object_len,
+      uint64_t journal_tid, uint64_t new_journal_tid) override {
+  }
+
+private:
+  struct QueuedIO {
+    QueuedIO(uint64_t length, Context* on_finish, Context* on_dispatched)
+      : length(length), on_finish(on_finish), on_dispatched(on_dispatched) {
+    }
+
+    uint64_t length;
+    Context* on_finish;
+    Context* on_dispatched;
+  };
+
+  struct QueuedFlush {
+    QueuedFlush(Context* on_finish, Context* on_dispatched)
+      : on_finish(on_finish), on_dispatched(on_dispatched) {
+    }
+
+    Context* on_finish;
+    Context* on_dispatched;
+  };
+
+
+  struct BlockedIO : public QueuedIO {
+    BlockedIO(uint64_t offset, uint64_t length, Context* on_finish,
+              Context* on_dispatched)
+      : QueuedIO(length, on_finish, on_dispatched), offset(offset) {
+    }
+
+    uint64_t offset;
+  };
+
+  typedef std::map<uint64_t, QueuedIO> QueuedIOs;
+  typedef std::map<uint64_t, QueuedFlush> QueuedFlushes;
+
+  typedef std::map<uint64_t, BlockedIO> BlockedObjectIOs;
+  typedef std::map<uint64_t, BlockedObjectIOs> BlockedIOs;
+
+  typedef std::map<uint64_t, Context*> Contexts;
+  typedef std::set<uint64_t> Tids;
+  typedef interval_set<uint64_t> InFlightObjectExtents;
+  typedef std::map<uint64_t, InFlightObjectExtents> InFlightExtents;
+
+  ImageCtxT* m_image_ctx;
+  size_t m_init_max_dirty;
+  size_t m_max_dirty;
+
+  Mutex m_lock;
+  bool m_user_flushed = false;
+
+  uint64_t m_last_tid = 0;
+  uint64_t m_in_flight_bytes = 0;
+
+  Tids m_in_flight_io_tids;
+  InFlightExtents m_in_flight_extents;
+
+  BlockedIOs m_blocked_ios;
+  QueuedIOs m_queued_ios;
+  Tids m_queued_or_blocked_io_tids;
+
+  BlockedIOs m_blocked_unoptimized_ios;
+
+  QueuedFlushes m_queued_flushes;
+  Contexts m_in_flight_flushes;
+  Contexts m_pending_flushes;
+  int m_pending_flush_error = 0;
+
+  bool dispatch_unoptimized_io(uint64_t object_no, uint64_t object_off,
+                               uint64_t object_len,
+                               io::DispatchResult* dispatch_result,
+                               Context* on_dispatched);
+  bool dispatch_io(uint64_t object_no, uint64_t object_off,
+                   uint64_t object_len, io::DispatchResult* dispatch_result,
+                   Context** on_finish, Context* on_dispatch);
+
+  bool block_overlapping_io(InFlightObjectExtents* in_flight_object_extents,
+                            uint64_t object_off, uint64_t object_len);
+  void unblock_overlapping_ios(uint64_t object_no, uint64_t object_off,
+                               uint64_t object_len,
+                               Contexts* unoptimized_io_dispatches);
+
+  bool can_dispatch_io(uint64_t tid, uint64_t length);
+
+  void handle_in_flight_io_complete(int r, uint64_t tid, uint64_t object_no,
+                                    uint64_t object_off, uint64_t object_len);
+  void handle_in_flight_flush_complete(int r, uint64_t tid);
+
+  QueuedIOs collect_ready_ios();
+  Contexts collect_ready_flushes();
+  Contexts collect_finished_flushes();
+
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::WriteAroundObjectDispatch<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_WRITE_AROUND_OBJECT_DISPATCH_H
index 8f2ae4dc5413b16438f2bc7e7abcf16774da07b5..3024f03a610aed269ed670315bf78fb6979e1050 100644 (file)
@@ -8,6 +8,7 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/Utils.h"
 #include "librbd/cache/ObjectCacherObjectDispatch.h"
+#include "librbd/cache/WriteAroundObjectDispatch.h"
 #include "librbd/image/CloseRequest.h"
 #include "librbd/image/RefreshRequest.h"
 #include "librbd/image/SetSnapRequest.h"
@@ -528,16 +529,27 @@ Context *OpenRequest<I>::send_init_cache(int *result) {
     "rbd_cache_max_dirty");
   auto writethrough_until_flush = m_image_ctx->config.template get_val<bool>(
     "rbd_cache_writethrough_until_flush");
-  auto cache = cache::ObjectCacherObjectDispatch<I>::create(
-    m_image_ctx, max_dirty, writethrough_until_flush);
-  cache->init();
+  auto cache_policy = m_image_ctx->config.template get_val<std::string>(
+    "rbd_cache_policy");
+  if (cache_policy == "writearound") {
+    auto cache = cache::WriteAroundObjectDispatch<I>::create(
+      m_image_ctx, max_dirty, writethrough_until_flush);
+    cache->init();
+  } else if (cache_policy == "writethrough" || cache_policy == "writeback") {
+    if (cache_policy == "writethrough") {
+      max_dirty = 0;
+    }
 
-  // readahead requires the cache
-  m_image_ctx->readahead.set_trigger_requests(
-    m_image_ctx->config.template get_val<uint64_t>("rbd_readahead_trigger_requests"));
-  m_image_ctx->readahead.set_max_readahead_size(
-    m_image_ctx->config.template get_val<Option::size_t>("rbd_readahead_max_bytes"));
+    auto cache = cache::ObjectCacherObjectDispatch<I>::create(
+      m_image_ctx, max_dirty, writethrough_until_flush);
+    cache->init();
 
+    // readahead requires the object cacher cache
+    m_image_ctx->readahead.set_trigger_requests(
+      m_image_ctx->config.template get_val<uint64_t>("rbd_readahead_trigger_requests"));
+    m_image_ctx->readahead.set_max_readahead_size(
+      m_image_ctx->config.template get_val<Option::size_t>("rbd_readahead_max_bytes"));
+  }
   return send_register_watch(result);
 }
 
index 326511bccf8f45b86d4ace7441cdf7ba6da1c678..b0fb77f464a1ebc384bab7929482d5d705f5bdb7 100644 (file)
@@ -50,6 +50,7 @@ set(unittest_librbd_srcs
   test_mock_ObjectMap.cc
   test_mock_TrashWatcher.cc
   test_mock_Watcher.cc
+  cache/test_mock_WriteAroundObjectDispatch.cc
   deep_copy/test_mock_ImageCopyRequest.cc
   deep_copy/test_mock_MetadataCopyRequest.cc
   deep_copy/test_mock_ObjectCopyRequest.cc
diff --git a/src/test/librbd/cache/test_mock_WriteAroundObjectDispatch.cc b/src/test/librbd/cache/test_mock_WriteAroundObjectDispatch.cc
new file mode 100644 (file)
index 0000000..1fa346f
--- /dev/null
@@ -0,0 +1,636 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/librbd/test_mock_fixture.h"
+#include "test/librbd/test_support.h"
+#include "test/librbd/mock/MockImageCtx.h"
+#include "include/rbd/librbd.hpp"
+#include "librbd/cache/WriteAroundObjectDispatch.h"
+#include "librbd/io/ObjectDispatchSpec.h"
+
+namespace librbd {
+namespace {
+
+struct MockTestImageCtx : public MockImageCtx {
+  MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) {
+  }
+};
+
+struct MockContext : public C_SaferCond  {
+  MOCK_METHOD1(complete, void(int));
+  MOCK_METHOD1(finish, void(int));
+
+  void do_complete(int r) {
+    C_SaferCond::complete(r);
+  }
+};
+
+} // anonymous namespace
+} // namespace librbd
+
+#include "librbd/cache/WriteAroundObjectDispatch.cc"
+
+namespace librbd {
+namespace cache {
+
+using ::testing::_;
+using ::testing::DoDefault;
+using ::testing::InSequence;
+using ::testing::Invoke;
+
+struct TestMockCacheWriteAroundObjectDispatch : public TestMockFixture {
+  typedef WriteAroundObjectDispatch<librbd::MockTestImageCtx> MockWriteAroundObjectDispatch;
+
+  void expect_op_work_queue(MockTestImageCtx& mock_image_ctx) {
+    EXPECT_CALL(*mock_image_ctx.op_work_queue, queue(_, _))
+      .WillRepeatedly(Invoke([](Context* ctx, int r) {
+                        ctx->complete(r);
+                      }));
+  }
+
+  void expect_context_complete(MockContext& mock_context, int r) {
+    EXPECT_CALL(mock_context, complete(r))
+      .WillOnce(Invoke([&mock_context](int r) {
+                  mock_context.do_complete(r);
+                }));
+  }
+};
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, WriteThrough) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 0, false);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx;
+  MockContext dispatch_ctx;
+  Context* finish_ctx_ptr = &finish_ctx;
+  ASSERT_FALSE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                     nullptr, nullptr, &dispatch_result,
+                                     &finish_ctx_ptr, &dispatch_ctx));
+  ASSERT_EQ(finish_ctx_ptr, &finish_ctx);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, WriteThroughUntilFlushed) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, true);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx;
+  MockContext dispatch_ctx;
+  Context* finish_ctx_ptr = &finish_ctx;
+  ASSERT_FALSE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                     nullptr, nullptr, &dispatch_result,
+                                     &finish_ctx_ptr, &dispatch_ctx));
+  ASSERT_EQ(finish_ctx_ptr, &finish_ctx);
+
+  ASSERT_FALSE(object_dispatch.flush(io::FLUSH_SOURCE_USER, {}, nullptr,
+                                     &dispatch_result, &finish_ctx_ptr,
+                                     &dispatch_ctx));
+
+  expect_context_complete(dispatch_ctx, 0);
+  expect_context_complete(finish_ctx, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                     nullptr, nullptr, &dispatch_result,
+                                     &finish_ctx_ptr, &dispatch_ctx));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr, &finish_ctx);
+  ASSERT_EQ(0, dispatch_ctx.wait());
+  ASSERT_EQ(0, finish_ctx.wait());
+  finish_ctx_ptr->complete(0);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, DispatchIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx;
+  MockContext dispatch_ctx;
+  Context* finish_ctx_ptr = &finish_ctx;
+
+  expect_context_complete(dispatch_ctx, 0);
+  expect_context_complete(finish_ctx, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr, &dispatch_ctx));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr, &finish_ctx);
+
+  ASSERT_EQ(0, dispatch_ctx.wait());
+  ASSERT_EQ(0, finish_ctx.wait());
+  finish_ctx_ptr->complete(0);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, BlockedIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 16384, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+
+  expect_context_complete(dispatch_ctx2, 0);
+  expect_context_complete(finish_ctx2, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 4096, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr2, &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+
+  MockContext finish_ctx3;
+  MockContext dispatch_ctx3;
+  Context* finish_ctx_ptr3 = &finish_ctx3;
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 1024, std::move(data), {}, 0,
+                                    {}, nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr3, &dispatch_ctx3));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr3, &finish_ctx3);
+
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+  ASSERT_EQ(0, dispatch_ctx2.wait());
+  ASSERT_EQ(0, finish_ctx1.wait());
+  ASSERT_EQ(0, finish_ctx2.wait());
+  finish_ctx_ptr2->complete(0);
+
+  expect_context_complete(dispatch_ctx3, 0);
+  expect_context_complete(finish_ctx3, 0);
+  finish_ctx_ptr1->complete(0);
+
+  ASSERT_EQ(0, dispatch_ctx3.wait());
+  finish_ctx_ptr3->complete(0);
+  ASSERT_EQ(0, finish_ctx3.wait());
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, QueuedIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4095, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 8192, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr2, &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+
+  expect_context_complete(dispatch_ctx2, 0);
+  expect_context_complete(finish_ctx2, 0);
+  finish_ctx_ptr1->complete(0);
+
+  ASSERT_EQ(0, finish_ctx1.wait());
+  ASSERT_EQ(0, dispatch_ctx2.wait());
+  ASSERT_EQ(0, finish_ctx2.wait());
+  finish_ctx_ptr2->complete(0);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, BlockedAndQueuedIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 8196, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+
+  expect_context_complete(dispatch_ctx2, 0);
+  expect_context_complete(finish_ctx2, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 4096, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr2, &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+
+  MockContext finish_ctx3;
+  MockContext dispatch_ctx3;
+  Context* finish_ctx_ptr3 = &finish_ctx3;
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0,
+                                    {}, nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr3, &dispatch_ctx3));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr3, &finish_ctx3);
+
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+  ASSERT_EQ(0, dispatch_ctx2.wait());
+  ASSERT_EQ(0, finish_ctx1.wait());
+  ASSERT_EQ(0, finish_ctx2.wait());
+  finish_ctx_ptr2->complete(0);
+
+  expect_context_complete(dispatch_ctx3, 0);
+  expect_context_complete(finish_ctx3, 0);
+  finish_ctx_ptr1->complete(0);
+
+  ASSERT_EQ(0, dispatch_ctx3.wait());
+  ASSERT_EQ(0, finish_ctx3.wait());
+  finish_ctx_ptr3->complete(0);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, Flush) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, true);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx;
+  MockContext dispatch_ctx;
+  Context* finish_ctx_ptr = &finish_ctx;
+  ASSERT_FALSE(object_dispatch.flush(io::FLUSH_SOURCE_USER, {}, nullptr,
+                                     &dispatch_result, &finish_ctx_ptr,
+                                     &dispatch_ctx));
+  ASSERT_EQ(finish_ctx_ptr, &finish_ctx);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, FlushQueuedOnInFlightIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+  ASSERT_FALSE(object_dispatch.flush(io::FLUSH_SOURCE_USER, {}, nullptr,
+                                     &dispatch_result, &finish_ctx_ptr2,
+                                     &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+
+  expect_context_complete(finish_ctx2, 0);
+  finish_ctx_ptr1->complete(0);
+  ASSERT_EQ(0, finish_ctx1.wait());
+
+  finish_ctx_ptr2->complete(0);
+  ASSERT_EQ(0, finish_ctx2.wait());
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, FlushQueuedOnQueuedIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 8192, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr2, &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+
+  MockContext finish_ctx3;
+  MockContext dispatch_ctx3;
+  Context* finish_ctx_ptr3 = &finish_ctx3;
+  ASSERT_TRUE(object_dispatch.flush(io::FLUSH_SOURCE_USER, {}, nullptr,
+                                    &dispatch_result, &finish_ctx_ptr3,
+                                    &dispatch_ctx3));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr3, &finish_ctx3);
+
+  expect_context_complete(dispatch_ctx2, 0);
+  expect_context_complete(finish_ctx2, 0);
+  expect_context_complete(dispatch_ctx3, 0);
+  finish_ctx_ptr1->complete(0);
+
+  ASSERT_EQ(0, finish_ctx1.wait());
+  ASSERT_EQ(0, dispatch_ctx2.wait());
+  ASSERT_EQ(0, finish_ctx2.wait());
+
+  expect_context_complete(finish_ctx3, 0);
+  finish_ctx_ptr2->complete(0);
+
+  finish_ctx_ptr3->complete(0);
+  ASSERT_EQ(0, finish_ctx3.wait());
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, FlushError) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+  ASSERT_EQ(0, finish_ctx1.wait());
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+  ASSERT_FALSE(object_dispatch.flush(io::FLUSH_SOURCE_USER, {}, nullptr,
+                                     &dispatch_result, &finish_ctx_ptr2,
+                                     &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+
+  expect_context_complete(finish_ctx2, -EPERM);
+  finish_ctx_ptr1->complete(-EPERM);
+  finish_ctx_ptr2->complete(0);
+  ASSERT_EQ(-EPERM, finish_ctx2.wait());
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, UnoptimizedIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 16384, false);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx;
+  MockContext dispatch_ctx;
+  Context* finish_ctx_ptr = &finish_ctx;
+
+  ASSERT_FALSE(object_dispatch.compare_and_write("oid", 0, 0, std::move(data),
+                                                 std::move(data), {}, 0, {},
+                                                 nullptr, nullptr, nullptr,
+                                                 &dispatch_result,
+                                                 &finish_ctx_ptr,
+                                                 &dispatch_ctx));
+  ASSERT_EQ(finish_ctx_ptr, &finish_ctx);
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, UnoptimizedIOInFlightIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 16384, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+  ASSERT_EQ(0, finish_ctx1.wait());
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+  ASSERT_TRUE(object_dispatch.compare_and_write("oid", 0, 0, std::move(data),
+                                                std::move(data), {}, 0, {},
+                                                nullptr, nullptr, nullptr,
+                                                &dispatch_result,
+                                                &finish_ctx_ptr2,
+                                                &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_EQ(finish_ctx_ptr2, &finish_ctx2);
+
+  expect_context_complete(dispatch_ctx2, 0);
+  finish_ctx_ptr1->complete(0);
+  ASSERT_EQ(0, dispatch_ctx2.wait());
+}
+
+TEST_F(TestMockCacheWriteAroundObjectDispatch, UnoptimizedIOBlockedIO) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockTestImageCtx mock_image_ctx(*ictx);
+  MockWriteAroundObjectDispatch object_dispatch(&mock_image_ctx, 4096, false);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  bufferlist data;
+  data.append(std::string(4096, '1'));
+
+  io::DispatchResult dispatch_result;
+  MockContext finish_ctx1;
+  MockContext dispatch_ctx1;
+  Context* finish_ctx_ptr1 = &finish_ctx1;
+
+  expect_context_complete(dispatch_ctx1, 0);
+  expect_context_complete(finish_ctx1, 0);
+
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 0, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr1, &dispatch_ctx1));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr1, &finish_ctx1);
+  ASSERT_EQ(0, dispatch_ctx1.wait());
+  ASSERT_EQ(0, finish_ctx1.wait());
+
+  MockContext finish_ctx2;
+  MockContext dispatch_ctx2;
+  Context* finish_ctx_ptr2 = &finish_ctx2;
+  ASSERT_TRUE(object_dispatch.write("oid", 0, 4096, std::move(data), {}, 0, {},
+                                    nullptr, nullptr, &dispatch_result,
+                                    &finish_ctx_ptr2, &dispatch_ctx2));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_NE(finish_ctx_ptr2, &finish_ctx2);
+
+  MockContext finish_ctx3;
+  MockContext dispatch_ctx3;
+  Context* finish_ctx_ptr3 = &finish_ctx3;
+  ASSERT_TRUE(object_dispatch.compare_and_write("oid", 0, 0, std::move(data),
+                                                std::move(data), {}, 0, {},
+                                                nullptr, nullptr, nullptr,
+                                                &dispatch_result,
+                                                &finish_ctx_ptr3,
+                                                &dispatch_ctx3));
+  ASSERT_EQ(io::DISPATCH_RESULT_CONTINUE, dispatch_result);
+  ASSERT_EQ(finish_ctx_ptr3, &finish_ctx3);
+
+  expect_context_complete(dispatch_ctx3, 0);
+  expect_context_complete(dispatch_ctx2, 0);
+  expect_context_complete(finish_ctx2, 0);
+  finish_ctx_ptr1->complete(0);
+  ASSERT_EQ(0, dispatch_ctx3.wait());
+  ASSERT_EQ(0, dispatch_ctx2.wait());
+  ASSERT_EQ(0, finish_ctx2.wait());
+  finish_ctx_ptr2->complete(0);
+}
+
+} // namespace cache
+} // namespace librbd
index d6c8fac62c0c798f1a2ec6be36e6ede07b4376e6..c220b2bee6eb19293a3b252660ec59f6e9aa3c79 100644 (file)
@@ -141,8 +141,8 @@ struct TestMigration : public TestFixture {
                   << std::endl;
         char *c = getenv("TEST_RBD_MIGRATION_VERBOSE");
         if (c != NULL && *c != '\0') {
-          std::cout << "src block: " << std::endl; src_bl.hexdump(std::cout);
-          std::cout << "dst block: " << std::endl; dst_bl.hexdump(std::cout);
+          std::cout << "src block: " << src_ictx->id << ": " << std::endl; src_bl.hexdump(std::cout);
+          std::cout << "dst block: " << dst_ictx->id << ": " << std::endl; dst_bl.hexdump(std::cout);
         }
       }
       EXPECT_TRUE(src_bl.contents_equal(dst_bl));