]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd: enable librbd hook for shared RO cache
authorshangdehao1 <dehao.shang@intel.com>
Wed, 17 Apr 2019 07:58:16 +0000 (15:58 +0800)
committerJason Dillaman <dillaman@redhat.com>
Mon, 24 Jun 2019 21:35:40 +0000 (17:35 -0400)
Signed-off-by: Dehao Shang <dehao.shang@intel.com>
Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
src/common/options.cc
src/doc/howtotestsharedrocache.txt [new file with mode: 0644]
src/librbd/CMakeLists.txt
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/cache/SharedPersistentObjectCacher.cc [new file with mode: 0644]
src/librbd/cache/SharedPersistentObjectCacher.h [new file with mode: 0644]
src/librbd/cache/SharedReadOnlyObjectDispatch.cc [new file with mode: 0644]
src/librbd/cache/SharedReadOnlyObjectDispatch.h [new file with mode: 0644]
src/librbd/image/OpenRequest.cc
src/librbd/io/Types.h

index fac1502429b8a441fe71986854934b57b7c5d1a4..304f513f72109c73add9bd55854ffd25e9291912 100644 (file)
@@ -7148,6 +7148,10 @@ static std::vector<Option> get_rbd_options() {
     .set_default(false)
     .set_description("whether to block writes to the cache before the aio_write call completes"),
 
+    Option("rbd_shared_cache_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description("whether to enable rbd shared ro cache"),
+
     Option("rbd_concurrent_management_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
     .set_default(10)
     .set_min(1)
diff --git a/src/doc/howtotestsharedrocache.txt b/src/doc/howtotestsharedrocache.txt
new file mode 100644 (file)
index 0000000..7355a0c
--- /dev/null
@@ -0,0 +1,26 @@
+0. setup a parent image and a clone image
+
+```
+rbd create testimage --size 100M
+rbd snap create testimage@snap1
+rbd snap protect testimage@snap1
+rbd clone testimage@snap1 child_image
+```
+
+1. start the ceph-immutable-obj-cache daemon
+
+```
+ceph-immutable-obj-cache
+```
+The obj cache will be promoted to /tmp/ceph_immutable_obj_cache/, the total cache size is set to 1G.
+By default this feature is disabled, so you may change these params by setting new values in ceph.conf, like:
+```
+rbd shared cache enabled = true
+immutable object cache path = /mnt/cache # SSD based cache dir
+immutable object cache max size = 16G
+```
+For now the daemon simply cleans all cache on start. So if you have lots of old cache, it may take while to start
+
+2. start randread or read jobs against child image
+Note this is a shared ro cache. So all writes will go to RADOS directly and only read can benefit from the cache
+
index af6217c06d69d522cf509045ee07cad6d43e4499..4150ef604d814f4cd00397671ac0dd9d28952344 100644 (file)
@@ -36,6 +36,8 @@ set(librbd_internal_srcs
   api/Trash.cc
   cache/ImageWriteback.cc
   cache/ObjectCacherObjectDispatch.cc
+  cache/SharedReadOnlyObjectDispatch.cc
+  cache/SharedPersistentObjectCacher.cc
   cache/ObjectCacherWriteback.cc
   cache/PassthroughImageCache.cc
   cache/WriteAroundObjectDispatch.cc
@@ -155,6 +157,7 @@ if(WITH_LTTNG AND WITH_EVENTTRACE)
   add_dependencies(rbd_internal eventtrace_tp)
 endif()
 target_link_libraries(rbd_internal PRIVATE
+  ceph_immutable_object_cache_lib
   osdc)
 
 add_library(librbd ${CEPH_SHARED}
index 5a8bf337745a7954f1d6bb8d27e530b87de23d47..4830aa4c82a34f0386f684bb4319fe8605925a27 100644 (file)
@@ -769,6 +769,7 @@ public:
     ASSIGN_OPTION(skip_partial_discard, bool);
     ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
     ASSIGN_OPTION(blkin_trace_all, bool);
+    ASSIGN_OPTION(shared_cache_enabled, bool);
 
 #undef ASSIGN_OPTION
 
index 32f5f5f2b965558427502dfa80adee1c9702c5ce..f0ae7a1c14057c5752ee9db45cc97148dc8aeab1 100644 (file)
@@ -195,6 +195,9 @@ namespace librbd {
     uint64_t mtime_update_interval;
     uint64_t atime_update_interval;
 
+    bool shared_cache_enabled;
+    std::string shared_cache_path;
+
     LibrbdAdminSocketHook *asok_hook;
 
     exclusive_lock::Policy *exclusive_lock_policy = nullptr;
diff --git a/src/librbd/cache/SharedPersistentObjectCacher.cc b/src/librbd/cache/SharedPersistentObjectCacher.cc
new file mode 100644 (file)
index 0000000..79a549a
--- /dev/null
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/cache/SharedPersistentObjectCacher.h"
+#include "include/buffer.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::SharedPersistentObjectCacher: " << this \
+                           << " " <<  __func__ << ": "
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+SharedPersistentObjectCacher<I>::SharedPersistentObjectCacher(I *image_ctx, std::string cache_path)
+  : m_image_ctx(image_ctx), m_cache_path(cache_path),
+    m_file_map_lock("librbd::cache::SharedObjectCacher::filemaplock") {
+  auto *cct = m_image_ctx->cct;
+  ldout(cct, 20) << dendl;
+}
+
+template <typename I>
+SharedPersistentObjectCacher<I>::~SharedPersistentObjectCacher() {
+}
+
+template <typename I>
+int SharedPersistentObjectCacher<I>::read_object(std::string file_path,
+        ceph::bufferlist* read_data, uint64_t offset, uint64_t length,
+        Context *on_finish) {
+
+  auto *cct = m_image_ctx->cct;
+  ldout(cct, 20) << "file path: " << file_path << dendl;
+
+  std::string error;
+  int ret = read_data->pread_file(file_path.c_str(), offset, length, &error);
+  if (ret < 0) {
+    ldout(cct, 5) << "read from file return error: " << error
+                  << "file path= " << file_path
+                  << dendl;
+    return ret;
+  }
+
+  if (read_data->length() != length) {
+    ceph_assert(ret < length);
+    read_data->append("0", length - read_data->length());
+  }
+
+  ceph_assert(read_data->length() == length);
+
+  return length;
+}
+
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::SharedPersistentObjectCacher<librbd::ImageCtx>;
diff --git a/src/librbd/cache/SharedPersistentObjectCacher.h b/src/librbd/cache/SharedPersistentObjectCacher.h
new file mode 100644 (file)
index 0000000..f9f7fc2
--- /dev/null
@@ -0,0 +1,42 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER
+#define CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER
+
+#include "include/buffer_fwd.h"
+#include "include/int_types.h"
+#include "common/Mutex.h"
+#include <vector>
+#include <unordered_map>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace cache {
+
+template <typename ImageCtxT>
+class SharedPersistentObjectCacher {
+public:
+
+  SharedPersistentObjectCacher(ImageCtxT *image_ctx, std::string cache_path);
+  ~SharedPersistentObjectCacher();
+
+  int read_object(std::string file_path, ceph::bufferlist* read_data,
+                 uint64_t offset, uint64_t length, Context *on_finish);
+
+private:
+  ImageCtxT *m_image_ctx;
+  std::string m_cache_path;
+  Mutex m_file_map_lock;
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::SharedPersistentObjectCacher<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_FILE_IMAGE_STORE
diff --git a/src/librbd/cache/SharedReadOnlyObjectDispatch.cc b/src/librbd/cache/SharedReadOnlyObjectDispatch.cc
new file mode 100644 (file)
index 0000000..24eb72e
--- /dev/null
@@ -0,0 +1,169 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/WorkQueue.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/LibrbdWriteback.h"
+#include "librbd/io/ObjectDispatchSpec.h"
+#include "librbd/io/ObjectDispatcher.h"
+#include "librbd/io/Utils.h"
+#include "librbd/cache/SharedReadOnlyObjectDispatch.h"
+#include "osd/osd_types.h"
+#include "osdc/WritebackHandler.h"
+
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::SharedReadOnlyObjectDispatch: " \
+                           << this << " " << __func__ << ": "
+using namespace ceph::immutable_obj_cache;
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+SharedReadOnlyObjectDispatch<I>::SharedReadOnlyObjectDispatch(
+    I* image_ctx) : m_image_ctx(image_ctx) {
+}
+
+template <typename I>
+SharedReadOnlyObjectDispatch<I>::~SharedReadOnlyObjectDispatch() {
+    delete m_object_store;
+    delete m_cache_client;
+}
+
+// TODO if connect fails, init will return error to high layer.
+template <typename I>
+void SharedReadOnlyObjectDispatch<I>::init() {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 5) << dendl;
+
+  if (m_image_ctx->parent != nullptr) {
+    ldout(cct, 5) << "child image: skipping" << dendl;
+    return;
+  }
+
+  ldout(cct, 5) << "parent image: setup SRO cache client" << dendl;
+
+  std::string controller_path = ((CephContext*)cct)->_conf.get_val<std::string>("immutable_object_cache_sock");
+  m_cache_client = new ceph::immutable_obj_cache::CacheClient(controller_path.c_str(), m_image_ctx->cct);
+  m_cache_client->run();
+
+  int ret = m_cache_client->connect();
+  if (ret < 0) {
+    ldout(cct, 5) << "SRO cache client fail to connect with local controller: "
+                  << "please start ceph-immutable-object-cache daemon"
+                 << dendl;
+  } else {
+    ldout(cct, 5) << "SRO cache client to register volume "
+                  << "name = " << m_image_ctx->id
+                  << " on ceph-immutable-object-cache daemon"
+                  << dendl;
+
+    auto ctx = new FunctionContext([this](bool reg) {
+      handle_register_client(reg);
+    });
+    ret = m_cache_client->register_client(ctx);
+
+    if (ret >= 0) {
+      // add ourself to the IO object dispatcher chain
+      m_image_ctx->io_object_dispatcher->register_object_dispatch(this);
+    }
+  }
+}
+
+template <typename I>
+bool SharedReadOnlyObjectDispatch<I>::read(
+    const std::string &oid, uint64_t object_no, uint64_t object_off,
+    uint64_t object_len, librados::snap_t snap_id, int op_flags,
+    const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+    io::ExtentMap* extent_map, int* object_dispatch_flags,
+    io::DispatchResult* dispatch_result, Context** on_finish,
+    Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+                 << object_len << dendl;
+
+  // if any session failed, reads will go to rados
+  if(!m_cache_client->is_session_work()) {
+    ldout(cct, 5) << "SRO cache client session failed " << dendl;
+    *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+    on_dispatched->complete(0);
+    return true;
+  }
+
+  auto ctx = new LambdaGenContext<std::function<void(ObjectCacheRequest*)>,
+      ObjectCacheRequest*>([this, snap_id, read_data, dispatch_result, on_dispatched,
+      oid, object_off, object_len](ObjectCacheRequest* ack) {
+
+    if (ack->type == RBDSC_READ_REPLY) {
+      std::string file_path = ((ObjectCacheReadReplyData*)ack)->cache_path;
+      ceph_assert(file_path != "");
+      handle_read_cache(file_path, object_off, object_len, read_data,
+                        dispatch_result, on_dispatched);
+    } else {
+      // go back to read rados
+      *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+      on_dispatched->complete(0);
+    }
+  });
+
+  if (m_cache_client && m_cache_client->is_session_work() && m_object_store) {
+
+    m_cache_client->lookup_object(m_image_ctx->data_ctx.get_namespace(),
+                                  m_image_ctx->data_ctx.get_id(),
+                                  (uint64_t)snap_id, oid, ctx);
+  }
+  return true;
+}
+
+template <typename I>
+int SharedReadOnlyObjectDispatch<I>::handle_read_cache(
+    const std::string file_path, uint64_t read_off,
+    uint64_t read_len, ceph::bufferlist* read_data,
+    io::DispatchResult* dispatch_result, Context* on_dispatched) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << dendl;
+
+  // try to read from parent image cache
+  int r = m_object_store->read_object(file_path, read_data, read_off, read_len, on_dispatched);
+  if (r == read_len) {
+    *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
+    //TODO(): complete in syncfile
+    on_dispatched->complete(r);
+    ldout(cct, 20) << "read cache: " << *dispatch_result <<dendl;
+    return true;
+  }
+
+  // cache read error, fall back to read rados
+  *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+  on_dispatched->complete(0);
+  return false;
+}
+
+template <typename I>
+int SharedReadOnlyObjectDispatch<I>::handle_register_client(bool reg) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << dendl;
+
+  if (reg) {
+    ldout(cct, 20) << "SRO cache client open cache handler" << dendl;
+    m_object_store = new SharedPersistentObjectCacher<I>(m_image_ctx, m_image_ctx->shared_cache_path);
+  }
+  return 0;
+}
+
+template <typename I>
+void SharedReadOnlyObjectDispatch<I>::client_handle_request(std::string msg) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << dendl;
+
+}
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::SharedReadOnlyObjectDispatch<librbd::ImageCtx>;
diff --git a/src/librbd/cache/SharedReadOnlyObjectDispatch.h b/src/librbd/cache/SharedReadOnlyObjectDispatch.h
new file mode 100644 (file)
index 0000000..3195996
--- /dev/null
@@ -0,0 +1,127 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER_OBJECT_DISPATCH_H
+#define CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER_OBJECT_DISPATCH_H
+
+#include "common/Mutex.h"
+#include "SharedPersistentObjectCacher.h"
+#include "librbd/io/ObjectDispatchInterface.h"
+#include "tools/immutable_object_cache/CacheClient.h"
+#include "tools/immutable_object_cache/Types.h"
+
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace cache {
+
+template <typename ImageCtxT = ImageCtx>
+class SharedReadOnlyObjectDispatch : public io::ObjectDispatchInterface {
+public:
+  static SharedReadOnlyObjectDispatch* create(ImageCtxT* image_ctx) {
+    return new SharedReadOnlyObjectDispatch(image_ctx);
+  }
+
+  SharedReadOnlyObjectDispatch(ImageCtxT* image_ctx);
+  ~SharedReadOnlyObjectDispatch() override;
+
+  io::ObjectDispatchLayer get_object_dispatch_layer() const override {
+    return io::OBJECT_DISPATCH_LAYER_SHARED_PERSISTENT_CACHE;
+  }
+
+  void init();
+  void shut_down(Context* on_finish) {
+    m_image_ctx->op_work_queue->queue(on_finish, 0);
+  }
+
+  bool read(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      uint64_t object_len, librados::snap_t snap_id, int op_flags,
+      const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+      io::ExtentMap* extent_map, int* object_dispatch_flags,
+      io::DispatchResult* dispatch_result, Context** on_finish,
+      Context* on_dispatched) override;
+
+  bool discard(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      uint64_t object_len, const ::SnapContext &snapc, int discard_flags,
+      const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context** on_finish, Context* on_dispatched) {
+    return false;
+  }
+
+  bool write(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+      const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context** on_finish, Context* on_dispatched) {
+    return false;
+  }
+
+  bool write_same(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      uint64_t object_len, io::Extents&& buffer_extents,
+      ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+      const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+      uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+      Context** on_finish, Context* on_dispatched) {
+    return false;
+  }
+
+  bool compare_and_write(
+      const std::string &oid, uint64_t object_no, uint64_t object_off,
+      ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
+      const ::SnapContext &snapc, int op_flags,
+      const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
+      int* object_dispatch_flags, uint64_t* journal_tid,
+      io::DispatchResult* dispatch_result, Context** on_finish,
+      Context* on_dispatched) {
+    return false;
+  }
+
+  bool flush(
+      io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
+      io::DispatchResult* dispatch_result, Context** on_finish,
+      Context* on_dispatched) {
+    return false;
+  }
+
+  bool invalidate_cache(Context* on_finish) {
+    return false;
+  }
+
+  bool reset_existence_cache(Context* on_finish) {
+    return false;
+  }
+
+  void extent_overwritten(
+      uint64_t object_no, uint64_t object_off, uint64_t object_len,
+      uint64_t journal_tid, uint64_t new_journal_tid) {
+  }
+
+private:
+
+  int handle_read_cache(
+      const std::string file_path, uint64_t read_off,
+      uint64_t read_len, ceph::bufferlist* read_data,
+      io::DispatchResult* dispatch_result,
+      Context* on_dispatched);
+  int handle_register_client(bool reg);
+  void client_handle_request(std::string msg);
+
+  ImageCtxT* m_image_ctx;
+
+  ceph::immutable_obj_cache::CacheClient *m_cache_client = nullptr;
+  SharedPersistentObjectCacher<ImageCtxT> *m_object_store = nullptr;
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::SharedReadOnlyObjectDispatch<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_OBJECT_CACHER_OBJECT_DISPATCH_H
index 325107d9e2efa7884e0b58b56e5b6fd737c75d59..c50145587e986df29a0ff926e189508e0829e81f 100644 (file)
@@ -9,6 +9,7 @@
 #include "librbd/Utils.h"
 #include "librbd/cache/ObjectCacherObjectDispatch.h"
 #include "librbd/cache/WriteAroundObjectDispatch.h"
+#include "librbd/cache/SharedReadOnlyObjectDispatch.cc"
 #include "librbd/image/CloseRequest.h"
 #include "librbd/image/RefreshRequest.h"
 #include "librbd/image/SetSnapRequest.h"
@@ -518,11 +519,18 @@ Context *OpenRequest<I>::handle_refresh(int *result) {
 template <typename I>
 Context *OpenRequest<I>::send_init_cache(int *result) {
   // cache is disabled or parent image context
+  CephContext *cct = m_image_ctx->cct;
+
   if (!m_image_ctx->cache || m_image_ctx->child != nullptr) {
+     // enable Shared Read-only cache for parent image
+    if (m_image_ctx->child != nullptr && m_image_ctx->shared_cache_enabled ) {
+      ldout(cct, 10) << this << " " << "setting up parent cache"<< dendl;
+      auto sro_cache = cache::SharedReadOnlyObjectDispatch<I>::create(m_image_ctx);
+      sro_cache->init();
+    }
     return send_register_watch(result);
   }
 
-  CephContext *cct = m_image_ctx->cct;
   ldout(cct, 10) << this << " " << __func__ << dendl;
 
   size_t max_dirty = m_image_ctx->config.template get_val<Option::size_t>(
index 4847d11dfca40ae37447a71a1b1d327722bfd36a..69605433c5675ea42af61cfacb5300fab619a519 100644 (file)
@@ -61,6 +61,7 @@ enum ObjectDispatchLayer {
   OBJECT_DISPATCH_LAYER_NONE = 0,
   OBJECT_DISPATCH_LAYER_CACHE,
   OBJECT_DISPATCH_LAYER_JOURNAL,
+  OBJECT_DISPATCH_LAYER_SHARED_PERSISTENT_CACHE,
   OBJECT_DISPATCH_LAYER_SCHEDULER,
   OBJECT_DISPATCH_LAYER_CORE,
   OBJECT_DISPATCH_LAYER_LAST