.set_default(false)
.set_description("whether to block writes to the cache before the aio_write call completes"),
+ Option("rbd_shared_cache_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+ .set_default(false)
+ .set_description("whether to enable rbd shared ro cache"),
+
Option("rbd_concurrent_management_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(10)
.set_min(1)
--- /dev/null
+0. setup a parent image and a clone image
+
+```
+rbd create testimage --size 100M
+rbd snap create testimage@snap1
+rbd snap protect testimage@snap1
+rbd clone testimage@snap1 child_image
+```
+
+1. start the ceph-immutable-obj-cache daemon
+
+```
+ceph-immutable-obj-cache
+```
+The obj cache will be promoted to /tmp/ceph_immutable_obj_cache/, the total cache size is set to 1G.
+By default this feature is disabled, so you may change these params by setting new values in ceph.conf, like:
+```
+rbd shared cache enabled = true
+immutable object cache path = /mnt/cache # SSD based cache dir
+immutable object cache max size = 16G
+```
+For now the daemon simply cleans all cache on start. So if you have lots of old cache, it may take while to start
+
+2. start randread or read jobs against child image
+Note this is a shared ro cache. So all writes will go to RADOS directly and only read can benefit from the cache
+
api/Trash.cc
cache/ImageWriteback.cc
cache/ObjectCacherObjectDispatch.cc
+ cache/SharedReadOnlyObjectDispatch.cc
+ cache/SharedPersistentObjectCacher.cc
cache/ObjectCacherWriteback.cc
cache/PassthroughImageCache.cc
cache/WriteAroundObjectDispatch.cc
add_dependencies(rbd_internal eventtrace_tp)
endif()
target_link_libraries(rbd_internal PRIVATE
+ ceph_immutable_object_cache_lib
osdc)
add_library(librbd ${CEPH_SHARED}
ASSIGN_OPTION(skip_partial_discard, bool);
ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
ASSIGN_OPTION(blkin_trace_all, bool);
+ ASSIGN_OPTION(shared_cache_enabled, bool);
#undef ASSIGN_OPTION
uint64_t mtime_update_interval;
uint64_t atime_update_interval;
+ bool shared_cache_enabled;
+ std::string shared_cache_path;
+
LibrbdAdminSocketHook *asok_hook;
exclusive_lock::Policy *exclusive_lock_policy = nullptr;
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/cache/SharedPersistentObjectCacher.h"
+#include "include/buffer.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::SharedPersistentObjectCacher: " << this \
+ << " " << __func__ << ": "
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+SharedPersistentObjectCacher<I>::SharedPersistentObjectCacher(I *image_ctx, std::string cache_path)
+ : m_image_ctx(image_ctx), m_cache_path(cache_path),
+ m_file_map_lock("librbd::cache::SharedObjectCacher::filemaplock") {
+ auto *cct = m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+}
+
+template <typename I>
+SharedPersistentObjectCacher<I>::~SharedPersistentObjectCacher() {
+}
+
+template <typename I>
+int SharedPersistentObjectCacher<I>::read_object(std::string file_path,
+ ceph::bufferlist* read_data, uint64_t offset, uint64_t length,
+ Context *on_finish) {
+
+ auto *cct = m_image_ctx->cct;
+ ldout(cct, 20) << "file path: " << file_path << dendl;
+
+ std::string error;
+ int ret = read_data->pread_file(file_path.c_str(), offset, length, &error);
+ if (ret < 0) {
+ ldout(cct, 5) << "read from file return error: " << error
+ << "file path= " << file_path
+ << dendl;
+ return ret;
+ }
+
+ if (read_data->length() != length) {
+ ceph_assert(ret < length);
+ read_data->append("0", length - read_data->length());
+ }
+
+ ceph_assert(read_data->length() == length);
+
+ return length;
+}
+
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::SharedPersistentObjectCacher<librbd::ImageCtx>;
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER
+#define CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER
+
+#include "include/buffer_fwd.h"
+#include "include/int_types.h"
+#include "common/Mutex.h"
+#include <vector>
+#include <unordered_map>
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace cache {
+
+template <typename ImageCtxT>
+class SharedPersistentObjectCacher {
+public:
+
+ SharedPersistentObjectCacher(ImageCtxT *image_ctx, std::string cache_path);
+ ~SharedPersistentObjectCacher();
+
+ int read_object(std::string file_path, ceph::bufferlist* read_data,
+ uint64_t offset, uint64_t length, Context *on_finish);
+
+private:
+ ImageCtxT *m_image_ctx;
+ std::string m_cache_path;
+ Mutex m_file_map_lock;
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::SharedPersistentObjectCacher<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_FILE_IMAGE_STORE
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/WorkQueue.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/LibrbdWriteback.h"
+#include "librbd/io/ObjectDispatchSpec.h"
+#include "librbd/io/ObjectDispatcher.h"
+#include "librbd/io/Utils.h"
+#include "librbd/cache/SharedReadOnlyObjectDispatch.h"
+#include "osd/osd_types.h"
+#include "osdc/WritebackHandler.h"
+
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::SharedReadOnlyObjectDispatch: " \
+ << this << " " << __func__ << ": "
+using namespace ceph::immutable_obj_cache;
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+SharedReadOnlyObjectDispatch<I>::SharedReadOnlyObjectDispatch(
+ I* image_ctx) : m_image_ctx(image_ctx) {
+}
+
+template <typename I>
+SharedReadOnlyObjectDispatch<I>::~SharedReadOnlyObjectDispatch() {
+ delete m_object_store;
+ delete m_cache_client;
+}
+
+// TODO if connect fails, init will return error to high layer.
+template <typename I>
+void SharedReadOnlyObjectDispatch<I>::init() {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 5) << dendl;
+
+ if (m_image_ctx->parent != nullptr) {
+ ldout(cct, 5) << "child image: skipping" << dendl;
+ return;
+ }
+
+ ldout(cct, 5) << "parent image: setup SRO cache client" << dendl;
+
+ std::string controller_path = ((CephContext*)cct)->_conf.get_val<std::string>("immutable_object_cache_sock");
+ m_cache_client = new ceph::immutable_obj_cache::CacheClient(controller_path.c_str(), m_image_ctx->cct);
+ m_cache_client->run();
+
+ int ret = m_cache_client->connect();
+ if (ret < 0) {
+ ldout(cct, 5) << "SRO cache client fail to connect with local controller: "
+ << "please start ceph-immutable-object-cache daemon"
+ << dendl;
+ } else {
+ ldout(cct, 5) << "SRO cache client to register volume "
+ << "name = " << m_image_ctx->id
+ << " on ceph-immutable-object-cache daemon"
+ << dendl;
+
+ auto ctx = new FunctionContext([this](bool reg) {
+ handle_register_client(reg);
+ });
+ ret = m_cache_client->register_client(ctx);
+
+ if (ret >= 0) {
+ // add ourself to the IO object dispatcher chain
+ m_image_ctx->io_object_dispatcher->register_object_dispatch(this);
+ }
+ }
+}
+
+template <typename I>
+bool SharedReadOnlyObjectDispatch<I>::read(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, librados::snap_t snap_id, int op_flags,
+ const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+ io::ExtentMap* extent_map, int* object_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+ << object_len << dendl;
+
+ // if any session failed, reads will go to rados
+ if(!m_cache_client->is_session_work()) {
+ ldout(cct, 5) << "SRO cache client session failed " << dendl;
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+ on_dispatched->complete(0);
+ return true;
+ }
+
+ auto ctx = new LambdaGenContext<std::function<void(ObjectCacheRequest*)>,
+ ObjectCacheRequest*>([this, snap_id, read_data, dispatch_result, on_dispatched,
+ oid, object_off, object_len](ObjectCacheRequest* ack) {
+
+ if (ack->type == RBDSC_READ_REPLY) {
+ std::string file_path = ((ObjectCacheReadReplyData*)ack)->cache_path;
+ ceph_assert(file_path != "");
+ handle_read_cache(file_path, object_off, object_len, read_data,
+ dispatch_result, on_dispatched);
+ } else {
+ // go back to read rados
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+ on_dispatched->complete(0);
+ }
+ });
+
+ if (m_cache_client && m_cache_client->is_session_work() && m_object_store) {
+
+ m_cache_client->lookup_object(m_image_ctx->data_ctx.get_namespace(),
+ m_image_ctx->data_ctx.get_id(),
+ (uint64_t)snap_id, oid, ctx);
+ }
+ return true;
+}
+
+template <typename I>
+int SharedReadOnlyObjectDispatch<I>::handle_read_cache(
+ const std::string file_path, uint64_t read_off,
+ uint64_t read_len, ceph::bufferlist* read_data,
+ io::DispatchResult* dispatch_result, Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+
+ // try to read from parent image cache
+ int r = m_object_store->read_object(file_path, read_data, read_off, read_len, on_dispatched);
+ if (r == read_len) {
+ *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
+ //TODO(): complete in syncfile
+ on_dispatched->complete(r);
+ ldout(cct, 20) << "read cache: " << *dispatch_result <<dendl;
+ return true;
+ }
+
+ // cache read error, fall back to read rados
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+ on_dispatched->complete(0);
+ return false;
+}
+
+template <typename I>
+int SharedReadOnlyObjectDispatch<I>::handle_register_client(bool reg) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+
+ if (reg) {
+ ldout(cct, 20) << "SRO cache client open cache handler" << dendl;
+ m_object_store = new SharedPersistentObjectCacher<I>(m_image_ctx, m_image_ctx->shared_cache_path);
+ }
+ return 0;
+}
+
+template <typename I>
+void SharedReadOnlyObjectDispatch<I>::client_handle_request(std::string msg) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+
+}
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::SharedReadOnlyObjectDispatch<librbd::ImageCtx>;
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER_OBJECT_DISPATCH_H
+#define CEPH_LIBRBD_CACHE_SHARED_PERSISTENT_OBJECT_CACHER_OBJECT_DISPATCH_H
+
+#include "common/Mutex.h"
+#include "SharedPersistentObjectCacher.h"
+#include "librbd/io/ObjectDispatchInterface.h"
+#include "tools/immutable_object_cache/CacheClient.h"
+#include "tools/immutable_object_cache/Types.h"
+
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace cache {
+
+template <typename ImageCtxT = ImageCtx>
+class SharedReadOnlyObjectDispatch : public io::ObjectDispatchInterface {
+public:
+ static SharedReadOnlyObjectDispatch* create(ImageCtxT* image_ctx) {
+ return new SharedReadOnlyObjectDispatch(image_ctx);
+ }
+
+ SharedReadOnlyObjectDispatch(ImageCtxT* image_ctx);
+ ~SharedReadOnlyObjectDispatch() override;
+
+ io::ObjectDispatchLayer get_object_dispatch_layer() const override {
+ return io::OBJECT_DISPATCH_LAYER_SHARED_PERSISTENT_CACHE;
+ }
+
+ void init();
+ void shut_down(Context* on_finish) {
+ m_image_ctx->op_work_queue->queue(on_finish, 0);
+ }
+
+ bool read(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, librados::snap_t snap_id, int op_flags,
+ const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+ io::ExtentMap* extent_map, int* object_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+
+ bool discard(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, const ::SnapContext &snapc, int discard_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) {
+ return false;
+ }
+
+ bool write(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) {
+ return false;
+ }
+
+ bool write_same(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, io::Extents&& buffer_extents,
+ ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) {
+ return false;
+ }
+
+ bool compare_and_write(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
+ const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
+ int* object_dispatch_flags, uint64_t* journal_tid,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ return false;
+ }
+
+ bool flush(
+ io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ return false;
+ }
+
+ bool invalidate_cache(Context* on_finish) {
+ return false;
+ }
+
+ bool reset_existence_cache(Context* on_finish) {
+ return false;
+ }
+
+ void extent_overwritten(
+ uint64_t object_no, uint64_t object_off, uint64_t object_len,
+ uint64_t journal_tid, uint64_t new_journal_tid) {
+ }
+
+private:
+
+ int handle_read_cache(
+ const std::string file_path, uint64_t read_off,
+ uint64_t read_len, ceph::bufferlist* read_data,
+ io::DispatchResult* dispatch_result,
+ Context* on_dispatched);
+ int handle_register_client(bool reg);
+ void client_handle_request(std::string msg);
+
+ ImageCtxT* m_image_ctx;
+
+ ceph::immutable_obj_cache::CacheClient *m_cache_client = nullptr;
+ SharedPersistentObjectCacher<ImageCtxT> *m_object_store = nullptr;
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::SharedReadOnlyObjectDispatch<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_OBJECT_CACHER_OBJECT_DISPATCH_H
#include "librbd/Utils.h"
#include "librbd/cache/ObjectCacherObjectDispatch.h"
#include "librbd/cache/WriteAroundObjectDispatch.h"
+#include "librbd/cache/SharedReadOnlyObjectDispatch.cc"
#include "librbd/image/CloseRequest.h"
#include "librbd/image/RefreshRequest.h"
#include "librbd/image/SetSnapRequest.h"
template <typename I>
Context *OpenRequest<I>::send_init_cache(int *result) {
// cache is disabled or parent image context
+ CephContext *cct = m_image_ctx->cct;
+
if (!m_image_ctx->cache || m_image_ctx->child != nullptr) {
+ // enable Shared Read-only cache for parent image
+ if (m_image_ctx->child != nullptr && m_image_ctx->shared_cache_enabled ) {
+ ldout(cct, 10) << this << " " << "setting up parent cache"<< dendl;
+ auto sro_cache = cache::SharedReadOnlyObjectDispatch<I>::create(m_image_ctx);
+ sro_cache->init();
+ }
return send_register_watch(result);
}
- CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << dendl;
size_t max_dirty = m_image_ctx->config.template get_val<Option::size_t>(
OBJECT_DISPATCH_LAYER_NONE = 0,
OBJECT_DISPATCH_LAYER_CACHE,
OBJECT_DISPATCH_LAYER_JOURNAL,
+ OBJECT_DISPATCH_LAYER_SHARED_PERSISTENT_CACHE,
OBJECT_DISPATCH_LAYER_SCHEDULER,
OBJECT_DISPATCH_LAYER_CORE,
OBJECT_DISPATCH_LAYER_LAST