From: Jason Dillaman Date: Tue, 7 Jul 2015 19:56:35 +0000 (-0400) Subject: librbd: rename AioRequest classes to AioObjectRequest X-Git-Tag: v10.0.1~52^2~39 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b85a5fefda5669e1b83b4fd364fac9f5a0b37dbd;p=ceph.git librbd: rename AioRequest classes to AioObjectRequest This better reflects the fact that these represent requests against an object extent and helps differentiate it from the future AioImageRequest classes. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index c3cda28a5ef0..451898b6b2e9 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -7,7 +7,7 @@ #include "common/dout.h" #include "common/errno.h" -#include "librbd/AioRequest.h" +#include "librbd/AioObjectRequest.h" #include "librbd/internal.h" #include "librbd/AioCompletion.h" diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index a75599e6cf78..ba24c300bb1c 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -19,7 +19,7 @@ namespace librbd { - class AioRead; + class AioObjectRead; typedef enum { AIO_TYPE_READ = 0, @@ -31,7 +31,7 @@ namespace librbd { /** * AioCompletion is the overall completion for a single - * rbd I/O request. It may be composed of many AioRequests, + * rbd I/O request. It may be composed of many AioObjectRequests, * which each go to a single object. * * The retrying of individual requests is handled at a lower level, @@ -178,23 +178,23 @@ namespace librbd { } virtual ~C_AioRead() {} virtual void finish(int r); - void set_req(AioRead *req) { + void set_req(AioObjectRead *req) { m_req = req; } private: - AioRead *m_req; + AioObjectRead *m_req; }; class C_CacheRead : public Context { public: - explicit C_CacheRead(ImageCtx *ictx, AioRead *req) + explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req) : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {} virtual void complete(int r); protected: virtual void finish(int r); private: ImageCtx &m_image_ctx; - AioRead *m_req; + AioObjectRead *m_req; bool m_enqueued; }; } diff --git a/src/librbd/AioObjectRequest.cc b/src/librbd/AioObjectRequest.cc new file mode 100644 index 000000000000..d09c6ddf351a --- /dev/null +++ b/src/librbd/AioObjectRequest.cc @@ -0,0 +1,563 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_context.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Mutex.h" +#include "common/RWLock.h" + +#include "librbd/AioCompletion.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" + +#include "librbd/AioObjectRequest.h" +#include "librbd/CopyupRequest.h" + +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::AioObjectRequest: " + +namespace librbd { + + AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t off, + uint64_t len, librados::snap_t snap_id, + Context *completion, bool hide_enoent) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), + m_object_len(len), m_snap_id(snap_id), m_completion(completion), + m_hide_enoent(hide_enoent) { + + Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, + 0, m_ictx->layout.fl_object_size, m_parent_extents); + + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + compute_parent_extents(); + } + + void AioObjectRequest::complete(int r) + { + if (should_complete(r)) { + ldout(m_ictx->cct, 20) << "complete " << this << dendl; + if (m_hide_enoent && r == -ENOENT) { + r = 0; + } + m_completion->complete(r); + delete this; + } + } + + bool AioObjectRequest::compute_parent_extents() { + assert(m_ictx->snap_lock.is_locked()); + assert(m_ictx->parent_lock.is_locked()); + + uint64_t parent_overlap; + int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); + if (r < 0) { + // NOTE: it's possible for a snapshot to be deleted while we are + // still reading from it + lderr(m_ictx->cct) << this << " compute_parent_extents: failed to " + << "retrieve parent overlap: " << cpp_strerror(r) + << dendl; + m_parent_extents.clear(); + return false; + } + + uint64_t object_overlap = + m_ictx->prune_parent_extents(m_parent_extents, parent_overlap); + if (object_overlap > 0) { + ldout(m_ictx->cct, 20) << this << " compute_parent_extents: " + << "overlap " << parent_overlap << " " + << "extents " << m_parent_extents << dendl; + return true; + } + return false; + } + + static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { + assert(ictx->snap_lock.is_locked()); + return (ictx->clone_copy_on_read) && + (!ictx->read_only) && (snap_id == CEPH_NOSNAP); + } + + /** read **/ + + AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, uint64_t len, + vector >& be, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags) + : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion, + false), + m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), + m_op_flags(op_flags), m_parent_completion(NULL), + m_state(LIBRBD_AIO_READ_FLAT) { + + guard_read(); + } + + AioObjectRead::~AioObjectRead() + { + if (m_parent_completion) { + m_parent_completion->release(); + m_parent_completion = NULL; + } + } + + void AioObjectRead::guard_read() + { + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + + if (has_parent()) { + ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl; + m_state = LIBRBD_AIO_READ_GUARD; + } + } + + bool AioObjectRead::should_complete(int r) + { + ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " r = " << r << dendl; + + bool finished = true; + + switch (m_state) { + case LIBRBD_AIO_READ_GUARD: + ldout(m_ictx->cct, 20) << "should_complete " << this + << " READ_CHECK_GUARD" << dendl; + + // This is the step to read from parent + if (!m_tried_parent && r == -ENOENT) { + { + RWLock::RLocker l(m_ictx->snap_lock); + RWLock::RLocker l2(m_ictx->parent_lock); + if (m_ictx->parent == NULL) { + ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; + m_state = LIBRBD_AIO_READ_FLAT; + finished = false; + break; + } + + // calculate reverse mapping onto the image + vector > parent_extents; + Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, + m_object_off, m_object_len, parent_extents); + + uint64_t parent_overlap = 0; + uint64_t object_overlap = 0; + r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); + if (r == 0) { + object_overlap = m_ictx->prune_parent_extents(parent_extents, + parent_overlap); + } + + if (object_overlap > 0) { + m_tried_parent = true; + if (is_copy_on_read(m_ictx, m_snap_id)) { + m_state = LIBRBD_AIO_READ_COPYUP; + } + + read_from_parent(parent_extents); + finished = false; + } + } + + if (m_tried_parent) { + // release reference to the parent read completion. this request + // might be completed after unblock is invoked. + AioCompletion *parent_completion = m_parent_completion; + parent_completion->unblock(m_ictx->cct); + parent_completion->put(); + } + } + break; + case LIBRBD_AIO_READ_COPYUP: + ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP" + << dendl; + // This is the extra step for copy-on-read: kick off an asynchronous copyup. + // It is different from copy-on-write as asynchronous copyup will finish + // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. + + assert(m_tried_parent); + if (r > 0) { + // If read entire object from parent success and CoR is possible, kick + // off a asynchronous copyup. This approach minimizes the latency + // impact. + send_copyup(); + } + break; + case LIBRBD_AIO_READ_FLAT: + ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT" + << dendl; + // The read content should be deposit in m_read_data + break; + default: + lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; + assert(0); + } + + return finished; + } + + void AioObjectRead::send() { + ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + + // send read request to parent if the object doesn't exist locally + if (!m_ictx->object_map.object_may_exist(m_object_no)) { + complete(-ENOENT); + return; + } + + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(this, rados_req_cb, NULL); + int r; + librados::ObjectReadOperation op; + int flags = m_ictx->get_read_flags(m_snap_id); + if (m_sparse) { + op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data, + NULL); + } else { + op.read(m_object_off, m_object_len, &m_read_data, NULL); + } + op.set_op_flags2(m_op_flags); + + r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, NULL); + assert(r == 0); + + rados_completion->release(); + } + + void AioObjectRead::send_copyup() + { + { + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + if (!compute_parent_extents()) { + return; + } + } + + Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + // create and kick off a CopyupRequest + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no, + m_parent_extents); + m_ictx->copyup_list[m_object_no] = new_req; + new_req->queue_send(); + } + } + + void AioObjectRead::read_from_parent(const vector >& parent_extents) + { + assert(!m_parent_completion); + m_parent_completion = aio_create_completion_internal(this, rbd_req_cb); + + // prevent the parent image from being deleted while this + // request is still in-progress + m_parent_completion->get(); + m_parent_completion->block(); + + ldout(m_ictx->cct, 20) << "read_from_parent this = " << this + << " parent completion " << m_parent_completion + << " extents " << parent_extents + << dendl; + aio_read(m_ictx->parent, parent_extents, NULL, &m_read_data, + m_parent_completion, 0); + } + + /** write **/ + + AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx, + const std::string &oid, + uint64_t object_no, + uint64_t object_off, + uint64_t len, + const ::SnapContext &snapc, + Context *completion, + bool hide_enoent) + : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, + completion, hide_enoent), + m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) + { + m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); + } + + void AbstractAioObjectWrite::guard_write() + { + if (has_parent()) { + m_state = LIBRBD_AIO_WRITE_GUARD; + m_write.assert_exists(); + ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; + } + } + + bool AbstractAioObjectWrite::should_complete(int r) + { + ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid + << " " << m_object_off << "~" << m_object_len + << " should_complete: r = " << r << dendl; + + bool finished = true; + switch (m_state) { + case LIBRBD_AIO_WRITE_PRE: + ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; + if (r < 0) { + return true; + } + + send_write(); + finished = false; + break; + + case LIBRBD_AIO_WRITE_POST: + ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; + finished = true; + break; + + case LIBRBD_AIO_WRITE_GUARD: + ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; + + if (r == -ENOENT) { + handle_write_guard(); + finished = false; + break; + } else if (r < 0) { + // pass the error code to the finish context + m_state = LIBRBD_AIO_WRITE_ERROR; + complete(r); + finished = false; + break; + } + + finished = send_post(); + break; + + case LIBRBD_AIO_WRITE_COPYUP: + ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; + if (r < 0) { + m_state = LIBRBD_AIO_WRITE_ERROR; + complete(r); + finished = false; + } else { + finished = send_post(); + } + break; + + case LIBRBD_AIO_WRITE_FLAT: + ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; + + finished = send_post(); + break; + + case LIBRBD_AIO_WRITE_ERROR: + assert(r < 0); + lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) + << dendl; + break; + + default: + lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; + assert(0); + } + + return finished; + } + + void AbstractAioObjectWrite::send() { + assert(m_ictx->owner_lock.is_locked()); + ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" " + << m_oid << " " << m_object_off << "~" + << m_object_len << dendl; + send_pre(); + } + + void AbstractAioObjectWrite::send_pre() { + assert(m_ictx->owner_lock.is_locked()); + + m_object_exist = m_ictx->object_map.object_may_exist(m_object_no); + bool write = false; + { + RWLock::RLocker snap_lock(m_ictx->snap_lock); + if (!m_ictx->object_map.enabled()) { + write = true; + } else { + // should have been flushed prior to releasing lock + assert(m_ictx->image_watcher->is_lock_owner()); + + ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_PRE; + + uint8_t new_state; + boost::optional current_state; + pre_object_map_update(&new_state); + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (m_ictx->object_map[m_object_no] != new_state) { + FunctionContext *ctx = new FunctionContext( + boost::bind(&AioObjectRequest::complete, this, _1)); + bool updated = m_ictx->object_map.aio_update(m_object_no, new_state, + current_state, ctx); + assert(updated); + } else { + write = true; + } + } + } + + // avoid possible recursive lock attempts + if (write) { + // no object map update required + send_write(); + } + } + + bool AbstractAioObjectWrite::send_post() { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (!m_ictx->object_map.enabled() || !post_object_map_update()) { + return true; + } + + // should have been flushed prior to releasing lock + assert(m_ictx->image_watcher->is_lock_owner()); + + ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_POST; + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + uint8_t current_state = m_ictx->object_map[m_object_no]; + if (current_state != OBJECT_PENDING || + current_state == OBJECT_NONEXISTENT) { + return true; + } + + FunctionContext *ctx = new FunctionContext( + boost::bind(&AioObjectRequest::complete, this, _1)); + bool updated = m_ictx->object_map.aio_update(m_object_no, + OBJECT_NONEXISTENT, + OBJECT_PENDING, ctx); + assert(updated); + return false; + } + + void AbstractAioObjectWrite::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " object exist " << m_object_exist << dendl; + + if (!m_object_exist && has_parent()) { + m_state = LIBRBD_AIO_WRITE_GUARD; + handle_write_guard(); + } else { + send_write_op(true); + } + } + + void AbstractAioObjectWrite::send_copyup() + { + ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_COPYUP; + + m_ictx->copyup_list_lock.Lock(); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, + m_object_no, + m_parent_extents); + + // make sure to wait on this CopyupRequest + new_req->append_request(this); + m_ictx->copyup_list[m_object_no] = new_req; + + m_ictx->copyup_list_lock.Unlock(); + new_req->send(); + } else { + it->second->append_request(this); + m_ictx->copyup_list_lock.Unlock(); + } + } + void AbstractAioObjectWrite::send_write_op(bool write_guard) + { + m_state = LIBRBD_AIO_WRITE_FLAT; + if (write_guard) + guard_write(); + add_write_ops(&m_write); + assert(m_write.size() != 0); + + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(this, NULL, rados_req_cb); + int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write, + m_snap_seq, m_snaps); + assert(r == 0); + rados_completion->release(); + } + void AbstractAioObjectWrite::handle_write_guard() + { + bool has_parent; + { + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + has_parent = compute_parent_extents(); + } + // If parent still exists, overlap might also have changed. + if (has_parent) { + send_copyup(); + } else { + // parent may have disappeared -- send original write again + ldout(m_ictx->cct, 20) << "should_complete(" << this + << "): parent overlap now 0" << dendl; + send_write(); + } + } + + void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) { + if (m_ictx->enable_alloc_hint && !m_ictx->object_map.object_may_exist(m_object_no)) + wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); + if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { + wr->write_full(m_write_data); + } else { + wr->write(m_object_off, m_write_data); + } + wr->set_op_flags2(m_op_flags); + } + + void AioObjectWrite::send_write() { + bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " object exist " << m_object_exist + << " write_full " << write_full << dendl; + if (write_full) { + send_write_op(false); + } else { + AbstractAioObjectWrite::send_write(); + } + } + + void AioObjectRemove::guard_write() { + // do nothing to disable write guard only if deep-copyup not required + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (!m_ictx->snaps.empty()) { + AbstractAioObjectWrite::guard_write(); + } + } + void AioObjectRemove::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + send_write_op(true); + } +} diff --git a/src/librbd/AioObjectRequest.h b/src/librbd/AioObjectRequest.h new file mode 100644 index 000000000000..bcbaf6b50aec --- /dev/null +++ b/src/librbd/AioObjectRequest.h @@ -0,0 +1,349 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_AIOREQUEST_H +#define CEPH_LIBRBD_AIOREQUEST_H + +#include "include/int_types.h" + +#include + +#include "common/snap_types.h" +#include "include/buffer.h" +#include "include/Context.h" +#include "include/rados/librados.hpp" +#include "librbd/ObjectMap.h" + +namespace librbd { + + struct AioCompletion; + struct ImageCtx; + class CopyupRequest; + + /** + * This class represents an I/O operation to a single RBD data object. + * Its subclasses encapsulate logic for dealing with special cases + * for I/O due to layering. + */ + class AioObjectRequest + { + public: + AioObjectRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t off, uint64_t len, + librados::snap_t snap_id, + Context *completion, bool hide_enoent); + virtual ~AioObjectRequest() {} + + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; + + void complete(int r); + + virtual bool should_complete(int r) = 0; + virtual void send() = 0; + + bool has_parent() const { + return !m_parent_extents.empty(); + } + + protected: + bool compute_parent_extents(); + + ImageCtx *m_ictx; + std::string m_oid; + uint64_t m_object_no, m_object_off, m_object_len; + librados::snap_t m_snap_id; + Context *m_completion; + std::vector > m_parent_extents; + bool m_hide_enoent; + }; + + class AioObjectRead : public AioObjectRequest { + public: + AioObjectRead(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, uint64_t len, + vector >& be, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags); + virtual ~AioObjectRead(); + + virtual bool should_complete(int r); + virtual void send(); + void guard_read(); + + ceph::bufferlist &data() { + return m_read_data; + } + + std::map m_ext_map; + + friend class C_AioRead; + + private: + vector > m_buffer_extents; + bool m_tried_parent; + bool m_sparse; + int m_op_flags; + ceph::bufferlist m_read_data; + AioCompletion *m_parent_completion; + + /** + * Reads go through the following state machine to deal with + * layering: + * + * need copyup + * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP + * | | + * v | + * done <------------------------------------/ + * ^ + * | + * LIBRBD_AIO_READ_FLAT + * + * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on + * whether there is a parent or not. + */ + enum read_state_d { + LIBRBD_AIO_READ_GUARD, + LIBRBD_AIO_READ_COPYUP, + LIBRBD_AIO_READ_FLAT + }; + + read_state_d m_state; + + void send_copyup(); + void read_from_parent(const vector >& image_extents); + }; + + class AbstractAioObjectWrite : public AioObjectRequest { + public: + AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + uint64_t len, const ::SnapContext &snapc, + Context *completion, bool hide_enoent); + + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) + { + add_write_ops(wr); + } + + virtual bool should_complete(int r); + virtual void send(); + + /** + * Writes go through the following state machine to deal with + * layering and the object map: + * + * + * . | + * . | + * . \---> LIBRBD_AIO_WRITE_PRE + * . | | + * . . . . . . | . . . . | . . . . . . . . . . . + * . | -or- | . + * . | | v + * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . . + * . | | . + * v v need copyup | . + * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . + * . | | . | . + * . | | . | . + * . | /-----/ . | . + * . | | . | . + * . \-------------------\ | /-------------------/ . + * . | | | . . + * . v v v . . + * . LIBRBD_AIO_WRITE_POST . . + * . | . . + * . | . . . . . . . . . + * . | . . + * . v v . + * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . + * + * The _PRE/_POST states are skipped if the object map is disabled. + * The write starts in _WRITE_GUARD or _FLAT depending on whether or not + * there is a parent overlap. + */ + protected: + enum write_state_d { + LIBRBD_AIO_WRITE_GUARD, + LIBRBD_AIO_WRITE_COPYUP, + LIBRBD_AIO_WRITE_FLAT, + LIBRBD_AIO_WRITE_PRE, + LIBRBD_AIO_WRITE_POST, + LIBRBD_AIO_WRITE_ERROR + }; + + write_state_d m_state; + librados::ObjectWriteOperation m_write; + uint64_t m_snap_seq; + std::vector m_snaps; + bool m_object_exist; + + virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; + virtual const char* get_write_type() const = 0; + virtual void guard_write(); + virtual void pre_object_map_update(uint8_t *new_state) = 0; + virtual bool post_object_map_update() { + return false; + } + virtual void send_write(); + virtual void send_write_op(bool write_guard); + virtual void handle_write_guard(); + + private: + void send_pre(); + bool send_post(); + void send_copyup(); + }; + + class AioObjectWrite : public AbstractAioObjectWrite { + public: + AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + uint64_t object_off, const ceph::bufferlist &data, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(), + snapc, completion, false), + m_write_data(data), m_op_flags(0) { + } + + void set_op_flags(int op_flags) { + m_op_flags = op_flags; + } + protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr); + + virtual const char* get_write_type() const { + return "write"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } + virtual void send_write(); + + private: + ceph::bufferlist m_write_data; + int m_op_flags; + }; + + class AioObjectRemove : public AbstractAioObjectWrite { + public: + AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, + true), + m_object_state(OBJECT_NONEXISTENT) { + } + + protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + if (has_parent()) { + wr->truncate(0); + } else { + wr->remove(); + } + } + + virtual const char* get_write_type() const { + if (has_parent()) { + return "remove (trunc)"; + } + return "remove"; + } + virtual void pre_object_map_update(uint8_t *new_state) { + if (has_parent()) { + m_object_state = OBJECT_EXISTS; + } else { + m_object_state = OBJECT_PENDING; + } + *new_state = m_object_state; + } + + virtual bool post_object_map_update() { + if (m_object_state == OBJECT_EXISTS) { + return false; + } + return true; + } + + virtual void guard_write(); + virtual void send_write(); + + private: + uint8_t m_object_state; + }; + + class AioObjectTrim : public AbstractAioObjectWrite { + public: + AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, + true) { + } + + protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->remove(); + } + + virtual const char* get_write_type() const { + return "remove (trim)"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_PENDING; + } + + virtual bool post_object_map_update() { + return true; + } + }; + + class AioObjectTruncate : public AbstractAioObjectWrite { + public: + AioObjectTruncate(ImageCtx *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc, + completion, true) { + } + + protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->truncate(m_object_off); + } + + virtual const char* get_write_type() const { + return "truncate"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } + }; + + class AioObjectZero : public AbstractAioObjectWrite { + public: + AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + uint64_t object_off, uint64_t object_len, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len, + snapc, completion, true) { + } + + protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->zero(m_object_off, m_object_len); + } + + virtual const char* get_write_type() const { + return "zero"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } + }; + +} + +#endif diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc deleted file mode 100644 index 421209a1ae77..000000000000 --- a/src/librbd/AioRequest.cc +++ /dev/null @@ -1,558 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "common/ceph_context.h" -#include "common/dout.h" -#include "common/errno.h" -#include "common/Mutex.h" -#include "common/RWLock.h" - -#include "librbd/AioCompletion.h" -#include "librbd/ImageCtx.h" -#include "librbd/ImageWatcher.h" -#include "librbd/internal.h" - -#include "librbd/AioRequest.h" -#include "librbd/CopyupRequest.h" - -#include -#include - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::AioRequest: " - -namespace librbd { - - AioRequest::AioRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t off, uint64_t len, - librados::snap_t snap_id, - Context *completion, - bool hide_enoent) - : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), - m_object_len(len), m_snap_id(snap_id), m_completion(completion), - m_hide_enoent(hide_enoent) { - - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - 0, m_ictx->layout.fl_object_size, m_parent_extents); - - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - compute_parent_extents(); - } - - void AioRequest::complete(int r) - { - if (should_complete(r)) { - ldout(m_ictx->cct, 20) << "complete " << this << dendl; - if (m_hide_enoent && r == -ENOENT) { - r = 0; - } - m_completion->complete(r); - delete this; - } - } - - bool AioRequest::compute_parent_extents() { - assert(m_ictx->snap_lock.is_locked()); - assert(m_ictx->parent_lock.is_locked()); - - uint64_t parent_overlap; - int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); - if (r < 0) { - // NOTE: it's possible for a snapshot to be deleted while we are - // still reading from it - lderr(m_ictx->cct) << this << " compute_parent_extents: failed to " - << "retrieve parent overlap: " << cpp_strerror(r) - << dendl; - m_parent_extents.clear(); - return false; - } - - uint64_t object_overlap = - m_ictx->prune_parent_extents(m_parent_extents, parent_overlap); - if (object_overlap > 0) { - ldout(m_ictx->cct, 20) << this << " compute_parent_extents: " - << "overlap " << parent_overlap << " " - << "extents " << m_parent_extents << dendl; - return true; - } - return false; - } - - static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { - assert(ictx->snap_lock.is_locked()); - return (ictx->clone_copy_on_read) && - (!ictx->read_only) && (snap_id == CEPH_NOSNAP); - } - - /** read **/ - - AioRead::AioRead(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t offset, uint64_t len, - vector >& be, - librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags) - : AioRequest(ictx, oid, objectno, offset, len, snap_id, completion, false), - m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), - m_op_flags(op_flags), m_parent_completion(NULL), - m_state(LIBRBD_AIO_READ_FLAT) { - - guard_read(); - } - - AioRead::~AioRead() - { - if (m_parent_completion) { - m_parent_completion->release(); - m_parent_completion = NULL; - } - } - - void AioRead::guard_read() - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - - if (has_parent()) { - ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl; - m_state = LIBRBD_AIO_READ_GUARD; - } - } - - bool AioRead::should_complete(int r) - { - ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " r = " << r << dendl; - - bool finished = true; - - switch (m_state) { - case LIBRBD_AIO_READ_GUARD: - ldout(m_ictx->cct, 20) << "should_complete " << this - << " READ_CHECK_GUARD" << dendl; - - // This is the step to read from parent - if (!m_tried_parent && r == -ENOENT) { - { - RWLock::RLocker l(m_ictx->snap_lock); - RWLock::RLocker l2(m_ictx->parent_lock); - if (m_ictx->parent == NULL) { - ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; - m_state = LIBRBD_AIO_READ_FLAT; - finished = false; - break; - } - - // calculate reverse mapping onto the image - vector > parent_extents; - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - m_object_off, m_object_len, parent_extents); - - uint64_t parent_overlap = 0; - uint64_t object_overlap = 0; - r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); - if (r == 0) { - object_overlap = m_ictx->prune_parent_extents(parent_extents, - parent_overlap); - } - - if (object_overlap > 0) { - m_tried_parent = true; - if (is_copy_on_read(m_ictx, m_snap_id)) { - m_state = LIBRBD_AIO_READ_COPYUP; - } - - read_from_parent(parent_extents); - finished = false; - } - } - - if (m_tried_parent) { - // release reference to the parent read completion. this request - // might be completed after unblock is invoked. - AioCompletion *parent_completion = m_parent_completion; - parent_completion->unblock(m_ictx->cct); - parent_completion->put(); - } - } - break; - case LIBRBD_AIO_READ_COPYUP: - ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP" - << dendl; - // This is the extra step for copy-on-read: kick off an asynchronous copyup. - // It is different from copy-on-write as asynchronous copyup will finish - // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. - - assert(m_tried_parent); - if (r > 0) { - // If read entire object from parent success and CoR is possible, kick - // off a asynchronous copyup. This approach minimizes the latency - // impact. - send_copyup(); - } - break; - case LIBRBD_AIO_READ_FLAT: - ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT" - << dendl; - // The read content should be deposit in m_read_data - break; - default: - lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; - assert(0); - } - - return finished; - } - - void AioRead::send() { - ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - - // send read request to parent if the object doesn't exist locally - if (!m_ictx->object_map.object_may_exist(m_object_no)) { - complete(-ENOENT); - return; - } - - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(this, rados_req_cb, NULL); - int r; - librados::ObjectReadOperation op; - int flags = m_ictx->get_read_flags(m_snap_id); - if (m_sparse) { - op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data, - NULL); - } else { - op.read(m_object_off, m_object_len, &m_read_data, NULL); - } - op.set_op_flags2(m_op_flags); - - r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, NULL); - assert(r == 0); - - rados_completion->release(); - } - - void AioRead::send_copyup() - { - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - if (!compute_parent_extents()) { - return; - } - } - - Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - // create and kick off a CopyupRequest - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no, - m_parent_extents); - m_ictx->copyup_list[m_object_no] = new_req; - new_req->queue_send(); - } - } - - void AioRead::read_from_parent(const vector >& parent_extents) - { - assert(!m_parent_completion); - m_parent_completion = aio_create_completion_internal(this, rbd_req_cb); - - // prevent the parent image from being deleted while this - // request is still in-progress - m_parent_completion->get(); - m_parent_completion->block(); - - ldout(m_ictx->cct, 20) << "read_from_parent this = " << this - << " parent completion " << m_parent_completion - << " extents " << parent_extents - << dendl; - aio_read(m_ictx->parent, parent_extents, NULL, &m_read_data, - m_parent_completion, 0); - } - - /** write **/ - - AbstractWrite::AbstractWrite(ImageCtx *ictx, const std::string &oid, - uint64_t object_no, uint64_t object_off, - uint64_t len, const ::SnapContext &snapc, - Context *completion, bool hide_enoent) - : AioRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, completion, - hide_enoent), - m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) - { - m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); - } - - void AbstractWrite::guard_write() - { - if (has_parent()) { - m_state = LIBRBD_AIO_WRITE_GUARD; - m_write.assert_exists(); - ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; - } - } - - bool AbstractWrite::should_complete(int r) - { - ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid - << " " << m_object_off << "~" << m_object_len - << " should_complete: r = " << r << dendl; - - bool finished = true; - switch (m_state) { - case LIBRBD_AIO_WRITE_PRE: - ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; - if (r < 0) { - return true; - } - - send_write(); - finished = false; - break; - - case LIBRBD_AIO_WRITE_POST: - ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; - finished = true; - break; - - case LIBRBD_AIO_WRITE_GUARD: - ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; - - if (r == -ENOENT) { - handle_write_guard(); - finished = false; - break; - } else if (r < 0) { - // pass the error code to the finish context - m_state = LIBRBD_AIO_WRITE_ERROR; - complete(r); - finished = false; - break; - } - - finished = send_post(); - break; - - case LIBRBD_AIO_WRITE_COPYUP: - ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; - if (r < 0) { - m_state = LIBRBD_AIO_WRITE_ERROR; - complete(r); - finished = false; - } else { - finished = send_post(); - } - break; - - case LIBRBD_AIO_WRITE_FLAT: - ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; - - finished = send_post(); - break; - - case LIBRBD_AIO_WRITE_ERROR: - assert(r < 0); - lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) - << dendl; - break; - - default: - lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; - assert(0); - } - - return finished; - } - - void AbstractWrite::send() { - assert(m_ictx->owner_lock.is_locked()); - ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" " - << m_oid << " " << m_object_off << "~" - << m_object_len << dendl; - send_pre(); - } - - void AbstractWrite::send_pre() { - assert(m_ictx->owner_lock.is_locked()); - - m_object_exist = m_ictx->object_map.object_may_exist(m_object_no); - bool write = false; - { - RWLock::RLocker snap_lock(m_ictx->snap_lock); - if (!m_ictx->object_map.enabled()) { - write = true; - } else { - // should have been flushed prior to releasing lock - assert(m_ictx->image_watcher->is_lock_owner()); - - ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_PRE; - - uint8_t new_state; - boost::optional current_state; - pre_object_map_update(&new_state); - - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (m_ictx->object_map[m_object_no] != new_state) { - FunctionContext *ctx = new FunctionContext( - boost::bind(&AioRequest::complete, this, _1)); - bool updated = m_ictx->object_map.aio_update(m_object_no, new_state, - current_state, ctx); - assert(updated); - } else { - write = true; - } - } - } - - // avoid possible recursive lock attempts - if (write) { - // no object map update required - send_write(); - } - } - - bool AbstractWrite::send_post() { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (!m_ictx->object_map.enabled() || !post_object_map_update()) { - return true; - } - - // should have been flushed prior to releasing lock - assert(m_ictx->image_watcher->is_lock_owner()); - - ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_POST; - - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - uint8_t current_state = m_ictx->object_map[m_object_no]; - if (current_state != OBJECT_PENDING || - current_state == OBJECT_NONEXISTENT) { - return true; - } - - FunctionContext *ctx = new FunctionContext( - boost::bind(&AioRequest::complete, this, _1)); - bool updated = m_ictx->object_map.aio_update(m_object_no, - OBJECT_NONEXISTENT, - OBJECT_PENDING, ctx); - assert(updated); - return false; - } - - void AbstractWrite::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " object exist " << m_object_exist << dendl; - - if (!m_object_exist && has_parent()) { - m_state = LIBRBD_AIO_WRITE_GUARD; - handle_write_guard(); - } else { - send_write_op(true); - } - } - - void AbstractWrite::send_copyup() - { - ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_COPYUP; - - m_ictx->copyup_list_lock.Lock(); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, - m_parent_extents); - - // make sure to wait on this CopyupRequest - new_req->append_request(this); - m_ictx->copyup_list[m_object_no] = new_req; - - m_ictx->copyup_list_lock.Unlock(); - new_req->send(); - } else { - it->second->append_request(this); - m_ictx->copyup_list_lock.Unlock(); - } - } - void AbstractWrite::send_write_op(bool write_guard) - { - m_state = LIBRBD_AIO_WRITE_FLAT; - if (write_guard) - guard_write(); - add_write_ops(&m_write); - assert(m_write.size() != 0); - - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(this, NULL, rados_req_cb); - int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write, - m_snap_seq, m_snaps); - assert(r == 0); - rados_completion->release(); - } - void AbstractWrite::handle_write_guard() - { - bool has_parent; - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - has_parent = compute_parent_extents(); - } - // If parent still exists, overlap might also have changed. - if (has_parent) { - send_copyup(); - } else { - // parent may have disappeared -- send original write again - ldout(m_ictx->cct, 20) << "should_complete(" << this - << "): parent overlap now 0" << dendl; - send_write(); - } - } - - void AioWrite::add_write_ops(librados::ObjectWriteOperation *wr) { - if (m_ictx->enable_alloc_hint && !m_ictx->object_map.object_may_exist(m_object_no)) - wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); - if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { - wr->write_full(m_write_data); - } else { - wr->write(m_object_off, m_write_data); - } - wr->set_op_flags2(m_op_flags); - } - void AioWrite::send_write() { - bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " object exist " << m_object_exist - << " write_full " << write_full << dendl; - if (write_full) { - send_write_op(false); - } else { - AbstractWrite::send_write(); - } - } - - void AioRemove::guard_write() { - // do nothing to disable write guard only if deep-copyup not required - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (!m_ictx->snaps.empty()) { - AbstractWrite::guard_write(); - } - } - void AioRemove::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - send_write_op(true); - } -} diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h deleted file mode 100644 index 341b3d60b7d4..000000000000 --- a/src/librbd/AioRequest.h +++ /dev/null @@ -1,351 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_AIOREQUEST_H -#define CEPH_LIBRBD_AIOREQUEST_H - -#include "include/int_types.h" - -#include - -#include "common/snap_types.h" -#include "include/buffer.h" -#include "include/Context.h" -#include "include/rados/librados.hpp" -#include "librbd/ObjectMap.h" - -namespace librbd { - - struct AioCompletion; - struct ImageCtx; - class CopyupRequest; - - /** - * This class represents an I/O operation to a single RBD data object. - * Its subclasses encapsulate logic for dealing with special cases - * for I/O due to layering. - */ - class AioRequest - { - public: - AioRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t off, uint64_t len, - librados::snap_t snap_id, - Context *completion, bool hide_enoent); - virtual ~AioRequest() {} - - virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; - - void complete(int r); - - virtual bool should_complete(int r) = 0; - virtual void send() = 0; - - bool has_parent() const { - return !m_parent_extents.empty(); - } - - protected: - bool compute_parent_extents(); - - ImageCtx *m_ictx; - std::string m_oid; - uint64_t m_object_no, m_object_off, m_object_len; - librados::snap_t m_snap_id; - Context *m_completion; - std::vector > m_parent_extents; - bool m_hide_enoent; - }; - - class AioRead : public AioRequest { - public: - AioRead(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t offset, uint64_t len, - vector >& be, - librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags); - virtual ~AioRead(); - - virtual bool should_complete(int r); - virtual void send(); - void guard_read(); - - ceph::bufferlist &data() { - return m_read_data; - } - - std::map m_ext_map; - - friend class C_AioRead; - - private: - vector > m_buffer_extents; - bool m_tried_parent; - bool m_sparse; - int m_op_flags; - ceph::bufferlist m_read_data; - AioCompletion *m_parent_completion; - - /** - * Reads go through the following state machine to deal with - * layering: - * - * need copyup - * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP - * | | - * v | - * done <------------------------------------/ - * ^ - * | - * LIBRBD_AIO_READ_FLAT - * - * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on - * whether there is a parent or not. - */ - enum read_state_d { - LIBRBD_AIO_READ_GUARD, - LIBRBD_AIO_READ_COPYUP, - LIBRBD_AIO_READ_FLAT - }; - - read_state_d m_state; - - void send_copyup(); - void read_from_parent(const vector >& image_extents); - }; - - class AbstractWrite : public AioRequest { - public: - AbstractWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, uint64_t len, const ::SnapContext &snapc, - Context *completion, bool hide_enoent); - virtual ~AbstractWrite() {} - - virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) - { - add_write_ops(wr); - } - - virtual bool should_complete(int r); - virtual void send(); - - /** - * Writes go through the following state machine to deal with - * layering and the object map: - * - * - * . | - * . | - * . \---> LIBRBD_AIO_WRITE_PRE - * . | | - * . . . . . . | . . . . | . . . . . . . . . . . - * . | -or- | . - * . | | v - * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . . - * . | | . - * v v need copyup | . - * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . - * . | | . | . - * . | | . | . - * . | /-----/ . | . - * . | | . | . - * . \-------------------\ | /-------------------/ . - * . | | | . . - * . v v v . . - * . LIBRBD_AIO_WRITE_POST . . - * . | . . - * . | . . . . . . . . . - * . | . . - * . v v . - * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . - * - * The _PRE/_POST states are skipped if the object map is disabled. - * The write starts in _WRITE_GUARD or _FLAT depending on whether or not - * there is a parent overlap. - */ - protected: - enum write_state_d { - LIBRBD_AIO_WRITE_GUARD, - LIBRBD_AIO_WRITE_COPYUP, - LIBRBD_AIO_WRITE_FLAT, - LIBRBD_AIO_WRITE_PRE, - LIBRBD_AIO_WRITE_POST, - LIBRBD_AIO_WRITE_ERROR - }; - - write_state_d m_state; - librados::ObjectWriteOperation m_write; - uint64_t m_snap_seq; - std::vector m_snaps; - bool m_object_exist; - - virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; - virtual const char* get_write_type() const = 0; - virtual void guard_write(); - virtual void pre_object_map_update(uint8_t *new_state) = 0; - virtual bool post_object_map_update() { - return false; - } - virtual void send_write(); - virtual void send_write_op(bool write_guard); - virtual void handle_write_guard(); - - private: - void send_pre(); - bool send_post(); - void send_copyup(); - }; - - class AioWrite : public AbstractWrite { - public: - AioWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, const ceph::bufferlist &data, - const ::SnapContext &snapc, Context *completion) - : AbstractWrite(ictx, oid, object_no, object_off, data.length(), snapc, - completion, false), - m_write_data(data), m_op_flags(0) { - } - virtual ~AioWrite() {} - - void set_op_flags(int op_flags) { - m_op_flags = op_flags; - } - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr); - - virtual const char* get_write_type() const { - return "write"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_EXISTS; - } - virtual void send_write(); - - private: - ceph::bufferlist m_write_data; - int m_op_flags; - }; - - class AioRemove : public AbstractWrite { - public: - AioRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - const ::SnapContext &snapc, Context *completion) - : AbstractWrite(ictx, oid, object_no, 0, 0, snapc, completion, true), - m_object_state(OBJECT_NONEXISTENT) { - } - virtual ~AioRemove() {} - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - if (has_parent()) { - wr->truncate(0); - } else { - wr->remove(); - } - } - - virtual const char* get_write_type() const { - if (has_parent()) { - return "remove (trunc)"; - } - return "remove"; - } - virtual void pre_object_map_update(uint8_t *new_state) { - if (has_parent()) { - m_object_state = OBJECT_EXISTS; - } else { - m_object_state = OBJECT_PENDING; - } - *new_state = m_object_state; - } - - virtual bool post_object_map_update() { - if (m_object_state == OBJECT_EXISTS) { - return false; - } - return true; - } - - virtual void guard_write(); - virtual void send_write(); - - private: - uint8_t m_object_state; - }; - - class AioTrim : public AbstractWrite { - public: - AioTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - const ::SnapContext &snapc, Context *completion) - : AbstractWrite(ictx, oid, object_no, 0, 0, snapc, completion, true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->remove(); - } - - virtual const char* get_write_type() const { - return "remove (trim)"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_PENDING; - } - - virtual bool post_object_map_update() { - return true; - } - }; - - class AioTruncate : public AbstractWrite { - public: - AioTruncate(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, const ::SnapContext &snapc, - Context *completion) - : AbstractWrite(ictx, oid, object_no, object_off, 0, snapc, completion, - true) { - } - virtual ~AioTruncate() {} - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->truncate(m_object_off); - } - - virtual const char* get_write_type() const { - return "truncate"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_EXISTS; - } - }; - - class AioZero : public AbstractWrite { - public: - AioZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, uint64_t object_len, - const ::SnapContext &snapc, Context *completion) - : AbstractWrite(ictx, oid, object_no, object_off, object_len, snapc, - completion, true) { - } - virtual ~AioZero() {} - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->zero(m_object_off, m_object_len); - } - - virtual const char* get_write_type() const { - return "zero"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_EXISTS; - } - }; - -} - -#endif diff --git a/src/librbd/AsyncFlattenRequest.cc b/src/librbd/AsyncFlattenRequest.cc index 9136220e57e7..dbcf3340dcca 100644 --- a/src/librbd/AsyncFlattenRequest.cc +++ b/src/librbd/AsyncFlattenRequest.cc @@ -2,7 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "librbd/AsyncFlattenRequest.h" -#include "librbd/AioRequest.h" +#include "librbd/AioObjectRequest.h" #include "librbd/AsyncObjectThrottle.h" #include "librbd/ImageCtx.h" #include "librbd/ImageWatcher.h" @@ -40,8 +40,8 @@ public: bufferlist bl; string oid = m_image_ctx.get_object_name(m_object_no); - AioWrite *req = new AioWrite(&m_image_ctx, oid, m_object_no, 0, bl, m_snapc, - this); + AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0, + bl, m_snapc, this); if (!req->has_parent()) { // stop early if the parent went away - it just means // another flatten finished first or the image was resized diff --git a/src/librbd/AsyncTrimRequest.cc b/src/librbd/AsyncTrimRequest.cc index 90668ce43123..6159ef58bd3e 100644 --- a/src/librbd/AsyncTrimRequest.cc +++ b/src/librbd/AsyncTrimRequest.cc @@ -2,7 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "librbd/AsyncTrimRequest.h" #include "librbd/AsyncObjectThrottle.h" -#include "librbd/AioRequest.h" +#include "librbd/AioObjectRequest.h" #include "librbd/ImageCtx.h" #include "librbd/ImageWatcher.h" #include "librbd/internal.h" @@ -41,8 +41,8 @@ public: string oid = m_image_ctx.get_object_name(m_object_no); ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl; - AbstractWrite *req = new AioTrim(&m_image_ctx, oid, m_object_no, m_snapc, - this); + AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no, + m_snapc, this); req->send(); return 0; } @@ -340,13 +340,13 @@ void AsyncTrimRequest::send_clean_boundary() { ldout(cct, 20) << " ex " << *p << dendl; Context *req_comp = new C_ContextCompletion(*completion); - AbstractWrite *req; + AioObjectRequest *req; if (p->offset == 0) { - req = new AioTrim(&m_image_ctx, p->oid.name, p->objectno, snapc, - req_comp); + req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc, + req_comp); } else { - req = new AioTruncate(&m_image_ctx, p->oid.name, p->objectno, - p->offset, snapc, req_comp); + req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno, + p->offset, snapc, req_comp); } req->send(); } diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index 667d19d89c4c..6be414f9067a 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -7,7 +7,7 @@ #include "common/Mutex.h" #include "librbd/AioCompletion.h" -#include "librbd/AioRequest.h" +#include "librbd/AioObjectRequest.h" #include "librbd/AsyncObjectThrottle.h" #include "librbd/CopyupRequest.h" #include "librbd/ImageCtx.h" @@ -84,15 +84,15 @@ private: m_async_op.finish_op(); } - void CopyupRequest::append_request(AioRequest *req) { + void CopyupRequest::append_request(AioObjectRequest *req) { ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; m_pending_requests.push_back(req); } void CopyupRequest::complete_requests(int r) { while (!m_pending_requests.empty()) { - vector::iterator it = m_pending_requests.begin(); - AioRequest *req = *it; + vector::iterator it = m_pending_requests.begin(); + AioObjectRequest *req = *it; ldout(m_ictx->cct, 20) << __func__ << " completing request " << req << dendl; req->complete(r); @@ -156,7 +156,7 @@ private: // merge all pending write ops into this single RADOS op for (size_t i=0; icct, 20) << __func__ << " add_copyup_ops " << req << dendl; req->add_copyup_ops(&write_op); diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index fd1fd87b241d..e3a7cdb9c695 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -20,7 +20,7 @@ namespace librbd { vector >& image_extents); ~CopyupRequest(); - void append_request(AioRequest *req); + void append_request(AioObjectRequest *req); void send(); void queue_send(); @@ -65,7 +65,7 @@ namespace librbd { vector > m_image_extents; State m_state; ceph::bufferlist m_copyup_data; - vector m_pending_requests; + vector m_pending_requests; atomic_t m_pending_copyups; AsyncOperation m_async_op; diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc index ac778eec5455..d240c97f2437 100644 --- a/src/librbd/LibrbdWriteback.cc +++ b/src/librbd/LibrbdWriteback.cc @@ -11,7 +11,7 @@ #include "include/rados/librados.hpp" #include "include/rbd/librbd.hpp" -#include "librbd/AioRequest.h" +#include "librbd/AioObjectRequest.h" #include "librbd/ImageCtx.h" #include "librbd/internal.h" #include "librbd/LibrbdWriteback.h" @@ -164,13 +164,13 @@ namespace librbd { { assert(m_ictx->owner_lock.is_locked()); uint64_t object_no = oid_to_object_no(oid.name, m_ictx->object_prefix); - + write_result_d *result = new write_result_d(oid.name, oncommit); m_writes[oid.name].push(result); ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl; C_OrderedWrite *req_comp = new C_OrderedWrite(m_ictx->cct, result, this); - AioWrite *req = new AioWrite(m_ictx, oid.name, object_no, off, bl, snapc, - req_comp); + AioObjectWrite *req = new AioObjectWrite(m_ictx, oid.name, object_no, off, + bl, snapc, req_comp); req->send(); return ++m_tid; } diff --git a/src/librbd/Makefile.am b/src/librbd/Makefile.am index 96390931141f..aa0fee52cca7 100644 --- a/src/librbd/Makefile.am +++ b/src/librbd/Makefile.am @@ -8,7 +8,7 @@ if WITH_RBD librbd_internal_la_SOURCES = \ librbd/AioCompletion.cc \ - librbd/AioRequest.cc \ + librbd/AioObjectRequest.cc \ librbd/AsyncFlattenRequest.cc \ librbd/AsyncObjectThrottle.cc \ librbd/AsyncOperation.cc \ @@ -49,7 +49,7 @@ lib_LTLIBRARIES += librbd.la noinst_HEADERS += \ librbd/AioCompletion.h \ - librbd/AioRequest.h \ + librbd/AioObjectRequest.h \ librbd/AsyncFlattenRequest.h \ librbd/AsyncObjectThrottle.h \ librbd/AsyncOperation.h \ diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index e027f7577236..e1b3a723a6fe 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -17,7 +17,7 @@ #include "cls/rbd/cls_rbd_client.h" #include "librbd/AioCompletion.h" -#include "librbd/AioRequest.h" +#include "librbd/AioObjectRequest.h" #include "librbd/AsyncFlattenRequest.h" #include "librbd/AsyncResizeRequest.h" #include "librbd/AsyncTrimRequest.h" @@ -3626,7 +3626,7 @@ reprotect_and_return_err: void rados_req_cb(rados_completion_t c, void *arg) { - AioRequest *req = reinterpret_cast(arg); + AioObjectRequest *req = reinterpret_cast(arg); req->complete(rados_aio_get_return_value(c)); } @@ -3819,8 +3819,9 @@ reprotect_and_return_err: if (ictx->object_cacher) { ictx->write_to_cache(p->oid, bl, p->length, p->offset, req_comp, op_flags); } else { - AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset, - bl, snapc, req_comp); + AioObjectWrite *req = new AioObjectWrite(ictx, p->oid.name, p->objectno, + p->offset, bl, snapc, + req_comp); req->set_op_flags(op_flags); req->send(); @@ -3945,20 +3946,21 @@ reprotect_and_return_err: ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length << " from " << p->buffer_extents << dendl; C_AioRequest *req_comp = new C_AioRequest(cct, c); - AbstractWrite *req; + AioObjectRequest *req; if (p->length == ictx->layout.fl_object_size) { - req = new AioRemove(ictx, p->oid.name, p->objectno, snapc, req_comp); + req = new AioObjectRemove(ictx, p->oid.name, p->objectno, snapc, + req_comp); } else if (p->offset + p->length == ictx->layout.fl_object_size) { - req = new AioTruncate(ictx, p->oid.name, p->objectno, p->offset, snapc, - req_comp); + req = new AioObjectTruncate(ictx, p->oid.name, p->objectno, p->offset, + snapc, req_comp); } else { if(ictx->cct->_conf->rbd_skip_partial_discard) { delete req_comp; continue; } - req = new AioZero(ictx, p->oid.name, p->objectno, p->offset, p->length, - snapc, req_comp); + req = new AioObjectZero(ictx, p->oid.name, p->objectno, p->offset, + p->length, snapc, req_comp); } req->send(); @@ -3978,7 +3980,7 @@ reprotect_and_return_err: void rbd_req_cb(completion_t cb, void *arg) { - AioRequest *req = reinterpret_cast(arg); + AioObjectRequest *req = reinterpret_cast(arg); AioCompletion *comp = reinterpret_cast(cb); req->complete(comp->get_return_value()); } @@ -4114,9 +4116,10 @@ reprotect_and_return_err: << dendl; C_AioRead *req_comp = new C_AioRead(ictx->cct, c); - AioRead *req = new AioRead(ictx, q->oid.name, q->objectno, q->offset, - q->length, q->buffer_extents, snap_id, true, - req_comp, op_flags); + AioObjectRead *req = new AioObjectRead(ictx, q->oid.name, q->objectno, + q->offset, q->length, + q->buffer_extents, snap_id, true, + req_comp, op_flags); req_comp->set_req(req); if (ictx->object_cacher) {