From 7be3df67809925164237cc185f9f29e145f45768 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 16 Apr 2015 14:15:10 -0400 Subject: [PATCH] librbd: move copyup class method call to CopyupRequest Move AbstractWrite's invocation of copyup to the CopyupRequest class. The AioRequest write path will now always create a CopyupRequest, which will now append the actual write ops to the copyup. Signed-off-by: Jason Dillaman --- src/librbd/AioRequest.cc | 194 +++++++++++++++--------------------- src/librbd/AioRequest.h | 49 +++++---- src/librbd/CopyupRequest.cc | 104 +++++++++++-------- src/librbd/CopyupRequest.h | 27 ++--- 4 files changed, 189 insertions(+), 185 deletions(-) diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index 7f1185a8a1992..fe97dc987db48 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -31,7 +31,7 @@ namespace librbd { bool hide_enoent) : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), m_object_len(len), m_snap_id(snap_id), m_completion(completion), - m_parent_completion(NULL), m_hide_enoent(hide_enoent) { + m_hide_enoent(hide_enoent) { Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0, m_ictx->layout.fl_object_size, m_parent_extents); @@ -41,13 +41,6 @@ namespace librbd { compute_parent_extents(); } - AioRequest::~AioRequest() { - if (m_parent_completion) { - m_parent_completion->release(); - m_parent_completion = NULL; - } - } - void AioRequest::complete(int r) { if (should_complete(r)) { @@ -87,33 +80,6 @@ namespace librbd { return false; } - void AioRequest::read_from_parent(const vector >& parent_extents, - bool block_completion) - { - assert(!m_parent_completion); - m_parent_completion = aio_create_completion_internal(this, rbd_req_cb); - if (block_completion) { - // prevent the parent image from being deleted while this - // request is still in-progress - m_parent_completion->get(); - m_parent_completion->block(); - } - - ldout(m_ictx->cct, 20) << "read_from_parent this = " << this - << " parent completion " << m_parent_completion - << " extents " << parent_extents - << dendl; - int r = aio_read(m_ictx->parent, parent_extents, NULL, &m_read_data, - m_parent_completion, 0); - if (r < 0) { - lderr(m_ictx->cct) << "read_from_parent " << this - << ": error reading from parent: " - << cpp_strerror(r) << dendl; - m_parent_completion->release(); - complete(r); - } - } - static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { assert(ictx->snap_lock.is_locked()); return (ictx->clone_copy_on_read) && @@ -129,11 +95,20 @@ namespace librbd { Context *completion, int op_flags) : AioRequest(ictx, oid, objectno, offset, len, snap_id, completion, false), m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), - m_op_flags(op_flags), m_state(LIBRBD_AIO_READ_FLAT) { + m_op_flags(op_flags), m_parent_completion(NULL), + m_state(LIBRBD_AIO_READ_FLAT) { guard_read(); } + AioRead::~AioRead() + { + if (m_parent_completion) { + m_parent_completion->release(); + m_parent_completion = NULL; + } + } + void AioRead::guard_read() { RWLock::RLocker snap_locker(m_ictx->snap_lock); @@ -189,7 +164,7 @@ namespace librbd { m_state = LIBRBD_AIO_READ_COPYUP; } - read_from_parent(parent_extents, true); + read_from_parent(parent_extents); finished = false; } } @@ -215,21 +190,7 @@ namespace librbd { // If read entire object from parent success and CoR is possible, kick // off a asynchronous copyup. This approach minimizes the latency // impact. - Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - if (compute_parent_extents()) { - // create and kick off a CopyupRequest - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, - m_parent_extents); - m_ictx->copyup_list[m_object_no] = new_req; - new_req->queue_send(); - } - } + send_copyup(); } break; case LIBRBD_AIO_READ_FLAT: @@ -273,6 +234,50 @@ namespace librbd { return r; } + void AioRead::send_copyup() + { + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + if (compute_parent_extents()) { + // create and kick off a CopyupRequest + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, + m_object_no, + m_parent_extents); + m_ictx->copyup_list[m_object_no] = new_req; + new_req->queue_send(); + } + } + } + + void AioRead::read_from_parent(const vector >& parent_extents) + { + assert(!m_parent_completion); + m_parent_completion = aio_create_completion_internal(this, rbd_req_cb); + + // prevent the parent image from being deleted while this + // request is still in-progress + m_parent_completion->get(); + m_parent_completion->block(); + + ldout(m_ictx->cct, 20) << "read_from_parent this = " << this + << " parent completion " << m_parent_completion + << " extents " << parent_extents + << dendl; + int r = aio_read(m_ictx->parent, parent_extents, NULL, &m_read_data, + m_parent_completion, 0); + if (r < 0) { + lderr(m_ictx->cct) << "read_from_parent " << this + << ": error reading from parent: " + << cpp_strerror(r) << dendl; + m_parent_completion->release(); + complete(r); + } + } + /** write **/ AbstractWrite::AbstractWrite(ImageCtx *ictx, const std::string &oid, @@ -281,8 +286,7 @@ namespace librbd { Context *completion, bool hide_enoent) : AioRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, completion, hide_enoent), - m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val), - m_entire_object(NULL) + m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) { m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); } @@ -302,7 +306,6 @@ namespace librbd { << m_object_off << "~" << m_object_len << " should_complete: r = " << r << dendl; - map::iterator it; bool finished = true; switch (m_state) { case LIBRBD_AIO_WRITE_PRE: @@ -329,44 +332,12 @@ namespace librbd { // If parent still exists, overlap might also have changed. if (compute_parent_extents()) { - m_state = LIBRBD_AIO_WRITE_COPYUP; - - if (is_copy_on_read(m_ictx, CEPH_NOSNAP)) { - m_ictx->copyup_list_lock.Lock(); - it = m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - // If it is not in the list, create a CopyupRequest and wait - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, - m_parent_extents); - // make sure to wait on this CopyupRequest - new_req->append_request(this); - m_ictx->copyup_list[m_object_no] = new_req; - - m_entire_object = &(new_req->get_copyup_data()); - m_ictx->copyup_list_lock.Unlock(); - new_req->send(); - } else { - it->second->append_request(this); - m_entire_object = &it->second->get_copyup_data(); - m_ictx->copyup_list_lock.Unlock(); - } - } else { - read_from_parent(m_parent_extents, false); - } + send_copyup(); } else { - /* - * Parent may have disappeared; if so, recover by using - * send_copyup() to send the original write req (the copyup - * operation itself will be a no-op, since someone must have - * populated the child object while we weren't looking). - * Move to WRITE_FLAT state as we'll be done with the - * operation once the null copyup completes. - */ + // parent may have disappeared -- send original write again ldout(m_ictx->cct, 20) << "should_complete(" << this << "): parent overlap now 0" << dendl; - m_state = LIBRBD_AIO_WRITE_FLAT; - send_copyup(); + send_write(); } finished = false; break; @@ -388,15 +359,7 @@ namespace librbd { return should_complete(r); } - // Read data from waiting list safely. If this AioWrite created a - // CopyupRequest, m_read_data should be empty. - if (m_entire_object != NULL) { - assert(m_read_data.length() == 0); - m_read_data.append(*m_entire_object); - } - - send_copyup(); - finished = false; + finished = send_post(); break; case LIBRBD_AIO_WRITE_FLAT: @@ -515,21 +478,30 @@ namespace librbd { rados_completion->release(); } - void AbstractWrite::send_copyup() { + void AbstractWrite::send_copyup() + { ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl; - librados::ObjectWriteOperation op; - if (!m_read_data.is_zero()) { - op.exec("rbd", "copyup", m_read_data); + m_state = LIBRBD_AIO_WRITE_COPYUP; + + m_ictx->copyup_list_lock.Lock(); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, + m_object_no, + m_parent_extents); + + // make sure to wait on this CopyupRequest + new_req->append_request(this); + m_ictx->copyup_list[m_object_no] = new_req; + + m_ictx->copyup_list_lock.Unlock(); + new_req->send(); + } else { + it->second->append_request(this); + m_ictx->copyup_list_lock.Unlock(); } - add_write_ops(&op); - assert(op.size() != 0); - - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(this, NULL, rados_req_cb); - m_ictx->md_ctx.aio_operate(m_oid, rados_completion, &op, - m_snap_seq, m_snaps); - rados_completion->release(); } void AioWrite::add_write_ops(librados::ObjectWriteOperation *wr) { diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h index 29bd880d7b7bb..ecfb08f9c219b 100644 --- a/src/librbd/AioRequest.h +++ b/src/librbd/AioRequest.h @@ -31,7 +31,9 @@ namespace librbd { uint64_t objectno, uint64_t off, uint64_t len, librados::snap_t snap_id, Context *completion, bool hide_enoent); - virtual ~AioRequest(); + virtual ~AioRequest() {} + + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; void complete(int r); @@ -44,8 +46,6 @@ namespace librbd { protected: bool compute_parent_extents(); - void read_from_parent(const vector >& image_extents, - bool block_completion); ImageCtx *m_ictx; std::string m_oid; @@ -53,8 +53,6 @@ namespace librbd { librados::snap_t m_snap_id; Context *m_completion; std::vector > m_parent_extents; - AioCompletion *m_parent_completion; - ceph::bufferlist m_read_data; bool m_hide_enoent; }; @@ -65,7 +63,8 @@ namespace librbd { vector >& be, librados::snap_t snap_id, bool sparse, Context *completion, int op_flags); - virtual ~AioRead() {} + virtual ~AioRead(); + virtual bool should_complete(int r); virtual int send(); void guard_read(); @@ -83,6 +82,8 @@ namespace librbd { bool m_tried_parent; bool m_sparse; int m_op_flags; + ceph::bufferlist m_read_data; + AioCompletion *m_parent_completion; /** * Reads go through the following state machine to deal with @@ -107,6 +108,9 @@ namespace librbd { }; read_state_d m_state; + + void send_copyup(); + void read_from_parent(const vector >& image_extents); }; class AbstractWrite : public AioRequest { @@ -116,6 +120,11 @@ namespace librbd { Context *completion, bool hide_enoent); virtual ~AbstractWrite() {} + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) + { + add_write_ops(wr); + } + virtual bool should_complete(int r); virtual int send(); @@ -136,20 +145,23 @@ namespace librbd { * . | | . * v v need copyup | . * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . - * . | ^ | | . - * . | | | | . - * . | \---------------------------/ | . - * . | | . - * . \-------------------\ /-------------------/ . - * . | | . - * . LIBRBD_AIO_WRITE_POST . - * . | . - * . v . + * . | | . | . + * . | | . | . + * . | /-----/ . | . + * . | | . | . + * . \-------------------\ | /-------------------/ . + * . | | | . . + * . v v v . . + * . LIBRBD_AIO_WRITE_POST . . + * . | . . + * . | . . . . . . . . . + * . | . . + * . v v . * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . * - * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map - * is disabled. The write starts in _WRITE_GUARD or _FLAT depending on - * whether or not there is a parent overlap. + * The _PRE/_POST states are skipped if the object map is disabled. + * The write starts in _WRITE_GUARD or _FLAT depending on whether or not + * there is a parent overlap. */ enum write_state_d { LIBRBD_AIO_WRITE_GUARD, @@ -165,7 +177,6 @@ namespace librbd { librados::ObjectWriteOperation m_write; uint64_t m_snap_seq; std::vector m_snaps; - ceph::bufferlist *m_entire_object; virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; virtual void guard_write(); diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index 7abb475fa4dec..f96dfbbce96f4 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -35,20 +35,12 @@ namespace librbd { m_async_op.finish_op(); } - ceph::bufferlist& CopyupRequest::get_copyup_data() { - return m_copyup_data; - } - void CopyupRequest::append_request(AioRequest *req) { ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; m_pending_requests.push_back(req); } - bool CopyupRequest::complete_requests(int r) { - if (m_pending_requests.empty()) { - return false; - } - + void CopyupRequest::complete_requests(int r) { while (!m_pending_requests.empty()) { vector::iterator it = m_pending_requests.begin(); AioRequest *req = *it; @@ -57,13 +49,9 @@ namespace librbd { req->complete(r); m_pending_requests.erase(it); } - return true; } - void CopyupRequest::send_copyup() { - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": oid " << m_oid << dendl; - + bool CopyupRequest::send_copyup() { m_ictx->snap_lock.get_read(); ::SnapContext snapc = m_ictx->snapc; m_ictx->snap_lock.put_read(); @@ -72,12 +60,33 @@ namespace librbd { snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); librados::ObjectWriteOperation copyup_op; - copyup_op.exec("rbd", "copyup", m_copyup_data); + if (!m_copyup_data.is_zero()) { + copyup_op.exec("rbd", "copyup", m_copyup_data); + } + + // merge all pending write ops into this single RADOS op + for (size_t i=0; icct, 20) << __func__ << " add_copyup_ops " << req << dendl; + req->add_copyup_ops(©up_op); + } + + if (copyup_op.size() == 0) { + return true; + } + + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid << dendl; + m_state = STATE_COPYUP; librados::AioCompletion *comp = - librados::Rados::aio_create_completion(NULL, NULL, NULL); - m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, snapc.seq.val, snaps); + librados::Rados::aio_create_completion(create_callback_context(), NULL, + rados_ctx_cb); + int r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, snapc.seq.val, + snaps); + assert(r == 0); comp->release(); + return false; } void CopyupRequest::send() @@ -124,7 +133,7 @@ namespace librbd { bool CopyupRequest::should_complete(int r) { CephContext *cct = m_ictx->cct; - ldout(cct, 20) << __func__ << " " + ldout(cct, 20) << __func__ << " " << this << ": oid " << m_oid << ", extents " << m_image_extents << ", r " << r << dendl; @@ -133,22 +142,23 @@ namespace librbd { case STATE_READ_FROM_PARENT: ldout(cct, 20) << "READ_FROM_PARENT" << dendl; remove_from_list(); - if (complete_requests(r)) { - // pending write operation: it will handle object map / copyup - return true; - } else if (r < 0) { - // nothing to copyup - return true; - } else if (send_object_map()) { - return true; + if (r >= 0) { + return send_object_map(); + } else if (r == -ENOENT) { + return send_copyup(); } break; case STATE_OBJECT_MAP: ldout(cct, 20) << "OBJECT_MAP" << dendl; if (r == 0) { - send_copyup(); + return send_copyup(); } + break; + + case STATE_COPYUP: + ldout(cct, 20) << "COPYUP" << dendl; + complete_requests(r); return true; default: @@ -156,6 +166,11 @@ namespace librbd { assert(false); break; } + + if (r < 0) { + complete_requests(r); + return true; + } return false; } @@ -170,37 +185,40 @@ namespace librbd { } bool CopyupRequest::send_object_map() { - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": oid " << m_oid - << ", extents " << m_image_extents - << dendl; - bool copyup = false; { RWLock::RLocker l(m_ictx->owner_lock); if (!m_ictx->object_map.enabled()) { copyup = true; } else if (!m_ictx->image_watcher->is_lock_owner()) { - ldout(m_ictx->cct, 20) << "exclusive lock not held for copy-on-read" + ldout(m_ictx->cct, 20) << "exclusive lock not held for copyup request" << dendl; - return true; + assert(m_pending_requests.empty()); + return true; } else { - m_state = STATE_OBJECT_MAP; - Context *ctx = create_callback_context(); RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (!m_ictx->object_map.aio_update(m_object_no, OBJECT_EXISTS, - boost::optional(), ctx)) { - delete ctx; - copyup = true; - } + if (m_ictx->object_map[m_object_no] != OBJECT_EXISTS) { + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid + << ", extents " << m_image_extents + << dendl; + m_state = STATE_OBJECT_MAP; + + Context *ctx = create_callback_context(); + bool sent = m_ictx->object_map.aio_update(m_object_no, OBJECT_EXISTS, + boost::optional(), + ctx); + assert(sent); + } else { + copyup = true; + } } } // avoid possible recursive lock attempts if (copyup) { // no object map update required - send_copyup(); - return true; + return send_copyup(); } return false; } diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index 87a6b16efb4ce..667e14e7e8645 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -20,7 +20,6 @@ namespace librbd { vector >& image_extents); ~CopyupRequest(); - ceph::bufferlist& get_copyup_data(); void append_request(AioRequest *req); void send(); @@ -37,20 +36,24 @@ namespace librbd { * * | * v - * STATE_READ_FROM_PARENT ---> STATE_OBJECT_MAP - * . | - * . . . . . . . . . . . . . | - * . | - * v v - * + * STATE_READ_FROM_PARENT ----> STATE_OBJECT_MAP . . . + * . . | . + * . . v . + * . . . . . > STATE_COPYUP . + * . | . + * . v . + * . . . . . . . . . . . . > < . . . . . . * * @endverbatim * - * The _OBJECT_MAP state is skipped if the object map isn't enabled. + * The _OBJECT_MAP state is skipped if the object map isn't enabled or if + * an object map update isn't required. The _COPYUP state is skipped if + * no data was read from the parent *and* there are no additional ops. */ enum State { STATE_READ_FROM_PARENT, - STATE_OBJECT_MAP + STATE_OBJECT_MAP, + STATE_COPYUP }; ImageCtx *m_ictx; @@ -63,15 +66,15 @@ namespace librbd { AsyncOperation m_async_op; - bool complete_requests(int r); + void complete_requests(int r); void complete(int r); bool should_complete(int r); void remove_from_list(); - bool send_object_map(); - void send_copyup(); + bool send_object_map(); + bool send_copyup(); Context *create_callback_context(); }; -- 2.39.5