From: Cheng Cheng Date: Fri, 16 Jan 2015 19:34:59 +0000 (-0500) Subject: librbd: copy-on-read X-Git-Tag: v0.93~197^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=190c185b74b5959000d8dc79e8ee0a10ed0fc979;p=ceph.git librbd: copy-on-read Addressed Jason's review comments. Signed-off-by: Cheng Cheng --- diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index dc61c00f5910..14a6328d37c3 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -23,7 +23,7 @@ namespace librbd { m_ictx(NULL), m_ioctx(NULL), m_object_no(0), m_object_off(0), m_object_len(0), m_snap_id(CEPH_NOSNAP), m_completion(NULL), m_parent_completion(NULL), - m_hide_enoent(false), m_entire_object(NULL) {} + m_hide_enoent(false) {} AioRequest::AioRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, uint64_t off, uint64_t len, const ::SnapContext &snapc, librados::snap_t snap_id, @@ -32,7 +32,7 @@ namespace librbd { m_ictx(ictx), m_ioctx(&ictx->data_ctx), m_oid(oid), m_object_no(objectno), m_object_off(off), m_object_len(len), m_snap_id(snap_id), m_completion(completion), m_parent_completion(NULL), - m_hide_enoent(hide_enoent), m_entire_object(NULL) { + m_hide_enoent(hide_enoent) { for (std::vector::const_iterator it = snapc.snaps.begin(); it != snapc.snaps.end(); ++it) m_snaps.push_back(it->val); @@ -57,27 +57,42 @@ namespace librbd { m_parent_completion, 0); } - static inline bool is_cor(ImageCtx *ictx, librados::snap_t snap_id) { - return (ictx->cct->_conf->rbd_clone_copy_on_read) && - (!ictx->read_only) && (snap_id == CEPH_NOSNAP); + static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { + assert(ictx->snap_lock.is_locked()); + return (ictx->cct->_conf->rbd_clone_copy_on_read) && + (!ictx->read_only) && (snap_id == CEPH_NOSNAP); } /** read **/ - void AioRead::guard_read() - { + AioRead::AioRead(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, uint64_t len, + vector >& be, + const ::SnapContext &snapc, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags) + : AioRequest(ictx, oid, objectno, offset, len, snapc, snap_id, completion, + false), + m_buffer_extents(be), m_tried_parent(false), + m_sparse(sparse), m_op_flags(op_flags), m_state(LIBRBD_AIO_READ_FLAT) { RWLock::RLocker l(m_ictx->snap_lock); RWLock::RLocker l2(m_ictx->parent_lock); - vector > image_extents; Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0, m_ictx->layout.fl_object_size, - image_extents); + m_image_extents); + + guard_read(); + } + + void AioRead::guard_read() + { + assert(m_ictx->snap_lock.is_locked()); uint64_t image_overlap = 0; m_ictx->get_parent_overlap(m_snap_id, &image_overlap); uint64_t object_overlap = - m_ictx->prune_parent_extents(image_extents, image_overlap); + m_ictx->prune_parent_extents(m_image_extents, image_overlap); if (object_overlap) { ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl; m_state = LIBRBD_AIO_READ_GUARD; @@ -86,12 +101,8 @@ namespace librbd { bool AioRead::should_complete(int r) { - ldout(m_ictx->cct, 20) << "AioRead::should_complete " << this - << " " << m_oid << " " << m_object_off - << "~" << m_object_len << " r = " << r - << " cor = " << is_cor(m_ictx, m_snap_id) - << " readonly = " << m_ictx->read_only - << dendl; + ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len + << " r = " << r << dendl; bool finished = true; @@ -105,6 +116,13 @@ namespace librbd { RWLock::RLocker l(m_ictx->snap_lock); RWLock::RLocker l2(m_ictx->parent_lock); + if (m_ictx->parent == NULL) { + ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; + m_state = LIBRBD_AIO_READ_FLAT; + finished = false; + break; + } + // calculate reverse mapping onto the image vector > image_extents; Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, @@ -120,7 +138,7 @@ namespace librbd { if (object_overlap) { m_tried_parent = true; - if (is_cor(m_ictx, m_snap_id)) { + if (is_copy_on_read(m_ictx, m_snap_id)) { // If there is a valid object being coyping up, directly extract // content and finish up. Mutex::Locker l3(m_ictx->copyup_list_lock); @@ -142,7 +160,7 @@ namespace librbd { read_from_parent(image_extents); - if (is_cor(m_ictx, m_snap_id)) + if (is_copy_on_read(m_ictx, m_snap_id)) m_state = LIBRBD_AIO_READ_COPYUP; else m_state = LIBRBD_AIO_READ_GUARD; @@ -157,7 +175,8 @@ namespace librbd { // It is different from copy-on-write as asynchronous copyup will finish // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. - if (m_tried_parent && r > 0) { + assert(m_tried_parent); + if (r > 0) { // If read entire object from parent success and CoR is possible, kick // off a asynchronous copyup. This approach minimizes the latency // impact. @@ -165,30 +184,28 @@ namespace librbd { map::iterator it = m_ictx->copyup_list.find(m_object_no); if (it == m_ictx->copyup_list.end()) { - // create and kick off a CopyupRequest - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, true); - pair new_slot = - pair(m_object_no, new_req); - m_ictx->copyup_list.insert(new_slot); - m_ictx->copyup_list_lock.Unlock(); - RWLock::RLocker l(m_ictx->snap_lock); RWLock::RLocker l2(m_ictx->parent_lock); - vector > extend_image_extents; - //extend range to entire object - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - 0, m_ictx->layout.fl_object_size, - extend_image_extents); - uint64_t image_overlap = 0; - r = m_ictx->get_parent_overlap(m_snap_id, &image_overlap); - if (r < 0) { - assert(0 == "FIXME"); + if (m_ictx->parent == NULL) { + ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; + m_state = LIBRBD_AIO_READ_FLAT; + finished = true; + break; } - m_ictx->prune_parent_extents(extend_image_extents, image_overlap); - m_ictx->copyup_list[m_object_no]->read_from_parent(extend_image_extents); + // If parent still exists, overlap might also have changed. + uint64_t newlen = m_ictx->prune_parent_extents( + m_image_extents, m_ictx->parent_md.overlap); + if (newlen != 0) { + // create and kick off a CopyupRequest + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, + m_object_no, true); + m_ictx->copyup_list[m_object_no] = new_req; + m_ictx->copyup_list_lock.Unlock(); + + new_req->read_from_parent(m_image_extents); + } } else { m_ictx->copyup_list_lock.Unlock(); } @@ -203,7 +220,7 @@ namespace librbd { break; case LIBRBD_AIO_READ_FLAT: ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT" << dendl; - // The read contect should be deposit in m_read_data + // The read content should be deposit in m_read_data break; default: lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; @@ -250,7 +267,7 @@ namespace librbd { bool hide_enoent) : AioRequest(ictx, oid, object_no, object_off, len, snapc, snap_id, completion, hide_enoent), - m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) + m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val), m_entire_object(NULL) { m_object_image_extents = objectx; m_parent_overlap = object_overlap; @@ -311,7 +328,7 @@ namespace librbd { m_state = LIBRBD_AIO_WRITE_COPYUP; - if (is_cor(m_ictx, m_snap_id)) { + if (is_copy_on_read(m_ictx, m_snap_id)) { m_ictx->copyup_list_lock.Lock(); it = m_ictx->copyup_list.find(m_object_no); if (it == m_ictx->copyup_list.end()) { @@ -320,15 +337,13 @@ namespace librbd { m_object_no, false); // make sure to wait on this CopyupRequest new_req->append_request(this); - pair new_slot = - pair(m_object_no, new_req); - m_ictx->copyup_list.insert(new_slot); + m_ictx->copyup_list[m_object_no] = new_req; m_ictx->copyup_list_lock.Unlock(); - m_entire_object = &(m_ictx->copyup_list[m_object_no]->get_copyup_data()); + m_entire_object = &(new_req->get_copyup_data()); ldout(m_ictx->cct, 20) << __func__ << " creating new Copyup for AioWrite, obj-" << m_object_no << dendl; - m_ictx->copyup_list[m_object_no]->read_from_parent(m_object_image_extents); + new_req->read_from_parent(m_object_image_extents); ldout(m_ictx->cct, 20) << __func__ << " issuing read_from_parent" << dendl; } else { Mutex::Locker l3(it->second->get_lock()); @@ -345,13 +360,13 @@ namespace librbd { } else { ldout(m_ictx->cct, 20) << __func__ << " someone is reading back from parent" << dendl; it->second->append_request(this); - m_ictx->copyup_list_lock.Unlock(); - m_entire_object = &(m_ictx->copyup_list[m_object_no]->get_copyup_data()); + m_ictx->copyup_list_lock.Unlock(); } } - } else + } else { read_from_parent(m_object_image_extents); + } } else { ldout(m_ictx->cct, 20) << "should_complete(" << this << "): parent overlap now 0" << dendl; @@ -376,15 +391,19 @@ namespace librbd { // Read data from waiting list safely. If this AioWrite created a // CopyupRequest, m_read_data should be empty. - if (m_read_data.is_zero()) { - Mutex::Locker l(m_ictx->copyup_list_lock); - ldout(m_ictx->cct, 20) << __func__ << " releasing self pending, obj-" - << m_object_no << dendl; - it = m_ictx->copyup_list.find(m_object_no); - assert(it != m_ictx->copyup_list.end()); - assert(it->second->is_ready()); - assert(m_entire_object); - m_read_data.append(*m_entire_object); + { + RWLock::RLocker l3(m_ictx->snap_lock); + if (is_copy_on_read(m_ictx, m_snap_id) && + m_read_data.is_zero()) { + Mutex::Locker l(m_ictx->copyup_list_lock); + ldout(m_ictx->cct, 20) << __func__ << " releasing self pending, obj-" + << m_object_no << dendl; + it = m_ictx->copyup_list.find(m_object_no); + assert(it != m_ictx->copyup_list.end()); + assert(it->second->is_ready()); + assert(m_entire_object); + m_read_data.append(*m_entire_object); + } } send_copyup(); diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h index 4368c1214d1e..4e29feb82aa6 100644 --- a/src/librbd/AioRequest.h +++ b/src/librbd/AioRequest.h @@ -59,7 +59,6 @@ namespace librbd { ceph::bufferlist m_read_data; bool m_hide_enoent; std::vector m_snaps; - ceph::bufferlist *m_entire_object; }; class AioRead : public AioRequest { @@ -68,13 +67,7 @@ namespace librbd { uint64_t objectno, uint64_t offset, uint64_t len, vector >& be, const ::SnapContext &snapc, librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags) - : AioRequest(ictx, oid, objectno, offset, len, snapc, snap_id, completion, - false), - m_buffer_extents(be), m_tried_parent(false), - m_sparse(sparse), m_op_flags(op_flags), m_state(LIBRBD_AIO_READ_FLAT) { - guard_read(); - } + Context *completion, int op_flags); virtual ~AioRead() {} virtual bool should_complete(int r); virtual int send(); @@ -92,6 +85,7 @@ namespace librbd { bool m_tried_parent; bool m_sparse; int m_op_flags; + vector > m_image_extents; /** * Reads go through the following state machine to deal with @@ -169,6 +163,7 @@ namespace librbd { librados::ObjectWriteOperation m_write; librados::ObjectWriteOperation m_copyup; uint64_t m_snap_seq; + ceph::bufferlist *m_entire_object; private: void send_copyup(); diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index 1afe71f34993..c82e2e7e77a2 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -16,16 +16,12 @@ #define dout_prefix *_dout << "librbd::CopyupRequest: " namespace librbd { - CopyupRequest::CopyupRequest() - : m_ictx(NULL), m_object_no(0), m_lock("librbd::CopyupRequest::m_lock"), - m_ready(false), m_need_copyup(false), m_parent_completion(NULL), - m_copyup_completion(NULL) {} CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, bool need_copyup) + uint64_t objectno, bool send_copyup) : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_lock("librbd::CopyupRequest::m_lock"), m_ready(false), - m_need_copyup(need_copyup), m_parent_completion(NULL), + m_send_copyup(send_copyup), m_parent_completion(NULL), m_copyup_completion(NULL) {} CopyupRequest::~CopyupRequest() { @@ -57,8 +53,8 @@ namespace librbd { return m_ready; } - bool CopyupRequest::is_need_send_copyup() { - return m_need_copyup; + bool CopyupRequest::should_send_copyup() { + return m_send_copyup; } ceph::bufferlist& CopyupRequest::get_copyup_data() { @@ -117,30 +113,30 @@ namespace librbd { aio_read(m_ictx->parent, image_extents, NULL, &m_copyup_data, m_parent_completion, 0); } - void rbd_read_from_parent_cb(completion_t cb, void *arg) + void CopyupRequest::rbd_read_from_parent_cb(completion_t cb, void *arg) { CopyupRequest *req = reinterpret_cast(arg); AioCompletion *comp = reinterpret_cast(cb); ldout(req->m_ictx->cct, 20) << __func__ << dendl; - req->get_lock().Lock(); + req->m_lock.Lock(); req->set_ready(); req->complete_all(comp->get_return_value()); - req->get_lock().Unlock(); + req->m_lock.Unlock(); // If this entry is created by a read request, then copyup operation will // be performed asynchronously. Perform cleaning up from copyup callback. // If this entry is created by a write request, then copyup operation will // be performed synchronously by AioWrite. After extracting data, perform // cleaning up here - if (req->is_need_send_copyup()) + if (req->should_send_copyup()) req->send_copyup(comp->get_return_value()); else delete req; } - void rbd_copyup_cb(rados_completion_t c, void *arg) + void CopyupRequest::rbd_copyup_cb(rados_completion_t c, void *arg) { CopyupRequest *req = reinterpret_cast(arg); diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index 55813e2c0cab..246af27b7ac2 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -15,30 +15,30 @@ namespace librbd { class CopyupRequest { public: - CopyupRequest(); CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, - bool need_copyup); + bool send_copyup); ~CopyupRequest(); void set_ready(); bool is_ready(); - bool is_need_send_copyup(); + bool should_send_copyup(); ceph::bufferlist& get_copyup_data(); Mutex& get_lock(); void append_request(AioRequest *req); void complete_all(int r); void send_copyup(int r); void read_from_parent(vector >& image_extents); - AioCompletion *get_parent_completion() { return m_parent_completion; } - librados::AioCompletion *get_copyup_completion() { return m_copyup_completion; } - ImageCtx *m_ictx; + + static void rbd_read_from_parent_cb(completion_t cb, void *arg); + static void rbd_copyup_cb(completion_t aio_completion_impl, void *arg); private: + ImageCtx *m_ictx; std::string m_oid; uint64_t m_object_no; Mutex m_lock; bool m_ready; - bool m_need_copyup; + bool m_send_copyup; AioCompletion *m_parent_completion; librados::AioCompletion *m_copyup_completion; ceph::bufferlist m_copyup_data;