From: Jason Dillaman Date: Wed, 27 Jul 2016 12:49:44 +0000 (-0400) Subject: librbd: removed namespace indentation from legacy classes X-Git-Tag: v10.2.3~48^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=61f0acb82ba344139bc2aa486fb096e5576e2553;p=ceph.git librbd: removed namespace indentation from legacy classes Better follows the Ceph C++ style guide Signed-off-by: Jason Dillaman (cherry picked from commit f1e391982b43ddfb363ff913260460368a6d5834) --- diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index 1e892acb79473..c5cb3a1bad0e1 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -28,239 +28,240 @@ namespace librbd { - int AioCompletion::wait_for_complete() { - tracepoint(librbd, aio_wait_for_complete_enter, this); - lock.Lock(); - while (state != STATE_COMPLETE) - cond.Wait(lock); - lock.Unlock(); - tracepoint(librbd, aio_wait_for_complete_exit, 0); - return 0; - } +int AioCompletion::wait_for_complete() { + tracepoint(librbd, aio_wait_for_complete_enter, this); + lock.Lock(); + while (state != STATE_COMPLETE) + cond.Wait(lock); + lock.Unlock(); + tracepoint(librbd, aio_wait_for_complete_exit, 0); + return 0; +} - void AioCompletion::finalize(ssize_t rval) - { - assert(lock.is_locked()); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", " - << "read_buf=" << reinterpret_cast(read_buf) << ", " - << "real_bl=" << reinterpret_cast(read_bl) << dendl; - if (rval >= 0 && aio_type == AIO_TYPE_READ) { - if (read_buf && !read_bl) { - destriper.assemble_result(cct, read_buf, read_buf_len); - } else { - // FIXME: make the destriper write directly into a buffer so - // that we avoid shuffling pointers and copying zeros around. - bufferlist bl; - destriper.assemble_result(cct, bl, true); - - if (read_buf) { - assert(bl.length() == read_buf_len); - bl.copy(0, read_buf_len, read_buf); - ldout(cct, 20) << "copied resulting " << bl.length() - << " bytes to " << (void*)read_buf << dendl; - } - if (read_bl) { - ldout(cct, 20) << " moving resulting " << bl.length() - << " bytes to bl " << (void*)read_bl << dendl; - read_bl->claim(bl); - } +void AioCompletion::finalize(ssize_t rval) +{ + assert(lock.is_locked()); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", " + << "read_buf=" << reinterpret_cast(read_buf) << ", " + << "real_bl=" << reinterpret_cast(read_bl) << dendl; + if (rval >= 0 && aio_type == AIO_TYPE_READ) { + if (read_buf && !read_bl) { + destriper.assemble_result(cct, read_buf, read_buf_len); + } else { + // FIXME: make the destriper write directly into a buffer so + // that we avoid shuffling pointers and copying zeros around. + bufferlist bl; + destriper.assemble_result(cct, bl, true); + + if (read_buf) { + assert(bl.length() == read_buf_len); + bl.copy(0, read_buf_len, read_buf); + ldout(cct, 20) << "copied resulting " << bl.length() + << " bytes to " << (void*)read_buf << dendl; + } + if (read_bl) { + ldout(cct, 20) << " moving resulting " << bl.length() + << " bytes to bl " << (void*)read_bl << dendl; + read_bl->claim(bl); } } } +} - void AioCompletion::complete() { - assert(lock.is_locked()); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - tracepoint(librbd, aio_complete_enter, this, rval); - utime_t elapsed; - elapsed = ceph_clock_now(cct) - start_time; - switch (aio_type) { - case AIO_TYPE_OPEN: - case AIO_TYPE_CLOSE: - break; - case AIO_TYPE_READ: - ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break; - case AIO_TYPE_WRITE: - ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break; - case AIO_TYPE_DISCARD: - ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break; - case AIO_TYPE_FLUSH: - ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break; - default: - lderr(cct) << "completed invalid aio_type: " << aio_type << dendl; - break; - } +void AioCompletion::complete() { + assert(lock.is_locked()); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + tracepoint(librbd, aio_complete_enter, this, rval); + utime_t elapsed; + elapsed = ceph_clock_now(cct) - start_time; + switch (aio_type) { + case AIO_TYPE_OPEN: + case AIO_TYPE_CLOSE: + break; + case AIO_TYPE_READ: + ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break; + case AIO_TYPE_WRITE: + ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break; + case AIO_TYPE_DISCARD: + ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break; + case AIO_TYPE_FLUSH: + ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break; + default: + lderr(cct) << "completed invalid aio_type: " << aio_type << dendl; + break; + } - // inform the journal that the op has successfully committed - if (journal_tid != 0) { - assert(ictx->journal != NULL); - ictx->journal->commit_io_event(journal_tid, rval); - } + // inform the journal that the op has successfully committed + if (journal_tid != 0) { + assert(ictx->journal != NULL); + ictx->journal->commit_io_event(journal_tid, rval); + } - state = STATE_CALLBACK; - if (complete_cb) { - lock.Unlock(); - complete_cb(rbd_comp, complete_arg); - lock.Lock(); - } + state = STATE_CALLBACK; + if (complete_cb) { + lock.Unlock(); + complete_cb(rbd_comp, complete_arg); + lock.Lock(); + } - if (ictx && event_notify && ictx->event_socket.is_valid()) { - ictx->completed_reqs_lock.Lock(); - ictx->completed_reqs.push_back(&m_xlist_item); - ictx->completed_reqs_lock.Unlock(); - ictx->event_socket.notify(); - } + if (ictx && event_notify && ictx->event_socket.is_valid()) { + ictx->completed_reqs_lock.Lock(); + ictx->completed_reqs.push_back(&m_xlist_item); + ictx->completed_reqs_lock.Unlock(); + ictx->event_socket.notify(); + } - state = STATE_COMPLETE; - cond.Signal(); + state = STATE_COMPLETE; + cond.Signal(); - // note: possible for image to be closed after op marked finished - if (async_op.started()) { - async_op.finish_op(); - } - tracepoint(librbd, aio_complete_exit); + // note: possible for image to be closed after op marked finished + if (async_op.started()) { + async_op.finish_op(); } + tracepoint(librbd, aio_complete_exit); +} - void AioCompletion::init_time(ImageCtx *i, aio_type_t t) { - Mutex::Locker locker(lock); - if (ictx == nullptr) { - ictx = i; - aio_type = t; - start_time = ceph_clock_now(ictx->cct); - } +void AioCompletion::init_time(ImageCtx *i, aio_type_t t) { + Mutex::Locker locker(lock); + if (ictx == nullptr) { + ictx = i; + aio_type = t; + start_time = ceph_clock_now(ictx->cct); } +} - void AioCompletion::start_op(bool ignore_type) { - Mutex::Locker locker(lock); - assert(ictx != nullptr); - assert(!async_op.started()); - if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) { - async_op.start_op(*ictx); - } +void AioCompletion::start_op(bool ignore_type) { + Mutex::Locker locker(lock); + assert(ictx != nullptr); + assert(!async_op.started()); + if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) { + async_op.start_op(*ictx); } +} - void AioCompletion::fail(int r) - { - lock.Lock(); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; +void AioCompletion::fail(int r) +{ + lock.Lock(); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r) + << dendl; + assert(pending_count == 0); + rval = r; + complete(); + put_unlock(); +} - lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r) - << dendl; - assert(pending_count == 0); - rval = r; - complete(); - put_unlock(); - } +void AioCompletion::set_request_count(uint32_t count) { + lock.Lock(); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; - void AioCompletion::set_request_count(uint32_t count) { - lock.Lock(); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; + ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl; + assert(pending_count == 0); + pending_count = count; + lock.Unlock(); - ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl; - assert(pending_count == 0); - pending_count = count; - lock.Unlock(); + // if no pending requests, completion will fire now + unblock(); +} - // if no pending requests, completion will fire now - unblock(); +void AioCompletion::complete_request(ssize_t r) +{ + lock.Lock(); + assert(ictx != nullptr); + CephContext *cct = ictx->cct; + + if (rval >= 0) { + if (r < 0 && r != -EEXIST) + rval = r; + else if (r > 0) + rval += r; } + assert(pending_count); + int count = --pending_count; - void AioCompletion::complete_request(ssize_t r) - { - lock.Lock(); - assert(ictx != nullptr); - CephContext *cct = ictx->cct; - - if (rval >= 0) { - if (r < 0 && r != -EEXIST) - rval = r; - else if (r > 0) - rval += r; - } - assert(pending_count); - int count = --pending_count; - - ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", " - << "pending=" << pending_count << dendl; - if (!count && blockers == 0) { - finalize(rval); - complete(); - } - put_unlock(); + ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", " + << "pending=" << pending_count << dendl; + if (!count && blockers == 0) { + finalize(rval); + complete(); } + put_unlock(); +} + +void AioCompletion::associate_journal_event(uint64_t tid) { + Mutex::Locker l(lock); + assert(state == STATE_PENDING); + journal_tid = tid; +} - void AioCompletion::associate_journal_event(uint64_t tid) { +bool AioCompletion::is_complete() { + tracepoint(librbd, aio_is_complete_enter, this); + bool done; + { Mutex::Locker l(lock); - assert(state == STATE_PENDING); - journal_tid = tid; + done = this->state == STATE_COMPLETE; } + tracepoint(librbd, aio_is_complete_exit, done); + return done; +} - bool AioCompletion::is_complete() { - tracepoint(librbd, aio_is_complete_enter, this); - bool done; - { - Mutex::Locker l(lock); - done = this->state == STATE_COMPLETE; - } - tracepoint(librbd, aio_is_complete_exit, done); - return done; - } +ssize_t AioCompletion::get_return_value() { + tracepoint(librbd, aio_get_return_value_enter, this); + lock.Lock(); + ssize_t r = rval; + lock.Unlock(); + tracepoint(librbd, aio_get_return_value_exit, r); + return r; +} - ssize_t AioCompletion::get_return_value() { - tracepoint(librbd, aio_get_return_value_enter, this); - lock.Lock(); - ssize_t r = rval; - lock.Unlock(); - tracepoint(librbd, aio_get_return_value_exit, r); - return r; +void C_AioRead::finish(int r) +{ + m_completion->lock.Lock(); + CephContext *cct = m_completion->ictx->cct; + ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl; + + if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation + ldout(cct, 10) << " got " << m_req->m_ext_map + << " for " << m_req->m_buffer_extents + << " bl " << m_req->data().length() << dendl; + // reads from the parent don't populate the m_ext_map and the overlap + // may not be the full buffer. compensate here by filling in m_ext_map + // with the read extent when it is empty. + if (m_req->m_ext_map.empty()) + m_req->m_ext_map[m_req->m_object_off] = m_req->data().length(); + + m_completion->destriper.add_partial_sparse_result( + cct, m_req->data(), m_req->m_ext_map, m_req->m_object_off, + m_req->m_buffer_extents); + r = m_req->m_object_len; } + m_completion->lock.Unlock(); - void C_AioRead::finish(int r) - { - m_completion->lock.Lock(); - CephContext *cct = m_completion->ictx->cct; - ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl; - - if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation - ldout(cct, 10) << " got " << m_req->m_ext_map - << " for " << m_req->m_buffer_extents - << " bl " << m_req->data().length() << dendl; - // reads from the parent don't populate the m_ext_map and the overlap - // may not be the full buffer. compensate here by filling in m_ext_map - // with the read extent when it is empty. - if (m_req->m_ext_map.empty()) - m_req->m_ext_map[m_req->m_object_off] = m_req->data().length(); - - m_completion->destriper.add_partial_sparse_result( - cct, m_req->data(), m_req->m_ext_map, m_req->m_object_off, - m_req->m_buffer_extents); - r = m_req->m_object_len; - } - m_completion->lock.Unlock(); - - C_AioRequest::finish(r); - } + C_AioRequest::finish(r); +} - void C_CacheRead::complete(int r) { - if (!m_enqueued) { - // cache_lock creates a lock ordering issue -- so re-execute this context - // outside the cache_lock - m_enqueued = true; - m_image_ctx.op_work_queue->queue(this, r); - return; - } - Context::complete(r); +void C_CacheRead::complete(int r) { + if (!m_enqueued) { + // cache_lock creates a lock ordering issue -- so re-execute this context + // outside the cache_lock + m_enqueued = true; + m_image_ctx.op_work_queue->queue(this, r); + return; } + Context::complete(r); +} - void C_CacheRead::finish(int r) - { - m_req->complete(r); - } +void C_CacheRead::finish(int r) +{ + m_req->complete(r); } + +} // namespace librbd diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index e259a484698a4..1ec87f4417cdf 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -1,7 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_AIOCOMPLETION_H -#define CEPH_LIBRBD_AIOCOMPLETION_H + +#ifndef CEPH_LIBRBD_AIO_COMPLETION_H +#define CEPH_LIBRBD_AIO_COMPLETION_H #include "common/Cond.h" #include "common/Mutex.h" @@ -18,248 +19,249 @@ class CephContext; namespace librbd { - class AioObjectRead; - - typedef enum { - AIO_TYPE_NONE = 0, - AIO_TYPE_OPEN, - AIO_TYPE_CLOSE, - AIO_TYPE_READ, - AIO_TYPE_WRITE, - AIO_TYPE_DISCARD, - AIO_TYPE_FLUSH, - } aio_type_t; - - typedef enum { - STATE_PENDING = 0, - STATE_CALLBACK, - STATE_COMPLETE, - } aio_state_t; - - /** - * AioCompletion is the overall completion for a single - * rbd I/O request. It may be composed of many AioObjectRequests, - * which each go to a single object. - * - * The retrying of individual requests is handled at a lower level, - * so all AioCompletion cares about is the count of outstanding - * requests. The number of expected individual requests should be - * set initially using set_request_count() prior to issuing the - * requests. This ensures that the completion will not be completed - * within the caller's thread of execution (instead via a librados - * context or via a thread pool context for cache read hits). - */ - struct AioCompletion { - mutable Mutex lock; - Cond cond; - aio_state_t state; - ssize_t rval; - callback_t complete_cb; - void *complete_arg; - rbd_completion_t rbd_comp; - uint32_t pending_count; ///< number of requests - uint32_t blockers; - int ref; - bool released; - ImageCtx *ictx; - utime_t start_time; - aio_type_t aio_type; - - Striper::StripedReadResult destriper; - bufferlist *read_bl; - char *read_buf; - size_t read_buf_len; - - AsyncOperation async_op; - - uint64_t journal_tid; - xlist::item m_xlist_item; - bool event_notify; - - template - static void callback_adapter(completion_t cb, void *arg) { - AioCompletion *comp = reinterpret_cast(cb); - T *t = reinterpret_cast(arg); - (t->*MF)(comp->get_return_value()); - comp->release(); - } - - static AioCompletion *create(void *cb_arg, callback_t cb_complete, - rbd_completion_t rbd_comp) { - AioCompletion *comp = new AioCompletion(); - comp->set_complete_cb(cb_arg, cb_complete); - comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp); - return comp; - } - - template - static AioCompletion *create(T *obj) { - AioCompletion *comp = new AioCompletion(); - comp->set_complete_cb(obj, &callback_adapter); - comp->rbd_comp = comp; - return comp; - } - - template - static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx, - aio_type_t type) { - AioCompletion *comp = create(obj); - comp->init_time(image_ctx, type); - comp->start_op(); - return comp; - } - - AioCompletion() : lock("AioCompletion::lock", true, false), - state(STATE_PENDING), rval(0), complete_cb(NULL), - complete_arg(NULL), rbd_comp(NULL), - pending_count(0), blockers(1), - ref(1), released(false), ictx(NULL), - aio_type(AIO_TYPE_NONE), - read_bl(NULL), read_buf(NULL), read_buf_len(0), - journal_tid(0), - m_xlist_item(this), event_notify(false) { - } - ~AioCompletion() { - } - - int wait_for_complete(); - - void finalize(ssize_t rval); - - inline bool is_initialized(aio_type_t type) const { - Mutex::Locker locker(lock); - return ((ictx != nullptr) && (aio_type == type)); - } - inline bool is_started() const { - Mutex::Locker locker(lock); - return async_op.started(); - } - - void init_time(ImageCtx *i, aio_type_t t); - void start_op(bool ignore_type = false); - void fail(int r); - - void complete(); - - void set_complete_cb(void *cb_arg, callback_t cb) { - complete_cb = cb; - complete_arg = cb_arg; - } - - void set_request_count(uint32_t num); - void add_request() { - lock.Lock(); - assert(pending_count > 0); - lock.Unlock(); - get(); - } - void complete_request(ssize_t r); - - void associate_journal_event(uint64_t tid); - - bool is_complete(); - - ssize_t get_return_value(); - - void get() { - lock.Lock(); - assert(ref > 0); - ref++; - lock.Unlock(); - } - void release() { - lock.Lock(); - assert(!released); - released = true; - put_unlock(); - } - void put() { - lock.Lock(); - put_unlock(); - } - void put_unlock() { - assert(ref > 0); - int n = --ref; - lock.Unlock(); - if (!n) { - if (ictx) { - if (event_notify) { - ictx->completed_reqs_lock.Lock(); - m_xlist_item.remove_myself(); - ictx->completed_reqs_lock.Unlock(); - } - if (aio_type == AIO_TYPE_CLOSE || (aio_type == AIO_TYPE_OPEN && - rval < 0)) { - delete ictx; - } - } - delete this; - } - } - - void block() { - Mutex::Locker l(lock); - ++blockers; - } - void unblock() { - Mutex::Locker l(lock); - assert(blockers > 0); - --blockers; - if (pending_count == 0 && blockers == 0) { - finalize(rval); - complete(); +class AioObjectRead; + +typedef enum { + AIO_TYPE_NONE = 0, + AIO_TYPE_OPEN, + AIO_TYPE_CLOSE, + AIO_TYPE_READ, + AIO_TYPE_WRITE, + AIO_TYPE_DISCARD, + AIO_TYPE_FLUSH, +} aio_type_t; + +typedef enum { + STATE_PENDING = 0, + STATE_CALLBACK, + STATE_COMPLETE, +} aio_state_t; + +/** + * AioCompletion is the overall completion for a single + * rbd I/O request. It may be composed of many AioObjectRequests, + * which each go to a single object. + * + * The retrying of individual requests is handled at a lower level, + * so all AioCompletion cares about is the count of outstanding + * requests. The number of expected individual requests should be + * set initially using set_request_count() prior to issuing the + * requests. This ensures that the completion will not be completed + * within the caller's thread of execution (instead via a librados + * context or via a thread pool context for cache read hits). + */ +struct AioCompletion { + mutable Mutex lock; + Cond cond; + aio_state_t state; + ssize_t rval; + callback_t complete_cb; + void *complete_arg; + rbd_completion_t rbd_comp; + uint32_t pending_count; ///< number of requests + uint32_t blockers; + int ref; + bool released; + ImageCtx *ictx; + utime_t start_time; + aio_type_t aio_type; + + Striper::StripedReadResult destriper; + bufferlist *read_bl; + char *read_buf; + size_t read_buf_len; + + AsyncOperation async_op; + + uint64_t journal_tid; + xlist::item m_xlist_item; + bool event_notify; + + template + static void callback_adapter(completion_t cb, void *arg) { + AioCompletion *comp = reinterpret_cast(cb); + T *t = reinterpret_cast(arg); + (t->*MF)(comp->get_return_value()); + comp->release(); + } + + static AioCompletion *create(void *cb_arg, callback_t cb_complete, + rbd_completion_t rbd_comp) { + AioCompletion *comp = new AioCompletion(); + comp->set_complete_cb(cb_arg, cb_complete); + comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp); + return comp; + } + + template + static AioCompletion *create(T *obj) { + AioCompletion *comp = new AioCompletion(); + comp->set_complete_cb(obj, &callback_adapter); + comp->rbd_comp = comp; + return comp; + } + + template + static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx, + aio_type_t type) { + AioCompletion *comp = create(obj); + comp->init_time(image_ctx, type); + comp->start_op(); + return comp; + } + + AioCompletion() : lock("AioCompletion::lock", true, false), + state(STATE_PENDING), rval(0), complete_cb(NULL), + complete_arg(NULL), rbd_comp(NULL), + pending_count(0), blockers(1), + ref(1), released(false), ictx(NULL), + aio_type(AIO_TYPE_NONE), + read_bl(NULL), read_buf(NULL), read_buf_len(0), + journal_tid(0), m_xlist_item(this), event_notify(false) { + } + + ~AioCompletion() { + } + + int wait_for_complete(); + + void finalize(ssize_t rval); + + inline bool is_initialized(aio_type_t type) const { + Mutex::Locker locker(lock); + return ((ictx != nullptr) && (aio_type == type)); + } + inline bool is_started() const { + Mutex::Locker locker(lock); + return async_op.started(); + } + + void init_time(ImageCtx *i, aio_type_t t); + void start_op(bool ignore_type = false); + void fail(int r); + + void complete(); + + void set_complete_cb(void *cb_arg, callback_t cb) { + complete_cb = cb; + complete_arg = cb_arg; + } + + void set_request_count(uint32_t num); + void add_request() { + lock.Lock(); + assert(pending_count > 0); + lock.Unlock(); + get(); + } + void complete_request(ssize_t r); + + void associate_journal_event(uint64_t tid); + + bool is_complete(); + + ssize_t get_return_value(); + + void get() { + lock.Lock(); + assert(ref > 0); + ref++; + lock.Unlock(); + } + void release() { + lock.Lock(); + assert(!released); + released = true; + put_unlock(); + } + void put() { + lock.Lock(); + put_unlock(); + } + void put_unlock() { + assert(ref > 0); + int n = --ref; + lock.Unlock(); + if (!n) { + if (ictx) { + if (event_notify) { + ictx->completed_reqs_lock.Lock(); + m_xlist_item.remove_myself(); + ictx->completed_reqs_lock.Unlock(); + } + if (aio_type == AIO_TYPE_CLOSE || + (aio_type == AIO_TYPE_OPEN && rval < 0)) { + delete ictx; + } } + delete this; } - - void set_event_notify(bool s) { - Mutex::Locker l(lock); - event_notify = s; - } - - void *get_arg() { - return complete_arg; - } - }; - - class C_AioRequest : public Context { - public: - C_AioRequest(AioCompletion *completion) : m_completion(completion) { - m_completion->add_request(); - } - virtual ~C_AioRequest() {} - virtual void finish(int r) { - m_completion->complete_request(r); - } - protected: - AioCompletion *m_completion; - }; - - class C_AioRead : public C_AioRequest { - public: - C_AioRead(AioCompletion *completion) - : C_AioRequest(completion), m_req(nullptr) { - } - virtual ~C_AioRead() {} - virtual void finish(int r); - void set_req(AioObjectRead *req) { - m_req = req; + } + + void block() { + Mutex::Locker l(lock); + ++blockers; + } + void unblock() { + Mutex::Locker l(lock); + assert(blockers > 0); + --blockers; + if (pending_count == 0 && blockers == 0) { + finalize(rval); + complete(); } - private: - AioObjectRead *m_req; - }; - - class C_CacheRead : public Context { - public: - explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req) - : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {} - virtual void complete(int r); - protected: - virtual void finish(int r); - private: - ImageCtx &m_image_ctx; - AioObjectRead *m_req; - bool m_enqueued; - }; -} - -#endif + } + + void set_event_notify(bool s) { + Mutex::Locker l(lock); + event_notify = s; + } + + void *get_arg() { + return complete_arg; + } +}; + +class C_AioRequest : public Context { +public: + C_AioRequest(AioCompletion *completion) : m_completion(completion) { + m_completion->add_request(); + } + virtual ~C_AioRequest() {} + virtual void finish(int r) { + m_completion->complete_request(r); + } +protected: + AioCompletion *m_completion; +}; + +class C_AioRead : public C_AioRequest { +public: + C_AioRead(AioCompletion *completion) + : C_AioRequest(completion), m_req(nullptr) { + } + virtual ~C_AioRead() {} + virtual void finish(int r); + void set_req(AioObjectRead *req) { + m_req = req; + } +private: + AioObjectRead *m_req; +}; + +class C_CacheRead : public Context { +public: + explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req) + : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {} + virtual void complete(int r); +protected: + virtual void finish(int r); +private: + ImageCtx &m_image_ctx; + AioObjectRead *m_req; + bool m_enqueued; +}; + +} // namespace librbd + +#endif // CEPH_LIBRBD_AIO_COMPLETION_H diff --git a/src/librbd/AioObjectRequest.cc b/src/librbd/AioObjectRequest.cc index 934375ec04029..799109b0ffbb3 100644 --- a/src/librbd/AioObjectRequest.cc +++ b/src/librbd/AioObjectRequest.cc @@ -27,560 +27,558 @@ namespace librbd { - AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t off, - uint64_t len, librados::snap_t snap_id, - Context *completion, bool hide_enoent) - : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), - m_object_len(len), m_snap_id(snap_id), m_completion(completion), - m_hide_enoent(hide_enoent) { - - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - 0, m_ictx->layout.object_size, m_parent_extents); - - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - compute_parent_extents(); - } +AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t off, + uint64_t len, librados::snap_t snap_id, + Context *completion, bool hide_enoent) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), + m_object_len(len), m_snap_id(snap_id), m_completion(completion), + m_hide_enoent(hide_enoent) { + + Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, + 0, m_ictx->layout.object_size, m_parent_extents); + + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + compute_parent_extents(); +} - void AioObjectRequest::complete(int r) - { - if (should_complete(r)) { - ldout(m_ictx->cct, 20) << "complete " << this << dendl; - if (m_hide_enoent && r == -ENOENT) { - r = 0; - } - m_completion->complete(r); - delete this; +void AioObjectRequest::complete(int r) +{ + if (should_complete(r)) { + ldout(m_ictx->cct, 20) << "complete " << this << dendl; + if (m_hide_enoent && r == -ENOENT) { + r = 0; } + m_completion->complete(r); + delete this; } +} - bool AioObjectRequest::compute_parent_extents() { - assert(m_ictx->snap_lock.is_locked()); - assert(m_ictx->parent_lock.is_locked()); - - uint64_t parent_overlap; - int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); - if (r < 0) { - // NOTE: it's possible for a snapshot to be deleted while we are - // still reading from it - lderr(m_ictx->cct) << this << " compute_parent_extents: failed to " - << "retrieve parent overlap: " << cpp_strerror(r) - << dendl; - m_parent_extents.clear(); - return false; - } - - uint64_t object_overlap = - m_ictx->prune_parent_extents(m_parent_extents, parent_overlap); - if (object_overlap > 0) { - ldout(m_ictx->cct, 20) << this << " compute_parent_extents: " - << "overlap " << parent_overlap << " " - << "extents " << m_parent_extents << dendl; - return true; - } +bool AioObjectRequest::compute_parent_extents() { + assert(m_ictx->snap_lock.is_locked()); + assert(m_ictx->parent_lock.is_locked()); + + uint64_t parent_overlap; + int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); + if (r < 0) { + // NOTE: it's possible for a snapshot to be deleted while we are + // still reading from it + lderr(m_ictx->cct) << this << " compute_parent_extents: failed to " + << "retrieve parent overlap: " << cpp_strerror(r) + << dendl; + m_parent_extents.clear(); return false; } - static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { - assert(ictx->owner_lock.is_locked()); - assert(ictx->snap_lock.is_locked()); - return (ictx->clone_copy_on_read && - !ictx->read_only && snap_id == CEPH_NOSNAP && - (ictx->exclusive_lock == nullptr || - ictx->exclusive_lock->is_lock_owner())); + uint64_t object_overlap = + m_ictx->prune_parent_extents(m_parent_extents, parent_overlap); + if (object_overlap > 0) { + ldout(m_ictx->cct, 20) << this << " compute_parent_extents: " + << "overlap " << parent_overlap << " " + << "extents " << m_parent_extents << dendl; + return true; } + return false; +} - /** read **/ +static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { + assert(ictx->owner_lock.is_locked()); + assert(ictx->snap_lock.is_locked()); + return (ictx->clone_copy_on_read && + !ictx->read_only && snap_id == CEPH_NOSNAP && + (ictx->exclusive_lock == nullptr || + ictx->exclusive_lock->is_lock_owner())); +} - AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t offset, uint64_t len, - vector >& be, - librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags) - : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion, - false), - m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), - m_op_flags(op_flags), m_parent_completion(NULL), - m_state(LIBRBD_AIO_READ_FLAT) { +/** read **/ - guard_read(); - } +AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, uint64_t len, + vector >& be, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags) + : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion, + false), + m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), + m_op_flags(op_flags), m_parent_completion(NULL), + m_state(LIBRBD_AIO_READ_FLAT) { - void AioObjectRead::guard_read() - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); + guard_read(); +} - if (has_parent()) { - ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl; - m_state = LIBRBD_AIO_READ_GUARD; - } +void AioObjectRead::guard_read() +{ + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + + if (has_parent()) { + ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl; + m_state = LIBRBD_AIO_READ_GUARD; } +} - bool AioObjectRead::should_complete(int r) - { - ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " r = " << r << dendl; - - bool finished = true; - - switch (m_state) { - case LIBRBD_AIO_READ_GUARD: - ldout(m_ictx->cct, 20) << "should_complete " << this - << " READ_CHECK_GUARD" << dendl; - - // This is the step to read from parent - if (!m_tried_parent && r == -ENOENT) { - { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - if (m_ictx->parent == NULL) { - ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; - m_state = LIBRBD_AIO_READ_FLAT; - finished = false; - break; - } - - // calculate reverse mapping onto the image - vector > parent_extents; - Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, - m_object_off, m_object_len, parent_extents); - - uint64_t parent_overlap = 0; - uint64_t object_overlap = 0; - r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); - if (r == 0) { - object_overlap = m_ictx->prune_parent_extents(parent_extents, - parent_overlap); - } +bool AioObjectRead::should_complete(int r) +{ + ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " r = " << r << dendl; + + bool finished = true; + + switch (m_state) { + case LIBRBD_AIO_READ_GUARD: + ldout(m_ictx->cct, 20) << "should_complete " << this + << " READ_CHECK_GUARD" << dendl; + + // This is the step to read from parent + if (!m_tried_parent && r == -ENOENT) { + { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + if (m_ictx->parent == NULL) { + ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; + m_state = LIBRBD_AIO_READ_FLAT; + finished = false; + break; + } - if (object_overlap > 0) { - m_tried_parent = true; - if (is_copy_on_read(m_ictx, m_snap_id)) { - m_state = LIBRBD_AIO_READ_COPYUP; - } + // calculate reverse mapping onto the image + vector > parent_extents; + Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, + m_object_off, m_object_len, parent_extents); + + uint64_t parent_overlap = 0; + uint64_t object_overlap = 0; + r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); + if (r == 0) { + object_overlap = m_ictx->prune_parent_extents(parent_extents, + parent_overlap); + } - read_from_parent(parent_extents); - finished = false; + if (object_overlap > 0) { + m_tried_parent = true; + if (is_copy_on_read(m_ictx, m_snap_id)) { + m_state = LIBRBD_AIO_READ_COPYUP; } - } - if (m_tried_parent) { - // release reference to the parent read completion. this request - // might be completed after unblock is invoked. - AioCompletion *parent_completion = m_parent_completion; - parent_completion->unblock(); - parent_completion->put(); + read_from_parent(parent_extents); + finished = false; } } - break; - case LIBRBD_AIO_READ_COPYUP: - ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP" - << dendl; - // This is the extra step for copy-on-read: kick off an asynchronous copyup. - // It is different from copy-on-write as asynchronous copyup will finish - // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. - - assert(m_tried_parent); - if (r > 0) { - // If read entire object from parent success and CoR is possible, kick - // off a asynchronous copyup. This approach minimizes the latency - // impact. - send_copyup(); + + if (m_tried_parent) { + // release reference to the parent read completion. this request + // might be completed after unblock is invoked. + AioCompletion *parent_completion = m_parent_completion; + parent_completion->unblock(); + parent_completion->put(); } - break; - case LIBRBD_AIO_READ_FLAT: - ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT" - << dendl; - // The read content should be deposit in m_read_data - break; - default: - lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; - assert(0); } - - return finished; + break; + case LIBRBD_AIO_READ_COPYUP: + ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP" + << dendl; + // This is the extra step for copy-on-read: kick off an asynchronous copyup. + // It is different from copy-on-write as asynchronous copyup will finish + // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. + + assert(m_tried_parent); + if (r > 0) { + // If read entire object from parent success and CoR is possible, kick + // off a asynchronous copyup. This approach minimizes the latency + // impact. + send_copyup(); + } + break; + case LIBRBD_AIO_READ_FLAT: + ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT" + << dendl; + // The read content should be deposit in m_read_data + break; + default: + lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; + assert(0); } - void AioObjectRead::send() { - ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; + return finished; +} - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); +void AioObjectRead::send() { + ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; - // send read request to parent if the object doesn't exist locally - if (m_ictx->object_map != nullptr && - !m_ictx->object_map->object_may_exist(m_object_no)) { - m_ictx->op_work_queue->queue(util::create_context_callback< - AioObjectRequest>(this), -ENOENT); - return; - } - } + { + RWLock::RLocker snap_locker(m_ictx->snap_lock); - librados::ObjectReadOperation op; - int flags = m_ictx->get_read_flags(m_snap_id); - if (m_sparse) { - op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data, - NULL); - } else { - op.read(m_object_off, m_object_len, &m_read_data, NULL); + // send read request to parent if the object doesn't exist locally + if (m_ictx->object_map != nullptr && + !m_ictx->object_map->object_may_exist(m_object_no)) { + m_ictx->op_work_queue->queue(util::create_context_callback< + AioObjectRequest>(this), -ENOENT); + return; } - op.set_op_flags2(m_op_flags); - - librados::AioCompletion *rados_completion = - util::create_rados_ack_callback(this); - int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, - NULL); - assert(r == 0); + } - rados_completion->release(); + librados::ObjectReadOperation op; + int flags = m_ictx->get_read_flags(m_snap_id); + if (m_sparse) { + op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data, + NULL); + } else { + op.read(m_object_off, m_object_len, &m_read_data, NULL); } + op.set_op_flags2(m_op_flags); - void AioObjectRead::send_copyup() - { - { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - if (!compute_parent_extents() || - (m_ictx->exclusive_lock != nullptr && - !m_ictx->exclusive_lock->is_lock_owner())) { - return; - } - } + librados::AioCompletion *rados_completion = + util::create_rados_ack_callback(this); + int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, + NULL); + assert(r == 0); - Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - // create and kick off a CopyupRequest - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no, - m_parent_extents); - m_ictx->copyup_list[m_object_no] = new_req; - new_req->send(); - } - } + rados_completion->release(); +} - void AioObjectRead::read_from_parent(const vector >& parent_extents) +void AioObjectRead::send_copyup() +{ { - assert(!m_parent_completion); - m_parent_completion = AioCompletion::create_and_start( - this, m_ictx, AIO_TYPE_READ); - - // prevent the parent image from being deleted while this - // request is still in-progress - m_parent_completion->get(); - m_parent_completion->block(); - - ldout(m_ictx->cct, 20) << "read_from_parent this = " << this - << " parent completion " << m_parent_completion - << " extents " << parent_extents - << dendl; - RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); - AioImageRequest<>::aio_read(m_ictx->parent, m_parent_completion, - parent_extents, NULL, &m_read_data, 0); + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + if (!compute_parent_extents() || + (m_ictx->exclusive_lock != nullptr && + !m_ictx->exclusive_lock->is_lock_owner())) { + return; + } } - /** write **/ - - AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx, - const std::string &oid, - uint64_t object_no, - uint64_t object_off, - uint64_t len, - const ::SnapContext &snapc, - Context *completion, - bool hide_enoent) - : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, - completion, hide_enoent), - m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) - { - m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); + Mutex::Locker copyup_locker(m_ictx->copyup_list_lock); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + // create and kick off a CopyupRequest + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no, + m_parent_extents); + m_ictx->copyup_list[m_object_no] = new_req; + new_req->send(); } +} - void AbstractAioObjectWrite::guard_write() - { - if (has_parent()) { - m_state = LIBRBD_AIO_WRITE_GUARD; - m_write.assert_exists(); - ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; - } +void AioObjectRead::read_from_parent(const vector >& parent_extents) +{ + assert(!m_parent_completion); + m_parent_completion = AioCompletion::create_and_start( + this, m_ictx, AIO_TYPE_READ); + + // prevent the parent image from being deleted while this + // request is still in-progress + m_parent_completion->get(); + m_parent_completion->block(); + + ldout(m_ictx->cct, 20) << "read_from_parent this = " << this + << " parent completion " << m_parent_completion + << " extents " << parent_extents << dendl; + RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); + AioImageRequest<>::aio_read(m_ictx->parent, m_parent_completion, + parent_extents, NULL, &m_read_data, 0); +} + +/** write **/ + +AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx, + const std::string &oid, + uint64_t object_no, + uint64_t object_off, + uint64_t len, + const ::SnapContext &snapc, + Context *completion, + bool hide_enoent) + : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, + completion, hide_enoent), + m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) +{ + m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); +} + +void AbstractAioObjectWrite::guard_write() +{ + if (has_parent()) { + m_state = LIBRBD_AIO_WRITE_GUARD; + m_write.assert_exists(); + ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl; } +} - bool AbstractAioObjectWrite::should_complete(int r) - { - ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid - << " " << m_object_off << "~" << m_object_len - << " should_complete: r = " << r << dendl; - - bool finished = true; - switch (m_state) { - case LIBRBD_AIO_WRITE_PRE: - ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; - if (r < 0) { - return true; - } +bool AbstractAioObjectWrite::should_complete(int r) +{ + ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid + << " " << m_object_off << "~" << m_object_len + << " should_complete: r = " << r << dendl; - send_write(); - finished = false; - break; + bool finished = true; + switch (m_state) { + case LIBRBD_AIO_WRITE_PRE: + ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; + if (r < 0) { + return true; + } - case LIBRBD_AIO_WRITE_POST: - ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; - finished = true; - break; + send_write(); + finished = false; + break; - case LIBRBD_AIO_WRITE_GUARD: - ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; - - if (r == -ENOENT) { - handle_write_guard(); - finished = false; - break; - } else if (r < 0) { - // pass the error code to the finish context - m_state = LIBRBD_AIO_WRITE_ERROR; - complete(r); - finished = false; - break; - } + case LIBRBD_AIO_WRITE_POST: + ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; + finished = true; + break; - finished = send_post(); - break; + case LIBRBD_AIO_WRITE_GUARD: + ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; - case LIBRBD_AIO_WRITE_COPYUP: - ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; - if (r < 0) { - m_state = LIBRBD_AIO_WRITE_ERROR; - complete(r); - finished = false; - } else { - finished = send_post(); - } + if (r == -ENOENT) { + handle_write_guard(); + finished = false; break; + } else if (r < 0) { + // pass the error code to the finish context + m_state = LIBRBD_AIO_WRITE_ERROR; + complete(r); + finished = false; + break; + } - case LIBRBD_AIO_WRITE_FLAT: - ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; + finished = send_post(); + break; + case LIBRBD_AIO_WRITE_COPYUP: + ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; + if (r < 0) { + m_state = LIBRBD_AIO_WRITE_ERROR; + complete(r); + finished = false; + } else { finished = send_post(); - break; + } + break; - case LIBRBD_AIO_WRITE_ERROR: - assert(r < 0); - lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) - << dendl; - break; + case LIBRBD_AIO_WRITE_FLAT: + ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; - default: - lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; - assert(0); - } + finished = send_post(); + break; - return finished; - } + case LIBRBD_AIO_WRITE_ERROR: + assert(r < 0); + lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) << dendl; + break; - void AbstractAioObjectWrite::send() { - assert(m_ictx->owner_lock.is_locked()); - ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" " - << m_oid << " " << m_object_off << "~" - << m_object_len << dendl; - send_pre(); + default: + lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; + assert(0); } - void AbstractAioObjectWrite::send_pre() { - assert(m_ictx->owner_lock.is_locked()); + return finished; +} - bool write = false; - { - RWLock::RLocker snap_lock(m_ictx->snap_lock); - if (m_ictx->object_map == nullptr) { - m_object_exist = true; - write = true; +void AbstractAioObjectWrite::send() { + assert(m_ictx->owner_lock.is_locked()); + ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" " + << m_oid << " " << m_object_off << "~" + << m_object_len << dendl; + send_pre(); +} + +void AbstractAioObjectWrite::send_pre() { + assert(m_ictx->owner_lock.is_locked()); + + bool write = false; + { + RWLock::RLocker snap_lock(m_ictx->snap_lock); + if (m_ictx->object_map == nullptr) { + m_object_exist = true; + write = true; + } else { + // should have been flushed prior to releasing lock + assert(m_ictx->exclusive_lock->is_lock_owner()); + m_object_exist = m_ictx->object_map->object_may_exist(m_object_no); + + uint8_t new_state; + pre_object_map_update(&new_state); + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (m_ictx->object_map->update_required(m_object_no, new_state)) { + ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << dendl; + m_state = LIBRBD_AIO_WRITE_PRE; + + Context *ctx = util::create_context_callback(this); + bool updated = m_ictx->object_map->aio_update(m_object_no, new_state, + {}, ctx); + assert(updated); } else { - // should have been flushed prior to releasing lock - assert(m_ictx->exclusive_lock->is_lock_owner()); - m_object_exist = m_ictx->object_map->object_may_exist(m_object_no); - - uint8_t new_state; - pre_object_map_update(&new_state); - - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (m_ictx->object_map->update_required(m_object_no, new_state)) { - ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << dendl; - m_state = LIBRBD_AIO_WRITE_PRE; - - Context *ctx = util::create_context_callback(this); - bool updated = m_ictx->object_map->aio_update(m_object_no, new_state, - {}, ctx); - assert(updated); - } else { - write = true; - } + write = true; } } + } - // avoid possible recursive lock attempts - if (write) { - // no object map update required - send_write(); - } + // avoid possible recursive lock attempts + if (write) { + // no object map update required + send_write(); } +} - bool AbstractAioObjectWrite::send_post() { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (m_ictx->object_map == nullptr || !post_object_map_update()) { - return true; - } +bool AbstractAioObjectWrite::send_post() { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->object_map == nullptr || !post_object_map_update()) { + return true; + } - // should have been flushed prior to releasing lock - assert(m_ictx->exclusive_lock->is_lock_owner()); + // should have been flushed prior to releasing lock + assert(m_ictx->exclusive_lock->is_lock_owner()); - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) { - return true; - } + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) { + return true; + } - ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_POST; + ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_POST; - Context *ctx = util::create_context_callback(this); - bool updated = m_ictx->object_map->aio_update(m_object_no, - OBJECT_NONEXISTENT, - OBJECT_PENDING, ctx); - assert(updated); - return false; - } + Context *ctx = util::create_context_callback(this); + bool updated = m_ictx->object_map->aio_update(m_object_no, + OBJECT_NONEXISTENT, + OBJECT_PENDING, ctx); + assert(updated); + return false; +} - void AbstractAioObjectWrite::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " object exist " << m_object_exist << dendl; +void AbstractAioObjectWrite::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " object exist " << m_object_exist << dendl; - if (!m_object_exist && has_parent()) { - m_state = LIBRBD_AIO_WRITE_GUARD; - handle_write_guard(); - } else { - send_write_op(true); - } + if (!m_object_exist && has_parent()) { + m_state = LIBRBD_AIO_WRITE_GUARD; + handle_write_guard(); + } else { + send_write_op(true); } +} - void AbstractAioObjectWrite::send_copyup() - { - ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - m_state = LIBRBD_AIO_WRITE_COPYUP; - - m_ictx->copyup_list_lock.Lock(); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - if (it == m_ictx->copyup_list.end()) { - CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, - m_parent_extents); - - // make sure to wait on this CopyupRequest - new_req->append_request(this); - m_ictx->copyup_list[m_object_no] = new_req; - - m_ictx->copyup_list_lock.Unlock(); - new_req->send(); - } else { - it->second->append_request(this); - m_ictx->copyup_list_lock.Unlock(); - } +void AbstractAioObjectWrite::send_copyup() +{ + ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + m_state = LIBRBD_AIO_WRITE_COPYUP; + + m_ictx->copyup_list_lock.Lock(); + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + if (it == m_ictx->copyup_list.end()) { + CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, + m_object_no, + m_parent_extents); + + // make sure to wait on this CopyupRequest + new_req->append_request(this); + m_ictx->copyup_list[m_object_no] = new_req; + + m_ictx->copyup_list_lock.Unlock(); + new_req->send(); + } else { + it->second->append_request(this); + m_ictx->copyup_list_lock.Unlock(); } - void AbstractAioObjectWrite::send_write_op(bool write_guard) +} +void AbstractAioObjectWrite::send_write_op(bool write_guard) +{ + m_state = LIBRBD_AIO_WRITE_FLAT; + if (write_guard) + guard_write(); + add_write_ops(&m_write); + assert(m_write.size() != 0); + + librados::AioCompletion *rados_completion = + util::create_rados_safe_callback(this); + int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write, + m_snap_seq, m_snaps); + assert(r == 0); + rados_completion->release(); +} +void AbstractAioObjectWrite::handle_write_guard() +{ + bool has_parent; { - m_state = LIBRBD_AIO_WRITE_FLAT; - if (write_guard) - guard_write(); - add_write_ops(&m_write); - assert(m_write.size() != 0); - - librados::AioCompletion *rados_completion = - util::create_rados_safe_callback(this); - int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write, - m_snap_seq, m_snaps); - assert(r == 0); - rados_completion->release(); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); + has_parent = compute_parent_extents(); } - void AbstractAioObjectWrite::handle_write_guard() - { - bool has_parent; - { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - RWLock::RLocker parent_locker(m_ictx->parent_lock); - has_parent = compute_parent_extents(); - } - // If parent still exists, overlap might also have changed. - if (has_parent) { - send_copyup(); - } else { - // parent may have disappeared -- send original write again - ldout(m_ictx->cct, 20) << "should_complete(" << this - << "): parent overlap now 0" << dendl; - send_write(); - } + // If parent still exists, overlap might also have changed. + if (has_parent) { + send_copyup(); + } else { + // parent may have disappeared -- send original write again + ldout(m_ictx->cct, 20) << "should_complete(" << this + << "): parent overlap now 0" << dendl; + send_write(); } +} - void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) { - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (m_ictx->enable_alloc_hint && - (m_ictx->object_map == nullptr || - !m_object_exist)) { - wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); - } - - if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { - wr->write_full(m_write_data); - } else { - wr->write(m_object_off, m_write_data); - } - wr->set_op_flags2(m_op_flags); +void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) { + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->enable_alloc_hint && + (m_ictx->object_map == nullptr || !m_object_exist)) { + wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); } - void AioObjectWrite::send_write() { - bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len - << " object exist " << m_object_exist - << " write_full " << write_full << dendl; - if (write_full && !has_parent()) { - send_write_op(false); - } else { - AbstractAioObjectWrite::send_write(); - } + if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { + wr->write_full(m_write_data); + } else { + wr->write(m_object_off, m_write_data); } + wr->set_op_flags2(m_op_flags); +} - void AioObjectRemove::guard_write() { - // do nothing to disable write guard only if deep-copyup not required - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (!m_ictx->snaps.empty()) { - AbstractAioObjectWrite::guard_write(); - } +void AioObjectWrite::send_write() { + bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len + << " object exist " << m_object_exist + << " write_full " << write_full << dendl; + if (write_full && !has_parent()) { + send_write_op(false); + } else { + AbstractAioObjectWrite::send_write(); } - void AioObjectRemove::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " - << m_object_off << "~" << m_object_len << dendl; - send_write_op(true); +} + +void AioObjectRemove::guard_write() { + // do nothing to disable write guard only if deep-copyup not required + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (!m_ictx->snaps.empty()) { + AbstractAioObjectWrite::guard_write(); } - void AioObjectTruncate::send_write() { - ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid - << " truncate " << m_object_off << dendl; - if (!m_object_exist && ! has_parent()) { - m_state = LIBRBD_AIO_WRITE_FLAT; - Context *ctx = util::create_context_callback(this); - m_ictx->op_work_queue->queue(ctx, 0); - } else { - AbstractAioObjectWrite::send_write(); - } +} +void AioObjectRemove::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " + << m_object_off << "~" << m_object_len << dendl; + send_write_op(true); +} +void AioObjectTruncate::send_write() { + ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid + << " truncate " << m_object_off << dendl; + if (!m_object_exist && ! has_parent()) { + m_state = LIBRBD_AIO_WRITE_FLAT; + Context *ctx = util::create_context_callback(this); + m_ictx->op_work_queue->queue(ctx, 0); + } else { + AbstractAioObjectWrite::send_write(); } } + +} // namespace librbd diff --git a/src/librbd/AioObjectRequest.h b/src/librbd/AioObjectRequest.h index 9647a3e698661..024142323295a 100644 --- a/src/librbd/AioObjectRequest.h +++ b/src/librbd/AioObjectRequest.h @@ -1,7 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_AIOREQUEST_H -#define CEPH_LIBRBD_AIOREQUEST_H + +#ifndef CEPH_LIBRBD_AIO_OBJECT_REQUEST_H +#define CEPH_LIBRBD_AIO_OBJECT_REQUEST_H #include "include/int_types.h" @@ -16,339 +17,339 @@ class Context; namespace librbd { - struct AioCompletion; - struct ImageCtx; - class CopyupRequest; +struct AioCompletion; +struct ImageCtx; +class CopyupRequest; + +/** + * This class represents an I/O operation to a single RBD data object. + * Its subclasses encapsulate logic for dealing with special cases + * for I/O due to layering. + */ +class AioObjectRequest +{ +public: + AioObjectRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t off, uint64_t len, + librados::snap_t snap_id, + Context *completion, bool hide_enoent); + virtual ~AioObjectRequest() {} + + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; + + void complete(int r); + + virtual bool should_complete(int r) = 0; + virtual void send() = 0; + + bool has_parent() const { + return !m_parent_extents.empty(); + } + +protected: + bool compute_parent_extents(); + + ImageCtx *m_ictx; + std::string m_oid; + uint64_t m_object_no, m_object_off, m_object_len; + librados::snap_t m_snap_id; + Context *m_completion; + std::vector > m_parent_extents; + bool m_hide_enoent; +}; + +class AioObjectRead : public AioObjectRequest { +public: + AioObjectRead(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, uint64_t offset, uint64_t len, + vector >& be, + librados::snap_t snap_id, bool sparse, + Context *completion, int op_flags); + + virtual bool should_complete(int r); + virtual void send(); + void guard_read(); + + ceph::bufferlist &data() { + return m_read_data; + } + + std::map m_ext_map; + + friend class C_AioRead; + +private: + vector > m_buffer_extents; + bool m_tried_parent; + bool m_sparse; + int m_op_flags; + ceph::bufferlist m_read_data; + AioCompletion *m_parent_completion; /** - * This class represents an I/O operation to a single RBD data object. - * Its subclasses encapsulate logic for dealing with special cases - * for I/O due to layering. + * Reads go through the following state machine to deal with + * layering: + * + * need copyup + * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP + * | | + * v | + * done <------------------------------------/ + * ^ + * | + * LIBRBD_AIO_READ_FLAT + * + * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on + * whether there is a parent or not. */ - class AioObjectRequest - { - public: - AioObjectRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t off, uint64_t len, - librados::snap_t snap_id, - Context *completion, bool hide_enoent); - virtual ~AioObjectRequest() {} - - virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {}; - - void complete(int r); - - virtual bool should_complete(int r) = 0; - virtual void send() = 0; - - bool has_parent() const { - return !m_parent_extents.empty(); - } - - protected: - bool compute_parent_extents(); - - ImageCtx *m_ictx; - std::string m_oid; - uint64_t m_object_no, m_object_off, m_object_len; - librados::snap_t m_snap_id; - Context *m_completion; - std::vector > m_parent_extents; - bool m_hide_enoent; + enum read_state_d { + LIBRBD_AIO_READ_GUARD, + LIBRBD_AIO_READ_COPYUP, + LIBRBD_AIO_READ_FLAT }; - class AioObjectRead : public AioObjectRequest { - public: - AioObjectRead(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, uint64_t offset, uint64_t len, - vector >& be, - librados::snap_t snap_id, bool sparse, - Context *completion, int op_flags); - - virtual bool should_complete(int r); - virtual void send(); - void guard_read(); - - ceph::bufferlist &data() { - return m_read_data; - } - - std::map m_ext_map; - - friend class C_AioRead; - - private: - vector > m_buffer_extents; - bool m_tried_parent; - bool m_sparse; - int m_op_flags; - ceph::bufferlist m_read_data; - AioCompletion *m_parent_completion; - - /** - * Reads go through the following state machine to deal with - * layering: - * - * need copyup - * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP - * | | - * v | - * done <------------------------------------/ - * ^ - * | - * LIBRBD_AIO_READ_FLAT - * - * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on - * whether there is a parent or not. - */ - enum read_state_d { - LIBRBD_AIO_READ_GUARD, - LIBRBD_AIO_READ_COPYUP, - LIBRBD_AIO_READ_FLAT - }; - - read_state_d m_state; - - void send_copyup(); - - void read_from_parent(const vector >& image_extents); - }; - - class AbstractAioObjectWrite : public AioObjectRequest { - public: - AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid, - uint64_t object_no, uint64_t object_off, - uint64_t len, const ::SnapContext &snapc, - Context *completion, bool hide_enoent); - - virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) - { - add_write_ops(wr); - } + read_state_d m_state; - virtual bool should_complete(int r); - virtual void send(); - - /** - * Writes go through the following state machine to deal with - * layering and the object map: - * - * - * . | - * . | - * . \---> LIBRBD_AIO_WRITE_PRE - * . | | - * . . . . . . | . . . . | . . . . . . . . . . . - * . | -or- | . - * . | | v - * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . . - * . | | . - * v v need copyup | . - * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . - * . | | . | . - * . | | . | . - * . | /-----/ . | . - * . | | . | . - * . \-------------------\ | /-------------------/ . - * . | | | . . - * . v v v . . - * . LIBRBD_AIO_WRITE_POST . . - * . | . . - * . | . . . . . . . . . - * . | . . - * . v v . - * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . - * - * The _PRE/_POST states are skipped if the object map is disabled. - * The write starts in _WRITE_GUARD or _FLAT depending on whether or not - * there is a parent overlap. - */ - protected: - enum write_state_d { - LIBRBD_AIO_WRITE_GUARD, - LIBRBD_AIO_WRITE_COPYUP, - LIBRBD_AIO_WRITE_FLAT, - LIBRBD_AIO_WRITE_PRE, - LIBRBD_AIO_WRITE_POST, - LIBRBD_AIO_WRITE_ERROR - }; - - write_state_d m_state; - librados::ObjectWriteOperation m_write; - uint64_t m_snap_seq; - std::vector m_snaps; - bool m_object_exist; - - virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; - virtual const char* get_write_type() const = 0; - virtual void guard_write(); - virtual void pre_object_map_update(uint8_t *new_state) = 0; - virtual bool post_object_map_update() { - return false; - } - virtual void send_write(); - virtual void send_write_op(bool write_guard); - virtual void handle_write_guard(); - - private: - void send_pre(); - bool send_post(); - void send_copyup(); - }; - - class AioObjectWrite : public AbstractAioObjectWrite { - public: - AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, const ceph::bufferlist &data, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(), - snapc, completion, false), - m_write_data(data), m_op_flags(0) { - } - - void set_op_flags(int op_flags) { - m_op_flags = op_flags; - } - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr); - - virtual const char* get_write_type() const { - return "write"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_EXISTS; - } - virtual void send_write(); - - private: - ceph::bufferlist m_write_data; - int m_op_flags; - }; - - class AioObjectRemove : public AbstractAioObjectWrite { - public: - AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, - true), - m_object_state(OBJECT_NONEXISTENT) { - } + void send_copyup(); - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - if (has_parent()) { - wr->truncate(0); - } else { - wr->remove(); - } - } + void read_from_parent(const vector >& image_extents); +}; - virtual const char* get_write_type() const { - if (has_parent()) { - return "remove (trunc)"; - } - return "remove"; - } - virtual void pre_object_map_update(uint8_t *new_state) { - if (has_parent()) { - m_object_state = OBJECT_EXISTS; - } else { - m_object_state = OBJECT_PENDING; - } - *new_state = m_object_state; - } +class AbstractAioObjectWrite : public AioObjectRequest { +public: + AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + uint64_t len, const ::SnapContext &snapc, + Context *completion, bool hide_enoent); - virtual bool post_object_map_update() { - if (m_object_state == OBJECT_EXISTS) { - return false; - } - return true; - } + virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) + { + add_write_ops(wr); + } - virtual void guard_write(); - virtual void send_write(); + virtual bool should_complete(int r); + virtual void send(); - private: - uint8_t m_object_state; + /** + * Writes go through the following state machine to deal with + * layering and the object map: + * + * + * . | + * . | + * . \---> LIBRBD_AIO_WRITE_PRE + * . | | + * . . . . . . | . . . . | . . . . . . . . . . . + * . | -or- | . + * . | | v + * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . . + * . | | . + * v v need copyup | . + * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . + * . | | . | . + * . | | . | . + * . | /-----/ . | . + * . | | . | . + * . \-------------------\ | /-------------------/ . + * . | | | . . + * . v v v . . + * . LIBRBD_AIO_WRITE_POST . . + * . | . . + * . | . . . . . . . . . + * . | . . + * . v v . + * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . + * + * The _PRE/_POST states are skipped if the object map is disabled. + * The write starts in _WRITE_GUARD or _FLAT depending on whether or not + * there is a parent overlap. + */ +protected: + enum write_state_d { + LIBRBD_AIO_WRITE_GUARD, + LIBRBD_AIO_WRITE_COPYUP, + LIBRBD_AIO_WRITE_FLAT, + LIBRBD_AIO_WRITE_PRE, + LIBRBD_AIO_WRITE_POST, + LIBRBD_AIO_WRITE_ERROR }; - class AioObjectTrim : public AbstractAioObjectWrite { - public: - AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + write_state_d m_state; + librados::ObjectWriteOperation m_write; + uint64_t m_snap_seq; + std::vector m_snaps; + bool m_object_exist; + + virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; + virtual const char* get_write_type() const = 0; + virtual void guard_write(); + virtual void pre_object_map_update(uint8_t *new_state) = 0; + virtual bool post_object_map_update() { + return false; + } + virtual void send_write(); + virtual void send_write_op(bool write_guard); + virtual void handle_write_guard(); + +private: + void send_pre(); + bool send_post(); + void send_copyup(); +}; + +class AioObjectWrite : public AbstractAioObjectWrite { +public: + AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + uint64_t object_off, const ceph::bufferlist &data, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(), + snapc, completion, false), + m_write_data(data), m_op_flags(0) { + } + + void set_op_flags(int op_flags) { + m_op_flags = op_flags; + } +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr); + + virtual const char* get_write_type() const { + return "write"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } + virtual void send_write(); + +private: + ceph::bufferlist m_write_data; + int m_op_flags; +}; + +class AioObjectRemove : public AbstractAioObjectWrite { +public: + AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no, const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, - true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, + true), + m_object_state(OBJECT_NONEXISTENT) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + if (has_parent()) { + wr->truncate(0); + } else { wr->remove(); } + } - virtual const char* get_write_type() const { - return "remove (trim)"; + virtual const char* get_write_type() const { + if (has_parent()) { + return "remove (trunc)"; } - - virtual void pre_object_map_update(uint8_t *new_state) { - *new_state = OBJECT_PENDING; - } - - virtual bool post_object_map_update() { - return true; - } - }; - - class AioObjectTruncate : public AbstractAioObjectWrite { - public: - AioObjectTruncate(ImageCtx *ictx, const std::string &oid, - uint64_t object_no, uint64_t object_off, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc, - completion, true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->truncate(m_object_off); + return "remove"; + } + virtual void pre_object_map_update(uint8_t *new_state) { + if (has_parent()) { + m_object_state = OBJECT_EXISTS; + } else { + m_object_state = OBJECT_PENDING; } + *new_state = m_object_state; + } - virtual const char* get_write_type() const { - return "truncate"; - } - - virtual void pre_object_map_update(uint8_t *new_state) { - if (!m_object_exist && !has_parent()) - *new_state = OBJECT_NONEXISTENT; - else - *new_state = OBJECT_EXISTS; - } - virtual void send_write(); - }; - - class AioObjectZero : public AbstractAioObjectWrite { - public: - AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no, - uint64_t object_off, uint64_t object_len, - const ::SnapContext &snapc, Context *completion) - : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len, - snapc, completion, true) { - } - - protected: - virtual void add_write_ops(librados::ObjectWriteOperation *wr) { - wr->zero(m_object_off, m_object_len); - } - - virtual const char* get_write_type() const { - return "zero"; + virtual bool post_object_map_update() { + if (m_object_state == OBJECT_EXISTS) { + return false; } - - virtual void pre_object_map_update(uint8_t *new_state) { + return true; + } + + virtual void guard_write(); + virtual void send_write(); + +private: + uint8_t m_object_state; +}; + +class AioObjectTrim : public AbstractAioObjectWrite { +public: + AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion, + true) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->remove(); + } + + virtual const char* get_write_type() const { + return "remove (trim)"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_PENDING; + } + + virtual bool post_object_map_update() { + return true; + } +}; + +class AioObjectTruncate : public AbstractAioObjectWrite { +public: + AioObjectTruncate(ImageCtx *ictx, const std::string &oid, + uint64_t object_no, uint64_t object_off, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc, + completion, true) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->truncate(m_object_off); + } + + virtual const char* get_write_type() const { + return "truncate"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + if (!m_object_exist && !has_parent()) + *new_state = OBJECT_NONEXISTENT; + else *new_state = OBJECT_EXISTS; - } - }; - -} - -#endif + } + virtual void send_write(); +}; + +class AioObjectZero : public AbstractAioObjectWrite { +public: + AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no, + uint64_t object_off, uint64_t object_len, + const ::SnapContext &snapc, Context *completion) + : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len, + snapc, completion, true) { + } + +protected: + virtual void add_write_ops(librados::ObjectWriteOperation *wr) { + wr->zero(m_object_off, m_object_len); + } + + virtual const char* get_write_type() const { + return "zero"; + } + + virtual void pre_object_map_update(uint8_t *new_state) { + *new_state = OBJECT_EXISTS; + } +}; + +} // namespace librbd + +#endif // CEPH_LIBRBD_AIO_OBJECT_REQUEST_H diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index da7c43ae286a0..84068d19eeb6b 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -78,228 +78,229 @@ private: } // anonymous namespace - CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, - vector >& image_extents) - : m_ictx(ictx), m_oid(oid), m_object_no(objectno), - m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT) - { - m_async_op.start_op(*m_ictx); - } +CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, + vector >& image_extents) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), + m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT) +{ + m_async_op.start_op(*m_ictx); +} - CopyupRequest::~CopyupRequest() { - assert(m_pending_requests.empty()); - m_async_op.finish_op(); - } +CopyupRequest::~CopyupRequest() { + assert(m_pending_requests.empty()); + m_async_op.finish_op(); +} + +void CopyupRequest::append_request(AioObjectRequest *req) { + ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; + m_pending_requests.push_back(req); +} - void CopyupRequest::append_request(AioObjectRequest *req) { - ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; - m_pending_requests.push_back(req); +void CopyupRequest::complete_requests(int r) { + while (!m_pending_requests.empty()) { + vector::iterator it = m_pending_requests.begin(); + AioObjectRequest *req = *it; + ldout(m_ictx->cct, 20) << __func__ << " completing request " << req + << dendl; + req->complete(r); + m_pending_requests.erase(it); } +} - void CopyupRequest::complete_requests(int r) { - while (!m_pending_requests.empty()) { - vector::iterator it = m_pending_requests.begin(); - AioObjectRequest *req = *it; - ldout(m_ictx->cct, 20) << __func__ << " completing request " << req - << dendl; - req->complete(r); - m_pending_requests.erase(it); - } +bool CopyupRequest::send_copyup() { + bool add_copyup_op = !m_copyup_data.is_zero(); + bool copy_on_read = m_pending_requests.empty(); + if (!add_copyup_op && copy_on_read) { + // copyup empty object to prevent future CoR attempts + m_copyup_data.clear(); + add_copyup_op = true; } - bool CopyupRequest::send_copyup() { - bool add_copyup_op = !m_copyup_data.is_zero(); - bool copy_on_read = m_pending_requests.empty(); - if (!add_copyup_op && copy_on_read) { - // copyup empty object to prevent future CoR attempts - m_copyup_data.clear(); - add_copyup_op = true; - } + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid << dendl; + m_state = STATE_COPYUP; - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": oid " << m_oid << dendl; - m_state = STATE_COPYUP; + m_ictx->snap_lock.get_read(); + ::SnapContext snapc = m_ictx->snapc; + m_ictx->snap_lock.put_read(); - m_ictx->snap_lock.get_read(); - ::SnapContext snapc = m_ictx->snapc; - m_ictx->snap_lock.put_read(); + std::vector snaps; - std::vector snaps; + if (!copy_on_read) { + m_pending_copyups.inc(); + } - if (!copy_on_read) { - m_pending_copyups.inc(); - } + int r; + if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) { + assert(add_copyup_op); + add_copyup_op = false; + + librados::ObjectWriteOperation copyup_op; + copyup_op.exec("rbd", "copyup", m_copyup_data); + + // send only the copyup request with a blank snapshot context so that + // all snapshots are detected from the parent for this object. If + // this is a CoW request, a second request will be created for the + // actual modification. + m_pending_copyups.inc(); + + ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with " + << "empty snapshot context" << dendl; + librados::AioCompletion *comp = util::create_rados_safe_callback(this); + r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, 0, snaps); + assert(r == 0); + comp->release(); + } - int r; - if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) { - assert(add_copyup_op); - add_copyup_op = false; - - librados::ObjectWriteOperation copyup_op; - copyup_op.exec("rbd", "copyup", m_copyup_data); - - // send only the copyup request with a blank snapshot context so that - // all snapshots are detected from the parent for this object. If - // this is a CoW request, a second request will be created for the - // actual modification. - m_pending_copyups.inc(); - - ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with " - << "empty snapshot context" << dendl; - librados::AioCompletion *comp = util::create_rados_safe_callback(this); - r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, 0, snaps); - assert(r == 0); - comp->release(); + if (!copy_on_read) { + librados::ObjectWriteOperation write_op; + if (add_copyup_op) { + // CoW did not need to handle existing snapshots + write_op.exec("rbd", "copyup", m_copyup_data); } - if (!copy_on_read) { - librados::ObjectWriteOperation write_op; - if (add_copyup_op) { - // CoW did not need to handle existing snapshots - write_op.exec("rbd", "copyup", m_copyup_data); - } - - // merge all pending write ops into this single RADOS op - for (size_t i=0; icct, 20) << __func__ << " add_copyup_ops " << req - << dendl; - req->add_copyup_ops(&write_op); - } - assert(write_op.size() != 0); - - snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); - librados::AioCompletion *comp = util::create_rados_safe_callback(this); - r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op); - assert(r == 0); - comp->release(); + // merge all pending write ops into this single RADOS op + for (size_t i=0; icct, 20) << __func__ << " add_copyup_ops " << req + << dendl; + req->add_copyup_ops(&write_op); } - return false; + assert(write_op.size() != 0); + + snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); + librados::AioCompletion *comp = util::create_rados_safe_callback(this); + r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op); + assert(r == 0); + comp->release(); } + return false; +} - void CopyupRequest::send() - { - m_state = STATE_READ_FROM_PARENT; - AioCompletion *comp = AioCompletion::create_and_start( - this, m_ictx, AIO_TYPE_READ); +void CopyupRequest::send() +{ + m_state = STATE_READ_FROM_PARENT; + AioCompletion *comp = AioCompletion::create_and_start( + this, m_ictx, AIO_TYPE_READ); + + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": completion " << comp + << ", oid " << m_oid + << ", extents " << m_image_extents + << dendl; + RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); + AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL, + &m_copyup_data, 0); +} - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": completion " << comp - << ", oid " << m_oid - << ", extents " << m_image_extents - << dendl; - RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); - AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL, - &m_copyup_data, 0); +void CopyupRequest::complete(int r) +{ + if (should_complete(r)) { + complete_requests(r); + delete this; } +} - void CopyupRequest::complete(int r) - { - if (should_complete(r)) { - complete_requests(r); - delete this; +bool CopyupRequest::should_complete(int r) +{ + CephContext *cct = m_ictx->cct; + ldout(cct, 20) << __func__ << " " << this + << ": oid " << m_oid + << ", extents " << m_image_extents + << ", r " << r << dendl; + + uint64_t pending_copyups; + switch (m_state) { + case STATE_READ_FROM_PARENT: + ldout(cct, 20) << "READ_FROM_PARENT" << dendl; + remove_from_list(); + if (r >= 0 || r == -ENOENT) { + return send_object_map(); } - } - - bool CopyupRequest::should_complete(int r) - { - CephContext *cct = m_ictx->cct; - ldout(cct, 20) << __func__ << " " << this - << ": oid " << m_oid - << ", extents " << m_image_extents - << ", r " << r << dendl; - - uint64_t pending_copyups; - switch (m_state) { - case STATE_READ_FROM_PARENT: - ldout(cct, 20) << "READ_FROM_PARENT" << dendl; - remove_from_list(); - if (r >= 0 || r == -ENOENT) { - return send_object_map(); - } - break; - - case STATE_OBJECT_MAP: - ldout(cct, 20) << "OBJECT_MAP" << dendl; - assert(r == 0); - return send_copyup(); - - case STATE_COPYUP: - // invoked via a finisher in librados, so thread safe - pending_copyups = m_pending_copyups.dec(); - ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)" - << dendl; - if (r == -ENOENT) { - // hide the -ENOENT error if this is the last op - if (pending_copyups == 0) { - complete_requests(0); - } - } else if (r < 0) { - complete_requests(r); + break; + + case STATE_OBJECT_MAP: + ldout(cct, 20) << "OBJECT_MAP" << dendl; + assert(r == 0); + return send_copyup(); + + case STATE_COPYUP: + // invoked via a finisher in librados, so thread safe + pending_copyups = m_pending_copyups.dec(); + ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)" + << dendl; + if (r == -ENOENT) { + // hide the -ENOENT error if this is the last op + if (pending_copyups == 0) { + complete_requests(0); } - return (pending_copyups == 0); - - default: - lderr(cct) << "invalid state: " << m_state << dendl; - assert(false); - break; + } else if (r < 0) { + complete_requests(r); } - return (r < 0); + return (pending_copyups == 0); + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + assert(false); + break; } + return (r < 0); +} - void CopyupRequest::remove_from_list() - { - Mutex::Locker l(m_ictx->copyup_list_lock); +void CopyupRequest::remove_from_list() +{ + Mutex::Locker l(m_ictx->copyup_list_lock); - map::iterator it = - m_ictx->copyup_list.find(m_object_no); - assert(it != m_ictx->copyup_list.end()); - m_ictx->copyup_list.erase(it); - } + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + assert(it != m_ictx->copyup_list.end()); + m_ictx->copyup_list.erase(it); +} - bool CopyupRequest::send_object_map() { - { - RWLock::RLocker owner_locker(m_ictx->owner_lock); - RWLock::RLocker snap_locker(m_ictx->snap_lock); - if (m_ictx->object_map != nullptr) { - bool copy_on_read = m_pending_requests.empty(); - assert(m_ictx->exclusive_lock->is_lock_owner()); - - RWLock::WLocker object_map_locker(m_ictx->object_map_lock); - if (copy_on_read && - (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) { - // CoW already updates the HEAD object map - m_snap_ids.push_back(CEPH_NOSNAP); - } - if (!m_ictx->snaps.empty()) { - m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(), - m_ictx->snaps.end()); - } +bool CopyupRequest::send_object_map() { + { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->object_map != nullptr) { + bool copy_on_read = m_pending_requests.empty(); + assert(m_ictx->exclusive_lock->is_lock_owner()); + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (copy_on_read && + (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) { + // CoW already updates the HEAD object map + m_snap_ids.push_back(CEPH_NOSNAP); + } + if (!m_ictx->snaps.empty()) { + m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(), + m_ictx->snaps.end()); } } + } - // avoid possible recursive lock attempts - if (m_snap_ids.empty()) { - // no object map update required - return send_copyup(); - } else { - // update object maps for HEAD and all existing snapshots - ldout(m_ictx->cct, 20) << __func__ << " " << this - << ": oid " << m_oid - << dendl; - m_state = STATE_OBJECT_MAP; - - RWLock::RLocker owner_locker(m_ictx->owner_lock); - AsyncObjectThrottle<>::ContextFactory context_factory( - boost::lambda::bind(boost::lambda::new_ptr(), - boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, - boost::lambda::_2)); - AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( - NULL, *m_ictx, context_factory, util::create_context_callback(this), - NULL, 0, m_snap_ids.size()); - throttle->start_ops(m_ictx->concurrent_management_ops); - } - return false; + // avoid possible recursive lock attempts + if (m_snap_ids.empty()) { + // no object map update required + return send_copyup(); + } else { + // update object maps for HEAD and all existing snapshots + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid + << dendl; + m_state = STATE_OBJECT_MAP; + + RWLock::RLocker owner_locker(m_ictx->owner_lock); + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, + boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + NULL, *m_ictx, context_factory, util::create_context_callback(this), + NULL, 0, m_snap_ids.size()); + throttle->start_ops(m_ictx->concurrent_management_ops); } + return false; } + +} // namespace librbd diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index 81cdc5b117ad1..63b981fa0fc41 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -1,7 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_COPYUPREQUEST_H -#define CEPH_LIBRBD_COPYUPREQUEST_H + +#ifndef CEPH_LIBRBD_COPYUP_REQUEST_H +#define CEPH_LIBRBD_COPYUP_REQUEST_H #include "librbd/AsyncOperation.h" #include "include/int_types.h" @@ -9,76 +10,77 @@ namespace librbd { - struct AioCompletion; - - class CopyupRequest { - public: - CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, - vector >& image_extents); - ~CopyupRequest(); - - void append_request(AioObjectRequest *req); - - void send(); - - void complete(int r); - - private: - /** - * Copyup requests go through the following state machine to read from the - * parent image, update the object map, and copyup the object: - * - * - * @verbatim - * - * - * | - * v - * STATE_READ_FROM_PARENT - * . . | - * . . v - * . . STATE_OBJECT_MAP . . - * . . | . - * . . v . - * . . . > STATE_COPYUP . - * . | . - * . v . - * . . . . > < . . . . . - * - * @endverbatim - * - * The _OBJECT_MAP state is skipped if the object map isn't enabled or if - * an object map update isn't required. The _COPYUP state is skipped if - * no data was read from the parent *and* there are no additional ops. - */ - enum State { - STATE_READ_FROM_PARENT, - STATE_OBJECT_MAP, - STATE_COPYUP - }; - - ImageCtx *m_ictx; - std::string m_oid; - uint64_t m_object_no; - vector > m_image_extents; - State m_state; - ceph::bufferlist m_copyup_data; - vector m_pending_requests; - atomic_t m_pending_copyups; - - AsyncOperation m_async_op; - - std::vector m_snap_ids; - - void complete_requests(int r); - - bool should_complete(int r); - - void remove_from_list(); - - bool send_object_map(); - bool send_copyup(); +struct AioCompletion; + +class CopyupRequest { +public: + CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, + vector >& image_extents); + ~CopyupRequest(); + + void append_request(AioObjectRequest *req); + + void send(); + + void complete(int r); + +private: + /** + * Copyup requests go through the following state machine to read from the + * parent image, update the object map, and copyup the object: + * + * + * @verbatim + * + * + * | + * v + * STATE_READ_FROM_PARENT + * . . | + * . . v + * . . STATE_OBJECT_MAP . . + * . . | . + * . . v . + * . . . > STATE_COPYUP . + * . | . + * . v . + * . . . . > < . . . . . + * + * @endverbatim + * + * The _OBJECT_MAP state is skipped if the object map isn't enabled or if + * an object map update isn't required. The _COPYUP state is skipped if + * no data was read from the parent *and* there are no additional ops. + */ + enum State { + STATE_READ_FROM_PARENT, + STATE_OBJECT_MAP, + STATE_COPYUP }; -} -#endif + ImageCtx *m_ictx; + std::string m_oid; + uint64_t m_object_no; + vector > m_image_extents; + State m_state; + ceph::bufferlist m_copyup_data; + vector m_pending_requests; + atomic_t m_pending_copyups; + + AsyncOperation m_async_op; + + std::vector m_snap_ids; + + void complete_requests(int r); + + bool should_complete(int r); + + void remove_from_list(); + + bool send_object_map(); + bool send_copyup(); +}; + +} // namespace librbd + +#endif // CEPH_LIBRBD_COPYUP_REQUEST_H