namespace librbd {
- int AioCompletion::wait_for_complete() {
- tracepoint(librbd, aio_wait_for_complete_enter, this);
- lock.Lock();
- while (state != STATE_COMPLETE)
- cond.Wait(lock);
- lock.Unlock();
- tracepoint(librbd, aio_wait_for_complete_exit, 0);
- return 0;
- }
+int AioCompletion::wait_for_complete() {
+ tracepoint(librbd, aio_wait_for_complete_enter, this);
+ lock.Lock();
+ while (state != STATE_COMPLETE)
+ cond.Wait(lock);
+ lock.Unlock();
+ tracepoint(librbd, aio_wait_for_complete_exit, 0);
+ return 0;
+}
- void AioCompletion::finalize(ssize_t rval)
- {
- assert(lock.is_locked());
- assert(ictx != nullptr);
- CephContext *cct = ictx->cct;
-
- ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", "
- << "read_buf=" << reinterpret_cast<void*>(read_buf) << ", "
- << "real_bl=" << reinterpret_cast<void*>(read_bl) << dendl;
- if (rval >= 0 && aio_type == AIO_TYPE_READ) {
- if (read_buf && !read_bl) {
- destriper.assemble_result(cct, read_buf, read_buf_len);
- } else {
- // FIXME: make the destriper write directly into a buffer so
- // that we avoid shuffling pointers and copying zeros around.
- bufferlist bl;
- destriper.assemble_result(cct, bl, true);
-
- if (read_buf) {
- assert(bl.length() == read_buf_len);
- bl.copy(0, read_buf_len, read_buf);
- ldout(cct, 20) << "copied resulting " << bl.length()
- << " bytes to " << (void*)read_buf << dendl;
- }
- if (read_bl) {
- ldout(cct, 20) << " moving resulting " << bl.length()
- << " bytes to bl " << (void*)read_bl << dendl;
- read_bl->claim(bl);
- }
+void AioCompletion::finalize(ssize_t rval)
+{
+ assert(lock.is_locked());
+ assert(ictx != nullptr);
+ CephContext *cct = ictx->cct;
+
+ ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", "
+ << "read_buf=" << reinterpret_cast<void*>(read_buf) << ", "
+ << "real_bl=" << reinterpret_cast<void*>(read_bl) << dendl;
+ if (rval >= 0 && aio_type == AIO_TYPE_READ) {
+ if (read_buf && !read_bl) {
+ destriper.assemble_result(cct, read_buf, read_buf_len);
+ } else {
+ // FIXME: make the destriper write directly into a buffer so
+ // that we avoid shuffling pointers and copying zeros around.
+ bufferlist bl;
+ destriper.assemble_result(cct, bl, true);
+
+ if (read_buf) {
+ assert(bl.length() == read_buf_len);
+ bl.copy(0, read_buf_len, read_buf);
+ ldout(cct, 20) << "copied resulting " << bl.length()
+ << " bytes to " << (void*)read_buf << dendl;
+ }
+ if (read_bl) {
+ ldout(cct, 20) << " moving resulting " << bl.length()
+ << " bytes to bl " << (void*)read_bl << dendl;
+ read_bl->claim(bl);
}
}
}
+}
- void AioCompletion::complete() {
- assert(lock.is_locked());
- assert(ictx != nullptr);
- CephContext *cct = ictx->cct;
-
- tracepoint(librbd, aio_complete_enter, this, rval);
- utime_t elapsed;
- elapsed = ceph_clock_now(cct) - start_time;
- switch (aio_type) {
- case AIO_TYPE_OPEN:
- case AIO_TYPE_CLOSE:
- break;
- case AIO_TYPE_READ:
- ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break;
- case AIO_TYPE_WRITE:
- ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break;
- case AIO_TYPE_DISCARD:
- ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break;
- case AIO_TYPE_FLUSH:
- ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break;
- default:
- lderr(cct) << "completed invalid aio_type: " << aio_type << dendl;
- break;
- }
+void AioCompletion::complete() {
+ assert(lock.is_locked());
+ assert(ictx != nullptr);
+ CephContext *cct = ictx->cct;
+
+ tracepoint(librbd, aio_complete_enter, this, rval);
+ utime_t elapsed;
+ elapsed = ceph_clock_now(cct) - start_time;
+ switch (aio_type) {
+ case AIO_TYPE_OPEN:
+ case AIO_TYPE_CLOSE:
+ break;
+ case AIO_TYPE_READ:
+ ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break;
+ case AIO_TYPE_WRITE:
+ ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break;
+ case AIO_TYPE_DISCARD:
+ ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break;
+ case AIO_TYPE_FLUSH:
+ ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break;
+ default:
+ lderr(cct) << "completed invalid aio_type: " << aio_type << dendl;
+ break;
+ }
- // inform the journal that the op has successfully committed
- if (journal_tid != 0) {
- assert(ictx->journal != NULL);
- ictx->journal->commit_io_event(journal_tid, rval);
- }
+ // inform the journal that the op has successfully committed
+ if (journal_tid != 0) {
+ assert(ictx->journal != NULL);
+ ictx->journal->commit_io_event(journal_tid, rval);
+ }
- state = STATE_CALLBACK;
- if (complete_cb) {
- lock.Unlock();
- complete_cb(rbd_comp, complete_arg);
- lock.Lock();
- }
+ state = STATE_CALLBACK;
+ if (complete_cb) {
+ lock.Unlock();
+ complete_cb(rbd_comp, complete_arg);
+ lock.Lock();
+ }
- if (ictx && event_notify && ictx->event_socket.is_valid()) {
- ictx->completed_reqs_lock.Lock();
- ictx->completed_reqs.push_back(&m_xlist_item);
- ictx->completed_reqs_lock.Unlock();
- ictx->event_socket.notify();
- }
+ if (ictx && event_notify && ictx->event_socket.is_valid()) {
+ ictx->completed_reqs_lock.Lock();
+ ictx->completed_reqs.push_back(&m_xlist_item);
+ ictx->completed_reqs_lock.Unlock();
+ ictx->event_socket.notify();
+ }
- state = STATE_COMPLETE;
- cond.Signal();
+ state = STATE_COMPLETE;
+ cond.Signal();
- // note: possible for image to be closed after op marked finished
- if (async_op.started()) {
- async_op.finish_op();
- }
- tracepoint(librbd, aio_complete_exit);
+ // note: possible for image to be closed after op marked finished
+ if (async_op.started()) {
+ async_op.finish_op();
}
+ tracepoint(librbd, aio_complete_exit);
+}
- void AioCompletion::init_time(ImageCtx *i, aio_type_t t) {
- Mutex::Locker locker(lock);
- if (ictx == nullptr) {
- ictx = i;
- aio_type = t;
- start_time = ceph_clock_now(ictx->cct);
- }
+void AioCompletion::init_time(ImageCtx *i, aio_type_t t) {
+ Mutex::Locker locker(lock);
+ if (ictx == nullptr) {
+ ictx = i;
+ aio_type = t;
+ start_time = ceph_clock_now(ictx->cct);
}
+}
- void AioCompletion::start_op(bool ignore_type) {
- Mutex::Locker locker(lock);
- assert(ictx != nullptr);
- assert(!async_op.started());
- if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) {
- async_op.start_op(*ictx);
- }
+void AioCompletion::start_op(bool ignore_type) {
+ Mutex::Locker locker(lock);
+ assert(ictx != nullptr);
+ assert(!async_op.started());
+ if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) {
+ async_op.start_op(*ictx);
}
+}
- void AioCompletion::fail(int r)
- {
- lock.Lock();
- assert(ictx != nullptr);
- CephContext *cct = ictx->cct;
+void AioCompletion::fail(int r)
+{
+ lock.Lock();
+ assert(ictx != nullptr);
+ CephContext *cct = ictx->cct;
+
+ lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r)
+ << dendl;
+ assert(pending_count == 0);
+ rval = r;
+ complete();
+ put_unlock();
+}
- lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r)
- << dendl;
- assert(pending_count == 0);
- rval = r;
- complete();
- put_unlock();
- }
+void AioCompletion::set_request_count(uint32_t count) {
+ lock.Lock();
+ assert(ictx != nullptr);
+ CephContext *cct = ictx->cct;
- void AioCompletion::set_request_count(uint32_t count) {
- lock.Lock();
- assert(ictx != nullptr);
- CephContext *cct = ictx->cct;
+ ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl;
+ assert(pending_count == 0);
+ pending_count = count;
+ lock.Unlock();
- ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl;
- assert(pending_count == 0);
- pending_count = count;
- lock.Unlock();
+ // if no pending requests, completion will fire now
+ unblock();
+}
- // if no pending requests, completion will fire now
- unblock();
+void AioCompletion::complete_request(ssize_t r)
+{
+ lock.Lock();
+ assert(ictx != nullptr);
+ CephContext *cct = ictx->cct;
+
+ if (rval >= 0) {
+ if (r < 0 && r != -EEXIST)
+ rval = r;
+ else if (r > 0)
+ rval += r;
}
+ assert(pending_count);
+ int count = --pending_count;
- void AioCompletion::complete_request(ssize_t r)
- {
- lock.Lock();
- assert(ictx != nullptr);
- CephContext *cct = ictx->cct;
-
- if (rval >= 0) {
- if (r < 0 && r != -EEXIST)
- rval = r;
- else if (r > 0)
- rval += r;
- }
- assert(pending_count);
- int count = --pending_count;
-
- ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", "
- << "pending=" << pending_count << dendl;
- if (!count && blockers == 0) {
- finalize(rval);
- complete();
- }
- put_unlock();
+ ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", "
+ << "pending=" << pending_count << dendl;
+ if (!count && blockers == 0) {
+ finalize(rval);
+ complete();
}
+ put_unlock();
+}
+
+void AioCompletion::associate_journal_event(uint64_t tid) {
+ Mutex::Locker l(lock);
+ assert(state == STATE_PENDING);
+ journal_tid = tid;
+}
- void AioCompletion::associate_journal_event(uint64_t tid) {
+bool AioCompletion::is_complete() {
+ tracepoint(librbd, aio_is_complete_enter, this);
+ bool done;
+ {
Mutex::Locker l(lock);
- assert(state == STATE_PENDING);
- journal_tid = tid;
+ done = this->state == STATE_COMPLETE;
}
+ tracepoint(librbd, aio_is_complete_exit, done);
+ return done;
+}
- bool AioCompletion::is_complete() {
- tracepoint(librbd, aio_is_complete_enter, this);
- bool done;
- {
- Mutex::Locker l(lock);
- done = this->state == STATE_COMPLETE;
- }
- tracepoint(librbd, aio_is_complete_exit, done);
- return done;
- }
+ssize_t AioCompletion::get_return_value() {
+ tracepoint(librbd, aio_get_return_value_enter, this);
+ lock.Lock();
+ ssize_t r = rval;
+ lock.Unlock();
+ tracepoint(librbd, aio_get_return_value_exit, r);
+ return r;
+}
- ssize_t AioCompletion::get_return_value() {
- tracepoint(librbd, aio_get_return_value_enter, this);
- lock.Lock();
- ssize_t r = rval;
- lock.Unlock();
- tracepoint(librbd, aio_get_return_value_exit, r);
- return r;
+void C_AioRead::finish(int r)
+{
+ m_completion->lock.Lock();
+ CephContext *cct = m_completion->ictx->cct;
+ ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl;
+
+ if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation
+ ldout(cct, 10) << " got " << m_req->m_ext_map
+ << " for " << m_req->m_buffer_extents
+ << " bl " << m_req->data().length() << dendl;
+ // reads from the parent don't populate the m_ext_map and the overlap
+ // may not be the full buffer. compensate here by filling in m_ext_map
+ // with the read extent when it is empty.
+ if (m_req->m_ext_map.empty())
+ m_req->m_ext_map[m_req->m_object_off] = m_req->data().length();
+
+ m_completion->destriper.add_partial_sparse_result(
+ cct, m_req->data(), m_req->m_ext_map, m_req->m_object_off,
+ m_req->m_buffer_extents);
+ r = m_req->m_object_len;
}
+ m_completion->lock.Unlock();
- void C_AioRead::finish(int r)
- {
- m_completion->lock.Lock();
- CephContext *cct = m_completion->ictx->cct;
- ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl;
-
- if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation
- ldout(cct, 10) << " got " << m_req->m_ext_map
- << " for " << m_req->m_buffer_extents
- << " bl " << m_req->data().length() << dendl;
- // reads from the parent don't populate the m_ext_map and the overlap
- // may not be the full buffer. compensate here by filling in m_ext_map
- // with the read extent when it is empty.
- if (m_req->m_ext_map.empty())
- m_req->m_ext_map[m_req->m_object_off] = m_req->data().length();
-
- m_completion->destriper.add_partial_sparse_result(
- cct, m_req->data(), m_req->m_ext_map, m_req->m_object_off,
- m_req->m_buffer_extents);
- r = m_req->m_object_len;
- }
- m_completion->lock.Unlock();
-
- C_AioRequest::finish(r);
- }
+ C_AioRequest::finish(r);
+}
- void C_CacheRead::complete(int r) {
- if (!m_enqueued) {
- // cache_lock creates a lock ordering issue -- so re-execute this context
- // outside the cache_lock
- m_enqueued = true;
- m_image_ctx.op_work_queue->queue(this, r);
- return;
- }
- Context::complete(r);
+void C_CacheRead::complete(int r) {
+ if (!m_enqueued) {
+ // cache_lock creates a lock ordering issue -- so re-execute this context
+ // outside the cache_lock
+ m_enqueued = true;
+ m_image_ctx.op_work_queue->queue(this, r);
+ return;
}
+ Context::complete(r);
+}
- void C_CacheRead::finish(int r)
- {
- m_req->complete(r);
- }
+void C_CacheRead::finish(int r)
+{
+ m_req->complete(r);
}
+
+} // namespace librbd
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_AIOCOMPLETION_H
-#define CEPH_LIBRBD_AIOCOMPLETION_H
+
+#ifndef CEPH_LIBRBD_AIO_COMPLETION_H
+#define CEPH_LIBRBD_AIO_COMPLETION_H
#include "common/Cond.h"
#include "common/Mutex.h"
namespace librbd {
- class AioObjectRead;
-
- typedef enum {
- AIO_TYPE_NONE = 0,
- AIO_TYPE_OPEN,
- AIO_TYPE_CLOSE,
- AIO_TYPE_READ,
- AIO_TYPE_WRITE,
- AIO_TYPE_DISCARD,
- AIO_TYPE_FLUSH,
- } aio_type_t;
-
- typedef enum {
- STATE_PENDING = 0,
- STATE_CALLBACK,
- STATE_COMPLETE,
- } aio_state_t;
-
- /**
- * AioCompletion is the overall completion for a single
- * rbd I/O request. It may be composed of many AioObjectRequests,
- * which each go to a single object.
- *
- * The retrying of individual requests is handled at a lower level,
- * so all AioCompletion cares about is the count of outstanding
- * requests. The number of expected individual requests should be
- * set initially using set_request_count() prior to issuing the
- * requests. This ensures that the completion will not be completed
- * within the caller's thread of execution (instead via a librados
- * context or via a thread pool context for cache read hits).
- */
- struct AioCompletion {
- mutable Mutex lock;
- Cond cond;
- aio_state_t state;
- ssize_t rval;
- callback_t complete_cb;
- void *complete_arg;
- rbd_completion_t rbd_comp;
- uint32_t pending_count; ///< number of requests
- uint32_t blockers;
- int ref;
- bool released;
- ImageCtx *ictx;
- utime_t start_time;
- aio_type_t aio_type;
-
- Striper::StripedReadResult destriper;
- bufferlist *read_bl;
- char *read_buf;
- size_t read_buf_len;
-
- AsyncOperation async_op;
-
- uint64_t journal_tid;
- xlist<AioCompletion*>::item m_xlist_item;
- bool event_notify;
-
- template <typename T, void (T::*MF)(int)>
- static void callback_adapter(completion_t cb, void *arg) {
- AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
- T *t = reinterpret_cast<T *>(arg);
- (t->*MF)(comp->get_return_value());
- comp->release();
- }
-
- static AioCompletion *create(void *cb_arg, callback_t cb_complete,
- rbd_completion_t rbd_comp) {
- AioCompletion *comp = new AioCompletion();
- comp->set_complete_cb(cb_arg, cb_complete);
- comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp);
- return comp;
- }
-
- template <typename T, void (T::*MF)(int) = &T::complete>
- static AioCompletion *create(T *obj) {
- AioCompletion *comp = new AioCompletion();
- comp->set_complete_cb(obj, &callback_adapter<T, MF>);
- comp->rbd_comp = comp;
- return comp;
- }
-
- template <typename T, void (T::*MF)(int) = &T::complete>
- static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx,
- aio_type_t type) {
- AioCompletion *comp = create<T, MF>(obj);
- comp->init_time(image_ctx, type);
- comp->start_op();
- return comp;
- }
-
- AioCompletion() : lock("AioCompletion::lock", true, false),
- state(STATE_PENDING), rval(0), complete_cb(NULL),
- complete_arg(NULL), rbd_comp(NULL),
- pending_count(0), blockers(1),
- ref(1), released(false), ictx(NULL),
- aio_type(AIO_TYPE_NONE),
- read_bl(NULL), read_buf(NULL), read_buf_len(0),
- journal_tid(0),
- m_xlist_item(this), event_notify(false) {
- }
- ~AioCompletion() {
- }
-
- int wait_for_complete();
-
- void finalize(ssize_t rval);
-
- inline bool is_initialized(aio_type_t type) const {
- Mutex::Locker locker(lock);
- return ((ictx != nullptr) && (aio_type == type));
- }
- inline bool is_started() const {
- Mutex::Locker locker(lock);
- return async_op.started();
- }
-
- void init_time(ImageCtx *i, aio_type_t t);
- void start_op(bool ignore_type = false);
- void fail(int r);
-
- void complete();
-
- void set_complete_cb(void *cb_arg, callback_t cb) {
- complete_cb = cb;
- complete_arg = cb_arg;
- }
-
- void set_request_count(uint32_t num);
- void add_request() {
- lock.Lock();
- assert(pending_count > 0);
- lock.Unlock();
- get();
- }
- void complete_request(ssize_t r);
-
- void associate_journal_event(uint64_t tid);
-
- bool is_complete();
-
- ssize_t get_return_value();
-
- void get() {
- lock.Lock();
- assert(ref > 0);
- ref++;
- lock.Unlock();
- }
- void release() {
- lock.Lock();
- assert(!released);
- released = true;
- put_unlock();
- }
- void put() {
- lock.Lock();
- put_unlock();
- }
- void put_unlock() {
- assert(ref > 0);
- int n = --ref;
- lock.Unlock();
- if (!n) {
- if (ictx) {
- if (event_notify) {
- ictx->completed_reqs_lock.Lock();
- m_xlist_item.remove_myself();
- ictx->completed_reqs_lock.Unlock();
- }
- if (aio_type == AIO_TYPE_CLOSE || (aio_type == AIO_TYPE_OPEN &&
- rval < 0)) {
- delete ictx;
- }
- }
- delete this;
- }
- }
-
- void block() {
- Mutex::Locker l(lock);
- ++blockers;
- }
- void unblock() {
- Mutex::Locker l(lock);
- assert(blockers > 0);
- --blockers;
- if (pending_count == 0 && blockers == 0) {
- finalize(rval);
- complete();
+class AioObjectRead;
+
+typedef enum {
+ AIO_TYPE_NONE = 0,
+ AIO_TYPE_OPEN,
+ AIO_TYPE_CLOSE,
+ AIO_TYPE_READ,
+ AIO_TYPE_WRITE,
+ AIO_TYPE_DISCARD,
+ AIO_TYPE_FLUSH,
+} aio_type_t;
+
+typedef enum {
+ STATE_PENDING = 0,
+ STATE_CALLBACK,
+ STATE_COMPLETE,
+} aio_state_t;
+
+/**
+ * AioCompletion is the overall completion for a single
+ * rbd I/O request. It may be composed of many AioObjectRequests,
+ * which each go to a single object.
+ *
+ * The retrying of individual requests is handled at a lower level,
+ * so all AioCompletion cares about is the count of outstanding
+ * requests. The number of expected individual requests should be
+ * set initially using set_request_count() prior to issuing the
+ * requests. This ensures that the completion will not be completed
+ * within the caller's thread of execution (instead via a librados
+ * context or via a thread pool context for cache read hits).
+ */
+struct AioCompletion {
+ mutable Mutex lock;
+ Cond cond;
+ aio_state_t state;
+ ssize_t rval;
+ callback_t complete_cb;
+ void *complete_arg;
+ rbd_completion_t rbd_comp;
+ uint32_t pending_count; ///< number of requests
+ uint32_t blockers;
+ int ref;
+ bool released;
+ ImageCtx *ictx;
+ utime_t start_time;
+ aio_type_t aio_type;
+
+ Striper::StripedReadResult destriper;
+ bufferlist *read_bl;
+ char *read_buf;
+ size_t read_buf_len;
+
+ AsyncOperation async_op;
+
+ uint64_t journal_tid;
+ xlist<AioCompletion*>::item m_xlist_item;
+ bool event_notify;
+
+ template <typename T, void (T::*MF)(int)>
+ static void callback_adapter(completion_t cb, void *arg) {
+ AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
+ T *t = reinterpret_cast<T *>(arg);
+ (t->*MF)(comp->get_return_value());
+ comp->release();
+ }
+
+ static AioCompletion *create(void *cb_arg, callback_t cb_complete,
+ rbd_completion_t rbd_comp) {
+ AioCompletion *comp = new AioCompletion();
+ comp->set_complete_cb(cb_arg, cb_complete);
+ comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp);
+ return comp;
+ }
+
+ template <typename T, void (T::*MF)(int) = &T::complete>
+ static AioCompletion *create(T *obj) {
+ AioCompletion *comp = new AioCompletion();
+ comp->set_complete_cb(obj, &callback_adapter<T, MF>);
+ comp->rbd_comp = comp;
+ return comp;
+ }
+
+ template <typename T, void (T::*MF)(int) = &T::complete>
+ static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx,
+ aio_type_t type) {
+ AioCompletion *comp = create<T, MF>(obj);
+ comp->init_time(image_ctx, type);
+ comp->start_op();
+ return comp;
+ }
+
+ AioCompletion() : lock("AioCompletion::lock", true, false),
+ state(STATE_PENDING), rval(0), complete_cb(NULL),
+ complete_arg(NULL), rbd_comp(NULL),
+ pending_count(0), blockers(1),
+ ref(1), released(false), ictx(NULL),
+ aio_type(AIO_TYPE_NONE),
+ read_bl(NULL), read_buf(NULL), read_buf_len(0),
+ journal_tid(0), m_xlist_item(this), event_notify(false) {
+ }
+
+ ~AioCompletion() {
+ }
+
+ int wait_for_complete();
+
+ void finalize(ssize_t rval);
+
+ inline bool is_initialized(aio_type_t type) const {
+ Mutex::Locker locker(lock);
+ return ((ictx != nullptr) && (aio_type == type));
+ }
+ inline bool is_started() const {
+ Mutex::Locker locker(lock);
+ return async_op.started();
+ }
+
+ void init_time(ImageCtx *i, aio_type_t t);
+ void start_op(bool ignore_type = false);
+ void fail(int r);
+
+ void complete();
+
+ void set_complete_cb(void *cb_arg, callback_t cb) {
+ complete_cb = cb;
+ complete_arg = cb_arg;
+ }
+
+ void set_request_count(uint32_t num);
+ void add_request() {
+ lock.Lock();
+ assert(pending_count > 0);
+ lock.Unlock();
+ get();
+ }
+ void complete_request(ssize_t r);
+
+ void associate_journal_event(uint64_t tid);
+
+ bool is_complete();
+
+ ssize_t get_return_value();
+
+ void get() {
+ lock.Lock();
+ assert(ref > 0);
+ ref++;
+ lock.Unlock();
+ }
+ void release() {
+ lock.Lock();
+ assert(!released);
+ released = true;
+ put_unlock();
+ }
+ void put() {
+ lock.Lock();
+ put_unlock();
+ }
+ void put_unlock() {
+ assert(ref > 0);
+ int n = --ref;
+ lock.Unlock();
+ if (!n) {
+ if (ictx) {
+ if (event_notify) {
+ ictx->completed_reqs_lock.Lock();
+ m_xlist_item.remove_myself();
+ ictx->completed_reqs_lock.Unlock();
+ }
+ if (aio_type == AIO_TYPE_CLOSE ||
+ (aio_type == AIO_TYPE_OPEN && rval < 0)) {
+ delete ictx;
+ }
}
+ delete this;
}
-
- void set_event_notify(bool s) {
- Mutex::Locker l(lock);
- event_notify = s;
- }
-
- void *get_arg() {
- return complete_arg;
- }
- };
-
- class C_AioRequest : public Context {
- public:
- C_AioRequest(AioCompletion *completion) : m_completion(completion) {
- m_completion->add_request();
- }
- virtual ~C_AioRequest() {}
- virtual void finish(int r) {
- m_completion->complete_request(r);
- }
- protected:
- AioCompletion *m_completion;
- };
-
- class C_AioRead : public C_AioRequest {
- public:
- C_AioRead(AioCompletion *completion)
- : C_AioRequest(completion), m_req(nullptr) {
- }
- virtual ~C_AioRead() {}
- virtual void finish(int r);
- void set_req(AioObjectRead *req) {
- m_req = req;
+ }
+
+ void block() {
+ Mutex::Locker l(lock);
+ ++blockers;
+ }
+ void unblock() {
+ Mutex::Locker l(lock);
+ assert(blockers > 0);
+ --blockers;
+ if (pending_count == 0 && blockers == 0) {
+ finalize(rval);
+ complete();
}
- private:
- AioObjectRead *m_req;
- };
-
- class C_CacheRead : public Context {
- public:
- explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req)
- : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {}
- virtual void complete(int r);
- protected:
- virtual void finish(int r);
- private:
- ImageCtx &m_image_ctx;
- AioObjectRead *m_req;
- bool m_enqueued;
- };
-}
-
-#endif
+ }
+
+ void set_event_notify(bool s) {
+ Mutex::Locker l(lock);
+ event_notify = s;
+ }
+
+ void *get_arg() {
+ return complete_arg;
+ }
+};
+
+class C_AioRequest : public Context {
+public:
+ C_AioRequest(AioCompletion *completion) : m_completion(completion) {
+ m_completion->add_request();
+ }
+ virtual ~C_AioRequest() {}
+ virtual void finish(int r) {
+ m_completion->complete_request(r);
+ }
+protected:
+ AioCompletion *m_completion;
+};
+
+class C_AioRead : public C_AioRequest {
+public:
+ C_AioRead(AioCompletion *completion)
+ : C_AioRequest(completion), m_req(nullptr) {
+ }
+ virtual ~C_AioRead() {}
+ virtual void finish(int r);
+ void set_req(AioObjectRead *req) {
+ m_req = req;
+ }
+private:
+ AioObjectRead *m_req;
+};
+
+class C_CacheRead : public Context {
+public:
+ explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req)
+ : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {}
+ virtual void complete(int r);
+protected:
+ virtual void finish(int r);
+private:
+ ImageCtx &m_image_ctx;
+ AioObjectRead *m_req;
+ bool m_enqueued;
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_AIO_COMPLETION_H
namespace librbd {
- AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid,
- uint64_t objectno, uint64_t off,
- uint64_t len, librados::snap_t snap_id,
- Context *completion, bool hide_enoent)
- : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off),
- m_object_len(len), m_snap_id(snap_id), m_completion(completion),
- m_hide_enoent(hide_enoent) {
-
- Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
- 0, m_ictx->layout.object_size, m_parent_extents);
-
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- RWLock::RLocker parent_locker(m_ictx->parent_lock);
- compute_parent_extents();
- }
+AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno, uint64_t off,
+ uint64_t len, librados::snap_t snap_id,
+ Context *completion, bool hide_enoent)
+ : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off),
+ m_object_len(len), m_snap_id(snap_id), m_completion(completion),
+ m_hide_enoent(hide_enoent) {
+
+ Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
+ 0, m_ictx->layout.object_size, m_parent_extents);
+
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ RWLock::RLocker parent_locker(m_ictx->parent_lock);
+ compute_parent_extents();
+}
- void AioObjectRequest::complete(int r)
- {
- if (should_complete(r)) {
- ldout(m_ictx->cct, 20) << "complete " << this << dendl;
- if (m_hide_enoent && r == -ENOENT) {
- r = 0;
- }
- m_completion->complete(r);
- delete this;
+void AioObjectRequest::complete(int r)
+{
+ if (should_complete(r)) {
+ ldout(m_ictx->cct, 20) << "complete " << this << dendl;
+ if (m_hide_enoent && r == -ENOENT) {
+ r = 0;
}
+ m_completion->complete(r);
+ delete this;
}
+}
- bool AioObjectRequest::compute_parent_extents() {
- assert(m_ictx->snap_lock.is_locked());
- assert(m_ictx->parent_lock.is_locked());
-
- uint64_t parent_overlap;
- int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
- if (r < 0) {
- // NOTE: it's possible for a snapshot to be deleted while we are
- // still reading from it
- lderr(m_ictx->cct) << this << " compute_parent_extents: failed to "
- << "retrieve parent overlap: " << cpp_strerror(r)
- << dendl;
- m_parent_extents.clear();
- return false;
- }
-
- uint64_t object_overlap =
- m_ictx->prune_parent_extents(m_parent_extents, parent_overlap);
- if (object_overlap > 0) {
- ldout(m_ictx->cct, 20) << this << " compute_parent_extents: "
- << "overlap " << parent_overlap << " "
- << "extents " << m_parent_extents << dendl;
- return true;
- }
+bool AioObjectRequest::compute_parent_extents() {
+ assert(m_ictx->snap_lock.is_locked());
+ assert(m_ictx->parent_lock.is_locked());
+
+ uint64_t parent_overlap;
+ int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
+ if (r < 0) {
+ // NOTE: it's possible for a snapshot to be deleted while we are
+ // still reading from it
+ lderr(m_ictx->cct) << this << " compute_parent_extents: failed to "
+ << "retrieve parent overlap: " << cpp_strerror(r)
+ << dendl;
+ m_parent_extents.clear();
return false;
}
- static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) {
- assert(ictx->owner_lock.is_locked());
- assert(ictx->snap_lock.is_locked());
- return (ictx->clone_copy_on_read &&
- !ictx->read_only && snap_id == CEPH_NOSNAP &&
- (ictx->exclusive_lock == nullptr ||
- ictx->exclusive_lock->is_lock_owner()));
+ uint64_t object_overlap =
+ m_ictx->prune_parent_extents(m_parent_extents, parent_overlap);
+ if (object_overlap > 0) {
+ ldout(m_ictx->cct, 20) << this << " compute_parent_extents: "
+ << "overlap " << parent_overlap << " "
+ << "extents " << m_parent_extents << dendl;
+ return true;
}
+ return false;
+}
- /** read **/
+static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) {
+ assert(ictx->owner_lock.is_locked());
+ assert(ictx->snap_lock.is_locked());
+ return (ictx->clone_copy_on_read &&
+ !ictx->read_only && snap_id == CEPH_NOSNAP &&
+ (ictx->exclusive_lock == nullptr ||
+ ictx->exclusive_lock->is_lock_owner()));
+}
- AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid,
- uint64_t objectno, uint64_t offset, uint64_t len,
- vector<pair<uint64_t,uint64_t> >& be,
- librados::snap_t snap_id, bool sparse,
- Context *completion, int op_flags)
- : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion,
- false),
- m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse),
- m_op_flags(op_flags), m_parent_completion(NULL),
- m_state(LIBRBD_AIO_READ_FLAT) {
+/** read **/
- guard_read();
- }
+AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno, uint64_t offset, uint64_t len,
+ vector<pair<uint64_t,uint64_t> >& be,
+ librados::snap_t snap_id, bool sparse,
+ Context *completion, int op_flags)
+ : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion,
+ false),
+ m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse),
+ m_op_flags(op_flags), m_parent_completion(NULL),
+ m_state(LIBRBD_AIO_READ_FLAT) {
- void AioObjectRead::guard_read()
- {
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- RWLock::RLocker parent_locker(m_ictx->parent_lock);
+ guard_read();
+}
- if (has_parent()) {
- ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl;
- m_state = LIBRBD_AIO_READ_GUARD;
- }
+void AioObjectRead::guard_read()
+{
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ RWLock::RLocker parent_locker(m_ictx->parent_lock);
+
+ if (has_parent()) {
+ ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl;
+ m_state = LIBRBD_AIO_READ_GUARD;
}
+}
- bool AioObjectRead::should_complete(int r)
- {
- ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len
- << " r = " << r << dendl;
-
- bool finished = true;
-
- switch (m_state) {
- case LIBRBD_AIO_READ_GUARD:
- ldout(m_ictx->cct, 20) << "should_complete " << this
- << " READ_CHECK_GUARD" << dendl;
-
- // This is the step to read from parent
- if (!m_tried_parent && r == -ENOENT) {
- {
- RWLock::RLocker owner_locker(m_ictx->owner_lock);
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- RWLock::RLocker parent_locker(m_ictx->parent_lock);
- if (m_ictx->parent == NULL) {
- ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl;
- m_state = LIBRBD_AIO_READ_FLAT;
- finished = false;
- break;
- }
-
- // calculate reverse mapping onto the image
- vector<pair<uint64_t,uint64_t> > parent_extents;
- Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
- m_object_off, m_object_len, parent_extents);
-
- uint64_t parent_overlap = 0;
- uint64_t object_overlap = 0;
- r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
- if (r == 0) {
- object_overlap = m_ictx->prune_parent_extents(parent_extents,
- parent_overlap);
- }
+bool AioObjectRead::should_complete(int r)
+{
+ ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len
+ << " r = " << r << dendl;
+
+ bool finished = true;
+
+ switch (m_state) {
+ case LIBRBD_AIO_READ_GUARD:
+ ldout(m_ictx->cct, 20) << "should_complete " << this
+ << " READ_CHECK_GUARD" << dendl;
+
+ // This is the step to read from parent
+ if (!m_tried_parent && r == -ENOENT) {
+ {
+ RWLock::RLocker owner_locker(m_ictx->owner_lock);
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ RWLock::RLocker parent_locker(m_ictx->parent_lock);
+ if (m_ictx->parent == NULL) {
+ ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl;
+ m_state = LIBRBD_AIO_READ_FLAT;
+ finished = false;
+ break;
+ }
- if (object_overlap > 0) {
- m_tried_parent = true;
- if (is_copy_on_read(m_ictx, m_snap_id)) {
- m_state = LIBRBD_AIO_READ_COPYUP;
- }
+ // calculate reverse mapping onto the image
+ vector<pair<uint64_t,uint64_t> > parent_extents;
+ Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
+ m_object_off, m_object_len, parent_extents);
+
+ uint64_t parent_overlap = 0;
+ uint64_t object_overlap = 0;
+ r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
+ if (r == 0) {
+ object_overlap = m_ictx->prune_parent_extents(parent_extents,
+ parent_overlap);
+ }
- read_from_parent(parent_extents);
- finished = false;
+ if (object_overlap > 0) {
+ m_tried_parent = true;
+ if (is_copy_on_read(m_ictx, m_snap_id)) {
+ m_state = LIBRBD_AIO_READ_COPYUP;
}
- }
- if (m_tried_parent) {
- // release reference to the parent read completion. this request
- // might be completed after unblock is invoked.
- AioCompletion *parent_completion = m_parent_completion;
- parent_completion->unblock();
- parent_completion->put();
+ read_from_parent(parent_extents);
+ finished = false;
}
}
- break;
- case LIBRBD_AIO_READ_COPYUP:
- ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP"
- << dendl;
- // This is the extra step for copy-on-read: kick off an asynchronous copyup.
- // It is different from copy-on-write as asynchronous copyup will finish
- // by itself so state won't go back to LIBRBD_AIO_READ_GUARD.
-
- assert(m_tried_parent);
- if (r > 0) {
- // If read entire object from parent success and CoR is possible, kick
- // off a asynchronous copyup. This approach minimizes the latency
- // impact.
- send_copyup();
+
+ if (m_tried_parent) {
+ // release reference to the parent read completion. this request
+ // might be completed after unblock is invoked.
+ AioCompletion *parent_completion = m_parent_completion;
+ parent_completion->unblock();
+ parent_completion->put();
}
- break;
- case LIBRBD_AIO_READ_FLAT:
- ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT"
- << dendl;
- // The read content should be deposit in m_read_data
- break;
- default:
- lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
- assert(0);
}
-
- return finished;
+ break;
+ case LIBRBD_AIO_READ_COPYUP:
+ ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP"
+ << dendl;
+ // This is the extra step for copy-on-read: kick off an asynchronous copyup.
+ // It is different from copy-on-write as asynchronous copyup will finish
+ // by itself so state won't go back to LIBRBD_AIO_READ_GUARD.
+
+ assert(m_tried_parent);
+ if (r > 0) {
+ // If read entire object from parent success and CoR is possible, kick
+ // off a asynchronous copyup. This approach minimizes the latency
+ // impact.
+ send_copyup();
+ }
+ break;
+ case LIBRBD_AIO_READ_FLAT:
+ ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT"
+ << dendl;
+ // The read content should be deposit in m_read_data
+ break;
+ default:
+ lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
+ assert(0);
}
- void AioObjectRead::send() {
- ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len << dendl;
+ return finished;
+}
- {
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
+void AioObjectRead::send() {
+ ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
- // send read request to parent if the object doesn't exist locally
- if (m_ictx->object_map != nullptr &&
- !m_ictx->object_map->object_may_exist(m_object_no)) {
- m_ictx->op_work_queue->queue(util::create_context_callback<
- AioObjectRequest>(this), -ENOENT);
- return;
- }
- }
+ {
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
- librados::ObjectReadOperation op;
- int flags = m_ictx->get_read_flags(m_snap_id);
- if (m_sparse) {
- op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data,
- NULL);
- } else {
- op.read(m_object_off, m_object_len, &m_read_data, NULL);
+ // send read request to parent if the object doesn't exist locally
+ if (m_ictx->object_map != nullptr &&
+ !m_ictx->object_map->object_may_exist(m_object_no)) {
+ m_ictx->op_work_queue->queue(util::create_context_callback<
+ AioObjectRequest>(this), -ENOENT);
+ return;
}
- op.set_op_flags2(m_op_flags);
-
- librados::AioCompletion *rados_completion =
- util::create_rados_ack_callback(this);
- int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags,
- NULL);
- assert(r == 0);
+ }
- rados_completion->release();
+ librados::ObjectReadOperation op;
+ int flags = m_ictx->get_read_flags(m_snap_id);
+ if (m_sparse) {
+ op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data,
+ NULL);
+ } else {
+ op.read(m_object_off, m_object_len, &m_read_data, NULL);
}
+ op.set_op_flags2(m_op_flags);
- void AioObjectRead::send_copyup()
- {
- {
- RWLock::RLocker owner_locker(m_ictx->owner_lock);
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- RWLock::RLocker parent_locker(m_ictx->parent_lock);
- if (!compute_parent_extents() ||
- (m_ictx->exclusive_lock != nullptr &&
- !m_ictx->exclusive_lock->is_lock_owner())) {
- return;
- }
- }
+ librados::AioCompletion *rados_completion =
+ util::create_rados_ack_callback(this);
+ int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags,
+ NULL);
+ assert(r == 0);
- Mutex::Locker copyup_locker(m_ictx->copyup_list_lock);
- map<uint64_t, CopyupRequest*>::iterator it =
- m_ictx->copyup_list.find(m_object_no);
- if (it == m_ictx->copyup_list.end()) {
- // create and kick off a CopyupRequest
- CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no,
- m_parent_extents);
- m_ictx->copyup_list[m_object_no] = new_req;
- new_req->send();
- }
- }
+ rados_completion->release();
+}
- void AioObjectRead::read_from_parent(const vector<pair<uint64_t,uint64_t> >& parent_extents)
+void AioObjectRead::send_copyup()
+{
{
- assert(!m_parent_completion);
- m_parent_completion = AioCompletion::create_and_start<AioObjectRequest>(
- this, m_ictx, AIO_TYPE_READ);
-
- // prevent the parent image from being deleted while this
- // request is still in-progress
- m_parent_completion->get();
- m_parent_completion->block();
-
- ldout(m_ictx->cct, 20) << "read_from_parent this = " << this
- << " parent completion " << m_parent_completion
- << " extents " << parent_extents
- << dendl;
- RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
- AioImageRequest<>::aio_read(m_ictx->parent, m_parent_completion,
- parent_extents, NULL, &m_read_data, 0);
+ RWLock::RLocker owner_locker(m_ictx->owner_lock);
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ RWLock::RLocker parent_locker(m_ictx->parent_lock);
+ if (!compute_parent_extents() ||
+ (m_ictx->exclusive_lock != nullptr &&
+ !m_ictx->exclusive_lock->is_lock_owner())) {
+ return;
+ }
}
- /** write **/
-
- AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx,
- const std::string &oid,
- uint64_t object_no,
- uint64_t object_off,
- uint64_t len,
- const ::SnapContext &snapc,
- Context *completion,
- bool hide_enoent)
- : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP,
- completion, hide_enoent),
- m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val)
- {
- m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
+ Mutex::Locker copyup_locker(m_ictx->copyup_list_lock);
+ map<uint64_t, CopyupRequest*>::iterator it =
+ m_ictx->copyup_list.find(m_object_no);
+ if (it == m_ictx->copyup_list.end()) {
+ // create and kick off a CopyupRequest
+ CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no,
+ m_parent_extents);
+ m_ictx->copyup_list[m_object_no] = new_req;
+ new_req->send();
}
+}
- void AbstractAioObjectWrite::guard_write()
- {
- if (has_parent()) {
- m_state = LIBRBD_AIO_WRITE_GUARD;
- m_write.assert_exists();
- ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
- }
+void AioObjectRead::read_from_parent(const vector<pair<uint64_t,uint64_t> >& parent_extents)
+{
+ assert(!m_parent_completion);
+ m_parent_completion = AioCompletion::create_and_start<AioObjectRequest>(
+ this, m_ictx, AIO_TYPE_READ);
+
+ // prevent the parent image from being deleted while this
+ // request is still in-progress
+ m_parent_completion->get();
+ m_parent_completion->block();
+
+ ldout(m_ictx->cct, 20) << "read_from_parent this = " << this
+ << " parent completion " << m_parent_completion
+ << " extents " << parent_extents << dendl;
+ RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
+ AioImageRequest<>::aio_read(m_ictx->parent, m_parent_completion,
+ parent_extents, NULL, &m_read_data, 0);
+}
+
+/** write **/
+
+AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx,
+ const std::string &oid,
+ uint64_t object_no,
+ uint64_t object_off,
+ uint64_t len,
+ const ::SnapContext &snapc,
+ Context *completion,
+ bool hide_enoent)
+ : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP,
+ completion, hide_enoent),
+ m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val)
+{
+ m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
+}
+
+void AbstractAioObjectWrite::guard_write()
+{
+ if (has_parent()) {
+ m_state = LIBRBD_AIO_WRITE_GUARD;
+ m_write.assert_exists();
+ ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
}
+}
- bool AbstractAioObjectWrite::should_complete(int r)
- {
- ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid
- << " " << m_object_off << "~" << m_object_len
- << " should_complete: r = " << r << dendl;
-
- bool finished = true;
- switch (m_state) {
- case LIBRBD_AIO_WRITE_PRE:
- ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl;
- if (r < 0) {
- return true;
- }
+bool AbstractAioObjectWrite::should_complete(int r)
+{
+ ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid
+ << " " << m_object_off << "~" << m_object_len
+ << " should_complete: r = " << r << dendl;
- send_write();
- finished = false;
- break;
+ bool finished = true;
+ switch (m_state) {
+ case LIBRBD_AIO_WRITE_PRE:
+ ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl;
+ if (r < 0) {
+ return true;
+ }
- case LIBRBD_AIO_WRITE_POST:
- ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl;
- finished = true;
- break;
+ send_write();
+ finished = false;
+ break;
- case LIBRBD_AIO_WRITE_GUARD:
- ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
-
- if (r == -ENOENT) {
- handle_write_guard();
- finished = false;
- break;
- } else if (r < 0) {
- // pass the error code to the finish context
- m_state = LIBRBD_AIO_WRITE_ERROR;
- complete(r);
- finished = false;
- break;
- }
+ case LIBRBD_AIO_WRITE_POST:
+ ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl;
+ finished = true;
+ break;
- finished = send_post();
- break;
+ case LIBRBD_AIO_WRITE_GUARD:
+ ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
- case LIBRBD_AIO_WRITE_COPYUP:
- ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
- if (r < 0) {
- m_state = LIBRBD_AIO_WRITE_ERROR;
- complete(r);
- finished = false;
- } else {
- finished = send_post();
- }
+ if (r == -ENOENT) {
+ handle_write_guard();
+ finished = false;
break;
+ } else if (r < 0) {
+ // pass the error code to the finish context
+ m_state = LIBRBD_AIO_WRITE_ERROR;
+ complete(r);
+ finished = false;
+ break;
+ }
- case LIBRBD_AIO_WRITE_FLAT:
- ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
+ finished = send_post();
+ break;
+ case LIBRBD_AIO_WRITE_COPYUP:
+ ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
+ if (r < 0) {
+ m_state = LIBRBD_AIO_WRITE_ERROR;
+ complete(r);
+ finished = false;
+ } else {
finished = send_post();
- break;
+ }
+ break;
- case LIBRBD_AIO_WRITE_ERROR:
- assert(r < 0);
- lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r)
- << dendl;
- break;
+ case LIBRBD_AIO_WRITE_FLAT:
+ ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
- default:
- lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
- assert(0);
- }
+ finished = send_post();
+ break;
- return finished;
- }
+ case LIBRBD_AIO_WRITE_ERROR:
+ assert(r < 0);
+ lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) << dendl;
+ break;
- void AbstractAioObjectWrite::send() {
- assert(m_ictx->owner_lock.is_locked());
- ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" "
- << m_oid << " " << m_object_off << "~"
- << m_object_len << dendl;
- send_pre();
+ default:
+ lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
+ assert(0);
}
- void AbstractAioObjectWrite::send_pre() {
- assert(m_ictx->owner_lock.is_locked());
+ return finished;
+}
- bool write = false;
- {
- RWLock::RLocker snap_lock(m_ictx->snap_lock);
- if (m_ictx->object_map == nullptr) {
- m_object_exist = true;
- write = true;
+void AbstractAioObjectWrite::send() {
+ assert(m_ictx->owner_lock.is_locked());
+ ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" "
+ << m_oid << " " << m_object_off << "~"
+ << m_object_len << dendl;
+ send_pre();
+}
+
+void AbstractAioObjectWrite::send_pre() {
+ assert(m_ictx->owner_lock.is_locked());
+
+ bool write = false;
+ {
+ RWLock::RLocker snap_lock(m_ictx->snap_lock);
+ if (m_ictx->object_map == nullptr) {
+ m_object_exist = true;
+ write = true;
+ } else {
+ // should have been flushed prior to releasing lock
+ assert(m_ictx->exclusive_lock->is_lock_owner());
+ m_object_exist = m_ictx->object_map->object_may_exist(m_object_no);
+
+ uint8_t new_state;
+ pre_object_map_update(&new_state);
+
+ RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+ if (m_ictx->object_map->update_required(m_object_no, new_state)) {
+ ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len
+ << dendl;
+ m_state = LIBRBD_AIO_WRITE_PRE;
+
+ Context *ctx = util::create_context_callback<AioObjectRequest>(this);
+ bool updated = m_ictx->object_map->aio_update(m_object_no, new_state,
+ {}, ctx);
+ assert(updated);
} else {
- // should have been flushed prior to releasing lock
- assert(m_ictx->exclusive_lock->is_lock_owner());
- m_object_exist = m_ictx->object_map->object_may_exist(m_object_no);
-
- uint8_t new_state;
- pre_object_map_update(&new_state);
-
- RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
- if (m_ictx->object_map->update_required(m_object_no, new_state)) {
- ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len
- << dendl;
- m_state = LIBRBD_AIO_WRITE_PRE;
-
- Context *ctx = util::create_context_callback<AioObjectRequest>(this);
- bool updated = m_ictx->object_map->aio_update(m_object_no, new_state,
- {}, ctx);
- assert(updated);
- } else {
- write = true;
- }
+ write = true;
}
}
+ }
- // avoid possible recursive lock attempts
- if (write) {
- // no object map update required
- send_write();
- }
+ // avoid possible recursive lock attempts
+ if (write) {
+ // no object map update required
+ send_write();
}
+}
- bool AbstractAioObjectWrite::send_post() {
- RWLock::RLocker owner_locker(m_ictx->owner_lock);
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- if (m_ictx->object_map == nullptr || !post_object_map_update()) {
- return true;
- }
+bool AbstractAioObjectWrite::send_post() {
+ RWLock::RLocker owner_locker(m_ictx->owner_lock);
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ if (m_ictx->object_map == nullptr || !post_object_map_update()) {
+ return true;
+ }
- // should have been flushed prior to releasing lock
- assert(m_ictx->exclusive_lock->is_lock_owner());
+ // should have been flushed prior to releasing lock
+ assert(m_ictx->exclusive_lock->is_lock_owner());
- RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
- if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) {
- return true;
- }
+ RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+ if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) {
+ return true;
+ }
- ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len << dendl;
- m_state = LIBRBD_AIO_WRITE_POST;
+ ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+ m_state = LIBRBD_AIO_WRITE_POST;
- Context *ctx = util::create_context_callback<AioObjectRequest>(this);
- bool updated = m_ictx->object_map->aio_update(m_object_no,
- OBJECT_NONEXISTENT,
- OBJECT_PENDING, ctx);
- assert(updated);
- return false;
- }
+ Context *ctx = util::create_context_callback<AioObjectRequest>(this);
+ bool updated = m_ictx->object_map->aio_update(m_object_no,
+ OBJECT_NONEXISTENT,
+ OBJECT_PENDING, ctx);
+ assert(updated);
+ return false;
+}
- void AbstractAioObjectWrite::send_write() {
- ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len
- << " object exist " << m_object_exist << dendl;
+void AbstractAioObjectWrite::send_write() {
+ ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len
+ << " object exist " << m_object_exist << dendl;
- if (!m_object_exist && has_parent()) {
- m_state = LIBRBD_AIO_WRITE_GUARD;
- handle_write_guard();
- } else {
- send_write_op(true);
- }
+ if (!m_object_exist && has_parent()) {
+ m_state = LIBRBD_AIO_WRITE_GUARD;
+ handle_write_guard();
+ } else {
+ send_write_op(true);
}
+}
- void AbstractAioObjectWrite::send_copyup()
- {
- ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len << dendl;
- m_state = LIBRBD_AIO_WRITE_COPYUP;
-
- m_ictx->copyup_list_lock.Lock();
- map<uint64_t, CopyupRequest*>::iterator it =
- m_ictx->copyup_list.find(m_object_no);
- if (it == m_ictx->copyup_list.end()) {
- CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid,
- m_object_no,
- m_parent_extents);
-
- // make sure to wait on this CopyupRequest
- new_req->append_request(this);
- m_ictx->copyup_list[m_object_no] = new_req;
-
- m_ictx->copyup_list_lock.Unlock();
- new_req->send();
- } else {
- it->second->append_request(this);
- m_ictx->copyup_list_lock.Unlock();
- }
+void AbstractAioObjectWrite::send_copyup()
+{
+ ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+ m_state = LIBRBD_AIO_WRITE_COPYUP;
+
+ m_ictx->copyup_list_lock.Lock();
+ map<uint64_t, CopyupRequest*>::iterator it =
+ m_ictx->copyup_list.find(m_object_no);
+ if (it == m_ictx->copyup_list.end()) {
+ CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid,
+ m_object_no,
+ m_parent_extents);
+
+ // make sure to wait on this CopyupRequest
+ new_req->append_request(this);
+ m_ictx->copyup_list[m_object_no] = new_req;
+
+ m_ictx->copyup_list_lock.Unlock();
+ new_req->send();
+ } else {
+ it->second->append_request(this);
+ m_ictx->copyup_list_lock.Unlock();
}
- void AbstractAioObjectWrite::send_write_op(bool write_guard)
+}
+void AbstractAioObjectWrite::send_write_op(bool write_guard)
+{
+ m_state = LIBRBD_AIO_WRITE_FLAT;
+ if (write_guard)
+ guard_write();
+ add_write_ops(&m_write);
+ assert(m_write.size() != 0);
+
+ librados::AioCompletion *rados_completion =
+ util::create_rados_safe_callback(this);
+ int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write,
+ m_snap_seq, m_snaps);
+ assert(r == 0);
+ rados_completion->release();
+}
+void AbstractAioObjectWrite::handle_write_guard()
+{
+ bool has_parent;
{
- m_state = LIBRBD_AIO_WRITE_FLAT;
- if (write_guard)
- guard_write();
- add_write_ops(&m_write);
- assert(m_write.size() != 0);
-
- librados::AioCompletion *rados_completion =
- util::create_rados_safe_callback(this);
- int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write,
- m_snap_seq, m_snaps);
- assert(r == 0);
- rados_completion->release();
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ RWLock::RLocker parent_locker(m_ictx->parent_lock);
+ has_parent = compute_parent_extents();
}
- void AbstractAioObjectWrite::handle_write_guard()
- {
- bool has_parent;
- {
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- RWLock::RLocker parent_locker(m_ictx->parent_lock);
- has_parent = compute_parent_extents();
- }
- // If parent still exists, overlap might also have changed.
- if (has_parent) {
- send_copyup();
- } else {
- // parent may have disappeared -- send original write again
- ldout(m_ictx->cct, 20) << "should_complete(" << this
- << "): parent overlap now 0" << dendl;
- send_write();
- }
+ // If parent still exists, overlap might also have changed.
+ if (has_parent) {
+ send_copyup();
+ } else {
+ // parent may have disappeared -- send original write again
+ ldout(m_ictx->cct, 20) << "should_complete(" << this
+ << "): parent overlap now 0" << dendl;
+ send_write();
}
+}
- void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) {
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- if (m_ictx->enable_alloc_hint &&
- (m_ictx->object_map == nullptr ||
- !m_object_exist)) {
- wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
- }
-
- if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) {
- wr->write_full(m_write_data);
- } else {
- wr->write(m_object_off, m_write_data);
- }
- wr->set_op_flags2(m_op_flags);
+void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) {
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ if (m_ictx->enable_alloc_hint &&
+ (m_ictx->object_map == nullptr || !m_object_exist)) {
+ wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
}
- void AioObjectWrite::send_write() {
- bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size());
- ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len
- << " object exist " << m_object_exist
- << " write_full " << write_full << dendl;
- if (write_full && !has_parent()) {
- send_write_op(false);
- } else {
- AbstractAioObjectWrite::send_write();
- }
+ if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) {
+ wr->write_full(m_write_data);
+ } else {
+ wr->write(m_object_off, m_write_data);
}
+ wr->set_op_flags2(m_op_flags);
+}
- void AioObjectRemove::guard_write() {
- // do nothing to disable write guard only if deep-copyup not required
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- if (!m_ictx->snaps.empty()) {
- AbstractAioObjectWrite::guard_write();
- }
+void AioObjectWrite::send_write() {
+ bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size());
+ ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len
+ << " object exist " << m_object_exist
+ << " write_full " << write_full << dendl;
+ if (write_full && !has_parent()) {
+ send_write_op(false);
+ } else {
+ AbstractAioObjectWrite::send_write();
}
- void AioObjectRemove::send_write() {
- ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
- << m_object_off << "~" << m_object_len << dendl;
- send_write_op(true);
+}
+
+void AioObjectRemove::guard_write() {
+ // do nothing to disable write guard only if deep-copyup not required
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ if (!m_ictx->snaps.empty()) {
+ AbstractAioObjectWrite::guard_write();
}
- void AioObjectTruncate::send_write() {
- ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid
- << " truncate " << m_object_off << dendl;
- if (!m_object_exist && ! has_parent()) {
- m_state = LIBRBD_AIO_WRITE_FLAT;
- Context *ctx = util::create_context_callback<AioObjectRequest>(this);
- m_ictx->op_work_queue->queue(ctx, 0);
- } else {
- AbstractAioObjectWrite::send_write();
- }
+}
+void AioObjectRemove::send_write() {
+ ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+ << m_object_off << "~" << m_object_len << dendl;
+ send_write_op(true);
+}
+void AioObjectTruncate::send_write() {
+ ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid
+ << " truncate " << m_object_off << dendl;
+ if (!m_object_exist && ! has_parent()) {
+ m_state = LIBRBD_AIO_WRITE_FLAT;
+ Context *ctx = util::create_context_callback<AioObjectRequest>(this);
+ m_ictx->op_work_queue->queue(ctx, 0);
+ } else {
+ AbstractAioObjectWrite::send_write();
}
}
+
+} // namespace librbd
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_AIOREQUEST_H
-#define CEPH_LIBRBD_AIOREQUEST_H
+
+#ifndef CEPH_LIBRBD_AIO_OBJECT_REQUEST_H
+#define CEPH_LIBRBD_AIO_OBJECT_REQUEST_H
#include "include/int_types.h"
namespace librbd {
- struct AioCompletion;
- struct ImageCtx;
- class CopyupRequest;
+struct AioCompletion;
+struct ImageCtx;
+class CopyupRequest;
+
+/**
+ * This class represents an I/O operation to a single RBD data object.
+ * Its subclasses encapsulate logic for dealing with special cases
+ * for I/O due to layering.
+ */
+class AioObjectRequest
+{
+public:
+ AioObjectRequest(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno, uint64_t off, uint64_t len,
+ librados::snap_t snap_id,
+ Context *completion, bool hide_enoent);
+ virtual ~AioObjectRequest() {}
+
+ virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {};
+
+ void complete(int r);
+
+ virtual bool should_complete(int r) = 0;
+ virtual void send() = 0;
+
+ bool has_parent() const {
+ return !m_parent_extents.empty();
+ }
+
+protected:
+ bool compute_parent_extents();
+
+ ImageCtx *m_ictx;
+ std::string m_oid;
+ uint64_t m_object_no, m_object_off, m_object_len;
+ librados::snap_t m_snap_id;
+ Context *m_completion;
+ std::vector<std::pair<uint64_t,uint64_t> > m_parent_extents;
+ bool m_hide_enoent;
+};
+
+class AioObjectRead : public AioObjectRequest {
+public:
+ AioObjectRead(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno, uint64_t offset, uint64_t len,
+ vector<pair<uint64_t,uint64_t> >& be,
+ librados::snap_t snap_id, bool sparse,
+ Context *completion, int op_flags);
+
+ virtual bool should_complete(int r);
+ virtual void send();
+ void guard_read();
+
+ ceph::bufferlist &data() {
+ return m_read_data;
+ }
+
+ std::map<uint64_t, uint64_t> m_ext_map;
+
+ friend class C_AioRead;
+
+private:
+ vector<pair<uint64_t,uint64_t> > m_buffer_extents;
+ bool m_tried_parent;
+ bool m_sparse;
+ int m_op_flags;
+ ceph::bufferlist m_read_data;
+ AioCompletion *m_parent_completion;
/**
- * This class represents an I/O operation to a single RBD data object.
- * Its subclasses encapsulate logic for dealing with special cases
- * for I/O due to layering.
+ * Reads go through the following state machine to deal with
+ * layering:
+ *
+ * need copyup
+ * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP
+ * | |
+ * v |
+ * done <------------------------------------/
+ * ^
+ * |
+ * LIBRBD_AIO_READ_FLAT
+ *
+ * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on
+ * whether there is a parent or not.
*/
- class AioObjectRequest
- {
- public:
- AioObjectRequest(ImageCtx *ictx, const std::string &oid,
- uint64_t objectno, uint64_t off, uint64_t len,
- librados::snap_t snap_id,
- Context *completion, bool hide_enoent);
- virtual ~AioObjectRequest() {}
-
- virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {};
-
- void complete(int r);
-
- virtual bool should_complete(int r) = 0;
- virtual void send() = 0;
-
- bool has_parent() const {
- return !m_parent_extents.empty();
- }
-
- protected:
- bool compute_parent_extents();
-
- ImageCtx *m_ictx;
- std::string m_oid;
- uint64_t m_object_no, m_object_off, m_object_len;
- librados::snap_t m_snap_id;
- Context *m_completion;
- std::vector<std::pair<uint64_t,uint64_t> > m_parent_extents;
- bool m_hide_enoent;
+ enum read_state_d {
+ LIBRBD_AIO_READ_GUARD,
+ LIBRBD_AIO_READ_COPYUP,
+ LIBRBD_AIO_READ_FLAT
};
- class AioObjectRead : public AioObjectRequest {
- public:
- AioObjectRead(ImageCtx *ictx, const std::string &oid,
- uint64_t objectno, uint64_t offset, uint64_t len,
- vector<pair<uint64_t,uint64_t> >& be,
- librados::snap_t snap_id, bool sparse,
- Context *completion, int op_flags);
-
- virtual bool should_complete(int r);
- virtual void send();
- void guard_read();
-
- ceph::bufferlist &data() {
- return m_read_data;
- }
-
- std::map<uint64_t, uint64_t> m_ext_map;
-
- friend class C_AioRead;
-
- private:
- vector<pair<uint64_t,uint64_t> > m_buffer_extents;
- bool m_tried_parent;
- bool m_sparse;
- int m_op_flags;
- ceph::bufferlist m_read_data;
- AioCompletion *m_parent_completion;
-
- /**
- * Reads go through the following state machine to deal with
- * layering:
- *
- * need copyup
- * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP
- * | |
- * v |
- * done <------------------------------------/
- * ^
- * |
- * LIBRBD_AIO_READ_FLAT
- *
- * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on
- * whether there is a parent or not.
- */
- enum read_state_d {
- LIBRBD_AIO_READ_GUARD,
- LIBRBD_AIO_READ_COPYUP,
- LIBRBD_AIO_READ_FLAT
- };
-
- read_state_d m_state;
-
- void send_copyup();
-
- void read_from_parent(const vector<pair<uint64_t,uint64_t> >& image_extents);
- };
-
- class AbstractAioObjectWrite : public AioObjectRequest {
- public:
- AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid,
- uint64_t object_no, uint64_t object_off,
- uint64_t len, const ::SnapContext &snapc,
- Context *completion, bool hide_enoent);
-
- virtual void add_copyup_ops(librados::ObjectWriteOperation *wr)
- {
- add_write_ops(wr);
- }
+ read_state_d m_state;
- virtual bool should_complete(int r);
- virtual void send();
-
- /**
- * Writes go through the following state machine to deal with
- * layering and the object map:
- *
- * <start>
- * . |
- * . |
- * . \---> LIBRBD_AIO_WRITE_PRE
- * . | |
- * . . . . . . | . . . . | . . . . . . . . . . .
- * . | -or- | .
- * . | | v
- * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . .
- * . | | .
- * v v need copyup | .
- * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | .
- * . | | . | .
- * . | | . | .
- * . | /-----/ . | .
- * . | | . | .
- * . \-------------------\ | /-------------------/ .
- * . | | | . .
- * . v v v . .
- * . LIBRBD_AIO_WRITE_POST . .
- * . | . .
- * . | . . . . . . . . .
- * . | . .
- * . v v .
- * . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . .
- *
- * The _PRE/_POST states are skipped if the object map is disabled.
- * The write starts in _WRITE_GUARD or _FLAT depending on whether or not
- * there is a parent overlap.
- */
- protected:
- enum write_state_d {
- LIBRBD_AIO_WRITE_GUARD,
- LIBRBD_AIO_WRITE_COPYUP,
- LIBRBD_AIO_WRITE_FLAT,
- LIBRBD_AIO_WRITE_PRE,
- LIBRBD_AIO_WRITE_POST,
- LIBRBD_AIO_WRITE_ERROR
- };
-
- write_state_d m_state;
- librados::ObjectWriteOperation m_write;
- uint64_t m_snap_seq;
- std::vector<librados::snap_t> m_snaps;
- bool m_object_exist;
-
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
- virtual const char* get_write_type() const = 0;
- virtual void guard_write();
- virtual void pre_object_map_update(uint8_t *new_state) = 0;
- virtual bool post_object_map_update() {
- return false;
- }
- virtual void send_write();
- virtual void send_write_op(bool write_guard);
- virtual void handle_write_guard();
-
- private:
- void send_pre();
- bool send_post();
- void send_copyup();
- };
-
- class AioObjectWrite : public AbstractAioObjectWrite {
- public:
- AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
- uint64_t object_off, const ceph::bufferlist &data,
- const ::SnapContext &snapc, Context *completion)
- : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(),
- snapc, completion, false),
- m_write_data(data), m_op_flags(0) {
- }
-
- void set_op_flags(int op_flags) {
- m_op_flags = op_flags;
- }
- protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr);
-
- virtual const char* get_write_type() const {
- return "write";
- }
-
- virtual void pre_object_map_update(uint8_t *new_state) {
- *new_state = OBJECT_EXISTS;
- }
- virtual void send_write();
-
- private:
- ceph::bufferlist m_write_data;
- int m_op_flags;
- };
-
- class AioObjectRemove : public AbstractAioObjectWrite {
- public:
- AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
- const ::SnapContext &snapc, Context *completion)
- : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
- true),
- m_object_state(OBJECT_NONEXISTENT) {
- }
+ void send_copyup();
- protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- if (has_parent()) {
- wr->truncate(0);
- } else {
- wr->remove();
- }
- }
+ void read_from_parent(const vector<pair<uint64_t,uint64_t> >& image_extents);
+};
- virtual const char* get_write_type() const {
- if (has_parent()) {
- return "remove (trunc)";
- }
- return "remove";
- }
- virtual void pre_object_map_update(uint8_t *new_state) {
- if (has_parent()) {
- m_object_state = OBJECT_EXISTS;
- } else {
- m_object_state = OBJECT_PENDING;
- }
- *new_state = m_object_state;
- }
+class AbstractAioObjectWrite : public AioObjectRequest {
+public:
+ AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no, uint64_t object_off,
+ uint64_t len, const ::SnapContext &snapc,
+ Context *completion, bool hide_enoent);
- virtual bool post_object_map_update() {
- if (m_object_state == OBJECT_EXISTS) {
- return false;
- }
- return true;
- }
+ virtual void add_copyup_ops(librados::ObjectWriteOperation *wr)
+ {
+ add_write_ops(wr);
+ }
- virtual void guard_write();
- virtual void send_write();
+ virtual bool should_complete(int r);
+ virtual void send();
- private:
- uint8_t m_object_state;
+ /**
+ * Writes go through the following state machine to deal with
+ * layering and the object map:
+ *
+ * <start>
+ * . |
+ * . |
+ * . \---> LIBRBD_AIO_WRITE_PRE
+ * . | |
+ * . . . . . . | . . . . | . . . . . . . . . . .
+ * . | -or- | .
+ * . | | v
+ * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . .
+ * . | | .
+ * v v need copyup | .
+ * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | .
+ * . | | . | .
+ * . | | . | .
+ * . | /-----/ . | .
+ * . | | . | .
+ * . \-------------------\ | /-------------------/ .
+ * . | | | . .
+ * . v v v . .
+ * . LIBRBD_AIO_WRITE_POST . .
+ * . | . .
+ * . | . . . . . . . . .
+ * . | . .
+ * . v v .
+ * . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . .
+ *
+ * The _PRE/_POST states are skipped if the object map is disabled.
+ * The write starts in _WRITE_GUARD or _FLAT depending on whether or not
+ * there is a parent overlap.
+ */
+protected:
+ enum write_state_d {
+ LIBRBD_AIO_WRITE_GUARD,
+ LIBRBD_AIO_WRITE_COPYUP,
+ LIBRBD_AIO_WRITE_FLAT,
+ LIBRBD_AIO_WRITE_PRE,
+ LIBRBD_AIO_WRITE_POST,
+ LIBRBD_AIO_WRITE_ERROR
};
- class AioObjectTrim : public AbstractAioObjectWrite {
- public:
- AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+ write_state_d m_state;
+ librados::ObjectWriteOperation m_write;
+ uint64_t m_snap_seq;
+ std::vector<librados::snap_t> m_snaps;
+ bool m_object_exist;
+
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
+ virtual const char* get_write_type() const = 0;
+ virtual void guard_write();
+ virtual void pre_object_map_update(uint8_t *new_state) = 0;
+ virtual bool post_object_map_update() {
+ return false;
+ }
+ virtual void send_write();
+ virtual void send_write_op(bool write_guard);
+ virtual void handle_write_guard();
+
+private:
+ void send_pre();
+ bool send_post();
+ void send_copyup();
+};
+
+class AioObjectWrite : public AbstractAioObjectWrite {
+public:
+ AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+ uint64_t object_off, const ceph::bufferlist &data,
+ const ::SnapContext &snapc, Context *completion)
+ : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(),
+ snapc, completion, false),
+ m_write_data(data), m_op_flags(0) {
+ }
+
+ void set_op_flags(int op_flags) {
+ m_op_flags = op_flags;
+ }
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+
+ virtual const char* get_write_type() const {
+ return "write";
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = OBJECT_EXISTS;
+ }
+ virtual void send_write();
+
+private:
+ ceph::bufferlist m_write_data;
+ int m_op_flags;
+};
+
+class AioObjectRemove : public AbstractAioObjectWrite {
+public:
+ AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
const ::SnapContext &snapc, Context *completion)
- : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
- true) {
- }
-
- protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
+ true),
+ m_object_state(OBJECT_NONEXISTENT) {
+ }
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ if (has_parent()) {
+ wr->truncate(0);
+ } else {
wr->remove();
}
+ }
- virtual const char* get_write_type() const {
- return "remove (trim)";
+ virtual const char* get_write_type() const {
+ if (has_parent()) {
+ return "remove (trunc)";
}
-
- virtual void pre_object_map_update(uint8_t *new_state) {
- *new_state = OBJECT_PENDING;
- }
-
- virtual bool post_object_map_update() {
- return true;
- }
- };
-
- class AioObjectTruncate : public AbstractAioObjectWrite {
- public:
- AioObjectTruncate(ImageCtx *ictx, const std::string &oid,
- uint64_t object_no, uint64_t object_off,
- const ::SnapContext &snapc, Context *completion)
- : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc,
- completion, true) {
- }
-
- protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- wr->truncate(m_object_off);
+ return "remove";
+ }
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ if (has_parent()) {
+ m_object_state = OBJECT_EXISTS;
+ } else {
+ m_object_state = OBJECT_PENDING;
}
+ *new_state = m_object_state;
+ }
- virtual const char* get_write_type() const {
- return "truncate";
- }
-
- virtual void pre_object_map_update(uint8_t *new_state) {
- if (!m_object_exist && !has_parent())
- *new_state = OBJECT_NONEXISTENT;
- else
- *new_state = OBJECT_EXISTS;
- }
- virtual void send_write();
- };
-
- class AioObjectZero : public AbstractAioObjectWrite {
- public:
- AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
- uint64_t object_off, uint64_t object_len,
- const ::SnapContext &snapc, Context *completion)
- : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len,
- snapc, completion, true) {
- }
-
- protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- wr->zero(m_object_off, m_object_len);
- }
-
- virtual const char* get_write_type() const {
- return "zero";
+ virtual bool post_object_map_update() {
+ if (m_object_state == OBJECT_EXISTS) {
+ return false;
}
-
- virtual void pre_object_map_update(uint8_t *new_state) {
+ return true;
+ }
+
+ virtual void guard_write();
+ virtual void send_write();
+
+private:
+ uint8_t m_object_state;
+};
+
+class AioObjectTrim : public AbstractAioObjectWrite {
+public:
+ AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+ const ::SnapContext &snapc, Context *completion)
+ : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
+ true) {
+ }
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->remove();
+ }
+
+ virtual const char* get_write_type() const {
+ return "remove (trim)";
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = OBJECT_PENDING;
+ }
+
+ virtual bool post_object_map_update() {
+ return true;
+ }
+};
+
+class AioObjectTruncate : public AbstractAioObjectWrite {
+public:
+ AioObjectTruncate(ImageCtx *ictx, const std::string &oid,
+ uint64_t object_no, uint64_t object_off,
+ const ::SnapContext &snapc, Context *completion)
+ : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc,
+ completion, true) {
+ }
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->truncate(m_object_off);
+ }
+
+ virtual const char* get_write_type() const {
+ return "truncate";
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ if (!m_object_exist && !has_parent())
+ *new_state = OBJECT_NONEXISTENT;
+ else
*new_state = OBJECT_EXISTS;
- }
- };
-
-}
-
-#endif
+ }
+ virtual void send_write();
+};
+
+class AioObjectZero : public AbstractAioObjectWrite {
+public:
+ AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+ uint64_t object_off, uint64_t object_len,
+ const ::SnapContext &snapc, Context *completion)
+ : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len,
+ snapc, completion, true) {
+ }
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->zero(m_object_off, m_object_len);
+ }
+
+ virtual const char* get_write_type() const {
+ return "zero";
+ }
+
+ virtual void pre_object_map_update(uint8_t *new_state) {
+ *new_state = OBJECT_EXISTS;
+ }
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_AIO_OBJECT_REQUEST_H
} // anonymous namespace
- CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid,
- uint64_t objectno,
- vector<pair<uint64_t,uint64_t> >& image_extents)
- : m_ictx(ictx), m_oid(oid), m_object_no(objectno),
- m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT)
- {
- m_async_op.start_op(*m_ictx);
- }
+CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid,
+ uint64_t objectno,
+ vector<pair<uint64_t,uint64_t> >& image_extents)
+ : m_ictx(ictx), m_oid(oid), m_object_no(objectno),
+ m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT)
+{
+ m_async_op.start_op(*m_ictx);
+}
- CopyupRequest::~CopyupRequest() {
- assert(m_pending_requests.empty());
- m_async_op.finish_op();
- }
+CopyupRequest::~CopyupRequest() {
+ assert(m_pending_requests.empty());
+ m_async_op.finish_op();
+}
+
+void CopyupRequest::append_request(AioObjectRequest *req) {
+ ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl;
+ m_pending_requests.push_back(req);
+}
- void CopyupRequest::append_request(AioObjectRequest *req) {
- ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl;
- m_pending_requests.push_back(req);
+void CopyupRequest::complete_requests(int r) {
+ while (!m_pending_requests.empty()) {
+ vector<AioObjectRequest *>::iterator it = m_pending_requests.begin();
+ AioObjectRequest *req = *it;
+ ldout(m_ictx->cct, 20) << __func__ << " completing request " << req
+ << dendl;
+ req->complete(r);
+ m_pending_requests.erase(it);
}
+}
- void CopyupRequest::complete_requests(int r) {
- while (!m_pending_requests.empty()) {
- vector<AioObjectRequest *>::iterator it = m_pending_requests.begin();
- AioObjectRequest *req = *it;
- ldout(m_ictx->cct, 20) << __func__ << " completing request " << req
- << dendl;
- req->complete(r);
- m_pending_requests.erase(it);
- }
+bool CopyupRequest::send_copyup() {
+ bool add_copyup_op = !m_copyup_data.is_zero();
+ bool copy_on_read = m_pending_requests.empty();
+ if (!add_copyup_op && copy_on_read) {
+ // copyup empty object to prevent future CoR attempts
+ m_copyup_data.clear();
+ add_copyup_op = true;
}
- bool CopyupRequest::send_copyup() {
- bool add_copyup_op = !m_copyup_data.is_zero();
- bool copy_on_read = m_pending_requests.empty();
- if (!add_copyup_op && copy_on_read) {
- // copyup empty object to prevent future CoR attempts
- m_copyup_data.clear();
- add_copyup_op = true;
- }
+ ldout(m_ictx->cct, 20) << __func__ << " " << this
+ << ": oid " << m_oid << dendl;
+ m_state = STATE_COPYUP;
- ldout(m_ictx->cct, 20) << __func__ << " " << this
- << ": oid " << m_oid << dendl;
- m_state = STATE_COPYUP;
+ m_ictx->snap_lock.get_read();
+ ::SnapContext snapc = m_ictx->snapc;
+ m_ictx->snap_lock.put_read();
- m_ictx->snap_lock.get_read();
- ::SnapContext snapc = m_ictx->snapc;
- m_ictx->snap_lock.put_read();
+ std::vector<librados::snap_t> snaps;
- std::vector<librados::snap_t> snaps;
+ if (!copy_on_read) {
+ m_pending_copyups.inc();
+ }
- if (!copy_on_read) {
- m_pending_copyups.inc();
- }
+ int r;
+ if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) {
+ assert(add_copyup_op);
+ add_copyup_op = false;
+
+ librados::ObjectWriteOperation copyup_op;
+ copyup_op.exec("rbd", "copyup", m_copyup_data);
+
+ // send only the copyup request with a blank snapshot context so that
+ // all snapshots are detected from the parent for this object. If
+ // this is a CoW request, a second request will be created for the
+ // actual modification.
+ m_pending_copyups.inc();
+
+ ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with "
+ << "empty snapshot context" << dendl;
+ librados::AioCompletion *comp = util::create_rados_safe_callback(this);
+ r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, 0, snaps);
+ assert(r == 0);
+ comp->release();
+ }
- int r;
- if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) {
- assert(add_copyup_op);
- add_copyup_op = false;
-
- librados::ObjectWriteOperation copyup_op;
- copyup_op.exec("rbd", "copyup", m_copyup_data);
-
- // send only the copyup request with a blank snapshot context so that
- // all snapshots are detected from the parent for this object. If
- // this is a CoW request, a second request will be created for the
- // actual modification.
- m_pending_copyups.inc();
-
- ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with "
- << "empty snapshot context" << dendl;
- librados::AioCompletion *comp = util::create_rados_safe_callback(this);
- r = m_ictx->md_ctx.aio_operate(m_oid, comp, ©up_op, 0, snaps);
- assert(r == 0);
- comp->release();
+ if (!copy_on_read) {
+ librados::ObjectWriteOperation write_op;
+ if (add_copyup_op) {
+ // CoW did not need to handle existing snapshots
+ write_op.exec("rbd", "copyup", m_copyup_data);
}
- if (!copy_on_read) {
- librados::ObjectWriteOperation write_op;
- if (add_copyup_op) {
- // CoW did not need to handle existing snapshots
- write_op.exec("rbd", "copyup", m_copyup_data);
- }
-
- // merge all pending write ops into this single RADOS op
- for (size_t i=0; i<m_pending_requests.size(); ++i) {
- AioObjectRequest *req = m_pending_requests[i];
- ldout(m_ictx->cct, 20) << __func__ << " add_copyup_ops " << req
- << dendl;
- req->add_copyup_ops(&write_op);
- }
- assert(write_op.size() != 0);
-
- snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
- librados::AioCompletion *comp = util::create_rados_safe_callback(this);
- r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op);
- assert(r == 0);
- comp->release();
+ // merge all pending write ops into this single RADOS op
+ for (size_t i=0; i<m_pending_requests.size(); ++i) {
+ AioObjectRequest *req = m_pending_requests[i];
+ ldout(m_ictx->cct, 20) << __func__ << " add_copyup_ops " << req
+ << dendl;
+ req->add_copyup_ops(&write_op);
}
- return false;
+ assert(write_op.size() != 0);
+
+ snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
+ librados::AioCompletion *comp = util::create_rados_safe_callback(this);
+ r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op);
+ assert(r == 0);
+ comp->release();
}
+ return false;
+}
- void CopyupRequest::send()
- {
- m_state = STATE_READ_FROM_PARENT;
- AioCompletion *comp = AioCompletion::create_and_start(
- this, m_ictx, AIO_TYPE_READ);
+void CopyupRequest::send()
+{
+ m_state = STATE_READ_FROM_PARENT;
+ AioCompletion *comp = AioCompletion::create_and_start(
+ this, m_ictx, AIO_TYPE_READ);
+
+ ldout(m_ictx->cct, 20) << __func__ << " " << this
+ << ": completion " << comp
+ << ", oid " << m_oid
+ << ", extents " << m_image_extents
+ << dendl;
+ RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
+ AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL,
+ &m_copyup_data, 0);
+}
- ldout(m_ictx->cct, 20) << __func__ << " " << this
- << ": completion " << comp
- << ", oid " << m_oid
- << ", extents " << m_image_extents
- << dendl;
- RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
- AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL,
- &m_copyup_data, 0);
+void CopyupRequest::complete(int r)
+{
+ if (should_complete(r)) {
+ complete_requests(r);
+ delete this;
}
+}
- void CopyupRequest::complete(int r)
- {
- if (should_complete(r)) {
- complete_requests(r);
- delete this;
+bool CopyupRequest::should_complete(int r)
+{
+ CephContext *cct = m_ictx->cct;
+ ldout(cct, 20) << __func__ << " " << this
+ << ": oid " << m_oid
+ << ", extents " << m_image_extents
+ << ", r " << r << dendl;
+
+ uint64_t pending_copyups;
+ switch (m_state) {
+ case STATE_READ_FROM_PARENT:
+ ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
+ remove_from_list();
+ if (r >= 0 || r == -ENOENT) {
+ return send_object_map();
}
- }
-
- bool CopyupRequest::should_complete(int r)
- {
- CephContext *cct = m_ictx->cct;
- ldout(cct, 20) << __func__ << " " << this
- << ": oid " << m_oid
- << ", extents " << m_image_extents
- << ", r " << r << dendl;
-
- uint64_t pending_copyups;
- switch (m_state) {
- case STATE_READ_FROM_PARENT:
- ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
- remove_from_list();
- if (r >= 0 || r == -ENOENT) {
- return send_object_map();
- }
- break;
-
- case STATE_OBJECT_MAP:
- ldout(cct, 20) << "OBJECT_MAP" << dendl;
- assert(r == 0);
- return send_copyup();
-
- case STATE_COPYUP:
- // invoked via a finisher in librados, so thread safe
- pending_copyups = m_pending_copyups.dec();
- ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)"
- << dendl;
- if (r == -ENOENT) {
- // hide the -ENOENT error if this is the last op
- if (pending_copyups == 0) {
- complete_requests(0);
- }
- } else if (r < 0) {
- complete_requests(r);
+ break;
+
+ case STATE_OBJECT_MAP:
+ ldout(cct, 20) << "OBJECT_MAP" << dendl;
+ assert(r == 0);
+ return send_copyup();
+
+ case STATE_COPYUP:
+ // invoked via a finisher in librados, so thread safe
+ pending_copyups = m_pending_copyups.dec();
+ ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)"
+ << dendl;
+ if (r == -ENOENT) {
+ // hide the -ENOENT error if this is the last op
+ if (pending_copyups == 0) {
+ complete_requests(0);
}
- return (pending_copyups == 0);
-
- default:
- lderr(cct) << "invalid state: " << m_state << dendl;
- assert(false);
- break;
+ } else if (r < 0) {
+ complete_requests(r);
}
- return (r < 0);
+ return (pending_copyups == 0);
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
}
+ return (r < 0);
+}
- void CopyupRequest::remove_from_list()
- {
- Mutex::Locker l(m_ictx->copyup_list_lock);
+void CopyupRequest::remove_from_list()
+{
+ Mutex::Locker l(m_ictx->copyup_list_lock);
- map<uint64_t, CopyupRequest*>::iterator it =
- m_ictx->copyup_list.find(m_object_no);
- assert(it != m_ictx->copyup_list.end());
- m_ictx->copyup_list.erase(it);
- }
+ map<uint64_t, CopyupRequest*>::iterator it =
+ m_ictx->copyup_list.find(m_object_no);
+ assert(it != m_ictx->copyup_list.end());
+ m_ictx->copyup_list.erase(it);
+}
- bool CopyupRequest::send_object_map() {
- {
- RWLock::RLocker owner_locker(m_ictx->owner_lock);
- RWLock::RLocker snap_locker(m_ictx->snap_lock);
- if (m_ictx->object_map != nullptr) {
- bool copy_on_read = m_pending_requests.empty();
- assert(m_ictx->exclusive_lock->is_lock_owner());
-
- RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
- if (copy_on_read &&
- (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
- // CoW already updates the HEAD object map
- m_snap_ids.push_back(CEPH_NOSNAP);
- }
- if (!m_ictx->snaps.empty()) {
- m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
- m_ictx->snaps.end());
- }
+bool CopyupRequest::send_object_map() {
+ {
+ RWLock::RLocker owner_locker(m_ictx->owner_lock);
+ RWLock::RLocker snap_locker(m_ictx->snap_lock);
+ if (m_ictx->object_map != nullptr) {
+ bool copy_on_read = m_pending_requests.empty();
+ assert(m_ictx->exclusive_lock->is_lock_owner());
+
+ RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+ if (copy_on_read &&
+ (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
+ // CoW already updates the HEAD object map
+ m_snap_ids.push_back(CEPH_NOSNAP);
+ }
+ if (!m_ictx->snaps.empty()) {
+ m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
+ m_ictx->snaps.end());
}
}
+ }
- // avoid possible recursive lock attempts
- if (m_snap_ids.empty()) {
- // no object map update required
- return send_copyup();
- } else {
- // update object maps for HEAD and all existing snapshots
- ldout(m_ictx->cct, 20) << __func__ << " " << this
- << ": oid " << m_oid
- << dendl;
- m_state = STATE_OBJECT_MAP;
-
- RWLock::RLocker owner_locker(m_ictx->owner_lock);
- AsyncObjectThrottle<>::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<UpdateObjectMap>(),
- boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids,
- boost::lambda::_2));
- AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
- NULL, *m_ictx, context_factory, util::create_context_callback(this),
- NULL, 0, m_snap_ids.size());
- throttle->start_ops(m_ictx->concurrent_management_ops);
- }
- return false;
+ // avoid possible recursive lock attempts
+ if (m_snap_ids.empty()) {
+ // no object map update required
+ return send_copyup();
+ } else {
+ // update object maps for HEAD and all existing snapshots
+ ldout(m_ictx->cct, 20) << __func__ << " " << this
+ << ": oid " << m_oid
+ << dendl;
+ m_state = STATE_OBJECT_MAP;
+
+ RWLock::RLocker owner_locker(m_ictx->owner_lock);
+ AsyncObjectThrottle<>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<UpdateObjectMap>(),
+ boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids,
+ boost::lambda::_2));
+ AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+ NULL, *m_ictx, context_factory, util::create_context_callback(this),
+ NULL, 0, m_snap_ids.size());
+ throttle->start_ops(m_ictx->concurrent_management_ops);
}
+ return false;
}
+
+} // namespace librbd
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_COPYUPREQUEST_H
-#define CEPH_LIBRBD_COPYUPREQUEST_H
+
+#ifndef CEPH_LIBRBD_COPYUP_REQUEST_H
+#define CEPH_LIBRBD_COPYUP_REQUEST_H
#include "librbd/AsyncOperation.h"
#include "include/int_types.h"
namespace librbd {
- struct AioCompletion;
-
- class CopyupRequest {
- public:
- CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno,
- vector<pair<uint64_t,uint64_t> >& image_extents);
- ~CopyupRequest();
-
- void append_request(AioObjectRequest *req);
-
- void send();
-
- void complete(int r);
-
- private:
- /**
- * Copyup requests go through the following state machine to read from the
- * parent image, update the object map, and copyup the object:
- *
- *
- * @verbatim
- *
- * <start>
- * |
- * v
- * STATE_READ_FROM_PARENT
- * . . |
- * . . v
- * . . STATE_OBJECT_MAP . .
- * . . | .
- * . . v .
- * . . . > STATE_COPYUP .
- * . | .
- * . v .
- * . . . . > <finish> < . . . . .
- *
- * @endverbatim
- *
- * The _OBJECT_MAP state is skipped if the object map isn't enabled or if
- * an object map update isn't required. The _COPYUP state is skipped if
- * no data was read from the parent *and* there are no additional ops.
- */
- enum State {
- STATE_READ_FROM_PARENT,
- STATE_OBJECT_MAP,
- STATE_COPYUP
- };
-
- ImageCtx *m_ictx;
- std::string m_oid;
- uint64_t m_object_no;
- vector<pair<uint64_t,uint64_t> > m_image_extents;
- State m_state;
- ceph::bufferlist m_copyup_data;
- vector<AioObjectRequest *> m_pending_requests;
- atomic_t m_pending_copyups;
-
- AsyncOperation m_async_op;
-
- std::vector<uint64_t> m_snap_ids;
-
- void complete_requests(int r);
-
- bool should_complete(int r);
-
- void remove_from_list();
-
- bool send_object_map();
- bool send_copyup();
+struct AioCompletion;
+
+class CopyupRequest {
+public:
+ CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno,
+ vector<pair<uint64_t,uint64_t> >& image_extents);
+ ~CopyupRequest();
+
+ void append_request(AioObjectRequest *req);
+
+ void send();
+
+ void complete(int r);
+
+private:
+ /**
+ * Copyup requests go through the following state machine to read from the
+ * parent image, update the object map, and copyup the object:
+ *
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_READ_FROM_PARENT
+ * . . |
+ * . . v
+ * . . STATE_OBJECT_MAP . .
+ * . . | .
+ * . . v .
+ * . . . > STATE_COPYUP .
+ * . | .
+ * . v .
+ * . . . . > <finish> < . . . . .
+ *
+ * @endverbatim
+ *
+ * The _OBJECT_MAP state is skipped if the object map isn't enabled or if
+ * an object map update isn't required. The _COPYUP state is skipped if
+ * no data was read from the parent *and* there are no additional ops.
+ */
+ enum State {
+ STATE_READ_FROM_PARENT,
+ STATE_OBJECT_MAP,
+ STATE_COPYUP
};
-}
-#endif
+ ImageCtx *m_ictx;
+ std::string m_oid;
+ uint64_t m_object_no;
+ vector<pair<uint64_t,uint64_t> > m_image_extents;
+ State m_state;
+ ceph::bufferlist m_copyup_data;
+ vector<AioObjectRequest *> m_pending_requests;
+ atomic_t m_pending_copyups;
+
+ AsyncOperation m_async_op;
+
+ std::vector<uint64_t> m_snap_ids;
+
+ void complete_requests(int r);
+
+ bool should_complete(int r);
+
+ void remove_from_list();
+
+ bool send_object_map();
+ bool send_copyup();
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_COPYUP_REQUEST_H