From: Jason Dillaman Date: Thu, 17 Dec 2015 04:17:22 +0000 (-0500) Subject: librbd: properly order maintenance ops journal events X-Git-Tag: v10.0.3~24^2~17 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bf7a1f62a67aaffef72f346a886c9a11711c2b13;p=ceph.git librbd: properly order maintenance ops journal events In general, the op shoud only start after the jouranl event is safely committed. Snapshot create and resize need special treatment to record the event while no IO is in-progress. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/AioImageRequest.cc b/src/librbd/AioImageRequest.cc index 975096f3fbcd..ed93b34eb750 100644 --- a/src/librbd/AioImageRequest.cc +++ b/src/librbd/AioImageRequest.cc @@ -311,7 +311,8 @@ uint64_t AioImageWrite::append_journal_event( bl.append(m_buf, m_len); journal::EventEntry event_entry(journal::AioWriteEvent(m_off, m_len, bl)); - uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, event_entry, + uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, + std::move(event_entry), requests, m_off, m_len, synchronous); if (m_image_ctx.object_cacher == NULL) { @@ -371,7 +372,8 @@ void AioImageWrite::update_stats(size_t length) { uint64_t AioImageDiscard::append_journal_event( const AioObjectRequests &requests, bool synchronous) { journal::EventEntry event_entry(journal::AioDiscardEvent(m_off, m_len)); - uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, event_entry, + uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, + std::move(event_entry), requests, m_off, m_len, synchronous); m_aio_comp->associate_journal_event(tid); diff --git a/src/librbd/AsyncRequest.h b/src/librbd/AsyncRequest.h index 96802f8114a2..8ca84f534c8f 100644 --- a/src/librbd/AsyncRequest.h +++ b/src/librbd/AsyncRequest.h @@ -24,8 +24,6 @@ public: if (should_complete(r)) { r = filter_return_code(r); finish(r); - finish_request(); - m_on_finish->complete(r); delete this; } } @@ -41,7 +39,6 @@ public: protected: ImageCtxT &m_image_ctx; - Context *m_on_finish; librados::AioCompletion *create_callback_completion(); Context *create_callback_context(); @@ -55,8 +52,12 @@ protected: } virtual void finish(int r) { + finish_request(); + m_on_finish->complete(r); } + private: + Context *m_on_finish; bool m_canceled; typename xlist *>::item m_xlist_item; diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index eb0de9f128cf..6b050556e749 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -239,7 +239,7 @@ bool Journal::is_journal_replaying() const { template void Journal::wait_for_journal_ready(Context *on_ready) { - on_ready = util::create_async_context_callback(m_image_ctx, on_ready); + on_ready = create_async_context_callback(m_image_ctx, on_ready); Mutex::Locker locker(m_lock); if (m_state == STATE_READY) { @@ -254,7 +254,7 @@ void Journal::open(Context *on_finish) { CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << this << " " << __func__ << dendl; - on_finish = util::create_async_context_callback(m_image_ctx, on_finish); + on_finish = create_async_context_callback(m_image_ctx, on_finish); Mutex::Locker locker(m_lock); assert(m_state == STATE_UNINITIALIZED); @@ -267,7 +267,7 @@ void Journal::close(Context *on_finish) { CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << this << " " << __func__ << dendl; - on_finish = util::create_async_context_callback(m_image_ctx, on_finish); + on_finish = create_async_context_callback(m_image_ctx, on_finish); Mutex::Locker locker(m_lock); assert(m_state != STATE_UNINITIALIZED); @@ -286,7 +286,7 @@ void Journal::close(Context *on_finish) { template uint64_t Journal::append_io_event(AioCompletion *aio_comp, - const journal::EventEntry &event_entry, + journal::EventEntry &&event_entry, const AioObjectRequests &requests, uint64_t offset, size_t length, bool flush_entry) { @@ -301,12 +301,11 @@ uint64_t Journal::append_io_event(AioCompletion *aio_comp, Mutex::Locker locker(m_lock); assert(m_state == STATE_READY); - future = m_journaler->append("", bl); - Mutex::Locker event_locker(m_event_lock); tid = ++m_event_tid; assert(tid != 0); + future = m_journaler->append("", bl); m_events[tid] = Event(future, aio_comp, requests, offset, length); } @@ -318,7 +317,7 @@ uint64_t Journal::append_io_event(AioCompletion *aio_comp, << "length=" << length << ", " << "flush=" << flush_entry << ", tid=" << tid << dendl; - Context *on_safe = new C_EventSafe(this, tid); + Context *on_safe = new C_IOEventSafe(this, tid); if (flush_entry) { future.flush(on_safe); } else { @@ -379,17 +378,23 @@ void Journal::commit_io_event_extent(uint64_t tid, uint64_t offset, template void Journal::append_op_event(uint64_t op_tid, - journal::EventEntry &event_entry) { + journal::EventEntry &&event_entry, + Context *on_safe) { assert(m_image_ctx.owner_lock.is_locked()); bufferlist bl; ::encode(event_entry, bl); + + Future future; { Mutex::Locker locker(m_lock); assert(m_state == STATE_READY); - m_journaler->committed(m_journaler->append("", bl)); + future = m_journaler->append("", bl); } + on_safe = create_async_context_callback(m_image_ctx, on_safe); + future.flush(new C_OpEventSafe(this, op_tid, future, on_safe)); + CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << this << " " << __func__ << ": " << "op_tid=" << op_tid << ", " @@ -397,21 +402,24 @@ void Journal::append_op_event(uint64_t op_tid, } template -void Journal::commit_op_event(uint64_t tid, int r) { +void Journal::commit_op_event(uint64_t op_tid, int r) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << this << " " << __func__ << ": tid=" << tid << ", " + ldout(cct, 10) << this << " " << __func__ << ": op_tid=" << op_tid << ", " << "r=" << r << dendl; - journal::EventEntry event_entry((journal::OpFinishEvent(tid, r))); + journal::EventEntry event_entry((journal::OpFinishEvent(op_tid, r))); bufferlist bl; ::encode(event_entry, bl); + Future future; { Mutex::Locker locker(m_lock); assert(m_state == STATE_READY); - m_journaler->committed(m_journaler->append("", bl)); + future = m_journaler->append("", bl); } + + future.flush(new C_OpEventSafe(this, op_tid, future, nullptr)); } template @@ -458,7 +466,8 @@ typename Journal::Future Journal::wait_event(Mutex &lock, uint64_t tid, return Future(); } - event.on_safe_contexts.push_back(on_safe); + event.on_safe_contexts.push_back(create_async_context_callback(m_image_ctx, + on_safe)); return event.future; } @@ -705,11 +714,17 @@ void Journal::handle_journal_destroyed(int r) { } template -void Journal::handle_event_safe(int r, uint64_t tid) { +void Journal::handle_io_event_safe(int r, uint64_t tid) { CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << this << " " << __func__ << ": r=" << r << ", " << "tid=" << tid << dendl; + // journal will be flushed before closing + assert(m_state == STATE_READY); + if (r < 0) { + lderr(cct) << "failed to commit IO event: " << cpp_strerror(r) << dendl; + } + AioCompletion *aio_comp; AioObjectRequests aio_object_requests; Contexts on_safe_contexts; @@ -758,6 +773,25 @@ void Journal::handle_event_safe(int r, uint64_t tid) { } } +template +void Journal::handle_op_event_safe(int r, uint64_t tid, const Future &future, + Context *on_safe) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << this << " " << __func__ << ": r=" << r << ", " + << "tid=" << tid << dendl; + + // journal will be flushed before closing + assert(m_state == STATE_READY); + if (r < 0) { + lderr(cct) << "failed to commit op event: " << cpp_strerror(r) << dendl; + } + + m_journaler->committed(future); + if (on_safe != nullptr) { + on_safe->complete(r); + } +} + template void Journal::stop_recording() { assert(m_lock.is_locked()); diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h index bee4dbe98667..1740bc6a6761 100644 --- a/src/librbd/Journal.h +++ b/src/librbd/Journal.h @@ -113,7 +113,7 @@ public: void close(Context *on_finish); uint64_t append_io_event(AioCompletion *aio_comp, - const journal::EventEntry &event_entry, + journal::EventEntry &&event_entry, const AioObjectRequests &requests, uint64_t offset, size_t length, bool flush_entry); @@ -121,8 +121,9 @@ public: void commit_io_event_extent(uint64_t tid, uint64_t offset, uint64_t length, int r); - void append_op_event(uint64_t op_tid, journal::EventEntry &event_entry); - void commit_op_event(uint64_t op_tid, int r); + void append_op_event(uint64_t op_tid, journal::EventEntry &&event_entry, + Context *on_safe); + void commit_op_event(uint64_t tid, int r); void flush_event(uint64_t tid, Context *on_safe); void wait_event(uint64_t tid, Context *on_safe); @@ -168,16 +169,32 @@ private: typedef ceph::unordered_map Events; - struct C_EventSafe : public Context { + struct C_IOEventSafe : public Context { Journal *journal; uint64_t tid; - C_EventSafe(Journal *_journal, uint64_t _tid) + C_IOEventSafe(Journal *_journal, uint64_t _tid) : journal(_journal), tid(_tid) { } virtual void finish(int r) { - journal->handle_event_safe(r, tid); + journal->handle_io_event_safe(r, tid); + } + }; + + struct C_OpEventSafe : public Context { + Journal *journal; + uint64_t tid; + Future future; + Context *on_safe; + + C_OpEventSafe(Journal *journal, uint64_t tid, const Future &future, + Context *on_safe) + : journal(journal), tid(tid), future(future), on_safe(on_safe) { + } + + virtual void finish(int r) { + journal->handle_op_event_safe(r, tid, future, on_safe); } }; @@ -241,7 +258,9 @@ private: void handle_journal_destroyed(int r); - void handle_event_safe(int r, uint64_t tid); + void handle_io_event_safe(int r, uint64_t tid); + void handle_op_event_safe(int r, uint64_t tid, const Future &future, + Context *on_safe); void stop_recording(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 4858df06a554..6f034f45352e 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -351,7 +351,7 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) { C_SaferCond ctx; ictx->snap_lock.get_read(); - operation::TrimRequest<> *req = new operation::TrimRequest<>( + operation::TrimRequest<> *req = operation::TrimRequest<>::create( *ictx, &ctx, ictx->size, newsize, prog_ctx); ictx->snap_lock.put_read(); req->send(); diff --git a/src/librbd/object_map/SnapshotRemoveRequest.h b/src/librbd/object_map/SnapshotRemoveRequest.h index 3f2a2412a1df..64696784130e 100644 --- a/src/librbd/object_map/SnapshotRemoveRequest.h +++ b/src/librbd/object_map/SnapshotRemoveRequest.h @@ -64,10 +64,6 @@ protected: return r; } - virtual void finish() { - } - using AsyncRequest<>::finish; - private: State m_state; ceph::BitVector<2> &m_object_map; diff --git a/src/librbd/operation/RebuildObjectMapRequest.cc b/src/librbd/operation/RebuildObjectMapRequest.cc index ce7f911b99ea..cfb1c113faaa 100644 --- a/src/librbd/operation/RebuildObjectMapRequest.cc +++ b/src/librbd/operation/RebuildObjectMapRequest.cc @@ -283,9 +283,9 @@ void RebuildObjectMapRequest::send_trim_image() { orig_size = m_image_ctx.get_object_size() * m_image_ctx.object_map->size(); } - TrimRequest *req = new TrimRequest(m_image_ctx, - this->create_callback_context(), - orig_size, new_size, m_prog_ctx); + TrimRequest *req = TrimRequest::create(m_image_ctx, + this->create_callback_context(), + orig_size, new_size, m_prog_ctx); req->send(); } diff --git a/src/librbd/operation/Request.cc b/src/librbd/operation/Request.cc index e751360838e0..67eb950abfd6 100644 --- a/src/librbd/operation/Request.cc +++ b/src/librbd/operation/Request.cc @@ -2,8 +2,15 @@ // vim: ts=8 sw=2 smarttab #include "librbd/operation/Request.h" +#include "common/dout.h" +#include "common/errno.h" #include "librbd/ImageCtx.h" #include "librbd/Journal.h" +#include "librbd/Utils.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::Request: " namespace librbd { namespace operation { @@ -18,48 +25,97 @@ void Request::send() { I &image_ctx = this->m_image_ctx; assert(image_ctx.owner_lock.is_locked()); - { - RWLock::RLocker snap_locker(image_ctx.snap_lock); - if (image_ctx.journal != NULL && - !image_ctx.journal->is_journal_replaying()) { - // journal might be replaying -- wait for it to complete - if (!image_ctx.journal->is_journal_ready()) { - image_ctx.journal->wait_for_journal_ready( - new C_WaitForJournalReady(this)); - return; - } - - m_op_tid = image_ctx.journal->allocate_op_tid(); - journal::EventEntry event_entry(create_event(m_op_tid)); - image_ctx.journal->append_op_event(m_op_tid, event_entry); - } + // automatically create the event if we don't need to worry + // about affecting concurrent IO ops + if (can_affect_io() || !append_op_event()) { + send_op(); } - - send_op(); } template void Request::finish(int r) { - { - I &image_ctx = this->m_image_ctx; - RWLock::RLocker snap_locker(image_ctx.snap_lock); - if (m_op_tid != 0 && image_ctx.journal != NULL && - !image_ctx.journal->is_journal_replaying()) { - // ops will be canceled / completed before closing journal - assert(image_ctx.journal->is_journal_ready()); - - image_ctx.journal->commit_op_event(m_op_tid, r); - } + // automatically commit the event if we don't need to worry + // about affecting concurrent IO ops + if (r < 0 || !can_affect_io()) { + commit_op_event(r); } + assert(!m_appended_op_event || m_committed_op_event); AsyncRequest::finish(r); } template -void Request::handle_journal_ready() { +bool Request::append_op_event() { I &image_ctx = this->m_image_ctx; - RWLock::RLocker owner_locker(image_ctx.owner_lock); - send(); + + assert(image_ctx.owner_lock.is_locked()); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.journal != NULL && + !image_ctx.journal->is_journal_replaying()) { + append_op_event(util::create_context_callback< + Request, &Request::handle_op_event_safe>(this)); + return true; + } + return false; +} + +template +void Request::commit_op_event(int r) { + I &image_ctx = this->m_image_ctx; + RWLock::RLocker snap_locker(image_ctx.snap_lock); + + if (!m_appended_op_event) { + return; + } + + assert(m_op_tid != 0); + assert(!m_committed_op_event); + m_committed_op_event = true; + + if (image_ctx.journal != NULL && + !image_ctx.journal->is_journal_replaying()) { + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + // ops will be canceled / completed before closing journal + assert(image_ctx.journal->is_journal_ready()); + image_ctx.journal->commit_op_event(m_op_tid, r); + } +} + +template +void Request::append_op_event(Context *on_safe) { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.owner_lock.is_locked()); + assert(image_ctx.snap_lock.is_locked()); + + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + m_op_tid = image_ctx.journal->allocate_op_tid(); + image_ctx.journal->append_op_event( + m_op_tid, journal::EventEntry{create_event(m_op_tid)}, + new C_OpEventSafe(this, on_safe)); +} + +template +void Request::handle_op_event_safe(int r) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r) + << dendl; + this->finish(r); + delete this; + } else { + assert(!can_affect_io()); + + // haven't started the request state machine yet + RWLock::RLocker owner_locker(image_ctx.owner_lock); + send_op(); + } } } // namespace operation diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h index 9432b26f0f4b..83a8009b4f47 100644 --- a/src/librbd/operation/Request.h +++ b/src/librbd/operation/Request.h @@ -6,6 +6,9 @@ #include "librbd/AsyncRequest.h" #include "include/Context.h" +#include "common/RWLock.h" +#include "librbd/Utils.h" +#include "librbd/Journal.h" #include "librbd/journal/Entries.h" namespace librbd { @@ -22,26 +25,59 @@ public: virtual void send(); protected: - virtual void finish(int r); + virtual void finish(int r) override; virtual void send_op() = 0; + virtual bool can_affect_io() const { + return false; + } virtual journal::Event create_event(uint64_t op_tid) const = 0; -private: - struct C_WaitForJournalReady : public Context { - Request *request; + template + bool append_op_event(T *request) { + ImageCtxT &image_ctx = this->m_image_ctx; - C_WaitForJournalReady(Request *_request) : request(_request) { + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.journal != NULL && + !image_ctx.journal->is_journal_replaying()) { + append_op_event(util::create_context_callback(request)); + return true; } + return false; + } + + bool append_op_event(); + void commit_op_event(int r); + + // NOTE: temporary until converted to new state machine format + Context *create_context_finisher() { + return util::create_context_callback< + Request, &Request::finish>(this); + } - virtual void finish(int r) { - request->handle_journal_ready(); +private: + struct C_OpEventSafe : public Context { + Request *request; + Context *on_safe; + C_OpEventSafe(Request *request, Context *on_safe) + : request(request), on_safe(on_safe) { + } + virtual void finish(int r) override { + if (r >= 0) { + request->m_appended_op_event = true; + } + on_safe->complete(r); } }; uint64_t m_op_tid = 0; + bool m_appended_op_event = false; + bool m_committed_op_event = false; + + void append_op_event(Context *on_safe); + void handle_op_event_safe(int r); - void handle_journal_ready(); }; } // namespace operation diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc index dc92dbeef34b..f6459dbe40f5 100644 --- a/src/librbd/operation/ResizeRequest.cc +++ b/src/librbd/operation/ResizeRequest.cc @@ -2,11 +2,13 @@ // vim: ts=8 sw=2 smarttab #include "librbd/operation/ResizeRequest.h" +#include "librbd/AioImageRequestWQ.h" #include "librbd/ExclusiveLock.h" #include "librbd/ImageCtx.h" #include "librbd/ImageWatcher.h" #include "librbd/internal.h" #include "librbd/ObjectMap.h" +#include "librbd/Utils.h" #include "librbd/operation/TrimRequest.h" #include "common/dout.h" #include "common/errno.h" @@ -18,6 +20,10 @@ namespace librbd { namespace operation { +using util::create_async_context_callback; +using util::create_context_callback; +using util::create_rados_safe_callback; + template ResizeRequest::ResizeRequest(I &image_ctx, Context *on_finish, uint64_t new_size, ProgressContext &prog_ctx) @@ -45,64 +51,6 @@ ResizeRequest::~ResizeRequest() { } } -template -bool ResizeRequest::should_complete(int r) { - I &image_ctx = this->m_image_ctx; - CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; - - if (r < 0) { - lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl; - return true; - } - if (m_state == STATE_FINISHED) { - ldout(cct, 5) << "FINISHED" << dendl; - return true; - } - - RWLock::RLocker owner_lock(image_ctx.owner_lock); - switch (m_state) { - case STATE_FLUSH: - ldout(cct, 5) << "FLUSH" << dendl; - send_invalidate_cache(); - break; - - case STATE_INVALIDATE_CACHE: - ldout(cct, 5) << "INVALIDATE_CACHE" << dendl; - send_trim_image(); - break; - - case STATE_TRIM_IMAGE: - ldout(cct, 5) << "TRIM_IMAGE" << dendl; - send_update_header(); - break; - - case STATE_GROW_OBJECT_MAP: - ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl; - send_update_header(); - break; - - case STATE_UPDATE_HEADER: - ldout(cct, 5) << "UPDATE_HEADER" << dendl; - if (send_shrink_object_map()) { - update_size_and_overlap(); - return true; - } - break; - - case STATE_SHRINK_OBJECT_MAP: - ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl; - update_size_and_overlap(); - return true; - - default: - lderr(cct) << "invalid state: " << m_state << dendl; - assert(false); - break; - } - return false; -} - template void ResizeRequest::send() { I &image_ctx = this->m_image_ctx; @@ -130,132 +78,257 @@ void ResizeRequest::send_op() { I &image_ctx = this->m_image_ctx; assert(image_ctx.owner_lock.is_locked()); - CephContext *cct = image_ctx.cct; if (this->is_canceled()) { this->async_complete(-ERESTART); - } else if (m_original_size == m_new_size) { - ldout(cct, 2) << this << " no change in size (" << m_original_size - << " -> " << m_new_size << ")" << dendl; - m_state = STATE_FINISHED; - this->async_complete(0); - } else if (m_new_size > m_original_size) { - ldout(cct, 2) << this << " expanding image (" << m_original_size - << " -> " << m_new_size << ")" << dendl; - send_grow_object_map(); } else { - ldout(cct, 2) << this << " shrinking image (" << m_original_size - << " -> " << m_new_size << ")" << dendl; - send_flush(); + send_pre_block_writes(); } } template -void ResizeRequest::send_flush() { +void ResizeRequest::send_pre_block_writes() { I &image_ctx = this->m_image_ctx; - ldout(image_ctx.cct, 5) << this << " send_flush: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_FLUSH; - - // with clipping adjusted, ensure that write / copy-on-read operations won't - // (re-)create objects that we just removed. need async callback to ensure - // we don't have cache_lock already held - image_ctx.flush_async_operations(this->create_async_callback_context()); + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + image_ctx.aio_work_queue->block_writes(create_context_callback< + ResizeRequest, &ResizeRequest::handle_pre_block_writes>(this)); +} + +template +Context *ResizeRequest::handle_pre_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(); + } + + return send_append_op_event(); +} + +template +Context *ResizeRequest::send_append_op_event() { + I &image_ctx = this->m_image_ctx; + if (!this->template append_op_event< + ResizeRequest, &ResizeRequest::handle_append_op_event>(this)) { + m_shrink_size_visible = true; + return send_grow_object_map(); + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + return nullptr; +} + +template +Context *ResizeRequest::handle_append_op_event(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + m_shrink_size_visible = true; + if (*result < 0) { + lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) + << dendl; + image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(); + } + + return send_grow_object_map(); +} + +template +void ResizeRequest::send_trim_image() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + TrimRequest *req = TrimRequest::create( + image_ctx, create_context_callback< + ResizeRequest, &ResizeRequest::handle_trim_image>(this), + m_original_size, m_new_size, m_prog_ctx); + req->send(); +} + +template +Context *ResizeRequest::handle_trim_image(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to trim image: " << cpp_strerror(*result) << dendl; + return this->create_context_finisher(); + } + + send_invalidate_cache(); + return nullptr; } template void ResizeRequest::send_invalidate_cache() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); - ldout(image_ctx.cct, 5) << this << " send_invalidate_cache: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_INVALIDATE_CACHE; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; // need to invalidate since we're deleting objects, and // ObjectCacher doesn't track non-existent objects - image_ctx.invalidate_cache(this->create_callback_context()); + RWLock::RLocker owner_locker(image_ctx.owner_lock); + image_ctx.invalidate_cache(create_async_context_callback( + image_ctx, create_context_callback< + ResizeRequest, &ResizeRequest::handle_invalidate_cache>(this))); } template -void ResizeRequest::send_trim_image() { +Context *ResizeRequest::handle_invalidate_cache(int *result) { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); - ldout(image_ctx.cct, 5) << this << " send_trim_image: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_TRIM_IMAGE; - - TrimRequest *req = new TrimRequest(image_ctx, - this->create_callback_context(), - m_original_size, m_new_size, - m_prog_ctx); - req->send(); + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(); + } + + send_post_block_writes(); + return nullptr; } template -void ResizeRequest::send_grow_object_map() { +Context *ResizeRequest::send_grow_object_map() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); + image_ctx.aio_work_queue->unblock_writes(); + if (m_original_size == m_new_size) { + this->commit_op_event(0); + return this->create_context_finisher(); + } else if (m_new_size < m_original_size) { + send_trim_image(); + return nullptr; + } + + image_ctx.owner_lock.get_read(); image_ctx.snap_lock.get_read(); if (image_ctx.object_map == nullptr) { image_ctx.snap_lock.put_read(); - send_update_header(); - return; + image_ctx.owner_lock.put_read(); + + send_post_block_writes(); + return nullptr; } - ldout(image_ctx.cct, 5) << this << " send_grow_object_map: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_GROW_OBJECT_MAP; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; // should have been canceled prior to releasing lock assert(image_ctx.exclusive_lock == nullptr || image_ctx.exclusive_lock->is_lock_owner()); - image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT, - this->create_callback_context()); + image_ctx.object_map->aio_resize( + m_new_size, OBJECT_NONEXISTENT, create_context_callback< + ResizeRequest, &ResizeRequest::handle_grow_object_map>(this)); image_ctx.snap_lock.put_read(); + image_ctx.owner_lock.put_read(); + return nullptr; } template -bool ResizeRequest::send_shrink_object_map() { +Context *ResizeRequest::handle_grow_object_map(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + assert(*result == 0); + send_post_block_writes(); + return nullptr; +} + +template +Context *ResizeRequest::send_shrink_object_map() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); + image_ctx.owner_lock.get_read(); image_ctx.snap_lock.get_read(); if (image_ctx.object_map == nullptr || m_new_size > m_original_size) { image_ctx.snap_lock.put_read(); - return true; + image_ctx.owner_lock.put_read(); + + update_size_and_overlap(); + return this->create_context_finisher(); } - ldout(image_ctx.cct, 5) << this << " send_shrink_object_map: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_SHRINK_OBJECT_MAP; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << " " + << "original_size=" << m_original_size << ", " + << "new_size=" << m_new_size << dendl; // should have been canceled prior to releasing lock assert(image_ctx.exclusive_lock == nullptr || image_ctx.exclusive_lock->is_lock_owner()); - image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT, - this->create_callback_context()); + image_ctx.object_map->aio_resize( + m_new_size, OBJECT_NONEXISTENT, create_context_callback< + ResizeRequest, &ResizeRequest::handle_shrink_object_map>(this)); image_ctx.snap_lock.put_read(); - return false; + image_ctx.owner_lock.put_read(); + return nullptr; } template -void ResizeRequest::send_update_header() { +Context *ResizeRequest::handle_shrink_object_map(int *result) { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; - ldout(image_ctx.cct, 5) << this << " send_update_header: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_UPDATE_HEADER; + update_size_and_overlap(); + assert(*result == 0); + return this->create_context_finisher(); +} + +template +void ResizeRequest::send_post_block_writes() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + image_ctx.aio_work_queue->block_writes(create_context_callback< + ResizeRequest, &ResizeRequest::handle_post_block_writes>(this)); +} + +template +Context *ResizeRequest::handle_post_block_writes(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + image_ctx.aio_work_queue->unblock_writes(); + lderr(cct) << "failed to block writes prior to header update: " + << cpp_strerror(*result) << dendl; + return this->create_context_finisher(); + } + + send_update_header(); + return nullptr; +} + +template +void ResizeRequest::send_update_header() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << " " + << "original_size=" << m_original_size << ", " + << "new_size=" << m_new_size << dendl;; // should have been canceled prior to releasing lock + RWLock::RLocker owner_locker(image_ctx.owner_lock); assert(image_ctx.exclusive_lock == nullptr || image_ctx.exclusive_lock->is_lock_owner()); @@ -273,14 +346,31 @@ void ResizeRequest::send_update_header() { cls_client::set_size(&op, m_new_size); } - librados::AioCompletion *rados_completion = - this->create_callback_completion(); + librados::AioCompletion *rados_completion = create_rados_safe_callback< + ResizeRequest, &ResizeRequest::handle_update_header>(this); int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, rados_completion, &op); assert(r == 0); rados_completion->release(); } +template +Context *ResizeRequest::handle_update_header(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + lderr(cct) << "failed to update image header: " << cpp_strerror(*result) + << dendl; + image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(); + } + + this->commit_op_event(0); + return send_shrink_object_map(); +} + template void ResizeRequest::compute_parent_overlap() { I &image_ctx = this->m_image_ctx; @@ -295,13 +385,18 @@ void ResizeRequest::compute_parent_overlap() { template void ResizeRequest::update_size_and_overlap() { I &image_ctx = this->m_image_ctx; - RWLock::WLocker snap_locker(image_ctx.snap_lock); - image_ctx.size = m_new_size; + { + RWLock::WLocker snap_locker(image_ctx.snap_lock); + image_ctx.size = m_new_size; - RWLock::WLocker parent_locker(image_ctx.parent_lock); - if (image_ctx.parent != NULL && m_new_size < m_original_size) { - image_ctx.parent_md.overlap = m_new_parent_overlap; + RWLock::WLocker parent_locker(image_ctx.parent_lock); + if (image_ctx.parent != NULL && m_new_size < m_original_size) { + image_ctx.parent_md.overlap = m_new_parent_overlap; + } } + + // blocked by POST_BLOCK_WRITES state + image_ctx.aio_work_queue->unblock_writes(); } } // namespace operation diff --git a/src/librbd/operation/ResizeRequest.h b/src/librbd/operation/ResizeRequest.h index 0ec1fe018621..a34a7cf3ef3e 100644 --- a/src/librbd/operation/ResizeRequest.h +++ b/src/librbd/operation/ResizeRequest.h @@ -22,7 +22,7 @@ public: virtual ~ResizeRequest(); inline bool shrinking() const { - return m_new_size < m_original_size; + return (m_shrink_size_visible && m_new_size < m_original_size); } inline uint64_t get_image_size() const { @@ -33,8 +33,12 @@ public: protected: virtual void send_op(); - virtual bool should_complete(int r); - + virtual bool should_complete(int r) { + return true; + } + virtual bool can_affect_io() const override { + return true; + } virtual journal::Event create_event(uint64_t op_tid) const { return journal::ResizeEvent(op_tid, m_new_size); } @@ -46,27 +50,44 @@ private: * * @verbatim * - * -------------> STATE_FINISHED -----------------------------\ - * | . (no change) | - * | . | - * | . . . . . . . . . . . . . . . . . . . . . | - * | . | - * | v | - * |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------| - * | (grow) | - * | | - * | | - * \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE | - * (shrink) | | - * | | - * /----------------------/ | - * | | - * v | - * STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . . | - * | . | - * | . | - * v v v - * STATE_SHRINK_OBJECT_MAP ---> + * + * | + * v + * STATE_PRE_BLOCK_WRITES + * | + * v + * STATE_APPEND_OP_EVENT (skip if journaling + * | disabled) + * | (unblock writes) + * | + * | + * | (grow) + * |\--------> STATE_GROW_OBJECT_MAP (skip if object map + * | | disabled) + * | v + * | STATE_POST_BLOCK_WRITES + * | | + * | v + * | STATE_UPDATE_HEADER ----------------------------\ + * | | + * | (shrink) | + * |\--------> STATE_TRIM_IMAGE | + * | | | + * | v | + * | STATE_INVALIDATE_CACHE | + * | | | + * | v | + * | STATE_POST_BLOCK_WRITES | + * | | | + * | v | + * | STATE_UPDATE_HEADER | + * | | | + * | v | + * | STATE_SHRINK_OBJECT_MAP (skip if object map | + * | | disabled) | + * | | (unblock writes) | + * | (no change) v | + * \------------> <-----------------------------------/ * * @endverbatim * @@ -74,30 +95,38 @@ private: * The state machine will immediately transition to _FINISHED if there * are no objects to trim. */ - enum State { - STATE_FLUSH, - STATE_INVALIDATE_CACHE, - STATE_TRIM_IMAGE, - STATE_GROW_OBJECT_MAP, - STATE_UPDATE_HEADER, - STATE_SHRINK_OBJECT_MAP, - STATE_FINISHED - }; - - State m_state; + uint64_t m_original_size; uint64_t m_new_size; ProgressContext &m_prog_ctx; uint64_t m_new_parent_overlap; + bool m_shrink_size_visible = false; typename xlist*>::item m_xlist_item; - void send_flush(); + void send_pre_block_writes(); + Context *handle_pre_block_writes(int *result); + + Context *send_append_op_event(); + Context *handle_append_op_event(int *result); + void send_invalidate_cache(); + Context *handle_invalidate_cache(int *result); + void send_trim_image(); - void send_grow_object_map(); - bool send_shrink_object_map(); + Context *handle_trim_image(int *result); + + Context *send_grow_object_map(); + Context *handle_grow_object_map(int *result); + + Context *send_shrink_object_map(); + Context *handle_shrink_object_map(int *result); + + void send_post_block_writes(); + Context *handle_post_block_writes(int *result); + void send_update_header(); + Context *handle_update_header(int *result); void compute_parent_overlap(); void update_size_and_overlap(); diff --git a/src/librbd/operation/SnapshotCreateRequest.cc b/src/librbd/operation/SnapshotCreateRequest.cc index 63f19d48385b..49f5825f386a 100644 --- a/src/librbd/operation/SnapshotCreateRequest.cc +++ b/src/librbd/operation/SnapshotCreateRequest.cc @@ -9,6 +9,7 @@ #include "librbd/ImageCtx.h" #include "librbd/ImageWatcher.h" #include "librbd/ObjectMap.h" +#include "librbd/Utils.h" #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -17,36 +18,43 @@ namespace librbd { namespace operation { +using util::create_async_context_callback; +using util::create_context_callback; +using util::create_rados_safe_callback; + namespace { -template -std::ostream& operator<<(std::ostream& os, - const typename SnapshotCreateRequest::State& state) { - switch(state) { - case SnapshotCreateRequest::STATE_SUSPEND_REQUESTS: - os << "SUSPEND_REQUESTS"; - break; - case SnapshotCreateRequest::STATE_SUSPEND_AIO: - os << "SUSPEND_AIO"; - break; - case SnapshotCreateRequest::STATE_ALLOCATE_SNAP_ID: - os << "ALLOCATE_SNAP_ID"; - break; - case SnapshotCreateRequest::STATE_CREATE_SNAP: - os << "CREATE_SNAP"; - break; - case SnapshotCreateRequest::STATE_CREATE_OBJECT_MAP: - os << "CREATE_OBJECT_MAP"; - break; - case SnapshotCreateRequest::STATE_RELEASE_SNAP_ID: - os << "RELEASE_SNAP_ID"; - break; - default: - os << "UNKNOWN (" << static_cast(state) << ")"; - break; +template +struct C_CreateSnapId: public Context { + ImageCtxT &image_ctx; + uint64_t *snap_id; + Context *on_finish; + + C_CreateSnapId(ImageCtxT &image_ctx, uint64_t *snap_id, Context *on_finish) + : image_ctx(image_ctx), snap_id(snap_id), on_finish(on_finish) { } - return os; -} + + virtual void finish(int r) { + r = image_ctx.md_ctx.selfmanaged_snap_create(snap_id); + on_finish->complete(r); + } +}; + +template +struct C_RemoveSnapId: public Context { + ImageCtxT &image_ctx; + uint64_t snap_id; + Context *on_finish; + + C_RemoveSnapId(ImageCtxT &image_ctx, uint64_t snap_id, Context *on_finish) + : image_ctx(image_ctx), snap_id(snap_id), on_finish(on_finish) { + } + + virtual void finish(int r) { + r = image_ctx.md_ctx.selfmanaged_snap_remove(snap_id); + on_finish->complete(r); + } +}; } // anonymous namespace @@ -55,8 +63,7 @@ SnapshotCreateRequest::SnapshotCreateRequest(I &image_ctx, Context *on_finish, const std::string &snap_name) : Request(image_ctx, on_finish), m_snap_name(snap_name), m_ret_val(0), - m_aio_suspended(false), m_requests_suspended(false), - m_snap_id(CEPH_NOSNAP), m_snap_created(false) { + m_snap_id(CEPH_NOSNAP) { } template @@ -65,128 +72,126 @@ void SnapshotCreateRequest::send_op() { } template -bool SnapshotCreateRequest::should_complete(int r) { +void SnapshotCreateRequest::send_suspend_requests() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", " - << "r=" << r << dendl; - int orig_result = r; - r = filter_state_return_code(r); - if (r < 0) { - lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl; - if (m_ret_val == 0) { - m_ret_val = r; - } - } + ldout(cct, 5) << this << " " << __func__ << dendl; - if (m_ret_val < 0) { - return should_complete_error(); - } + // TODO suspend (shrink) resize to ensure consistent RBD mirror + send_suspend_aio(); +} - RWLock::RLocker owner_lock(image_ctx.owner_lock); - bool finished = false; - switch (m_state) { - case STATE_SUSPEND_REQUESTS: - send_suspend_aio(); - break; - case STATE_SUSPEND_AIO: - send_allocate_snap_id(); - break; - case STATE_ALLOCATE_SNAP_ID: - send_create_snap(); - break; - case STATE_CREATE_SNAP: - if (orig_result == 0) { - update_snap_context(); - finished = send_create_object_map(); - } else { - assert(orig_result == -ESTALE); - send_allocate_snap_id(); - } - break; - case STATE_CREATE_OBJECT_MAP: - finished = true; - break; - default: - assert(false); - break; - } +template +Context *SnapshotCreateRequest::handle_suspend_requests(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; - if (finished) { - resume_aio(); - resume_requests(); - } - return finished; + // TODO + send_suspend_aio(); + return nullptr; } template -bool SnapshotCreateRequest::should_complete_error() { +void SnapshotCreateRequest::send_suspend_aio() { I &image_ctx = this->m_image_ctx; - CephContext *cct = image_ctx.cct; - lderr(cct) << this << " " << __func__ << ": " - << "ret_val=" << m_ret_val << dendl; + assert(image_ctx.owner_lock.is_locked()); - // only valid exit points during error recovery - bool finished = true; - if (m_state != STATE_RELEASE_SNAP_ID) { - finished = send_release_snap_id(); - } + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; - if (finished) { - resume_aio(); - resume_requests(); - } - return finished; + image_ctx.aio_work_queue->block_writes(create_context_callback< + SnapshotCreateRequest, + &SnapshotCreateRequest::handle_suspend_aio>(this)); } template -void SnapshotCreateRequest::send_suspend_requests() { +Context *SnapshotCreateRequest::handle_suspend_aio(int *result) { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; - // TODO suspend (shrink) resize to ensure consistent RBD mirror - send_suspend_aio(); + if (*result < 0) { + lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; + image_ctx.aio_work_queue->unblock_writes(); + return this->create_context_finisher(); + } + + send_append_op_event(); + return nullptr; } template -void SnapshotCreateRequest::send_suspend_aio() { +void SnapshotCreateRequest::send_append_op_event() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); + if (!this->template append_op_event< + SnapshotCreateRequest, + &SnapshotCreateRequest::handle_append_op_event>(this)) { + send_allocate_snap_id(); + return; + } CephContext *cct = image_ctx.cct; ldout(cct, 5) << this << " " << __func__ << dendl; +} + +template +Context *SnapshotCreateRequest::handle_append_op_event(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; - m_state = STATE_SUSPEND_AIO; - m_aio_suspended = true; + if (*result < 0) { + image_ctx.aio_work_queue->unblock_writes(); + lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(); + } - // can issue a re-entrant callback if no IO in-progress - image_ctx.aio_work_queue->block_writes(this->create_async_callback_context()); + send_allocate_snap_id(); + return nullptr; } template void SnapshotCreateRequest::send_allocate_snap_id() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); - CephContext *cct = image_ctx.cct; ldout(cct, 5) << this << " " << __func__ << dendl; - m_state = STATE_ALLOCATE_SNAP_ID; // TODO create an async version of selfmanaged_snap_create - int r = image_ctx.md_ctx.selfmanaged_snap_create(&m_snap_id); - this->async_complete(r); + image_ctx.op_work_queue->queue(new C_CreateSnapId( + image_ctx, &m_snap_id, create_context_callback< + SnapshotCreateRequest, + &SnapshotCreateRequest::handle_allocate_snap_id>(this)), 0); } template -void SnapshotCreateRequest::send_create_snap() { +Context *SnapshotCreateRequest::handle_allocate_snap_id(int *result) { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); - RWLock::RLocker snap_locker(image_ctx.snap_lock); - RWLock::RLocker parent_locker(image_ctx.parent_lock); + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result < 0) { + save_result(result); + finalize(*result); + lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result) + << dendl; + return this->create_context_finisher(); + } + + send_create_snap(); + return nullptr; +} +template +void SnapshotCreateRequest::send_create_snap() { + I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; ldout(cct, 5) << this << " " << __func__ << dendl; - m_state = STATE_CREATE_SNAP; + + RWLock::RLocker owner_locker(image_ctx.owner_lock); + RWLock::RLocker snap_locker(image_ctx.snap_lock); + RWLock::RLocker parent_locker(image_ctx.parent_lock); // should have been canceled prior to releasing lock assert(image_ctx.exclusive_lock == nullptr || @@ -206,84 +211,118 @@ void SnapshotCreateRequest::send_create_snap() { cls_client::snapshot_add(&op, m_snap_id, m_snap_name); } - librados::AioCompletion *rados_completion = - this->create_callback_completion(); + librados::AioCompletion *rados_completion = create_rados_safe_callback< + SnapshotCreateRequest, + &SnapshotCreateRequest::handle_create_snap>(this); int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid, - rados_completion, &op); + rados_completion, &op); assert(r == 0); rados_completion->release(); } template -bool SnapshotCreateRequest::send_create_object_map() { +Context *SnapshotCreateRequest::handle_create_snap(int *result) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + if (*result == -ESTALE) { + send_allocate_snap_id(); + return nullptr; + } else if (*result < 0) { + save_result(result); + send_release_snap_id(); + return nullptr; + } + + return send_create_object_map(); +} + +template +Context *SnapshotCreateRequest::send_create_object_map() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); + + update_snap_context(); + + image_ctx.snap_lock.get_read(); + if (image_ctx.object_map == nullptr) { + image_ctx.snap_lock.put_read(); + + finalize(0); + return this->create_context_finisher(); + } + + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; { - RWLock::RLocker snap_locker(image_ctx.snap_lock); RWLock::RLocker object_map_lock(image_ctx.object_map_lock); - if (image_ctx.object_map != nullptr) { - CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; - m_state = STATE_CREATE_OBJECT_MAP; - - image_ctx.object_map->snapshot_add(m_snap_id, - this->create_callback_context()); - return false; - } + image_ctx.object_map->snapshot_add( + m_snap_id, create_context_callback< + SnapshotCreateRequest, + &SnapshotCreateRequest::handle_create_object_map>(this)); } - return true; + image_ctx.snap_lock.put_read(); + return nullptr; } template -bool SnapshotCreateRequest::send_release_snap_id() { +Context *SnapshotCreateRequest::handle_create_object_map(int *result) { I &image_ctx = this->m_image_ctx; - if (m_snap_id != CEPH_NOSNAP && !m_snap_created) { - CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": snap_id=" << m_snap_id - << dendl; - m_state = STATE_RELEASE_SNAP_ID; - - // TODO add async version of selfmanaged_snap_remove - int r = image_ctx.md_ctx.selfmanaged_snap_remove(m_snap_id); - m_snap_id = CEPH_NOSNAP; - - this->async_complete(r); - return false; - } - return true; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + + assert(*result == 0); + + finalize(0); + return this->create_context_finisher(); +} + +template +void SnapshotCreateRequest::send_release_snap_id() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << dendl; + + assert(m_snap_id != CEPH_NOSNAP); + + // TODO create an async version of selfmanaged_snap_remove + image_ctx.op_work_queue->queue(new C_RemoveSnapId( + image_ctx, m_snap_id, create_context_callback< + SnapshotCreateRequest, + &SnapshotCreateRequest::handle_release_snap_id>(this)), 0); } template -void SnapshotCreateRequest::resume_aio() { +Context *SnapshotCreateRequest::handle_release_snap_id(int *result) { I &image_ctx = this->m_image_ctx; - if (m_aio_suspended) { - CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; - image_ctx.aio_work_queue->unblock_writes(); - m_aio_suspended = false; - } + assert(m_ret_val < 0); + *result = m_ret_val; + + finalize(m_ret_val); + return this->create_context_finisher(); } template -void SnapshotCreateRequest::resume_requests() { +void SnapshotCreateRequest::finalize(int r) { I &image_ctx = this->m_image_ctx; - if (m_requests_suspended) { - CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + CephContext *cct = image_ctx.cct; + ldout(cct, 5) << this << " " << __func__ << ": r=" << r << dendl; - // TODO - m_requests_suspended = false; + if (r == 0) { + this->commit_op_event(0); } + image_ctx.aio_work_queue->unblock_writes(); } template void SnapshotCreateRequest::update_snap_context() { I &image_ctx = this->m_image_ctx; - assert(image_ctx.owner_lock.is_locked()); - m_snap_created = true; + RWLock::RLocker owner_locker(image_ctx.owner_lock); RWLock::WLocker snap_locker(image_ctx.snap_lock); if (image_ctx.old_format) { return; diff --git a/src/librbd/operation/SnapshotCreateRequest.h b/src/librbd/operation/SnapshotCreateRequest.h index abe5556c1c7d..b809a1a09341 100644 --- a/src/librbd/operation/SnapshotCreateRequest.h +++ b/src/librbd/operation/SnapshotCreateRequest.h @@ -33,17 +33,20 @@ public: * v * STATE_SUSPEND_AIO * * * * * * * * * * * * * * | * + * v * + * STATE_APPEND_OP_EVENT (skip if journal * + * | disabled) * * (retry) v * - * . . . > STATE_ALLOCATE_SNAP_ID * * * - * . | * * - * . v * * - * . . . . STATE_CREATE_SNAP * * * * * * - * | * * - * v * * - * STATE_CREATE_OBJECT_MAP * * - * | * * - * | * * - * | v * + * . . . > STATE_ALLOCATE_SNAP_ID * + * . | * + * . v * + * . . . . STATE_CREATE_SNAP * * * * * * * * * * * + * | * * + * v * * + * STATE_CREATE_OBJECT_MAP (skip if * * + * | disabled) * * + * | * * + * | v * * | STATE_RELEASE_SNAP_ID * * | | * * | v * @@ -56,67 +59,59 @@ public: * to abort, the error path is followed to record the result in the journal * (if enabled) and bubble the originating error code back to the client. */ - enum State { - STATE_SUSPEND_REQUESTS, - STATE_SUSPEND_AIO, - STATE_ALLOCATE_SNAP_ID, - STATE_CREATE_SNAP, - STATE_CREATE_OBJECT_MAP, - STATE_RELEASE_SNAP_ID - }; - SnapshotCreateRequest(ImageCtxT &image_ctx, Context *on_finish, const std::string &snap_name); protected: virtual void send_op(); - virtual bool should_complete(int r); - - virtual int filter_return_code(int r) const { - if (m_ret_val < 0) { - return m_ret_val; - } - return r; + virtual bool should_complete(int r) { + return true; + } + virtual bool can_affect_io() const override { + return true; } - virtual journal::Event create_event(uint64_t op_tid) const { return journal::SnapCreateEvent(op_tid, m_snap_name); } private: std::string m_snap_name; - State m_state; int m_ret_val; - bool m_aio_suspended; - bool m_requests_suspended; - uint64_t m_snap_id; - bool m_snap_created; - uint64_t m_size; parent_info m_parent_info; - int filter_state_return_code(int r) const { - if (m_state == STATE_CREATE_SNAP && r == -ESTALE) { - return 0; - } - return r; - } - - bool should_complete_error(); - void send_suspend_requests(); + Context *handle_suspend_requests(int *result); + void send_suspend_aio(); + Context *handle_suspend_aio(int *result); + + void send_append_op_event(); + Context *handle_append_op_event(int *result); + void send_allocate_snap_id(); + Context *handle_allocate_snap_id(int *result); + void send_create_snap(); - bool send_create_object_map(); - bool send_release_snap_id(); + Context *handle_create_snap(int *result); - void resume_aio(); - void resume_requests(); + Context *send_create_object_map(); + Context *handle_create_object_map(int *result); + + void send_release_snap_id(); + Context *handle_release_snap_id(int *result); + + void finalize(int r); void update_snap_context(); + + void save_result(int *result) { + if (m_ret_val == 0 && *result < 0) { + m_ret_val = *result; + } + } }; } // namespace operation diff --git a/src/librbd/operation/TrimRequest.h b/src/librbd/operation/TrimRequest.h index 6e6c50c9f4fc..58312813dfc4 100644 --- a/src/librbd/operation/TrimRequest.h +++ b/src/librbd/operation/TrimRequest.h @@ -17,9 +17,12 @@ template class TrimRequest : public AsyncRequest { public: - TrimRequest(ImageCtxT &image_ctx, Context *on_finish, - uint64_t original_size, uint64_t new_size, - ProgressContext &prog_ctx); + static TrimRequest *create(ImageCtxT &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx) { + return new TrimRequest(image_ctx, on_finish, original_size, new_size, + prog_ctx); + } virtual void send(); @@ -80,6 +83,10 @@ private: uint64_t m_new_size; ProgressContext &m_prog_ctx; + TrimRequest(ImageCtxT &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx); + void send_copyup_objects(); void send_remove_objects(); void send_pre_remove();