From ca83e4b958d38d0efab626ea1533439de9749000 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 7 Jul 2015 22:07:47 -0400 Subject: [PATCH] librbd: consistent owner_lock handling for AIO paths Required moving non-AIO read/write/discard methods to AioImageRequestWQ to avoid deadlock on lock request. Signed-off-by: Jason Dillaman --- src/librbd/AioImageRequest.cc | 9 ++- src/librbd/AioImageRequestWQ.cc | 109 ++++++++++++++++++++++++++++- src/librbd/AioImageRequestWQ.h | 5 ++ src/librbd/AioObjectRequest.cc | 5 +- src/librbd/CopyupRequest.cc | 1 + src/librbd/ImageWatcher.cc | 2 + src/librbd/internal.cc | 117 +------------------------------- src/librbd/internal.h | 5 -- src/librbd/librbd.cc | 20 +++--- 9 files changed, 134 insertions(+), 139 deletions(-) diff --git a/src/librbd/AioImageRequest.cc b/src/librbd/AioImageRequest.cc index ed8a9dc20c6ac..379dd535d8538 100644 --- a/src/librbd/AioImageRequest.cc +++ b/src/librbd/AioImageRequest.cc @@ -49,21 +49,20 @@ void AioImageRequest::flush(ImageCtx *ictx, AioCompletion *c) { } void AioImageRequest::send() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << get_request_type() << ": ictx=" << &m_image_ctx << ", " << "completion=" << m_aio_comp << dendl; m_aio_comp->get(); - int r = ictx_check(&m_image_ctx); + int r = ictx_check(&m_image_ctx, m_image_ctx.owner_lock); if (r < 0) { m_aio_comp->fail(cct, r); return; } - { - RWLock::RLocker owner_locker(m_image_ctx.owner_lock); - execute_request(); - } + execute_request(); } diff --git a/src/librbd/AioImageRequestWQ.cc b/src/librbd/AioImageRequestWQ.cc index c5952bad207b6..d8f6e7778af0c 100644 --- a/src/librbd/AioImageRequestWQ.cc +++ b/src/librbd/AioImageRequestWQ.cc @@ -8,11 +8,83 @@ #include "librbd/ImageWatcher.h" #include "librbd/internal.h" +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::AioImageRequestWQ: " + namespace librbd { +ssize_t AioImageRequestWQ::read(uint64_t off, size_t len, char *buf, + int op_flags) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "read " << &m_image_ctx << " off = " << off << " len = " + << len << dendl; + + std::vector > image_extents; + image_extents.push_back(make_pair(off, len)); + + C_SaferCond cond; + AioCompletion *c = aio_create_completion_internal(&cond, rbd_ctx_cb); + aio_read(c, off, len, buf, NULL, op_flags); + return cond.wait(); +} + +ssize_t AioImageRequestWQ::write(uint64_t off, size_t len, const char *buf, + int op_flags) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "write " << &m_image_ctx << " off = " << off << " len = " + << len << dendl; + + m_image_ctx.snap_lock.get_read(); + int r = clip_io(&m_image_ctx, off, &len); + m_image_ctx.snap_lock.put_read(); + if (r < 0) { + return r; + } + + C_SaferCond cond; + AioCompletion *c = aio_create_completion_internal(&cond, rbd_ctx_cb); + aio_write(c, off, len, buf, op_flags); + + r = cond.wait(); + if (r < 0) { + return r; + } + return len; +} + +int AioImageRequestWQ::discard(uint64_t off, uint64_t len) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "discard " << &m_image_ctx << " off = " << off << " len = " + << len << dendl; + + m_image_ctx.snap_lock.get_read(); + int r = clip_io(&m_image_ctx, off, &len); + m_image_ctx.snap_lock.put_read(); + if (r < 0) { + return r; + } + + C_SaferCond cond; + AioCompletion *c = aio_create_completion_internal(&cond, rbd_ctx_cb); + aio_discard(c, off, len); + + r = cond.wait(); + if (r < 0) { + return r; + } + return len; +} + void AioImageRequestWQ::aio_read(AioCompletion *c, uint64_t off, size_t len, char *buf, bufferlist *pbl, int op_flags) { c->init_time(&m_image_ctx, librbd::AIO_TYPE_READ); + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "aio_read: ictx=" << &m_image_ctx << ", " + << "completion=" << c << ", off=" << off << ", len=" << len + << "flags=" << op_flags << dendl; + + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.non_blocking_aio) { queue(new AioImageRead(m_image_ctx, c, off, len, buf, pbl, op_flags)); } else { @@ -23,7 +95,14 @@ void AioImageRequestWQ::aio_read(AioCompletion *c, uint64_t off, size_t len, void AioImageRequestWQ::aio_write(AioCompletion *c, uint64_t off, size_t len, const char *buf, int op_flags) { c->init_time(&m_image_ctx, librbd::AIO_TYPE_WRITE); - if (m_image_ctx.non_blocking_aio) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "aio_write: ictx=" << &m_image_ctx << ", " + << "completion=" << c << ", off=" << off << ", len=" << len + << "flags=" << op_flags << dendl; + + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + bool lock_required = is_lock_required(); + if (m_image_ctx.non_blocking_aio || lock_required) { queue(new AioImageWrite(m_image_ctx, c, off, len, buf, op_flags)); } else { AioImageRequest::write(&m_image_ctx, c, off, len, buf, op_flags); @@ -33,7 +112,14 @@ void AioImageRequestWQ::aio_write(AioCompletion *c, uint64_t off, size_t len, void AioImageRequestWQ::aio_discard(AioCompletion *c, uint64_t off, uint64_t len) { c->init_time(&m_image_ctx, librbd::AIO_TYPE_DISCARD); - if (m_image_ctx.non_blocking_aio) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "aio_discard: ictx=" << &m_image_ctx << ", " + << "completion=" << c << ", off=" << off << ", len=" << len + << dendl; + + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + bool lock_required = is_lock_required(); + if (m_image_ctx.non_blocking_aio || lock_required) { queue(new AioImageDiscard(m_image_ctx, c, off, len)); } else { AioImageRequest::discard(&m_image_ctx, c, off, len); @@ -42,6 +128,11 @@ void AioImageRequestWQ::aio_discard(AioCompletion *c, uint64_t off, void AioImageRequestWQ::aio_flush(AioCompletion *c) { c->init_time(&m_image_ctx, librbd::AIO_TYPE_FLUSH); + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << "aio_flush: ictx=" << &m_image_ctx << ", " + << "completion=" << c << dendl; + + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.non_blocking_aio) { queue(new AioImageFlush(m_image_ctx, c)); } else { @@ -77,7 +168,10 @@ void *AioImageRequestWQ::_void_dequeue() { } void AioImageRequestWQ::process(AioImageRequest *req) { - req->send(); + { + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + req->send(); + } { Mutex::Locker locker(m_lock); @@ -94,6 +188,15 @@ void AioImageRequestWQ::process(AioImageRequest *req) { delete req; } +bool AioImageRequestWQ::is_lock_required() { + assert(m_image_ctx.owner_lock.is_locked()); + if (m_image_ctx.image_watcher == NULL) { + return false; + } + return (m_image_ctx.image_watcher->is_lock_supported() && + !m_image_ctx.image_watcher->is_lock_owner()); +} + void AioImageRequestWQ::queue(AioImageRequest *req) { { Mutex::Locker locker(m_lock); diff --git a/src/librbd/AioImageRequestWQ.h b/src/librbd/AioImageRequestWQ.h index 82a35ff053152..034026c82223a 100644 --- a/src/librbd/AioImageRequestWQ.h +++ b/src/librbd/AioImageRequestWQ.h @@ -22,6 +22,10 @@ public: m_writes_suspended(false), m_in_progress_writes(0), m_queued_writes(0) { } + ssize_t read(uint64_t off, size_t len, char *buf, int op_flags); + ssize_t write(uint64_t off, size_t len, const char *buf, int op_flags); + int discard(uint64_t off, uint64_t len); + void aio_read(AioCompletion *c, uint64_t off, size_t len, char *buf, bufferlist *pbl, int op_flags); void aio_write(AioCompletion *c, uint64_t off, size_t len, const char *buf, @@ -62,6 +66,7 @@ private: uint32_t m_in_progress_writes; uint32_t m_queued_writes; + bool is_lock_required(); void queue(AioImageRequest *req); }; diff --git a/src/librbd/AioObjectRequest.cc b/src/librbd/AioObjectRequest.cc index dbaee3a8523e9..95dda4b012eb1 100644 --- a/src/librbd/AioObjectRequest.cc +++ b/src/librbd/AioObjectRequest.cc @@ -137,8 +137,8 @@ namespace librbd { // This is the step to read from parent if (!m_tried_parent && r == -ENOENT) { { - RWLock::RLocker l(m_ictx->snap_lock); - RWLock::RLocker l2(m_ictx->parent_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + RWLock::RLocker parent_locker(m_ictx->parent_lock); if (m_ictx->parent == NULL) { ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl; m_state = LIBRBD_AIO_READ_FLAT; @@ -272,6 +272,7 @@ namespace librbd { << " parent completion " << m_parent_completion << " extents " << parent_extents << dendl; + RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); AioImageRequest::read(m_ictx->parent, m_parent_completion, parent_extents, NULL, &m_read_data, 0); } diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index 4f162bb253d38..9ac6ca6f853c4 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -186,6 +186,7 @@ private: << ", oid " << m_oid << ", extents " << m_image_extents << dendl; + RWLock::RLocker owner_locker(m_ictx->parent->owner_lock); AioImageRequest::read(m_ictx->parent, comp, m_image_extents, NULL, &m_copyup_data, 0); } diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index d9d4a08d120f6..ba2765695eb15 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -570,6 +570,7 @@ void ImageWatcher::schedule_retry_aio_requests(bool use_timer) { void ImageWatcher::retry_aio_requests() { m_task_finisher->cancel(TASK_CODE_RETRY_AIO_REQUESTS); + std::vector lock_request_restarts; { Mutex::Locker l(m_aio_request_lock); @@ -578,6 +579,7 @@ void ImageWatcher::retry_aio_requests() { ldout(m_image_ctx.cct, 15) << this << " retrying pending aio requests" << dendl; + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); for (std::vector::iterator iter = lock_request_restarts.begin(); iter != lock_request_restarts.end(); ++iter) { ldout(m_image_ctx.cct, 20) << this << " retrying aio request: " diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index c4e982831a077..8b8b25b2cc391 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2796,17 +2796,6 @@ reprotect_and_return_err: ProgressContext &prog_ctx; }; - int do_copy_extent(uint64_t offset, size_t len, const char *buf, void *data) - { - CopyProgressCtx *cp = reinterpret_cast(data); - cp->prog_ctx.update_progress(offset, cp->src_size); - int ret = 0; - if (buf) { - ret = write(cp->destictx, offset, len, buf, 0); - } - return ret; - } - int copy(ImageCtx *src, IoCtx& dest_md_ctx, const char *destname, ImageOptions& opts, ProgressContext &prog_ctx) { @@ -2895,6 +2884,7 @@ reprotect_and_return_err: return; } + RWLock::RLocker owner_lock(m_dest->owner_lock); Context *ctx = new C_CopyWrite(m_throttle, m_bl); AioCompletion *comp = aio_create_completion_internal(ctx, rbd_ctx_cb); AioImageRequest::write(m_dest, comp, m_offset, m_bl->length(), @@ -2938,6 +2928,7 @@ reprotect_and_return_err: } } + RWLock::RLocker owner_lock(src->owner_lock); SimpleThrottle throttle(src->concurrent_management_ops, false); uint64_t period = src->get_stripe_period(); unsigned fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | LIBRADOS_OP_FLAG_FADVISE_NOCACHE; @@ -3457,6 +3448,7 @@ reprotect_and_return_err: uint64_t period = ictx->get_stripe_period(); uint64_t left = mylen; + RWLock::RLocker owner_locker(ictx->owner_lock); start_time = ceph_clock_now(ictx->cct); while (left > 0) { uint64_t period_off = off - (off % period); @@ -3528,109 +3520,6 @@ reprotect_and_return_err: return r; } - ssize_t read(ImageCtx *ictx, uint64_t ofs, size_t len, char *buf, int op_flags) - { - ssize_t ret; - ldout(ictx->cct, 20) << "read " << ictx << " off = " << ofs << " len = " - << len << dendl; - - vector > extents; - extents.push_back(make_pair(ofs, len)); - ret = read(ictx, extents, buf, NULL, op_flags); - if (ret < 0) - return ret; - - return ret; - } - - ssize_t read(ImageCtx *ictx, const vector >& image_extents, - char *buf, bufferlist *pbl, int op_flags) - { - Mutex mylock("librbd::read::mylock"); - Cond cond; - bool done; - int ret; - - Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); - AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); - AioImageRequest::read(ictx, c, image_extents, buf, pbl, op_flags); - - mylock.Lock(); - while (!done) - cond.Wait(mylock); - mylock.Unlock(); - - return ret; - } - - ssize_t write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, int op_flags) - { - ldout(ictx->cct, 20) << "write " << ictx << " off = " << off << " len = " - << len << dendl; - - Mutex mylock("librbd::write::mylock"); - Cond cond; - bool done; - int ret; - - uint64_t mylen = len; - ictx->snap_lock.get_read(); - int r = clip_io(ictx, off, &mylen); - ictx->snap_lock.put_read(); - if (r < 0) { - return r; - } - - Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); - AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); - AioImageRequest::write(ictx, c, off, mylen, buf, op_flags); - - mylock.Lock(); - while (!done) - cond.Wait(mylock); - mylock.Unlock(); - - if (ret < 0) { - return ret; - } - - return mylen; - } - - int discard(ImageCtx *ictx, uint64_t off, uint64_t len) - { - ldout(ictx->cct, 20) << "discard " << ictx << " off = " << off << " len = " - << len << dendl; - - Mutex mylock("librbd::discard::mylock"); - Cond cond; - bool done; - int ret; - - uint64_t mylen = len; - ictx->snap_lock.get_read(); - int r = clip_io(ictx, off, &mylen); - ictx->snap_lock.put_read(); - if (r < 0) { - return r; - } - - Context *ctx = new C_SafeCond(&mylock, &cond, &done, &ret); - AioCompletion *c = aio_create_completion_internal(ctx, rbd_ctx_cb); - AioImageRequest::discard(ictx, c, off, mylen); - - mylock.Lock(); - while (!done) - cond.Wait(mylock); - mylock.Unlock(); - - if (ret < 0) { - return ret; - } - - return mylen; - } - void rados_req_cb(rados_completion_t c, void *arg) { AioObjectRequest *req = reinterpret_cast(arg); diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 7965c9db85405..634d2b3375502 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -201,11 +201,6 @@ namespace librbd { void *arg); void readahead(ImageCtx *ictx, const vector >& image_extents); - ssize_t read(ImageCtx *ictx, uint64_t off, size_t len, char *buf, int op_flags); - ssize_t read(ImageCtx *ictx, const vector >& image_extents, - char *buf, bufferlist *pbl, int op_flags); - ssize_t write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf, int op_flags); - int discard(ImageCtx *ictx, uint64_t off, uint64_t len); int async_flatten(ImageCtx *ictx, Context *ctx, ProgressContext &prog_ctx); int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size, diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index fe387c63044dd..e322ad5d167ab 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -782,7 +782,7 @@ namespace librbd { tracepoint(librbd, read_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len); bufferptr ptr(len); bl.push_back(ptr); - int r = librbd::read(ictx, ofs, len, bl.c_str(), 0); + int r = ictx->aio_work_queue->read(ofs, len, bl.c_str(), 0); tracepoint(librbd, read_exit, r); return r; } @@ -794,7 +794,7 @@ namespace librbd { ictx->read_only, ofs, len, op_flags); bufferptr ptr(len); bl.push_back(ptr); - int r = librbd::read(ictx, ofs, len, bl.c_str(), op_flags); + int r = ictx->aio_work_queue->read(ofs, len, bl.c_str(), op_flags); tracepoint(librbd, read_exit, r); return r; } @@ -860,7 +860,7 @@ namespace librbd { tracepoint(librbd, write_exit, -EINVAL); return -EINVAL; } - int r = librbd::write(ictx, ofs, len, bl.c_str(), 0); + int r = ictx->aio_work_queue->write(ofs, len, bl.c_str(), 0); tracepoint(librbd, write_exit, r); return r; } @@ -874,7 +874,7 @@ namespace librbd { tracepoint(librbd, write_exit, -EINVAL); return -EINVAL; } - int r = librbd::write(ictx, ofs, len, bl.c_str(), op_flags); + int r = ictx->aio_work_queue->write(ofs, len, bl.c_str(), op_flags); tracepoint(librbd, write_exit, r); return r; } @@ -883,7 +883,7 @@ namespace librbd { { ImageCtx *ictx = (ImageCtx *)ctx; tracepoint(librbd, discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len); - int r = librbd::discard(ictx, ofs, len); + int r = ictx->aio_work_queue->discard(ofs, len); tracepoint(librbd, discard_exit, r); return r; } @@ -1909,7 +1909,7 @@ extern "C" ssize_t rbd_read(rbd_image_t image, uint64_t ofs, size_t len, { librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; tracepoint(librbd, read_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len); - int r = librbd::read(ictx, ofs, len, buf, 0); + int r = ictx->aio_work_queue->read(ofs, len, buf, 0); tracepoint(librbd, read_exit, r); return r; } @@ -1920,7 +1920,7 @@ extern "C" ssize_t rbd_read2(rbd_image_t image, uint64_t ofs, size_t len, librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; tracepoint(librbd, read2_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len, op_flags); - int r = librbd::read(ictx, ofs, len, buf, op_flags); + int r = ictx->aio_work_queue->read(ofs, len, buf, op_flags); tracepoint(librbd, read_exit, r); return r; } @@ -1987,7 +1987,7 @@ extern "C" ssize_t rbd_write(rbd_image_t image, uint64_t ofs, size_t len, { librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; tracepoint(librbd, write_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len, buf); - int r = librbd::write(ictx, ofs, len, buf, 0); + int r = ictx->aio_work_queue->write(ofs, len, buf, 0); tracepoint(librbd, write_exit, r); return r; } @@ -1998,7 +1998,7 @@ extern "C" ssize_t rbd_write2(rbd_image_t image, uint64_t ofs, size_t len, librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; tracepoint(librbd, write2_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len, buf, op_flags); - int r = librbd::write(ictx, ofs, len, buf, op_flags); + int r = ictx->aio_work_queue->write(ofs, len, buf, op_flags); tracepoint(librbd, write_exit, r); return r; } @@ -2008,7 +2008,7 @@ extern "C" int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len) { librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; tracepoint(librbd, discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, ofs, len); - int r = librbd::discard(ictx, ofs, len); + int r = ictx->aio_work_queue->discard(ofs, len); tracepoint(librbd, discard_exit, r); return r; } -- 2.39.5