From 37a99694b1b4637b3b41b312809660d5139e8a23 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 15 Dec 2014 10:53:53 -0500 Subject: [PATCH] librbd: complete all pending aio ops prior to closing image It was possible for an image to be closed while aio operations were still outstanding. Now all aio operations are tracked and completed before the image is closed. Fixes: #10299 Backport: giant, firefly, dumpling Signed-off-by: Jason Dillaman --- src/librbd/AioCompletion.cc | 8 ++++++ src/librbd/AioCompletion.h | 4 +++ src/librbd/ImageCtx.cc | 14 ++++++++-- src/librbd/ImageCtx.h | 10 +++++-- src/librbd/internal.cc | 7 +++-- src/test/librbd/test_librbd.cc | 51 ++++++++++++++++++++++++++++++++++ 6 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index 3ff12233b61cb..32a91333e1fed 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -86,6 +86,14 @@ namespace librbd { lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl; break; } + + { + Mutex::Locker l(ictx->aio_lock); + assert(ictx->pending_aio != 0); + --ictx->pending_aio; + ictx->pending_aio_cond.Signal(); + } + if (complete_cb) { complete_cb(rbd_comp, complete_arg); } diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index 58eec66b8513d..3aa5bd5422a87 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -87,6 +87,10 @@ namespace librbd { void init_time(ImageCtx *i, aio_type_t t) { ictx = i; + { + Mutex::Locker l(ictx->aio_lock); + ++ictx->pending_aio; + } aio_type = t; start_time = ceph_clock_now(ictx->cct); } diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index c9b74f232a8e1..d444492ae5570 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -45,6 +45,7 @@ namespace librbd { snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + aio_lock("librbd::ImageCtx::aio_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), @@ -53,7 +54,8 @@ namespace librbd { stripe_unit(0), stripe_count(0), object_cacher(NULL), writeback_handler(NULL), object_set(NULL), readahead(), - total_bytes_read(0) + total_bytes_read(0), + pending_aio(0) { md_ctx.dup(p); data_ctx.dup(p); @@ -583,6 +585,7 @@ namespace librbd { } else if (r) { lderr(cct) << "flush_cache returned " << r << dendl; } + wait_for_pending_aio(); cache_lock.Lock(); loff_t unclean = object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -652,5 +655,12 @@ namespace librbd { << ", object overlap " << len << " from image extents " << objectx << dendl; return len; - } + } + + void ImageCtx::wait_for_pending_aio() { + Mutex::Locker l(aio_lock); + while (pending_aio > 0) { + pending_aio_cond.Wait(aio_lock); + } + } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 1a90f0c4be72b..75d176de0b5a0 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -10,6 +10,7 @@ #include #include +#include "common/Cond.h" #include "common/Mutex.h" #include "common/Readahead.h" #include "common/RWLock.h" @@ -61,7 +62,8 @@ namespace librbd { /** * Lock ordering: - * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock + * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, + * aio_lock */ RWLock md_lock; // protects access to the mutable image metadata that // isn't guarded by other locks below @@ -70,6 +72,7 @@ namespace librbd { RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + Mutex aio_lock; // protects pending_aio and pending_aio_cond unsigned extra_read_flags; @@ -94,6 +97,9 @@ namespace librbd { Readahead readahead; uint64_t total_bytes_read; + Cond pending_aio_cond; + uint64_t pending_aio; + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -157,7 +163,7 @@ namespace librbd { librados::snap_t in_snap_id); uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); - + void wait_for_pending_aio(); }; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index d84361721c082..6452e4c46babb 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2156,11 +2156,12 @@ reprotect_and_return_err: ldout(ictx->cct, 20) << "close_image " << ictx << dendl; ictx->readahead.wait_for_pending(); - - if (ictx->object_cacher) + if (ictx->object_cacher) { ictx->shutdown_cache(); // implicitly flushes - else + } else { flush(ictx); + ictx->wait_for_pending_aio(); + } if (ictx->parent) { close_image(ictx->parent); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 919de2af7448e..a2372dd28b7f0 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -2044,6 +2044,57 @@ TEST_F(TestLibRBD, LargeCacheRead) rados_ioctx_destroy(ioctx); } +TEST_F(TestLibRBD, TestPendingAio) +{ + rados_ioctx_t ioctx; + rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); + + int features = RBD_FEATURE_LAYERING; + rbd_image_t image; + int order = 0; + + std::string name = get_temp_image_name(); + + uint64_t size = 4 << 20; + ASSERT_EQ(0, create_image_full(ioctx, name.c_str(), size, &order, + false, features)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + char test_data[TEST_IO_SIZE]; + for (size_t i = 0; i < TEST_IO_SIZE; ++i) { + test_data[i] = (char) (rand() % (126 - 33) + 33); + } + + size_t num_aios = 256; + rbd_completion_t comps[num_aios]; + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_EQ(0, rbd_aio_write(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_wait_for_complete(comps[i])); + rbd_aio_release(comps[i]); + } + ASSERT_EQ(0, rbd_invalidate_cache(image)); + + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_LE(0, rbd_aio_read(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + + ASSERT_EQ(0, rbd_close(image)); + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(1, rbd_aio_is_complete(comps[i])); + rbd_aio_release(comps[i]); + } + + rados_ioctx_destroy(ioctx); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); -- 2.39.5