From 081f49b47ca8d7583211f546ab5699b14f773bfc Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 15 Dec 2014 10:53:53 -0500 Subject: [PATCH] librbd: complete all pending aio ops prior to closing image It was possible for an image to be closed while aio operations were still outstanding. Now all aio operations are tracked and completed before the image is closed. Fixes: #10299 Backport: giant, firefly, dumpling Signed-off-by: Jason Dillaman --- src/librbd/AioCompletion.cc | 8 +++++ src/librbd/AioCompletion.h | 4 +++ src/librbd/ImageCtx.cc | 14 +++++++-- src/librbd/ImageCtx.h | 10 +++++-- src/librbd/internal.cc | 6 ++-- src/test/librbd/test_librbd.cc | 55 ++++++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index 3ff12233b61cb..32a91333e1fed 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -86,6 +86,14 @@ namespace librbd { lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl; break; } + + { + Mutex::Locker l(ictx->aio_lock); + assert(ictx->pending_aio != 0); + --ictx->pending_aio; + ictx->pending_aio_cond.Signal(); + } + if (complete_cb) { complete_cb(rbd_comp, complete_arg); } diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index 58eec66b8513d..3aa5bd5422a87 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -87,6 +87,10 @@ namespace librbd { void init_time(ImageCtx *i, aio_type_t t) { ictx = i; + { + Mutex::Locker l(ictx->aio_lock); + ++ictx->pending_aio; + } aio_type = t; start_time = ceph_clock_now(ictx->cct); } diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 77211d7c9eaee..50d81e447e148 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -45,13 +45,15 @@ namespace librbd { snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + aio_lock("librbd::ImageCtx::aio_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), format_string(NULL), id(image_id), parent(NULL), stripe_unit(0), stripe_count(0), - object_cacher(NULL), writeback_handler(NULL), object_set(NULL) + object_cacher(NULL), writeback_handler(NULL), object_set(NULL), + pending_aio(0) { md_ctx.dup(p); data_ctx.dup(p); @@ -576,6 +578,7 @@ namespace librbd { } else if (r) { lderr(cct) << "flush_cache returned " << r << dendl; } + wait_for_pending_aio(); cache_lock.Lock(); loff_t unclean = object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -645,5 +648,12 @@ namespace librbd { << ", object overlap " << len << " from image extents " << objectx << dendl; return len; - } + } + + void ImageCtx::wait_for_pending_aio() { + Mutex::Locker l(aio_lock); + while (pending_aio > 0) { + pending_aio_cond.Wait(aio_lock); + } + } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 2c56461ab41aa..00cb9502202e9 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -10,6 +10,7 @@ #include #include +#include "common/Cond.h" #include "common/Mutex.h" #include "common/RWLock.h" #include "common/snap_types.h" @@ -60,7 +61,8 @@ namespace librbd { /** * Lock ordering: - * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock + * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, + * aio_lock */ RWLock md_lock; // protects access to the mutable image metadata that // isn't guarded by other locks below @@ -69,6 +71,7 @@ namespace librbd { RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + Mutex aio_lock; // protects pending_aio and pending_aio_cond unsigned extra_read_flags; @@ -90,6 +93,9 @@ namespace librbd { LibrbdWriteback *writeback_handler; ObjectCacher::ObjectSet *object_set; + Cond pending_aio_cond; + uint64_t pending_aio; + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -154,7 +160,7 @@ namespace librbd { librados::snap_t in_snap_id); uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); - + void wait_for_pending_aio(); }; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 9f1c943a91487..7b26c54a37a08 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2134,10 +2134,12 @@ reprotect_and_return_err: void close_image(ImageCtx *ictx) { ldout(ictx->cct, 20) << "close_image " << ictx << dendl; - if (ictx->object_cacher) + if (ictx->object_cacher) { ictx->shutdown_cache(); // implicitly flushes - else + } else { flush(ictx); + ictx->wait_for_pending_aio(); + } if (ictx->parent) { close_image(ictx->parent); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 2f0e5d26be888..6905c460b49e6 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -1855,6 +1855,61 @@ TEST(LibRBD, ZeroLengthRead) ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); } +TEST(LibRBD, TestPendingAio) +{ + rados_t cluster; + rados_ioctx_t ioctx; + string pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool(pool_name, &cluster)); + rados_ioctx_create(cluster, pool_name.c_str(), &ioctx); + + int features = RBD_FEATURE_LAYERING; + rbd_image_t image; + int order = 0; + + std::string name = "testimg"; + + uint64_t size = 4 << 20; + ASSERT_EQ(0, create_image_full(ioctx, name.c_str(), size, &order, + false, features)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + char test_data[TEST_IO_SIZE]; + for (size_t i = 0; i < TEST_IO_SIZE; ++i) { + test_data[i] = (char) (rand() % (126 - 33) + 33); + } + + size_t num_aios = 256; + rbd_completion_t comps[num_aios]; + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_EQ(0, rbd_aio_write(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_wait_for_complete(comps[i])); + rbd_aio_release(comps[i]); + } + ASSERT_EQ(0, rbd_invalidate_cache(image)); + + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_LE(0, rbd_aio_read(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + + ASSERT_EQ(0, rbd_close(image)); + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(1, rbd_aio_is_complete(comps[i])); + rbd_aio_release(comps[i]); + } + + rados_ioctx_destroy(ioctx); + ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); -- 2.39.5