From 08c2fda12cf46937a09a59bb032379c3c5321292 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 15 Dec 2014 10:53:53 -0500 Subject: [PATCH] librbd: complete all pending aio ops prior to closing image It was possible for an image to be closed while aio operations were still outstanding. Now all aio operations are tracked and completed before the image is closed. Fixes: #10299 Backport: giant, firefly, dumpling Signed-off-by: Jason Dillaman --- src/librbd/AioCompletion.h | 12 ++++++++ src/librbd/ImageCtx.cc | 14 +++++++-- src/librbd/ImageCtx.h | 10 +++++-- src/librbd/internal.cc | 6 ++-- src/test/librbd/test_librbd.cc | 55 ++++++++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index aaccefef9fce..e28cd6a06054 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -93,6 +93,10 @@ namespace librbd { void init_time(ImageCtx *i, aio_type_t t) { ictx = i; + { + Mutex::Locker l(ictx->aio_lock); + ++ictx->pending_aio; + } aio_type = t; start_time = ceph_clock_now(ictx->cct); } @@ -114,6 +118,14 @@ namespace librbd { lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl; break; } + + { + Mutex::Locker l(ictx->aio_lock); + assert(ictx->pending_aio != 0); + --ictx->pending_aio; + ictx->pending_aio_cond.Signal(); + } + if (complete_cb) { complete_cb(rbd_comp, complete_arg); } diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index b5c2db6e7895..8fb8e37129c9 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -45,13 +45,15 @@ namespace librbd { snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + aio_lock("librbd::ImageCtx::aio_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), format_string(NULL), id(image_id), parent(NULL), stripe_unit(0), stripe_count(0), - object_cacher(NULL), writeback_handler(NULL), object_set(NULL) + object_cacher(NULL), writeback_handler(NULL), object_set(NULL), + pending_aio(0) { md_ctx.dup(p); data_ctx.dup(p); @@ -586,6 +588,7 @@ namespace librbd { int r = flush_cache(); if (r) lderr(cct) << "flush_cache returned " << r << dendl; + wait_for_pending_aio(); cache_lock.Lock(); bool unclean = object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -655,5 +658,12 @@ namespace librbd { << ", object overlap " << len << " from image extents " << objectx << dendl; return len; - } + } + + void ImageCtx::wait_for_pending_aio() { + Mutex::Locker l(aio_lock); + while (pending_aio > 0) { + pending_aio_cond.Wait(aio_lock); + } + } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 83ed0449904a..5a0d63752e61 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -10,6 +10,7 @@ #include #include +#include "common/Cond.h" #include "common/Mutex.h" #include "common/RWLock.h" #include "common/snap_types.h" @@ -59,7 +60,8 @@ namespace librbd { /** * Lock ordering: - * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock + * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, + * aio_lock */ RWLock md_lock; // protects access to the mutable image metadata that // isn't guarded by other locks below @@ -68,6 +70,7 @@ namespace librbd { RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + Mutex aio_lock; // protects pending_aio and pending_aio_cond unsigned extra_read_flags; @@ -89,6 +92,9 @@ namespace librbd { LibrbdWriteback *writeback_handler; ObjectCacher::ObjectSet *object_set; + Cond pending_aio_cond; + uint64_t pending_aio; + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -147,7 +153,7 @@ namespace librbd { librados::snap_t in_snap_id); uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); - + void wait_for_pending_aio(); }; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index f054037405c5..598d515cde4a 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2139,10 +2139,12 @@ reprotect_and_return_err: void close_image(ImageCtx *ictx) { ldout(ictx->cct, 20) << "close_image " << ictx << dendl; - if (ictx->object_cacher) + if (ictx->object_cacher) { ictx->shutdown_cache(); // implicitly flushes - else + } else { flush(ictx); + ictx->wait_for_pending_aio(); + } if (ictx->parent) { close_image(ictx->parent); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 8997e8f12147..c37d884624d3 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -1910,6 +1910,61 @@ TEST(LibRBD, LargeCacheRead) ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); } +TEST(LibRBD, TestPendingAio) +{ + rados_t cluster; + rados_ioctx_t ioctx; + string pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool(pool_name, &cluster)); + rados_ioctx_create(cluster, pool_name.c_str(), &ioctx); + + int features = RBD_FEATURE_LAYERING; + rbd_image_t image; + int order = 0; + + std::string name = "testimg"; + + uint64_t size = 4 << 20; + ASSERT_EQ(0, create_image_full(ioctx, name.c_str(), size, &order, + false, features)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + char test_data[TEST_IO_SIZE]; + for (size_t i = 0; i < TEST_IO_SIZE; ++i) { + test_data[i] = (char) (rand() % (126 - 33) + 33); + } + + size_t num_aios = 256; + rbd_completion_t comps[num_aios]; + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_EQ(0, rbd_aio_write(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_wait_for_complete(comps[i])); + rbd_aio_release(comps[i]); + } + ASSERT_EQ(0, rbd_invalidate_cache(image)); + + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_LE(0, rbd_aio_read(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + + ASSERT_EQ(0, rbd_close(image)); + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(1, rbd_aio_is_complete(comps[i])); + rbd_aio_release(comps[i]); + } + + rados_ioctx_destroy(ioctx); + ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); -- 2.47.3