From: Jason Dillaman Date: Mon, 15 Dec 2014 15:53:53 +0000 (-0500) Subject: librbd: complete all pending aio ops prior to closing image X-Git-Tag: v0.91~55^2~1^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F3181%2Fhead;p=ceph.git librbd: complete all pending aio ops prior to closing image It was possible for an image to be closed while aio operations were still outstanding. Now all aio operations are tracked and completed before the image is closed. Fixes: #10299 Backport: giant, firefly, dumpling Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index 3ff12233b61c..32a91333e1fe 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -86,6 +86,14 @@ namespace librbd { lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl; break; } + + { + Mutex::Locker l(ictx->aio_lock); + assert(ictx->pending_aio != 0); + --ictx->pending_aio; + ictx->pending_aio_cond.Signal(); + } + if (complete_cb) { complete_cb(rbd_comp, complete_arg); } diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index 58eec66b8513..3aa5bd5422a8 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -87,6 +87,10 @@ namespace librbd { void init_time(ImageCtx *i, aio_type_t t) { ictx = i; + { + Mutex::Locker l(ictx->aio_lock); + ++ictx->pending_aio; + } aio_type = t; start_time = ceph_clock_now(ictx->cct); } diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index c9b74f232a8e..d444492ae557 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -45,6 +45,7 @@ namespace librbd { snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + aio_lock("librbd::ImageCtx::aio_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), @@ -53,7 +54,8 @@ namespace librbd { stripe_unit(0), stripe_count(0), object_cacher(NULL), writeback_handler(NULL), object_set(NULL), readahead(), - total_bytes_read(0) + total_bytes_read(0), + pending_aio(0) { md_ctx.dup(p); data_ctx.dup(p); @@ -583,6 +585,7 @@ namespace librbd { } else if (r) { lderr(cct) << "flush_cache returned " << r << dendl; } + wait_for_pending_aio(); cache_lock.Lock(); loff_t unclean = object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -652,5 +655,12 @@ namespace librbd { << ", object overlap " << len << " from image extents " << objectx << dendl; return len; - } + } + + void ImageCtx::wait_for_pending_aio() { + Mutex::Locker l(aio_lock); + while (pending_aio > 0) { + pending_aio_cond.Wait(aio_lock); + } + } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 1a90f0c4be72..75d176de0b5a 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -10,6 +10,7 @@ #include #include +#include "common/Cond.h" #include "common/Mutex.h" #include "common/Readahead.h" #include "common/RWLock.h" @@ -61,7 +62,8 @@ namespace librbd { /** * Lock ordering: - * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock + * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, + * aio_lock */ RWLock md_lock; // protects access to the mutable image metadata that // isn't guarded by other locks below @@ -70,6 +72,7 @@ namespace librbd { RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + Mutex aio_lock; // protects pending_aio and pending_aio_cond unsigned extra_read_flags; @@ -94,6 +97,9 @@ namespace librbd { Readahead readahead; uint64_t total_bytes_read; + Cond pending_aio_cond; + uint64_t pending_aio; + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -157,7 +163,7 @@ namespace librbd { librados::snap_t in_snap_id); uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); - + void wait_for_pending_aio(); }; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index d84361721c08..6452e4c46bab 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2156,11 +2156,12 @@ reprotect_and_return_err: ldout(ictx->cct, 20) << "close_image " << ictx << dendl; ictx->readahead.wait_for_pending(); - - if (ictx->object_cacher) + if (ictx->object_cacher) { ictx->shutdown_cache(); // implicitly flushes - else + } else { flush(ictx); + ictx->wait_for_pending_aio(); + } if (ictx->parent) { close_image(ictx->parent); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 919de2af7448..a2372dd28b7f 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -2044,6 +2044,57 @@ TEST_F(TestLibRBD, LargeCacheRead) rados_ioctx_destroy(ioctx); } +TEST_F(TestLibRBD, TestPendingAio) +{ + rados_ioctx_t ioctx; + rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); + + int features = RBD_FEATURE_LAYERING; + rbd_image_t image; + int order = 0; + + std::string name = get_temp_image_name(); + + uint64_t size = 4 << 20; + ASSERT_EQ(0, create_image_full(ioctx, name.c_str(), size, &order, + false, features)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + char test_data[TEST_IO_SIZE]; + for (size_t i = 0; i < TEST_IO_SIZE; ++i) { + test_data[i] = (char) (rand() % (126 - 33) + 33); + } + + size_t num_aios = 256; + rbd_completion_t comps[num_aios]; + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_EQ(0, rbd_aio_write(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_wait_for_complete(comps[i])); + rbd_aio_release(comps[i]); + } + ASSERT_EQ(0, rbd_invalidate_cache(image)); + + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_LE(0, rbd_aio_read(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + + ASSERT_EQ(0, rbd_close(image)); + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(1, rbd_aio_is_complete(comps[i])); + rbd_aio_release(comps[i]); + } + + rados_ioctx_destroy(ioctx); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv);