From: Sage Weil Date: Fri, 26 Oct 2012 18:30:06 +0000 (-0700) Subject: librbd: fix race in AioCompletion that are still being built X-Git-Tag: v0.55~199 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=57a4cbbfb23ab10b466bf66ca56cfebe5225f894;p=ceph.git librbd: fix race in AioCompletion that are still being built When caching is enabled, it is possible for the io completion to happen faster than we call ->finish_adding_requests() (e.g., on cache read). When that happens, the final read request completion doesn't see a pending_count == 0 and thus doesn't do all the final buffer construction that is necessary to return correct data. In particular, users will see zeroed buffers. test_librbd_fsx is turning this up consistently after several thousand ops with an image size of ~100MB and cloning disabled. This was introduced with the extra logic added here with striping. Fix this by making a separate flag to indicate the completion is under construction, and make sure we call complete() when both pending_count==0 and building==false. Signed-off-by: Sage Weil Reviewed-by: Josh Durgin --- diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc index 3d1662ea5483..384d9bff023b 100644 --- a/src/librbd/AioCompletion.cc +++ b/src/librbd/AioCompletion.cc @@ -52,7 +52,8 @@ namespace librbd { } } - complete(); + if (!building) + complete(); } put_unlock(); } diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h index b3d73ea7fedc..03ebde6d2905 100644 --- a/src/librbd/AioCompletion.h +++ b/src/librbd/AioCompletion.h @@ -47,7 +47,8 @@ namespace librbd { callback_t complete_cb; void *complete_arg; rbd_completion_t rbd_comp; - int pending_count; + int pending_count; ///< number of requests + bool building; ///< true if we are still building this completion int ref; bool released; ImageCtx *ictx; @@ -61,7 +62,8 @@ namespace librbd { AioCompletion() : lock("AioCompletion::lock", true), done(false), rval(0), complete_cb(NULL), - complete_arg(NULL), rbd_comp(NULL), pending_count(1), + complete_arg(NULL), rbd_comp(NULL), + pending_count(0), building(true), ref(1), released(false), ictx(NULL), aio_type(AIO_TYPE_NONE), read_bl(NULL), read_buf(NULL), read_buf_len(0) { @@ -86,9 +88,9 @@ namespace librbd { void finish_adding_requests() { lock.Lock(); - assert(pending_count); - int count = --pending_count; - if (!count) { + assert(building); + building = false; + if (!pending_count) { complete(); } lock.Unlock();