From 6ffaa54c9b32575d9d633c47f7fe2c3dcf072000 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 28 Nov 2016 15:56:29 -0500 Subject: [PATCH] os/bluestore/KernelDevice: fix race in aio_thread vs aio_wait The caller is free to destroy the aio vector contents once aio_wait completes. This is exactly what BlueFS::_fsync() does. Delay the num_running dec (which is what aio_wait waits for) until after we've examined the aios. Fixes: http://tracker.ceph.com/issues/17824 Signed-off-by: Sage Weil --- src/os/bluestore/KernelDevice.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/os/bluestore/KernelDevice.cc b/src/os/bluestore/KernelDevice.cc index b49032b2405..56173bb8e4a 100644 --- a/src/os/bluestore/KernelDevice.cc +++ b/src/os/bluestore/KernelDevice.cc @@ -262,12 +262,14 @@ void KernelDevice::_aio_thread() std::lock_guard l(debug_queue_lock); debug_aio_unlink(*aio[i]); } - int left = --ioc->num_running; int r = aio[i]->get_return_value(); dout(10) << __func__ << " finished aio " << aio[i] << " r " << r << " ioc " << ioc << " with " << left << " aios left" << dendl; assert(r >= 0); + int left = --ioc->num_running; + // NOTE: once num_running is decremented we can no longer + // trust aio[] values; they my be freed (e.g., by BlueFS::_fsync) if (left == 0) { // check waiting count before doing callback (which may // destroy this ioc). -- 2.39.5