os/bluestore/KernelDevice: make flush() thread safe

author Sage Weil <sage@redhat.com>

Thu, 9 Mar 2017 21:51:05 +0000 (16:51 -0500)

committer Nathan Cutler <ncutler@suse.com>

Tue, 4 Jul 2017 14:08:57 +0000 (16:08 +0200)
author Sage Weil <sage@redhat.com>
Thu, 9 Mar 2017 21:51:05 +0000 (16:51 -0500)
committer Nathan Cutler <ncutler@suse.com>
Tue, 4 Jul 2017 14:08:57 +0000 (16:08 +0200)
diff --git a/src/os/bluestore/KernelDevice.cc b/src/os/bluestore/KernelDevice.cc

index 4ed431fe0a4a4b393d12a0f4c8e5876d47386a90..5bc6768fded62385f669bb7748982cfcd901cd0e 100644 (file)
--- a/src/os/bluestore/KernelDevice.cc
+++ b/src/os/bluestore/KernelDevice.cc
@@ -28,7 +28,7 @@
  
  #define dout_subsys ceph_subsys_bdev
  #undef dout_prefix
-#define dout_prefix *_dout << "bdev(" << path << ") "
+#define dout_prefix *_dout << "bdev(" << this << " " << path << ") "
  
  KernelDevice::KernelDevice(aio_callback_t cb, void *cbpriv)
    : fd_direct(-1),
@@ -185,11 +185,23 @@ void KernelDevice::close()
  
  int KernelDevice::flush()
  {
-  bool ret = io_since_flush.compare_and_swap(1, 0);
-  if (!ret) {
-    dout(10) << __func__ << " no-op (no ios since last flush)" << dendl;
+  // protect flush with a mutex.  not that we are not really protected
+  // data here.  instead, we're ensuring that if any flush() caller
+  // sees that io_since_flush is true, they block any racing callers
+  // until the flush is observed.  that allows racing threads to be
+  // calling flush while still ensuring that *any* of them that got an
+  // aio completion notification will not return before that aio is
+  // stable on disk: whichever thread sees the flag first will block
+  // followers until the aio is stable.
+  std::lock_guard<std::mutex> l(flush_mutex);
+
+  bool expect = true;
+  if (!io_since_flush.compare_exchange_strong(expect, false)) {
+    dout(10) << __func__ << " no-op (no ios since last flush), flag is "
+            << (int)io_since_flush.load() << dendl;
      return 0;
    }
+
    dout(10) << __func__ << " start" << dendl;
    if (g_conf->bdev_inject_crash) {
      ++injecting_crash;
@@ -261,11 +273,21 @@ void KernelDevice::_aio_thread()
           std::lock_guard<std::mutex> l(debug_queue_lock);
           debug_aio_unlink(*aio[i]);
         }
+
+       // set flag indicating new ios have completed.  we do this *before*
+       // any completion or notifications so that any user flush() that
+       // follows the observed io completion will include this io.  Note
+       // that an earlier, racing flush() could observe and clear this
+       // flag, but that also ensures that the IO will be stable before the
+       // later flush() occurs.
+       io_since_flush.store(true);
+
         int r = aio[i]->get_return_value();
         dout(10) << __func__ << " finished aio " << aio[i] << " r " << r
                  << " ioc " << ioc
                  << " with " << left << " aios left" << dendl;
         assert(r >= 0);
+
         int left = --ioc->num_running;
         // NOTE: once num_running is decremented we can no longer
         // trust aio[] values; they my be freed (e.g., by BlueFS::_fsync)
@@ -510,8 +532,6 @@ int KernelDevice::aio_write(
        }
      }
    }
-
-  io_since_flush.set(1);
    return 0;
  }
  
diff --git a/src/os/bluestore/KernelDevice.h b/src/os/bluestore/KernelDevice.h

index 54e315f7f257840441eb9d0ec1cbee6e56d850d3..a9e4c8e9e54ec98de9ba62183e0959cfb5c9581d 100644 (file)
--- a/src/os/bluestore/KernelDevice.h
+++ b/src/os/bluestore/KernelDevice.h
@@ -35,7 +35,8 @@ class KernelDevice : public BlockDevice {
    interval_set<uint64_t> debug_inflight;
  
    Mutex flush_lock;
-  atomic_t io_since_flush;
+  std::atomic<bool> io_since_flush;
+  std::mutex flush_mutex;
  
    FS::aio_queue_t aio_queue;
    aio_callback_t aio_callback;
author	Sage Weil <sage@redhat.com>
	Thu, 9 Mar 2017 21:51:05 +0000 (16:51 -0500)
committer	Nathan Cutler <ncutler@suse.com>
	Tue, 4 Jul 2017 14:08:57 +0000 (16:08 +0200)
src/os/bluestore/KernelDevice.cc		patch \| blob \| history
src/os/bluestore/KernelDevice.h		patch \| blob \| history