From 10d055d65727e47deae4e459bc21aaa243c24a7d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 24 Jun 2016 09:23:21 -0400 Subject: [PATCH] os/bluestore: drop lock while we flush the log Handle cases where we have multiple racing threads trying to flush the log by only allowing one concurrent log flush to be in progress at a time, and behave if, after flushing, there are no more dirty records to flush. Signed-off-by: Sage Weil --- src/os/bluestore/BlueFS.cc | 46 ++++++++++++++++++++++++++++---------- src/os/bluestore/BlueFS.h | 2 ++ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 5afc5d260f7c..55c7df226c35 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -1025,6 +1025,14 @@ void BlueFS::_pad_bl(bufferlist& bl) int BlueFS::_flush_and_sync_log(std::unique_lock& l) { + while (log_flushing) { + dout(10) << __func__ << " log is currently flushing, waiting" << dendl; + log_cond.wait(l); + } + if (log_t.empty()) { + dout(10) << __func__ << " " << log_t << " not dirty, no-op" << dendl; + return 0; + } uint64_t seq = log_t.seq = ++log_seq; log_t.uuid = super.uuid; dout(10) << __func__ << " " << log_t << dendl; @@ -1053,27 +1061,41 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l) log_t.clear(); log_t.seq = 0; // just so debug output is less confusing + log_flushing = true; flush_bdev(); int r = _flush(log_writer, true); assert(r == 0); + + // drop lock while we wait for io + l.unlock(); wait_for_aio(log_writer); flush_bdev(); + l.lock(); + + log_flushing = false; + log_cond.notify_all(); // clean dirty files - dout(20) << __func__ << " log_seq_stable " << seq << dendl; - log_seq_stable = seq; - dirty_file_list_t::iterator p = dirty_files.begin(); - while (p != dirty_files.end()) { - File *file = &(*p); - assert(file->dirty_seq > 0); - if (file->dirty_seq <= log_seq_stable) { - dout(20) << __func__ << " cleaned file " << file->fnode << dendl; - file->dirty_seq = 0; - dirty_files.erase(p++); - } else { - ++p; + if (seq > log_seq_stable) { + log_seq_stable = seq; + dout(20) << __func__ << " log_seq_stable " << log_seq_stable << dendl; + dirty_file_list_t::iterator p = dirty_files.begin(); + while (p != dirty_files.end()) { + File *file = &(*p); + assert(file->dirty_seq > 0); + if (file->dirty_seq <= log_seq_stable) { + dout(20) << __func__ << " cleaned file " << file->fnode << dendl; + file->dirty_seq = 0; + dirty_files.erase(p++); + } else { + ++p; + } } + } else { + dout(20) << __func__ << " log_seq_stable " << log_seq_stable + << " already > out seq " << seq + << ", we lost a race against another log flush, done" << dendl; } _update_logger_stats(); diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 905f1e384007..5feb68329ddd 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -204,6 +204,8 @@ private: uint64_t log_seq_stable; ///< last stable/synced log seq FileWriter *log_writer; ///< writer for the log bluefs_transaction_t log_t; ///< pending, unwritten log transaction + bool log_flushing = false; ///< true while flushing the log + std::condition_variable log_cond; /* * There are up to 3 block devices: -- 2.47.3