From: Sage Weil Date: Fri, 20 Oct 2017 13:51:17 +0000 (-0500) Subject: os/bluestore/BlueFS: fix race with log flush during async log compaction X-Git-Tag: v12.2.2~123^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9f49698fab5de9ade79d3aba957afe82daa4fecc;p=ceph.git os/bluestore/BlueFS: fix race with log flush during async log compaction During async log compaction we rely on _flush-and_sync_log to update the log_writer to jump_to. However, if racing threads are also trying to flush the log and manage to flush our new log events for us, then our flush will turn into a no-op, and we won't update jump_to correctly at all. This results in a corrupted log size a bit later one. Fix by ensuring that there are no in-progress flushes before we add our log entries. Also, add asserts to _flush_and_sync_log to make sure we never bail out early if jump_to is set (which would indicate this or another similar bug is still present). Fixes: http://tracker.ceph.com/issues/21878 Signed-off-by: Sage Weil (cherry picked from commit 4324c8bc7e66633035c15995e3f82ef91d3a5e8c) --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 0f570893ba0a..4d07b48c20ed 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -1201,6 +1201,14 @@ void BlueFS::_compact_log_async(std::unique_lock& l) new_log = new File; new_log->fnode.ino = 0; // so that _flush_range won't try to log the fnode + // 0. wait for any racing flushes to complete. (We do not want to block + // in _flush_sync_log with jump_to set or else a racing thread might flush + // our entries and our jump_to update won't be correct.) + while (log_flushing) { + dout(10) << __func__ << " log is currently flushing, waiting" << dendl; + log_cond.wait(l); + } + // 1. allocate new log space and jump to it. old_log_jump_to = log_file->fnode.get_allocated(); uint64_t need = old_log_jump_to + cct->_conf->bluefs_max_log_runway; @@ -1361,16 +1369,19 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, while (log_flushing) { dout(10) << __func__ << " want_seq " << want_seq << " log is currently flushing, waiting" << dendl; + assert(!jump_to); log_cond.wait(l); } if (want_seq && want_seq <= log_seq_stable) { dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable " << log_seq_stable << ", done" << dendl; + assert(!jump_to); return 0; } if (log_t.empty() && dirty_files.empty()) { dout(10) << __func__ << " want_seq " << want_seq << " " << log_t << " not dirty, dirty_files empty, no-op" << dendl; + assert(!jump_to); return 0; }