From 39a734fbf34ccd121f17023bcec814e61c8bdaab Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 3 Jan 2013 17:15:07 -0800 Subject: [PATCH] os/FileStore: fix non-btrfs op_seq commit order The op_seq file is the starting point for journal replay. For stable btrfs commit mode, which is using a snapshot as a reference, we should write this file before we take the snap. We normally ignore current/ contents anyway. On non-btrfs file systems, however, we should only write this file *after* we do a full sync, and we should then fsync(2) it before we continue (and potentially trim anything from the journal). This fixes a serious bug that could cause data loss and corruption after a power loss event. For a 'kill -9' or crash, however, there was little risk, since the writes were still captured by the host's cache. Fixes: #3721 Signed-off-by: Sage Weil Reviewed-by: Samuel Just (cherry picked from commit 28d59d374b28629a230d36b93e60a8474c902aa5) --- src/os/FileStore.cc | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 4ad69bbaa622f..9ab0e74b9c0fb 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -3660,11 +3660,6 @@ void FileStore::sync_entry() sync_epoch++; dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl; - int err = write_op_seq(op_fd, cp); - if (err < 0) { - derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; - assert(0); - } stringstream errstream; if (g_conf->filestore_debug_omap_check && !object_map->check(errstream)) { derr << errstream.str() << dendl; @@ -3672,6 +3667,11 @@ void FileStore::sync_entry() } if (btrfs_stable_commits) { + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } if (btrfs_snap_create_v2) { // be smart! @@ -3740,6 +3740,17 @@ void FileStore::sync_entry() dout(15) << "sync_entry doing a full sync (syncfs(2) if possible)" << dendl; sync_filesystem(basedir_fd); } + + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } + err = ::fsync(op_fd); + if (err < 0) { + derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during fsync of op_seq"); + } } utime_t done = ceph_clock_now(g_ceph_context); -- 2.39.5