From: Sage Weil Date: Fri, 4 Jan 2013 01:15:07 +0000 (-0800) Subject: os/FileStore: fix non-btrfs op_seq commit order X-Git-Tag: v0.48.3argonaut~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=39a734fbf34ccd121f17023bcec814e61c8bdaab;p=ceph.git os/FileStore: fix non-btrfs op_seq commit order The op_seq file is the starting point for journal replay. For stable btrfs commit mode, which is using a snapshot as a reference, we should write this file before we take the snap. We normally ignore current/ contents anyway. On non-btrfs file systems, however, we should only write this file *after* we do a full sync, and we should then fsync(2) it before we continue (and potentially trim anything from the journal). This fixes a serious bug that could cause data loss and corruption after a power loss event. For a 'kill -9' or crash, however, there was little risk, since the writes were still captured by the host's cache. Fixes: #3721 Signed-off-by: Sage Weil Reviewed-by: Samuel Just (cherry picked from commit 28d59d374b28629a230d36b93e60a8474c902aa5) --- diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 4ad69bbaa622..9ab0e74b9c0f 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -3660,11 +3660,6 @@ void FileStore::sync_entry() sync_epoch++; dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl; - int err = write_op_seq(op_fd, cp); - if (err < 0) { - derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; - assert(0); - } stringstream errstream; if (g_conf->filestore_debug_omap_check && !object_map->check(errstream)) { derr << errstream.str() << dendl; @@ -3672,6 +3667,11 @@ void FileStore::sync_entry() } if (btrfs_stable_commits) { + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } if (btrfs_snap_create_v2) { // be smart! @@ -3740,6 +3740,17 @@ void FileStore::sync_entry() dout(15) << "sync_entry doing a full sync (syncfs(2) if possible)" << dendl; sync_filesystem(basedir_fd); } + + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } + err = ::fsync(op_fd); + if (err < 0) { + derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during fsync of op_seq"); + } } utime_t done = ceph_clock_now(g_ceph_context);