From: Kotresh HR Date: Sun, 15 Feb 2026 19:38:00 +0000 (+0530) Subject: tools/cephfs_mirror: Do remote fs sync once instead of fsync on each fd X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dcd6a5985d96f329e46d7cf297463ab805c119e2;p=ceph-ci.git tools/cephfs_mirror: Do remote fs sync once instead of fsync on each fd Doing remote fs sync once just before taking snapshot is faster than doing fsync on each fd. Moreover, all the datasync threads use the same sinlge libceph connection and doing ceph_fsync concurrently on different fds on a single libcephfs connection could cause hang as observed in testing as below. ----- Thread 2 (Thread 0xffff644cc400 (LWP 74020) "d_replayer-0"): 0 0x0000ffff8e82656c in __futex_abstimed_wait_cancelable64 () from /lib64/libc.so.6 1 0x0000ffff8e828ff0 [PAC] in pthread_cond_wait@@GLIBC_2.17 () from /lib64/libc.so.6 2 0x0000ffff8fc90fd4 [PAC] in ceph::condition_variable_debug::wait ... 3 0x0000ffff9080fc9c in ceph::condition_variable_debug::wait --- diff --git a/src/tools/cephfs_mirror/PeerReplayer.cc b/src/tools/cephfs_mirror/PeerReplayer.cc index 9ce6ed3288f..81f998193ef 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.cc +++ b/src/tools/cephfs_mirror/PeerReplayer.cc @@ -806,11 +806,6 @@ int PeerReplayer::copy_to_remote(const std::string &dir_root, const std::string << cpp_strerror(r) << dendl; goto freeptr; } - r = ceph_fsync(m_remote_mount, r_fd, 0); - if (r < 0) { - derr << ": failed to sync data for file path=" << epath << ": " - << cpp_strerror(r) << dendl; - } } freeptr: @@ -2016,7 +2011,14 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu bool datasync_err = syncm->wait_until_safe_to_snapshot(); if (r == 0 && !datasync_err) { - // All good, take the snapshot + // All good, fsync remote fs and take the snapshot + dout(20) << ": syncing remote filesystem, dir_root=" << dir_root << dendl; + r = ceph_sync_fs(m_remote_mount); + if (r < 0) { + derr << ": failed to sync remote filesystem, dir_root=" << dir_root + << ": " << cpp_strerror(r) << dendl; + return r; + } auto cur_snap_id_str{stringify(current.second)}; snap_metadata snap_meta[] = {{PRIMARY_SNAP_ID_KEY.c_str(), cur_snap_id_str.c_str()}}; r = ceph_mksnap(m_remote_mount, dir_root.c_str(), current.first.c_str(), 0755, @@ -2024,6 +2026,7 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu if (r < 0) { derr << ": failed to snap remote directory dir_root=" << dir_root << ": " << cpp_strerror(r) << dendl; + return r; } } else if (datasync_err) { r = syncm->get_datasync_errno();