From 7a747bc55381b840d76202c9897f7a0ab8d143b3 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Thu, 7 Nov 2024 19:00:27 +0300 Subject: [PATCH] cephfs-mirror: remove redundant ceph_close() calls. https://github.com/ceph/ceph/pull/55619 eliminates the need for ceph_close() call after successful ceph_fdopendir() one. And introduces automatic file descriptor's close when corresponding ceph_closedir() is called. That hasty ceph_close() call makes file descriptor available for a new allocation which might conflict with the automatic fd close in the above ceph_closedir(). Full PeerReplayer::do_synchronize() has been reworked to close fds properly, depending on whether ceph_fdopendir() has been already applied to them. Additionally for the sake of uniformity this reworks incremental do_synchronize() in a way to do final fd closings similar to full implementation. Plus this effectively reverts https://github.com/ceph/ceph/pull/56118/commits/bd78bdca3d7d7659b7ec0f12b77a2002282fec13 as it looks like a wrong approach to fight broken file descriptor references. No much sense in reopening of the current snapshot's root folder on each new entry processing. Instead this patch just doesn't close it from the beginning. Fixes: https://tracker.ceph.com/issues/68853 Signed-off-by: Igor Fedotov --- src/tools/cephfs_mirror/PeerReplayer.cc | 70 +++++++++++++++---------- src/tools/cephfs_mirror/PeerReplayer.h | 1 - 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/tools/cephfs_mirror/PeerReplayer.cc b/src/tools/cephfs_mirror/PeerReplayer.cc index 91117cf5f2b68..77e93ef6a9935 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.cc +++ b/src/tools/cephfs_mirror/PeerReplayer.cc @@ -120,7 +120,9 @@ int opendirat(MountRef mnt, int dirfd, const std::string &relpath, int flags, int fd = r; r = ceph_fdopendir(mnt, fd, dirp); - ceph_close(mnt, fd); + if (r < 0) { + ceph_close(mnt, fd); + } return r; } @@ -1222,15 +1224,6 @@ int PeerReplayer::sync_perms(const std::string& path) { return 0; } -void PeerReplayer::post_sync_close_handles(const FHandles &fh) { - dout(20) << dendl; - - // @FHandles.r_fd_dir_root is closed in @unregister_directory since - // its used to acquire an exclusive lock on remote dir_root. - ceph_close(m_local_mount, fh.c_fd); - ceph_close(fh.p_mnt, fh.p_fd); -} - int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot ¤t) { dout(20) << ": dir_root=" << dir_root << ", current=" << current << dendl; FHandles fh; @@ -1240,10 +1233,6 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu return r; } - BOOST_SCOPE_EXIT_ALL( (this)(&fh) ) { - post_sync_close_handles(fh); - }; - // record that we are going to "dirty" the data under this // directory root auto snap_id_str{stringify(current.second)}; @@ -1252,6 +1241,8 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu if (r < 0) { derr << ": error setting \"ceph.mirror.dirty_snap_id\" on dir_root=" << dir_root << ": " << cpp_strerror(r) << dendl; + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } @@ -1263,6 +1254,8 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu if (r < 0) { derr << ": failed to stat snap=" << current.first << ": " << cpp_strerror(r) << dendl; + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } @@ -1271,8 +1264,12 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu if (r < 0) { derr << ": failed to open local snap=" << current.first << ": " << cpp_strerror(r) << dendl; + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } + // starting from this point we shouldn't care about manual closing of fh.c_fd, + // it will be closed automatically when bound tdirp is closed. std::stack sync_stack; sync_stack.emplace(SyncEntry(".", tdirp, tstx)); @@ -1282,12 +1279,6 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu break; } - r = pre_sync_check_and_open_handles(dir_root, current, boost::none, &fh); - if (r < 0) { - dout(5) << ": cannot proceed with sync: " << cpp_strerror(r) << dendl; - return r; - } - dout(20) << ": " << sync_stack.size() << " entries in stack" << dendl; std::string e_name; auto &entry = sync_stack.top(); @@ -1390,6 +1381,18 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu sync_stack.pop(); } + dout(20) << " cur:" << fh.c_fd + << " prev:" << fh.p_fd + << " ret = " << r + << dendl; + + // @FHandles.r_fd_dir_root is closed in @unregister_directory since + // its used to acquire an exclusive lock on remote dir_root. + + // c_fd has been used in ceph_fdopendir call so + // there is no need to close this fd manually. + ceph_close(fh.p_mnt, fh.p_fd); + return r; } @@ -1409,9 +1412,6 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu return r; } - BOOST_SCOPE_EXIT_ALL( (this)(&fh) ) { - post_sync_close_handles(fh); - }; // record that we are going to "dirty" the data under this directory root auto snap_id_str{stringify(current.second)}; @@ -1420,6 +1420,8 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu if (r < 0) { derr << ": error setting \"ceph.mirror.dirty_snap_id\" on dir_root=" << dir_root << ": " << cpp_strerror(r) << dendl; + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } @@ -1431,6 +1433,8 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu if (r < 0) { derr << ": failed to stat snap=" << current.first << ": " << cpp_strerror(r) << dendl; + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } @@ -1450,11 +1454,6 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu dout(0) << ": backing off r=" << r << dendl; break; } - r = pre_sync_check_and_open_handles(dir_root, current, prev, &fh); - if (r < 0) { - dout(5) << ": cannot proceed with sync: " << cpp_strerror(r) << dendl; - return r; - } dout(20) << ": " << sync_queue.size() << " entries in queue" << dendl; const auto &queue_entry = sync_queue.front(); @@ -1464,12 +1463,16 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu stringify((*prev).first).c_str(), current.first.c_str(), &sd_info); if (r != 0) { derr << ": failed to open snapdiff, r=" << r << dendl; + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } while (0 < (r = ceph_readdir_snapdiff(&sd_info, &sd_entry))) { if (r < 0) { derr << ": failed to read directory=" << epath << dendl; ceph_close_snapdiff(&sd_info); + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } @@ -1561,6 +1564,17 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu } sync_queue.pop(); } + + dout(20) << " current:" << fh.c_fd + << " prev:" << fh.p_fd + << " ret = " << r + << dendl; + + // @FHandles.r_fd_dir_root is closed in @unregister_directory since + // its used to acquire an exclusive lock on remote dir_root. + + ceph_close(m_local_mount, fh.c_fd); + ceph_close(fh.p_mnt, fh.p_fd); return r; } diff --git a/src/tools/cephfs_mirror/PeerReplayer.h b/src/tools/cephfs_mirror/PeerReplayer.h index 933cb182635ba..32c71301f0068 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.h +++ b/src/tools/cephfs_mirror/PeerReplayer.h @@ -307,7 +307,6 @@ private: int open_dir(MountRef mnt, const std::string &dir_path, boost::optional snap_id); int pre_sync_check_and_open_handles(const std::string &dir_root, const Snapshot ¤t, boost::optional prev, FHandles *fh); - void post_sync_close_handles(const FHandles &fh); int do_synchronize(const std::string &dir_root, const Snapshot ¤t, boost::optional prev); -- 2.39.5