From: Kotresh HR Date: Fri, 8 May 2026 00:22:59 +0000 (+0530) Subject: tools/cephfs_mirror: Add datasync_queue_wait_duration metric X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=66c05f080b222897e3d28703de008e71f9721053;p=ceph.git tools/cephfs_mirror: Add datasync_queue_wait_duration metric Add the metric which measures the time spent by the snapshot in the data queue waiting for the datasync threads. Sample output: When still 'waiting' in queue { "/d1": { "state": "syncing", "current_syncing_snap": { "id": 18, "name": "d1_snap5", "sync-mode": "delta", "avg_read_throughput_bytes": "0.00 B/s", "avg_write_throughput_bytes": "0.00 B/s", "crawl": { "state": "in-progress", "duration": "13s" }, "datasync_queue_wait": { "state": "waiting", "duration": "12s" }, "bytes": { "sync_bytes": "0.00 B", "total_bytes": "110.99 MiB", "sync_percent": "0.00%" }, "files": { "sync_files": 0, "total_files": 3719, "sync_percent": "0.00%" }, "eta": "calculating..." }, "last_synced_snap": { "id": 15, "name": "d1_snap4" }, "snaps_synced": 0, "snaps_deleted": 0, "snaps_renamed": 0 }, } --------------- After 'complete' { "/d1": { "state": "syncing", "current_syncing_snap": { "id": 18, "name": "d1_snap5", "sync-mode": "delta", "avg_read_throughput_bytes": "11.66 MiB/s", "avg_write_throughput_bytes": "34.55 MiB/s", "crawl": { "state": "completed", "duration": "17s" }, "datasync_queue_wait": { "state": "completed", "duration": "19s" }, "bytes": { "sync_bytes": "149.94 MiB", "total_bytes": "149.94 MiB", "sync_percent": "100.00%" }, "files": { "sync_files": 5000, "total_files": 5000, "sync_percent": "100.00%" }, "eta": "0s" }, "last_synced_snap": { "id": 15, "name": "d1_snap4" }, "snaps_synced": 0, "snaps_deleted": 0, "snaps_renamed": 0 } } ----- Also stored in last_sync_snap section { "/d1": { "state": "idle", "last_synced_snap": { "id": 18, "name": "d1_snap5", "crawl_duration": "17s", "datasync_queue_wait_duration": "19s", "sync_duration": "44s", "sync_time_stamp": "8172.009480s", "sync_bytes": "149.94 MiB", "sync_files": 5000 }, "snaps_synced": 1, "snaps_deleted": 0, "snaps_renamed": 0 } } Fixes: https://tracker.ceph.com/issues/73453 Signed-off-by: Kotresh HR --- diff --git a/src/tools/cephfs_mirror/PeerReplayer.cc b/src/tools/cephfs_mirror/PeerReplayer.cc index 334567b0c9d..11d4c7f67fa 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.cc +++ b/src/tools/cephfs_mirror/PeerReplayer.cc @@ -1353,10 +1353,17 @@ PeerReplayer::SyncMechanism::~SyncMechanism() { void PeerReplayer::SyncMechanism::push_dataq_entry(SyncEntry e) { dout(10) << ": snapshot data replayer dataq pushed" << " syncm=" << this << " epath=" << e.epath << dendl; - m_peer_replayer.inc_total_bytes_files(std::string(m_dir_root), e.stx.stx_size); + const uint64_t entry_bytes = e.stx.stx_size; std::unique_lock lock(sdq_lock); + if (!m_first_dataq_push_time) { + m_first_dataq_push_time = clock::now(); + m_peer_replayer.set_datasync_queue_wait_start_time(std::string(m_dir_root), + *m_first_dataq_push_time); + } m_sync_dataq.push(std::move(e)); sdq_cv.notify_all(); + lock.unlock(); + m_peer_replayer.inc_total_bytes_files(std::string(m_dir_root), entry_bytes); } bool PeerReplayer::SyncMechanism::pop_dataq_entry(SyncEntry &out_entry) { @@ -1402,6 +1409,12 @@ bool PeerReplayer::SyncMechanism::pop_dataq_entry(SyncEntry &out_entry) { return false; // no more work } + if (!m_datasync_queue_wait_reported && m_first_dataq_push_time) { + sec_duration dq_wait{0}; + dq_wait = sec_duration(clock::now() - *m_first_dataq_push_time); + m_peer_replayer.set_datasync_queue_wait_duration(std::string(m_dir_root), dq_wait.count()); + m_datasync_queue_wait_reported = true; + } out_entry = std::move(m_sync_dataq.front()); m_sync_dataq.pop(); dout(10) << ": snapshot data replayer dataq popped" << " syncm=" << this @@ -2749,6 +2762,22 @@ void PeerReplayer::peer_status(Formatter *f) { f->dump_string("duration", format_time(crawl_duration_till_now.count())); } f->close_section(); //crawl + if (sync_stat.datasync_queue_wait_duration || + sync_stat.datasync_queue_wait_start_time) { + f->open_object_section("datasync_queue_wait"); + if (sync_stat.datasync_queue_wait_duration) { + f->dump_string("state", "completed"); + f->dump_string("duration", + format_time(*sync_stat.datasync_queue_wait_duration)); + } else { + f->dump_string("state", "waiting"); + auto cur_time = clock::now(); + sec_duration dq_wait{0}; + dq_wait = sec_duration(cur_time - *sync_stat.datasync_queue_wait_start_time); + f->dump_string("duration", format_time(dq_wait.count())); + } + f->close_section(); + } f->open_object_section("bytes"); f->dump_string("sync_bytes", format_bytes(sync_stat.sync_bytes)); f->dump_string("total_bytes", format_bytes(sync_stat.total_bytes)); @@ -2783,6 +2812,10 @@ void PeerReplayer::peer_status(Formatter *f) { if (sync_stat.last_sync_crawl_duration) { f->dump_string("crawl_duration", format_time(*sync_stat.last_sync_crawl_duration)); } + if (sync_stat.last_sync_datasync_queue_wait_duration) { + f->dump_string("datasync_queue_wait_duration", + format_time(*sync_stat.last_sync_datasync_queue_wait_duration)); + } if (sync_stat.last_sync_duration) { f->dump_string("sync_duration", format_time(*sync_stat.last_sync_duration)); f->dump_stream("sync_time_stamp") << sync_stat.last_synced; diff --git a/src/tools/cephfs_mirror/PeerReplayer.h b/src/tools/cephfs_mirror/PeerReplayer.h index 9cf3df07150..6bfc84a5835 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.h +++ b/src/tools/cephfs_mirror/PeerReplayer.h @@ -319,6 +319,8 @@ private: bool m_datasync_error = false; int m_datasync_errno = 0; bool m_backoff = false; + boost::optional m_first_dataq_push_time; + bool m_datasync_queue_wait_reported = false; }; class RemoteSync : public SyncMechanism { @@ -385,6 +387,7 @@ private: monotime last_synced = clock::zero(); boost::optional last_sync_duration; boost::optional last_sync_crawl_duration; + boost::optional last_sync_datasync_queue_wait_duration; boost::optional last_sync_bytes; //last sync bytes for display in status boost::optional last_sync_files; //last num of sync files for display in status uint64_t sync_bytes = 0; //sync bytes counter, independently for each directory sync. @@ -407,6 +410,8 @@ private: uint64_t discovered_delta_bytes = 0; //discovered delta bytes counter, independently for each directory sync. uint64_t blockdiff_sync_bytes = 0; //actual bytes synced using SnapDiff/blockdiff counter double blockdiff_time_sec = 0.0; //actual sync time using SnapDiff/blockdiff counter + boost::optional datasync_queue_wait_duration; // first data_q push to first pop (final) + boost::optional datasync_queue_wait_start_time; // until first pop; for in-progress display }; void _inc_failed_count(const std::string &dir_root) { @@ -455,6 +460,8 @@ private: sync_stat.discovered_delta_bytes = 0; sync_stat.blockdiff_sync_bytes = 0; sync_stat.blockdiff_time_sec = 0.0; + sync_stat.datasync_queue_wait_duration = boost::none; + sync_stat.datasync_queue_wait_start_time = boost::none; } void _set_last_synced_snap(const std::string &dir_root, uint64_t snap_id, const std::string &snap_name) { @@ -497,6 +504,9 @@ private: sync_stat.last_synced = clock::now(); sync_stat.last_sync_duration = duration; sync_stat.last_sync_crawl_duration = sync_stat.crawl_duration; + //For empty snapshot sync, datasync_queue_wait_duration is 0 + sync_stat.last_sync_datasync_queue_wait_duration = + sync_stat.datasync_queue_wait_duration.value_or(0.0); sync_stat.last_sync_bytes = sync_stat.sync_bytes; sync_stat.last_sync_files = sync_stat.sync_files; ++sync_stat.synced_snap_count; @@ -513,6 +523,21 @@ private: sync_stat.crawl_finished = state; sync_stat.crawl_duration = seconds; } + void set_datasync_queue_wait_start_time(const std::string &dir_root, monotime t) { + std::scoped_lock locker(m_lock); + auto &sync_stat = m_snap_sync_stats.at(dir_root); + if (!sync_stat.datasync_queue_wait_start_time && !sync_stat.datasync_queue_wait_duration) { + sync_stat.datasync_queue_wait_start_time = t; + } + } + void set_datasync_queue_wait_duration(const std::string &dir_root, double seconds) { + std::scoped_lock locker(m_lock); + auto &sync_stat = m_snap_sync_stats.at(dir_root); + if (!sync_stat.datasync_queue_wait_duration) { + sync_stat.datasync_queue_wait_duration = seconds; + sync_stat.datasync_queue_wait_start_time = boost::none; + } + } void set_blockdiff_metrics(const std::string &dir_root, const uint64_t bd_syncbytes, const double bd_time) { std::scoped_lock locker(m_lock); auto &sync_stat = m_snap_sync_stats.at(dir_root);