]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
tools/cephfs_mirror: Add crawl-state and sync-mode metric
authorKotresh HR <khiremat@redhat.com>
Sat, 28 Mar 2026 10:12:43 +0000 (15:42 +0530)
committerKotresh HR <khiremat@redhat.com>
Fri, 12 Jun 2026 19:10:57 +0000 (00:40 +0530)
The 'crawl' and 'sync-mode' metric is added.

sync-mode: full/delta,
"crawl": {
           "state": "completed",
           "duration": "37s"
       }

sync-mode:
---------
The 'sync-mode: full/delta' is added to peer status.
The 'delta' means, blockdiff along with snapdiff is
being used to sync the files where as 'full' means
full directory is crawled and each file is synced
entirely.

crawl:
-----
The state can be in-progress/completed. This
identifies whether the crawler thread is done
queuing the files for data sync threads.

The time taken for the duration is also shown.
If the crawl is in-progress, the duration
would show the time taken till then from the
start of the crawl. If the crawl state is
completed, then duration indicates total
time taken for the crawl.

The crawl duration is shown in "d h m s" format.
The existing 'sync_duration' in last_synced_snap
is also formatted

The values are as below. When crawl state is
completed, the 'total_files' metric doesn't
grow anymore.

crawl_duration:
--------------
The crawl_duration of last snapshot is saved in last_synced_snap
section as well.

Sample outputs:
---------------
{
    "/d0": {
        "state": "syncing",
        "current_syncing_snap": {
            "id": 2,
            "name": "d0_snap0",
            "sync-mode": "full",
            "crawl": {
                "state": "in-progress",
                "duration": "21s"
            },
            "bytes": {
                "sync_bytes": "149.25 MiB",
                "total_bytes": "176.47 MiB",
                "sync_percent": "84.57%"
            },
            "files": {
                "sync_files": 4931,
                "total_files": 5845,
                "sync_percent": "84.36%"
            }
        },
        "snaps_synced": 0,
        "snaps_deleted": 0,
        "snaps_renamed": 0
    }
}
------------------------------------------
{
    "/d0": {
        "state": "syncing",
        "current_syncing_snap": {
            "id": 2,
            "name": "d0_snap0",
            "sync-mode": "full",
            "crawl": {
                "state": "completed",
                "duration": "37s"
            },
            "bytes": {
                "sync_bytes": "891.39 MiB",
                "total_bytes": "901.52 MiB",
                "sync_percent": "98.88%"
            },
            "files": {
                "sync_files": 29656,
                "total_files": 30000,
                "sync_percent": "98.85%"
            }
        },
        "snaps_synced": 0,
        "snaps_deleted": 0,
        "snaps_renamed": 0
    }
}
---------
  {
        "/d0": {
            "state": "syncing",
            "current_syncing_snap": {
                "id": 3,
                "name": "d0_snap1",
                "sync-mode": "delta",
                "crawl": {
                    "state": "completed",
                    "duration": "15s"
                },
                "bytes": {
                    "sync_bytes": "120.20 MiB",
                    "total_bytes": "149.94 MiB",
                    "sync_percent": "80.16%"
                },
                "files": {
                    "sync_files": 4032,
                    "total_files": 5000,
                    "sync_percent": "80.64%"
                }
            },
            "last_synced_snap": {
                "id": 2,
                "name": "d0_snap0",
                "crawl_duration": "17s",
                "sync_duration": 45,
                "sync_time_stamp": "5642.805770s",
                "sync_bytes": "300.85 MiB",
                "sync_files": 10000
            },
            "snaps_synced": 1,
            "snaps_deleted": 0,
            "snaps_renamed": 0
        }
    }
-------------
{
    "/d0": {
        "state": "idle",
        "last_synced_snap": {
            "id": 2,
            "name": "d0_snap0",
            "crawl_duration": "17s",
            "sync_duration": "2m 38s",
            "sync_time_stamp": "9259.225009s",
            "sync_bytes": "901.52 MiB",
            "sync_files": 30000
        },
        "snaps_synced": 1,
        "snaps_deleted": 0,
        "snaps_renamed": 0
    }
}

Fixes: https://tracker.ceph.com/issues/73453
Signed-off-by: Kotresh HR <khiremat@redhat.com>
src/tools/cephfs_mirror/PeerReplayer.cc
src/tools/cephfs_mirror/PeerReplayer.h

index ebadde544f873c011104d7d39549065d917ce59b..57a6cd7b75d52dba41da872f50396d1eb3535b41 100644 (file)
@@ -30,6 +30,7 @@
                            << m_peer.uuid << ") " << __func__
 
 using namespace std;
+using sec_duration = std::chrono::duration<double>;
 
 // Performance Counters
 enum {
@@ -1421,12 +1422,16 @@ bool PeerReplayer::SyncMechanism::has_pending_work() const {
   return true;
 }
 
-void PeerReplayer::SyncMechanism::mark_crawl_finished(int ret) {
-  std::unique_lock lock(sdq_lock);
-  m_crawl_finished = true;
-  if (ret < 0)
-    m_crawl_error = true;
-  sdq_cv.notify_all();
+void PeerReplayer::SyncMechanism::mark_crawl_finished(int ret, double crawl_duration_secs) {
+  {
+    std::unique_lock lock(sdq_lock);
+    m_crawl_finished = true;
+    if (ret < 0)
+      m_crawl_error = true;
+    sdq_cv.notify_all();
+  }
+  // for crawl-state metrics
+  m_peer_replayer.set_crawl_finished(m_dir_root, true, crawl_duration_secs);
 }
 
 // Returns false if there is any error during data sync
@@ -1752,7 +1757,7 @@ int PeerReplayer::SnapDiffSync::get_changed_blocks(const std::string &epath,
   return r;
 }
 
-void PeerReplayer::SnapDiffSync::finish_crawl(int ret) {
+void PeerReplayer::SnapDiffSync::finish_crawl(int ret, double crawl_duration_secs) {
   dout(20) << dendl;
 
   while (!m_sync_stack.empty()) {
@@ -1768,7 +1773,7 @@ void PeerReplayer::SnapDiffSync::finish_crawl(int ret) {
   }
 
   // Crawl and entry operations are done syncing here. So mark crawl finished here
-  mark_crawl_finished(ret);
+  mark_crawl_finished(ret, crawl_duration_secs);
 }
 
 PeerReplayer::RemoteSync::RemoteSync(PeerReplayer& peer_replayer, std::string_view dir_root,
@@ -1905,7 +1910,7 @@ int PeerReplayer::RemoteSync::get_entry(std::string *epath, struct ceph_statx *s
   return 0;
 }
 
-void PeerReplayer::RemoteSync::finish_crawl(int ret) {
+void PeerReplayer::RemoteSync::finish_crawl(int ret, double crawl_duration_secs) {
   dout(20) << dendl;
 
   while (!m_sync_stack.empty()) {
@@ -1921,7 +1926,7 @@ void PeerReplayer::RemoteSync::finish_crawl(int ret) {
   }
 
   // Crawl and entry operations are done syncing here. So mark stack finished here
-  mark_crawl_finished(ret);
+  mark_crawl_finished(ret, crawl_duration_secs);
 }
 
 int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &current,
@@ -1951,9 +1956,11 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu
   if (fh.p_mnt == m_local_mount) {
     syncm = std::make_shared<SnapDiffSync>(*this, dir_root, m_local_mount, m_remote_mount,
                                            &fh, m_peer, current, prev);
+    set_snapdiff(dir_root, true); //for stats
   } else {
     syncm = std::make_shared<RemoteSync>(*this, dir_root, m_local_mount, m_remote_mount,
                                          &fh, m_peer, current, boost::none);
+    set_snapdiff(dir_root, false); //for stats
   }
 
   r = syncm->init_sync();
@@ -1968,6 +1975,9 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu
 
   // starting from this point we shouldn't care about manual closing of fh.c_fd,
   // it will be closed automatically when bound tdirp is closed.
+  sec_duration crawl_duration_time{0};
+  auto crawl_start_time = clock::now();
+  set_crawl_start_time(dir_root);
   while (true) {
     if (should_backoff(dir_root, &r)) {
       dout(0) << ": backing off r=" << r << dendl;
@@ -1996,7 +2006,9 @@ int PeerReplayer::do_synchronize(const std::string &dir_root, const Snapshot &cu
     }
   }
 
-  syncm->finish_crawl(r);
+  auto crawl_end_time = clock::now();
+  crawl_duration_time = sec_duration(crawl_end_time - crawl_start_time);
+  syncm->finish_crawl(r, crawl_duration_time.count());
 
   dout(20) << " cur:" << fh.c_fd
            << " prev:" << fh.p_fd
@@ -2524,6 +2536,40 @@ void PeerReplayer::run_datasync(SnapshotDataSyncThread *data_replayer) {
   } // outer while
 }
 
+std::string PeerReplayer::format_time(double total_seconds_d) {
+    // Round to nearest second
+    uint64_t total_seconds = static_cast<uint64_t>(std::llround(total_seconds_d));
+
+    uint64_t days = total_seconds / 86400;
+    total_seconds %= 86400;
+
+    uint64_t hours = total_seconds / 3600;
+    total_seconds %= 3600;
+
+    uint64_t minutes = total_seconds / 60;
+    uint64_t seconds = total_seconds % 60;
+
+    std::ostringstream oss;
+
+    if (days > 0) {
+        oss << days << "d "
+            << std::setw(2) << std::setfill('0') << hours   << "h "
+            << std::setw(2) << std::setfill('0') << minutes << "m "
+            << std::setw(2) << std::setfill('0') << seconds << "s";
+    } else if (hours > 0) {
+        oss << hours << "h "
+            << std::setw(2) << std::setfill('0') << minutes << "m "
+            << std::setw(2) << std::setfill('0') << seconds << "s";
+    } else if (minutes > 0) {
+        oss << minutes << "m "
+            << std::setw(2) << std::setfill('0') << seconds << "s";
+    } else {
+        oss << seconds << "s";
+    }
+
+    return oss.str();
+}
+
 std::string PeerReplayer::format_bytes(double bytes) {
   static constexpr double KiB = 1024.0;
   static constexpr double MiB = KiB * 1024.0;
@@ -2567,6 +2613,22 @@ void PeerReplayer::peer_status(Formatter *f) {
       f->open_object_section("current_syncing_snap");
       f->dump_unsigned("id", (*sync_stat.current_syncing_snap).first);
       f->dump_string("name", (*sync_stat.current_syncing_snap).second);
+      if (sync_stat.snapdiff)
+        f->dump_string("sync-mode", "delta");
+      else
+        f->dump_string("sync-mode", "full");
+      f->open_object_section("crawl");
+      if (sync_stat.crawl_finished) {
+        f->dump_string("state", "completed");
+        f->dump_string("duration", format_time(sync_stat.crawl_duration));
+      } else {
+        f->dump_string("state", "in-progress");
+        auto cur_time = clock::now();
+        sec_duration crawl_duration_till_now{0};
+        crawl_duration_till_now = sec_duration(cur_time - sync_stat.crawl_start_time);
+        f->dump_string("duration", format_time(crawl_duration_till_now.count()));
+      }
+      f->close_section(); //crawl
       f->open_object_section("bytes");
       f->dump_string("sync_bytes", format_bytes(sync_stat.sync_bytes));
       f->dump_string("total_bytes", format_bytes(sync_stat.total_bytes));
@@ -2593,8 +2655,11 @@ void PeerReplayer::peer_status(Formatter *f) {
       f->open_object_section("last_synced_snap");
       f->dump_unsigned("id", (*sync_stat.last_synced_snap).first);
       f->dump_string("name", (*sync_stat.last_synced_snap).second);
+      if (sync_stat.last_sync_crawl_duration) {
+        f->dump_string("crawl_duration", format_time(*sync_stat.last_sync_crawl_duration));
+      }
       if (sync_stat.last_sync_duration) {
-        f->dump_float("sync_duration", *sync_stat.last_sync_duration);
+        f->dump_string("sync_duration", format_time(*sync_stat.last_sync_duration));
         f->dump_stream("sync_time_stamp") << sync_stat.last_synced;
       }
       if (sync_stat.last_sync_bytes) {
index bb5e467da53c90afcdbbf69611d3f5e7b135a684..34786c08f33c37efbe9f91ba669a4bc7a27b7add 100644 (file)
@@ -221,12 +221,12 @@ private:
                                    const struct ceph_statx &stx, bool sync_check,
                                    const std::function<int (uint64_t, struct cblock *)> &callback);
 
-    virtual void finish_crawl(int ret) = 0;
+    virtual void finish_crawl(int ret, double crawl_duration_secs) = 0;
 
     void push_dataq_entry(PeerReplayer::SyncEntry e);
     bool pop_dataq_entry(PeerReplayer::SyncEntry &out);
     bool has_pending_work() const;
-    void mark_crawl_finished(int ret);
+    void mark_crawl_finished(int ret, double crawl_duration_secs);
     bool is_dataq_empty_unlocked() const {
       return m_sync_dataq.empty();
     }
@@ -335,7 +335,7 @@ private:
                   const std::function<int (const std::string&)> &dirsync_func,
                   const std::function<int (const std::string&)> &purge_func);
 
-    void finish_crawl(int ret);
+    void finish_crawl(int ret, double crawl_duration_secs);
   };
 
   class SnapDiffSync : public SyncMechanism {
@@ -355,7 +355,7 @@ private:
                            const struct ceph_statx &stx, bool sync_check,
                            const std::function<int (uint64_t, struct cblock *)> &callback);
 
-    void finish_crawl(int ret);
+    void finish_crawl(int ret, double crawl_duration_secs);
 
   private:
     int init_directory(const std::string &epath,
@@ -384,12 +384,17 @@ private:
     uint64_t renamed_snap_count = 0;
     monotime last_synced = clock::zero();
     boost::optional<double> last_sync_duration;
+    boost::optional<double> last_sync_crawl_duration;
     boost::optional<uint64_t> last_sync_bytes; //last sync bytes for display in status
     boost::optional<uint64_t> last_sync_files; //last num of sync files for display in status
     uint64_t sync_bytes = 0; //sync bytes counter, independently for each directory sync.
     uint64_t total_bytes = 0; //total bytes counter, independently for each directory sync.
     uint64_t sync_files = 0; //sync files counter, independently for each directory sync.
     uint64_t total_files = 0; //total files counter, independently for each directory sync.
+    bool snapdiff = false; // RemoteSync/Snapdiff aka full/delta
+    bool crawl_finished = false; // crawl_state - in-progress/completed
+    clock::time_point crawl_start_time; // to show current crawl duration if crawl is in progress
+    double crawl_duration = 0.0; // time taken to complete the crawl, includes a few entry operation like mkdir as well
   };
 
   void _inc_failed_count(const std::string &dir_root) {
@@ -425,6 +430,10 @@ private:
     sync_stat.total_bytes = 0;
     sync_stat.sync_files = 0;
     sync_stat.total_files = 0;
+    sync_stat.snapdiff = false;
+    sync_stat.crawl_finished = false;
+    sync_stat.crawl_start_time = clock::now();
+    sync_stat.crawl_duration = 0.0;
   }
   void _set_last_synced_snap(const std::string &dir_root, uint64_t snap_id,
                             const std::string &snap_name) {
@@ -466,11 +475,23 @@ private:
     auto &sync_stat = m_snap_sync_stats.at(dir_root);
     sync_stat.last_synced = clock::now();
     sync_stat.last_sync_duration = duration;
+    sync_stat.last_sync_crawl_duration = sync_stat.crawl_duration;
     sync_stat.last_sync_bytes = sync_stat.sync_bytes;
     sync_stat.last_sync_files = sync_stat.sync_files;
     ++sync_stat.synced_snap_count;
     _reset_sync_stat(dir_root);
   }
+  void set_snapdiff(const std::string &dir_root, bool snapdiff) {
+    std::scoped_lock locker(m_lock);
+    auto &sync_stat = m_snap_sync_stats.at(dir_root);
+    sync_stat.snapdiff = snapdiff;
+  }
+  void set_crawl_finished(const std::string &dir_root, bool state, double seconds) {
+    std::scoped_lock locker(m_lock);
+    auto &sync_stat = m_snap_sync_stats.at(dir_root);
+    sync_stat.crawl_finished = state;
+    sync_stat.crawl_duration = seconds;
+  }
   void inc_sync_bytes(const std::string &dir_root, const uint64_t& b) {
     std::scoped_lock locker(m_lock);
     auto &sync_stat = m_snap_sync_stats.at(dir_root);
@@ -481,6 +502,11 @@ private:
     auto &sync_stat = m_snap_sync_stats.at(dir_root);
     sync_stat.sync_files++;
   }
+  void set_crawl_start_time(const std::string &dir_root) {
+    std::scoped_lock locker(m_lock);
+    auto &sync_stat = m_snap_sync_stats.at(dir_root);
+    sync_stat.crawl_start_time = clock::now();
+  }
   void inc_total_bytes_files(const std::string &dir_root, const uint64_t& b) {
     std::scoped_lock locker(m_lock);
     auto &sync_stat = m_snap_sync_stats.at(dir_root);
@@ -634,6 +660,7 @@ private:
 
   // format routines for peer_status
   static std::string format_bytes(double bytes);
+  static std::string format_time(double total_seconds);
 };
 
 } // namespace mirror