]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
cephfs_mirror: add labeled replication performance metrics
authorVenky Shankar <vshankar@redhat.com>
Wed, 14 Feb 2024 12:28:58 +0000 (07:28 -0500)
committerVenky Shankar <vshankar@redhat.com>
Sat, 17 Feb 2024 01:06:34 +0000 (20:06 -0500)
Fixes: http://tracker.ceph.com/issues/63945
Signed-off-by: Jos Collin <jcollin@redhat.com>
Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/common/options/cephfs-mirror.yaml.in
src/pybind/mgr/mgr_module.py
src/tools/cephfs_mirror/FSMirror.cc
src/tools/cephfs_mirror/FSMirror.h
src/tools/cephfs_mirror/Mirror.cc
src/tools/cephfs_mirror/Mirror.h
src/tools/cephfs_mirror/PeerReplayer.cc
src/tools/cephfs_mirror/PeerReplayer.h

index 78f86dfb1a762087ac34271a6bcb8d69f5324d65..f826161872b8800b81cc1028af5885588e7203a5 100644 (file)
@@ -91,4 +91,15 @@ options:
   default: 10
   services:
   - cephfs-mirror
-  min: 0
\ No newline at end of file
+  min: 0
+- name: cephfs_mirror_perf_stats_prio
+  type: int
+  level: advanced
+  desc: Priority level for mirror daemon replication perf counters
+  long_desc: The daemon will send perf counter data to the manager daemon if the priority
+    is not lower than mgr_stats_threshold.
+  default: 5
+  services:
+  - cephfs-mirror
+  min: 0
+  max: 11
index 28fd69738eb9a2b511a72ea67466d5441e8dc225..13cf9386bcdb581d1dcd2cb2411ed9c31b7ea198 100644 (file)
@@ -2074,7 +2074,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
     @profile_method()
     def get_unlabeled_perf_counters(self, prio_limit: int = PRIO_USEFUL,
                               services: Sequence[str] = ("mds", "mon", "osd",
-                                                         "rbd-mirror", "rgw",
+                                                         "rbd-mirror", "cephfs-mirror", "rgw",
                                                          "tcmu-runner")) -> Dict[str, dict]:
         """
         Return the perf counters currently known to this ceph-mgr
index 7ea798e6bec2bd130cc0711f8b728c2e8e97b26c..3d5bf2d1c724244b19cf99f753cfeb27cce55307 100644 (file)
@@ -8,6 +8,8 @@
 #include "common/debug.h"
 #include "common/errno.h"
 #include "common/WorkQueue.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
 #include "include/stringify.h"
 #include "msg/Messenger.h"
 #include "FSMirror.h"
 
 using namespace std;
 
+// Performance Counters
+enum {
+  l_cephfs_mirror_fs_mirror_first = 5000,
+  l_cephfs_mirror_fs_mirror_peers,
+  l_cephfs_mirror_fs_mirror_dir_count,
+  l_cephfs_mirror_fs_mirror_last,
+};
+
 namespace cephfs {
 namespace mirror {
 
@@ -107,6 +117,18 @@ FSMirror::FSMirror(CephContext *cct, const Filesystem &filesystem, uint64_t pool
     m_asok_hook(new MirrorAdminSocketHook(cct, filesystem, this)) {
   m_service_daemon->add_or_update_fs_attribute(m_filesystem.fscid, SERVICE_DAEMON_DIR_COUNT_KEY,
                                                (uint64_t)0);
+
+  std::string labels = ceph::perf_counters::key_create("cephfs_mirror_mirrored_filesystems",
+                                                      {{"filesystem", m_filesystem.fs_name}});
+  PerfCountersBuilder plb(m_cct, labels, l_cephfs_mirror_fs_mirror_first,
+                         l_cephfs_mirror_fs_mirror_last);
+  auto prio = m_cct->_conf.get_val<int64_t>("cephfs_mirror_perf_stats_prio");
+  plb.add_u64(l_cephfs_mirror_fs_mirror_peers,
+             "mirroring_peers", "Mirroring Peers", "mpee", prio);
+  plb.add_u64(l_cephfs_mirror_fs_mirror_dir_count,
+             "directory_count", "Directory Count", "dirc", prio);
+  m_perf_counters = plb.create_perf_counters();
+  m_cct->get_perfcounters_collection()->add(m_perf_counters);
 }
 
 FSMirror::~FSMirror() {
@@ -120,6 +142,12 @@ FSMirror::~FSMirror() {
   // outside the lock so that in-progress commands can acquire
   // lock and finish executing.
   delete m_asok_hook;
+  PerfCounters *perf_counters = nullptr;
+  std::swap(perf_counters, m_perf_counters);
+  if (perf_counters != nullptr) {
+    m_cct->get_perfcounters_collection()->remove(perf_counters);
+    delete perf_counters;
+  }
 }
 
 int FSMirror::init_replayer(PeerReplayer *peer_replayer) {
@@ -355,6 +383,9 @@ void FSMirror::handle_acquire_directory(string_view dir_path) {
       peer_replayer->add_directory(dir_path);
     }
   }
+  if (m_perf_counters) {
+    m_perf_counters->set(l_cephfs_mirror_fs_mirror_dir_count, m_directories.size());
+  }
 }
 
 void FSMirror::handle_release_directory(string_view dir_path) {
@@ -372,6 +403,9 @@ void FSMirror::handle_release_directory(string_view dir_path) {
         peer_replayer->remove_directory(dir_path);
       }
     }
+    if (m_perf_counters) {
+      m_perf_counters->set(l_cephfs_mirror_fs_mirror_dir_count, m_directories.size());
+    }
   }
 }
 
@@ -395,6 +429,9 @@ void FSMirror::add_peer(const Peer &peer) {
   }
   m_peer_replayers.emplace(peer, std::move(replayer));
   ceph_assert(m_peer_replayers.size() == 1); // support only a single peer
+  if (m_perf_counters) {
+    m_perf_counters->inc(l_cephfs_mirror_fs_mirror_peers);
+  }
 }
 
 void FSMirror::remove_peer(const Peer &peer) {
@@ -415,6 +452,9 @@ void FSMirror::remove_peer(const Peer &peer) {
     dout(5) << ": shutting down replayers for peer=" << peer << dendl;
     shutdown_replayer(replayer.get());
   }
+  if (m_perf_counters) {
+    m_perf_counters->dec(l_cephfs_mirror_fs_mirror_peers);
+  }
 }
 
 void FSMirror::mirror_status(Formatter *f) {
index a9c1fab1025d9f42ee29bba6289ba1f7df744f7c..75fca758520d809fb77ca30458376a1522308fc3 100644 (file)
@@ -154,6 +154,8 @@ private:
 
   MountRef m_mount;
 
+  PerfCounters *m_perf_counters;
+
   int init_replayer(PeerReplayer *peer_replayer);
   void shutdown_replayer(PeerReplayer *peer_replayer);
   void cleanup();
index cc811f0a1c7fb3dd26f3067b43d610312574a4fa..0ad9f101e37107c03b2a088087e1ceebce03c91c 100644 (file)
@@ -9,6 +9,8 @@
 #include "common/errno.h"
 #include "common/Timer.h"
 #include "common/WorkQueue.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
 #include "include/types.h"
 #include "mon/MonClient.h"
 #include "msg/Messenger.h"
 #undef dout_prefix
 #define dout_prefix *_dout << "cephfs::mirror::Mirror " << __func__
 
+// Performance Counters
+enum {
+  l_cephfs_mirror_first = 4000,
+  l_cephfs_mirror_file_systems_mirrorred,
+  l_cephfs_mirror_file_systems_mirror_enable_failures,
+  l_cephfs_mirror_last,
+};
+
 namespace cephfs {
 namespace mirror {
 
@@ -277,6 +287,17 @@ int Mirror::init(std::string &reason) {
     return r;
   }
 
+  std::string labels = ceph::perf_counters::key_create("cephfs_mirror");
+  PerfCountersBuilder plb(m_cct, labels, l_cephfs_mirror_first, l_cephfs_mirror_last);
+
+  auto prio = m_cct->_conf.get_val<int64_t>("cephfs_mirror_perf_stats_prio");
+  plb.add_u64(l_cephfs_mirror_file_systems_mirrorred,
+             "mirrored_filesystems", "Filesystems mirrored", "mir", prio);
+  plb.add_u64_counter(l_cephfs_mirror_file_systems_mirror_enable_failures,
+                     "mirror_enable_failures", "Mirroring enable failures", "mirf", prio);
+  m_perf_counters = plb.create_perf_counters();
+  m_cct->get_perfcounters_collection()->add(m_perf_counters);
+
   return 0;
 }
 
@@ -285,6 +306,13 @@ void Mirror::shutdown() {
   m_stopping = true;
   m_cluster_watcher->shutdown();
   m_cond.notify_all();
+
+  PerfCounters *perf_counters = nullptr;
+  std::swap(perf_counters, m_perf_counters);
+  if (perf_counters != nullptr) {
+    m_cct->get_perfcounters_collection()->remove(perf_counters);
+    delete perf_counters;
+  }
 }
 
 void Mirror::reopen_logs() {
@@ -328,6 +356,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem,
     m_service_daemon->add_or_update_fs_attribute(filesystem.fscid,
                                                  SERVICE_DAEMON_MIRROR_ENABLE_FAILED_KEY,
                                                  true);
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_cephfs_mirror_file_systems_mirror_enable_failures);
+    }
     return;
   }
 
@@ -341,6 +372,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem,
   }
 
   dout(10) << ": Initialized FSMirror for filesystem=" << filesystem << dendl;
+  if (m_perf_counters) {
+    m_perf_counters->inc(l_cephfs_mirror_file_systems_mirrorred);
+  }
 }
 
 void Mirror::handle_enable_mirroring(const Filesystem &filesystem, int r) {
@@ -358,6 +392,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem, int r) {
     m_service_daemon->add_or_update_fs_attribute(filesystem.fscid,
                                                  SERVICE_DAEMON_MIRROR_ENABLE_FAILED_KEY,
                                                  true);
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_cephfs_mirror_file_systems_mirror_enable_failures);
+    }
     return;
   }
 
@@ -367,6 +404,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem, int r) {
   m_cond.notify_all();
 
   dout(10) << ": Initialized FSMirror for filesystem=" << filesystem << dendl;
+  if (m_perf_counters) {
+    m_perf_counters->inc(l_cephfs_mirror_file_systems_mirrorred);
+  }
 }
 
 void Mirror::enable_mirroring(const Filesystem &filesystem, uint64_t local_pool_id,
@@ -422,6 +462,10 @@ void Mirror::handle_disable_mirroring(const Filesystem &filesystem, int r) {
       m_mirror_actions.erase(filesystem);
     }
   }
+
+  if (m_perf_counters) {
+    m_perf_counters->dec(l_cephfs_mirror_file_systems_mirrorred);
+  }
 }
 
 void Mirror::disable_mirroring(const Filesystem &filesystem, Context *on_finish) {
index 2081b5b53050f0e08f098aa1cf6f18a2b4f9f191..5e37b5df4580b2338bba63e2f4c579acf42563bf 100644 (file)
@@ -104,6 +104,8 @@ private:
   RadosRef m_local;
   std::unique_ptr<ServiceDaemon> m_service_daemon;
 
+  PerfCounters *m_perf_counters;
+
   int init_mon_client();
 
   // called via listener
index bd47046bb12184da66f2c9a74aeccf4bcd91f6f2..6f3fb724a659e2b4654976049baeb5fdb947a64b 100644 (file)
@@ -12,6 +12,8 @@
 #include "common/ceph_context.h"
 #include "common/debug.h"
 #include "common/errno.h"
+#include "common/perf_counters.h"
+#include "common/perf_counters_key.h"
 #include "FSMirror.h"
 #include "PeerReplayer.h"
 #include "Utils.h"
 
 using namespace std;
 
+// Performance Counters
+enum {
+  l_cephfs_mirror_peer_replayer_first = 6000,
+  l_cephfs_mirror_peer_replayer_snaps_synced,
+  l_cephfs_mirror_peer_replayer_snaps_deleted,
+  l_cephfs_mirror_peer_replayer_snaps_renamed,
+  l_cephfs_mirror_peer_replayer_snap_sync_failures,
+  l_cephfs_mirror_peer_replayer_avg_sync_time,
+  l_cephfs_mirror_peer_replayer_sync_bytes,
+  l_cephfs_mirror_peer_replayer_last,
+};
+
 namespace cephfs {
 namespace mirror {
 
@@ -161,10 +175,39 @@ PeerReplayer::PeerReplayer(CephContext *cct, FSMirror *fs_mirror,
                                                  SERVICE_DAEMON_FAILED_DIR_COUNT_KEY, (uint64_t)0);
   m_service_daemon->add_or_update_peer_attribute(m_filesystem.fscid, m_peer,
                                                  SERVICE_DAEMON_RECOVERED_DIR_COUNT_KEY, (uint64_t)0);
+
+  std::string labels = ceph::perf_counters::key_create("cephfs_mirror_peers",
+                                                      {{"source_fscid", stringify(m_filesystem.fscid)},
+                                                       {"source_filesystem", m_filesystem.fs_name},
+                                                       {"peer_cluster_name", m_peer.remote.cluster_name},
+                                                       {"peer_cluster_filesystem", m_peer.remote.fs_name}});
+  PerfCountersBuilder plb(m_cct, labels, l_cephfs_mirror_peer_replayer_first,
+                         l_cephfs_mirror_peer_replayer_last);
+  auto prio = m_cct->_conf.get_val<int64_t>("cephfs_mirror_perf_stats_prio");
+  plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snaps_synced,
+                     "snaps_synced", "Snapshots Synchronized", "sync", prio);
+  plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snaps_deleted,
+                     "snaps_deleted", "Snapshots Deleted", "del", prio);
+  plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snaps_renamed,
+                     "snaps_renamed", "Snapshots Renamed", "ren", prio);
+  plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snap_sync_failures,
+                     "sync_failures", "Snapshot Sync Failures", "fail", prio);
+  plb.add_time_avg(l_cephfs_mirror_peer_replayer_avg_sync_time,
+                  "avg_sync_time", "Average Sync Time", "asyn", prio);
+  plb.add_u64_counter(l_cephfs_mirror_peer_replayer_sync_bytes,
+                     "sync_bytes", "Sync Bytes", "sbye", prio);
+  m_perf_counters = plb.create_perf_counters();
+  m_cct->get_perfcounters_collection()->add(m_perf_counters);
 }
 
 PeerReplayer::~PeerReplayer() {
   delete m_asok_hook;
+  PerfCounters *perf_counters = nullptr;
+  std::swap(perf_counters, m_perf_counters);
+  if (perf_counters != nullptr) {
+    m_cct->get_perfcounters_collection()->remove(perf_counters);
+    delete perf_counters;
+  }
 }
 
 int PeerReplayer::init() {
@@ -516,6 +559,9 @@ int PeerReplayer::propagate_snap_deletes(const std::string &dir_root,
       return r;
     }
     inc_deleted_snap(dir_root);
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snaps_deleted);
+    }
   }
 
   return 0;
@@ -539,6 +585,9 @@ int PeerReplayer::propagate_snap_renames(
       return r;
     }
     inc_renamed_snap(dir_root);
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snaps_renamed);
+    }
   }
 
   return 0;
@@ -694,6 +743,9 @@ int PeerReplayer::remote_file_op(const std::string &dir_root, const std::string
         derr << ": failed to copy path=" << epath << ": " << cpp_strerror(r) << dendl;
         return r;
       }
+      if (m_perf_counters) {
+       m_perf_counters->inc(l_cephfs_mirror_peer_replayer_sync_bytes, stx.stx_size);
+      }
     } else if (S_ISLNK(stx.stx_mode)) {
       // free the remote link before relinking
       r = ceph_unlinkat(m_remote_mount, fh.r_fd_dir_root, epath.c_str(), 0);
@@ -1457,7 +1509,17 @@ int PeerReplayer::do_sync_snaps(const std::string &dir_root) {
       clear_current_syncing_snap(dir_root);
       return r;
     }
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snaps_synced);
+    }
     std::chrono::duration<double> duration = clock::now() - start;
+
+    utime_t d;
+    d.set_from_double(duration.count());
+    if (m_perf_counters) {
+      m_perf_counters->tinc(l_cephfs_mirror_peer_replayer_avg_sync_time, d);
+    }
+
     set_last_synced_stat(dir_root, it->first, it->second, duration.count());
     if (--snaps_per_cycle == 0) {
       break;
@@ -1481,6 +1543,9 @@ void PeerReplayer::sync_snaps(const std::string &dir_root,
   locker.lock();
   if (r < 0) {
     _inc_failed_count(dir_root);
+    if (m_perf_counters) {
+      m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snap_sync_failures);
+    }
   } else {
     _reset_failed_count(dir_root);
   }
index 0511d154a759d068fcc87bd5a8db890b5c87bb7e..63e1bd9e8a77701c5ca30772d031653d91430f80 100644 (file)
@@ -269,6 +269,8 @@ private:
 
   ServiceDaemonStats m_service_daemon_stats;
 
+  PerfCounters *m_perf_counters;
+
   void run(SnapshotReplayerThread *replayer);
 
   boost::optional<std::string> pick_directory();