]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr: add new API's to fetch latest perf counter values
authorNaveen Naidu <naveennaidu479@gmail.com>
Wed, 19 Mar 2025 13:55:34 +0000 (19:25 +0530)
committerNaveen Naidu <naveennaidu479@gmail.com>
Tue, 8 Apr 2025 02:26:46 +0000 (07:56 +0530)
New API's to fetch the latest values of performance counters have been
added. These API's support fetching the values for labeled performance
counters.

We also make changes to `insert` and `create` API of
perf_counters_key to remove any empty labels. This change helps us
construct perf_counters_key when the key labels are constructed
dynamically.

Signed-off-by: Naveen Naidu <naveen.naidu@ibm.com>
doc/mgr/modules.rst
src/mgr/ActivePyModules.cc
src/mgr/ActivePyModules.h
src/mgr/BaseMgrModule.cc
src/pybind/ceph_argparse.py
src/pybind/mgr/ceph_module.pyi
src/pybind/mgr/mgr_module.py

index b07f6ad335e8bda4b72be705c5c76aa57e208c9c..31a502cc6f273c9df936d63462348a338ca4a337 100644 (file)
@@ -508,7 +508,8 @@ function. This will result in a circular locking exception.
 .. automethod:: MgrModule.get_unlabeled_perf_schema
 .. automethod:: MgrModule.get_unlabeled_counter
 .. automethod:: MgrModule.get_latest_unlabeled_counter
-.. automethod:: MgrModule.get_perf_schema   
+.. automethod:: MgrModule.get_perf_schema
+.. automethod:: MgrModule.get_latest_counter
 .. automethod:: MgrModule.get_mgr_id
 .. automethod:: MgrModule.get_daemon_health_metrics
 
index c197692c8c95d24a4d433e053b8c39127663aee5..5d2a751538ec7981bb3c348c735ff4965fac6ee5 100644 (file)
@@ -877,6 +877,76 @@ PyObject* ActivePyModules::with_unlabled_perf_counters(
   return f.get();
 }
 
+// Holds a list of label pairs for a counter, [(level, shallow), (pooltype, replicated)]
+typedef std::vector<pair<std::string_view, std::string_view>> perf_counter_label_pairs;
+
+PyObject* ActivePyModules::with_perf_counters(
+    std::function<void(
+       PerfCounterInstance &counter_instance,
+       PerfCounterType &counter_type,
+       PyFormatter& f)> fct,
+    const std::string& svc_name,
+    const std::string& svc_id,
+    std::string_view counter_name,
+    std::string_view sub_counter_name,
+    const perf_counter_label_pairs& labels) const
+{
+  PyFormatter f;
+  /*
+    The resolved counter path, they are of the format
+    <counter_name>.<sub_counter_name> If the counter name has labels, then they
+    are segregated via NULL delimters.
+
+    Eg:
+      - labeled counter:
+        "osd_scrub_sh_repl^@level^@shallow^@pooltype^@replicated^@.successful_scrubs_elapsed"
+      - unlabeled counter: "osd.stat_bytes"
+  */
+  std::string resolved_path;
+  Formatter::ArraySection perf_counter_value_section(f, counter_name);
+
+  // Construct the resolved path
+  if (labels.empty()) {
+    resolved_path =
+       std::string(counter_name) + "." + std::string(sub_counter_name);
+  } else {
+    perf_counter_label_pairs perf_counter_labels = labels;
+    std::string counter_name_with_labels = ceph::perf_counters::detail::create(
+       counter_name.data(), perf_counter_labels.data(),
+       perf_counter_labels.data() + perf_counter_labels.size());
+    resolved_path = std::string(counter_name_with_labels) + "." +
+                   std::string(sub_counter_name);
+  }
+
+  {
+    without_gil_t no_gil;
+    std::lock_guard l(lock);
+    auto metadata = daemon_state.get(DaemonKey{svc_name, svc_id});
+    if (metadata) {
+      std::lock_guard l2(metadata->lock);
+      if (metadata->perf_counters.instances.count(resolved_path)) {
+       auto counter_instance =
+           metadata->perf_counters.instances.at(resolved_path);
+       auto counter_type = metadata->perf_counters.types.at(resolved_path);
+       with_gil(no_gil, [&] { fct(counter_instance, counter_type, f); });
+      } else {
+       dout(4) << fmt::format(
+                      "Missing counter: '{}' ({}.{})", resolved_path, svc_name,
+                      svc_id)
+               << dendl;
+       dout(20) << "Paths are:" << dendl;
+       for (const auto& i : metadata->perf_counters.instances) {
+         dout(20) << i.first << dendl;
+       }
+      }
+    } else {
+      dout(4) << fmt::format("No daemon state for {}.{}", svc_name, svc_id)
+             << dendl;
+    }
+  }
+  return f.get();
+}
+
 PyObject* ActivePyModules::get_unlabeled_counter_python(
     const std::string &svc_name,
     const std::string &svc_id,
@@ -933,6 +1003,32 @@ PyObject* ActivePyModules::get_latest_unlabeled_counter_python(
   return with_unlabled_perf_counters(extract_latest_counters, svc_name, svc_id, path);
 }
 
+PyObject* ActivePyModules::get_latest_counter_python(
+    const std::string& svc_name,
+    const std::string& svc_id,
+    std::string_view counter_name,
+    std::string_view sub_counter_name,
+    const perf_counter_label_pairs& labels)
+{
+  auto extract_latest_counters = [](PerfCounterInstance& counter_instance,
+                                   PerfCounterType& counter_type,
+                                   PyFormatter& f) {
+    if (counter_type.type & PERFCOUNTER_LONGRUNAVG) {
+      const auto& datapoint = counter_instance.get_latest_data_avg();
+      f.dump_float("t", datapoint.t);
+      f.dump_unsigned("s", datapoint.s);
+      f.dump_unsigned("c", datapoint.c);
+    } else {
+      const auto& datapoint = counter_instance.get_latest_data();
+      f.dump_float("t", datapoint.t);
+      f.dump_unsigned("v", datapoint.v);
+    }
+  };
+  return with_perf_counters(
+      extract_latest_counters, svc_name, svc_id, counter_name, sub_counter_name,
+      labels);
+}
+
 PyObject* ActivePyModules::get_unlabeled_perf_schema_python(
     const std::string &svc_type,
     const std::string &svc_id)
@@ -995,9 +1091,6 @@ PyObject* ActivePyModules::get_unlabeled_perf_schema_python(
   return f.get();
 }
 
-// Holds a list of label pairs for a counter, [(level, shallow), (pooltype, replicated)]
-typedef std::vector<pair<std::string_view, std::string_view>> perf_counter_label_pairs;
-
 PyObject* ActivePyModules::get_perf_schema_python(
     const std::string& svc_type,
     const std::string& svc_id)
index 4e6caf3ee1bce4e02a9fa52b73e16f39c0f8e3c1..31ebb0c71e637681aee39230158d47a7d86bdc61 100644 (file)
@@ -99,6 +99,12 @@ public:
       const std::string &svc_type,
       const std::string &svc_id,
       const std::string &path);
+  PyObject *get_latest_counter_python(
+      const std::string &svc_type,
+      const std::string &svc_id,
+      std::string_view counter_name,
+      std::string_view sub_counter_name,
+      const std::vector<std::pair<std::string_view, std::string_view>> &labels);
   PyObject *get_unlabeled_perf_schema_python(
       const std::string &svc_type,
       const std::string &svc_id);
@@ -117,6 +123,18 @@ public:
       const std::string &svc_name,
       const std::string &svc_id,
       const std::string &path) const;
+  /// @note @c fct is not allowed to acquire locks when holding GIL
+  PyObject *with_perf_counters(
+      std::function<void(
+         PerfCounterInstance &counter_instance,
+         PerfCounterType &counter_type,
+         PyFormatter &f)> fct,
+      const std::string &svc_name,
+      const std::string &svc_id,
+      std::string_view counter_name,
+      std::string_view sub_counter_name,
+      const std::vector<std::pair<std::string_view, std::string_view>> &labels)
+      const;
 
   MetricQueryID add_osd_perf_query(
       const OSDPerfMetricQuery &query,
index 395cda35292301144a2dff3e3818791234c4dc08..7a57246db30f71ad2ca8f2369348b081f33ccaba 100644 (file)
@@ -667,6 +667,43 @@ get_latest_unlabeled_counter(BaseMgrModule *self, PyObject *args)
       svc_name, svc_id, counter_path);
 }
 
+static PyObject*
+get_latest_counter(BaseMgrModule *self, PyObject *args)
+{
+  char *svc_name = nullptr;
+  char *svc_id = nullptr;
+  char *counter_name = nullptr;
+  char *sub_counter_name = nullptr;
+  PyObject *labels_list = nullptr; //labels = [("level", "deep"), ("pooltype", "ec")]
+  if (!PyArg_ParseTuple(args, "ssssO:get_latest_counter", &svc_name,
+                                                  &svc_id, &counter_name, &sub_counter_name,
+                                                  &labels_list)) {
+    return nullptr;
+  }
+
+  if (!PyList_Check(labels_list)) {
+    derr << __func__ << " labels_list not a list" << dendl;
+    Py_RETURN_FALSE;
+  }
+
+  std::vector<std::pair<std::string_view, std::string_view>> labels;
+  for (int i = 0; i < PyList_Size(labels_list); ++i) {
+    // Get the tuple element of labels list ("level", "deep")
+    PyObject *label_key_value = PyList_GET_ITEM(labels_list, i);
+
+    char *label_key = nullptr;
+    char *label_value = nullptr;
+    if (!PyArg_ParseTuple(label_key_value, "ss:label_pair", &label_key, &label_value)) {
+      derr << fmt::format("{} list item {} not a size 2 tuple", __func__, i) << dendl;
+      continue;
+    }
+    labels.push_back(std::make_pair<std::string_view, std::string_view>(label_key, label_value));
+  }
+
+  return self->py_modules->get_latest_counter_python(
+      svc_name, svc_id, counter_name, sub_counter_name, labels);
+}
+
 static PyObject*
 get_unlabeled_perf_schema(BaseMgrModule *self, PyObject *args)
 {
@@ -1493,16 +1530,19 @@ PyMethodDef BaseMgrModule_methods[] = {
    "Set a stored field"},
 
   {"_ceph_get_unlabeled_counter", (PyCFunction)get_unlabeled_counter, METH_VARARGS,
-    "Get a performance counter"},
+   "Get a performance counter"},
 
   {"_ceph_get_latest_unlabeled_counter", (PyCFunction)get_latest_unlabeled_counter, METH_VARARGS,
-    "Fetch (or get) the latest (or updated) value of an unlabeled counter"},
+   "Fetch (or get) the latest (or updated) value of an unlabeled counter"},
+
+  {"_ceph_get_latest_counter", (PyCFunction)get_latest_counter, METH_VARARGS,
+   "Fetch (or get) the latest (or updated) value of a performance counter"},
 
   {"_ceph_get_unlabeled_perf_schema", (PyCFunction)get_unlabeled_perf_schema, METH_VARARGS,
-    "Get the unlabeled performance counter schema"},
+   "Get the unlabeled performance counter schema"},
 
   {"_ceph_get_perf_schema", (PyCFunction)get_perf_schema, METH_VARARGS,
-    "Get the performance counter schema"},
+   "Get the performance counter schema"},
 
   {"_ceph_get_rocksdb_version", (PyCFunction)ceph_get_rocksdb_version, METH_NOARGS,
     "Get the current RocksDB version number"},
index 7377c3143e8c93d668a55085310d613d4c624a53..a2792c12e9523c2ad407a05a2bd5eecf7ea96b19 100644 (file)
@@ -173,7 +173,7 @@ class CephArgtype(object):
             assert len(type_args) == 1
             attrs['n'] = 'N'
             return CephArgtype.to_argdesc(type_args[0], attrs, positional=positional)
-        elif orig_type is Tuple:
+        elif orig_type in (Tuple, tuple):
             assert len(type_args) >= 1
             inner_tp = type_args[0]
             assert type_args.count(inner_tp) == len(type_args), \
index 91419461faad2f41cb85499186419e40d373a250..9c9e4c0d339a8c94ae724c9124a55dd7fcf34614 100644 (file)
@@ -77,7 +77,7 @@ class BaseMgrModule(object):
     def _ceph_get_rocksdb_version(self) -> str: ...
     def _ceph_get_unlabeled_counter(self, svc_type: str, svc_name: str, path: str) -> Dict[str, List[Tuple[float, int]]]: ...
     def _ceph_get_latest_unlabeled_counter(self, svc_type, svc_name, path): ...
-    def _ceph_get_latest_counter(self, svc_type, svc_name, path): ...
+    def _ceph_get_latest_counter(self, svc_type: str, svc_name: str, counter_name: str, sub_counter_name: str, labels: List[Tuple[str, str]]): ...
     def _ceph_get_metadata(self, svc_type, svc_id): ...
     def _ceph_get_daemon_status(self, svc_type, svc_id): ...
     def _ceph_send_command(self,
index 990343c31b0b20eda8a12f0b626e17d66b434425..f57b5fc3f6c6bcb3d5e35812e8e880f5255f592c 100644 (file)
@@ -1641,7 +1641,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
         :return: list of dicts describing the counters requested
         """
         return self._ceph_get_unlabeled_perf_schema(svc_type, svc_name)
-    
+
     @API.expose
     def get_perf_schema(self,
                         svc_type: str,
@@ -1689,8 +1689,8 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
         self, svc_type: str, svc_name: str, path: str
     ) -> Dict[str, Union[Tuple[float, int], Tuple[float, int, int]]]:
         """
-        Called by the plugin to fetch only the newest performance counter data
-        point for a particular counter on a particular service.
+        Called by the plugin to fetch only the newest performance unlabeled counter
+        data point for a particular counter on a particular service.
 
         :param str svc_type:
         :param str svc_name:
@@ -1702,6 +1702,32 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
         """
         return self._ceph_get_latest_unlabeled_counter(svc_type, svc_name, path)
 
+    @API.expose
+    def get_latest_counter(self,
+                           svc_type: str,
+                           svc_name: str,
+                           counter_name: str,
+                           sub_counter_name: str,
+                           labels: List[Tuple[str, str]]) -> Dict[str, Union[Tuple[float, int],
+                                                                             Tuple[float, int, int]]]:
+        """
+        Called by the plugin to fetch only the newest performance counter data
+        point for a particular counter on a particular service.
+
+        :param str svc_type:
+        :param str svc_name:
+        :param str counter_name: the key_name of the counter, for example
+            "osd_scrub_sh_repl"
+        :param str sub_counter_name: the counters present under the key_name,
+            for example "successful_scrubs_elapsed"
+        :param list[(str, str)] labels: the labels associated with the counter,
+            for example "[("level", "deep"), ("pooltype", "ec")]"
+        :return: A list of two-tuples of (timestamp, value) or three-tuple of
+            (timestamp, value, count) is returned.  This may be empty if no
+            data is available.
+        """
+        return self._ceph_get_latest_counter(svc_type, svc_name, counter_name, sub_counter_name, labels)
+
     @API.expose
     def list_servers(self) -> List[ServerInfoT]:
         """
@@ -2214,6 +2240,16 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
         else:
             return 0
 
+    @API.expose
+    def get_counter_latest(self, daemon_type: str, daemon_name: str, counter_name: str,
+                           sub_counter_name: str, labels: List[Tuple[str, str]]) -> int:
+        data = self.get_latest_counter(
+            daemon_type, daemon_name, counter_name, sub_counter_name, labels)[counter_name]
+        if data:
+            return data[1]
+        else:
+            return 0
+
     @API.expose
     def get_unlabeled_counter_latest_avg(self, daemon_type: str, daemon_name: str, counter: str) -> Tuple[int, int]:
         data = self.get_latest_unlabeled_counter(
@@ -2225,6 +2261,18 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
         else:
             return 0, 0
 
+    @API.expose
+    def get_counter_latest_avg(self, daemon_type: str, daemon_name: str, counter_name: str,
+                               sub_counter_name: str, labels: List[Tuple[str, str]]) -> Tuple[int, int]:
+        data = self.get_latest_counter(
+            daemon_type, daemon_name, counter_name, sub_counter_name, labels)[counter_name]
+        if data:
+            # https://github.com/python/mypy/issues/1178
+            _, value, count = cast(Tuple[float, int, int], data)
+            return value, count
+        else:
+            return 0, 0
+
     @API.expose
     @profile_method()
     def get_unlabeled_perf_counters(