From: Naveen Naidu Date: Wed, 19 Mar 2025 13:55:34 +0000 (+0530) Subject: mgr: add new API's to fetch latest perf counter values X-Git-Tag: v20.3.0~45^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6d3cababa19361d2573207bb480552bf52d50271;p=ceph.git mgr: add new API's to fetch latest perf counter values New API's to fetch the latest values of performance counters have been added. These API's support fetching the values for labeled performance counters. We also make changes to `insert` and `create` API of perf_counters_key to remove any empty labels. This change helps us construct perf_counters_key when the key labels are constructed dynamically. Signed-off-by: Naveen Naidu --- diff --git a/doc/mgr/modules.rst b/doc/mgr/modules.rst index b07f6ad335e..31a502cc6f2 100644 --- a/doc/mgr/modules.rst +++ b/doc/mgr/modules.rst @@ -508,7 +508,8 @@ function. This will result in a circular locking exception. .. automethod:: MgrModule.get_unlabeled_perf_schema .. automethod:: MgrModule.get_unlabeled_counter .. automethod:: MgrModule.get_latest_unlabeled_counter -.. automethod:: MgrModule.get_perf_schema +.. automethod:: MgrModule.get_perf_schema +.. automethod:: MgrModule.get_latest_counter .. automethod:: MgrModule.get_mgr_id .. automethod:: MgrModule.get_daemon_health_metrics diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index c197692c8c9..5d2a751538e 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -877,6 +877,76 @@ PyObject* ActivePyModules::with_unlabled_perf_counters( return f.get(); } +// Holds a list of label pairs for a counter, [(level, shallow), (pooltype, replicated)] +typedef std::vector> perf_counter_label_pairs; + +PyObject* ActivePyModules::with_perf_counters( + std::function fct, + const std::string& svc_name, + const std::string& svc_id, + std::string_view counter_name, + std::string_view sub_counter_name, + const perf_counter_label_pairs& labels) const +{ + PyFormatter f; + /* + The resolved counter path, they are of the format + . If the counter name has labels, then they + are segregated via NULL delimters. + + Eg: + - labeled counter: + "osd_scrub_sh_repl^@level^@shallow^@pooltype^@replicated^@.successful_scrubs_elapsed" + - unlabeled counter: "osd.stat_bytes" + */ + std::string resolved_path; + Formatter::ArraySection perf_counter_value_section(f, counter_name); + + // Construct the resolved path + if (labels.empty()) { + resolved_path = + std::string(counter_name) + "." + std::string(sub_counter_name); + } else { + perf_counter_label_pairs perf_counter_labels = labels; + std::string counter_name_with_labels = ceph::perf_counters::detail::create( + counter_name.data(), perf_counter_labels.data(), + perf_counter_labels.data() + perf_counter_labels.size()); + resolved_path = std::string(counter_name_with_labels) + "." + + std::string(sub_counter_name); + } + + { + without_gil_t no_gil; + std::lock_guard l(lock); + auto metadata = daemon_state.get(DaemonKey{svc_name, svc_id}); + if (metadata) { + std::lock_guard l2(metadata->lock); + if (metadata->perf_counters.instances.count(resolved_path)) { + auto counter_instance = + metadata->perf_counters.instances.at(resolved_path); + auto counter_type = metadata->perf_counters.types.at(resolved_path); + with_gil(no_gil, [&] { fct(counter_instance, counter_type, f); }); + } else { + dout(4) << fmt::format( + "Missing counter: '{}' ({}.{})", resolved_path, svc_name, + svc_id) + << dendl; + dout(20) << "Paths are:" << dendl; + for (const auto& i : metadata->perf_counters.instances) { + dout(20) << i.first << dendl; + } + } + } else { + dout(4) << fmt::format("No daemon state for {}.{}", svc_name, svc_id) + << dendl; + } + } + return f.get(); +} + PyObject* ActivePyModules::get_unlabeled_counter_python( const std::string &svc_name, const std::string &svc_id, @@ -933,6 +1003,32 @@ PyObject* ActivePyModules::get_latest_unlabeled_counter_python( return with_unlabled_perf_counters(extract_latest_counters, svc_name, svc_id, path); } +PyObject* ActivePyModules::get_latest_counter_python( + const std::string& svc_name, + const std::string& svc_id, + std::string_view counter_name, + std::string_view sub_counter_name, + const perf_counter_label_pairs& labels) +{ + auto extract_latest_counters = [](PerfCounterInstance& counter_instance, + PerfCounterType& counter_type, + PyFormatter& f) { + if (counter_type.type & PERFCOUNTER_LONGRUNAVG) { + const auto& datapoint = counter_instance.get_latest_data_avg(); + f.dump_float("t", datapoint.t); + f.dump_unsigned("s", datapoint.s); + f.dump_unsigned("c", datapoint.c); + } else { + const auto& datapoint = counter_instance.get_latest_data(); + f.dump_float("t", datapoint.t); + f.dump_unsigned("v", datapoint.v); + } + }; + return with_perf_counters( + extract_latest_counters, svc_name, svc_id, counter_name, sub_counter_name, + labels); +} + PyObject* ActivePyModules::get_unlabeled_perf_schema_python( const std::string &svc_type, const std::string &svc_id) @@ -995,9 +1091,6 @@ PyObject* ActivePyModules::get_unlabeled_perf_schema_python( return f.get(); } -// Holds a list of label pairs for a counter, [(level, shallow), (pooltype, replicated)] -typedef std::vector> perf_counter_label_pairs; - PyObject* ActivePyModules::get_perf_schema_python( const std::string& svc_type, const std::string& svc_id) diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index 4e6caf3ee1b..31ebb0c71e6 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -99,6 +99,12 @@ public: const std::string &svc_type, const std::string &svc_id, const std::string &path); + PyObject *get_latest_counter_python( + const std::string &svc_type, + const std::string &svc_id, + std::string_view counter_name, + std::string_view sub_counter_name, + const std::vector> &labels); PyObject *get_unlabeled_perf_schema_python( const std::string &svc_type, const std::string &svc_id); @@ -117,6 +123,18 @@ public: const std::string &svc_name, const std::string &svc_id, const std::string &path) const; + /// @note @c fct is not allowed to acquire locks when holding GIL + PyObject *with_perf_counters( + std::function fct, + const std::string &svc_name, + const std::string &svc_id, + std::string_view counter_name, + std::string_view sub_counter_name, + const std::vector> &labels) + const; MetricQueryID add_osd_perf_query( const OSDPerfMetricQuery &query, diff --git a/src/mgr/BaseMgrModule.cc b/src/mgr/BaseMgrModule.cc index 395cda35292..7a57246db30 100644 --- a/src/mgr/BaseMgrModule.cc +++ b/src/mgr/BaseMgrModule.cc @@ -667,6 +667,43 @@ get_latest_unlabeled_counter(BaseMgrModule *self, PyObject *args) svc_name, svc_id, counter_path); } +static PyObject* +get_latest_counter(BaseMgrModule *self, PyObject *args) +{ + char *svc_name = nullptr; + char *svc_id = nullptr; + char *counter_name = nullptr; + char *sub_counter_name = nullptr; + PyObject *labels_list = nullptr; //labels = [("level", "deep"), ("pooltype", "ec")] + if (!PyArg_ParseTuple(args, "ssssO:get_latest_counter", &svc_name, + &svc_id, &counter_name, &sub_counter_name, + &labels_list)) { + return nullptr; + } + + if (!PyList_Check(labels_list)) { + derr << __func__ << " labels_list not a list" << dendl; + Py_RETURN_FALSE; + } + + std::vector> labels; + for (int i = 0; i < PyList_Size(labels_list); ++i) { + // Get the tuple element of labels list ("level", "deep") + PyObject *label_key_value = PyList_GET_ITEM(labels_list, i); + + char *label_key = nullptr; + char *label_value = nullptr; + if (!PyArg_ParseTuple(label_key_value, "ss:label_pair", &label_key, &label_value)) { + derr << fmt::format("{} list item {} not a size 2 tuple", __func__, i) << dendl; + continue; + } + labels.push_back(std::make_pair(label_key, label_value)); + } + + return self->py_modules->get_latest_counter_python( + svc_name, svc_id, counter_name, sub_counter_name, labels); +} + static PyObject* get_unlabeled_perf_schema(BaseMgrModule *self, PyObject *args) { @@ -1493,16 +1530,19 @@ PyMethodDef BaseMgrModule_methods[] = { "Set a stored field"}, {"_ceph_get_unlabeled_counter", (PyCFunction)get_unlabeled_counter, METH_VARARGS, - "Get a performance counter"}, + "Get a performance counter"}, {"_ceph_get_latest_unlabeled_counter", (PyCFunction)get_latest_unlabeled_counter, METH_VARARGS, - "Fetch (or get) the latest (or updated) value of an unlabeled counter"}, + "Fetch (or get) the latest (or updated) value of an unlabeled counter"}, + + {"_ceph_get_latest_counter", (PyCFunction)get_latest_counter, METH_VARARGS, + "Fetch (or get) the latest (or updated) value of a performance counter"}, {"_ceph_get_unlabeled_perf_schema", (PyCFunction)get_unlabeled_perf_schema, METH_VARARGS, - "Get the unlabeled performance counter schema"}, + "Get the unlabeled performance counter schema"}, {"_ceph_get_perf_schema", (PyCFunction)get_perf_schema, METH_VARARGS, - "Get the performance counter schema"}, + "Get the performance counter schema"}, {"_ceph_get_rocksdb_version", (PyCFunction)ceph_get_rocksdb_version, METH_NOARGS, "Get the current RocksDB version number"}, diff --git a/src/pybind/ceph_argparse.py b/src/pybind/ceph_argparse.py index 7377c3143e8..a2792c12e95 100644 --- a/src/pybind/ceph_argparse.py +++ b/src/pybind/ceph_argparse.py @@ -173,7 +173,7 @@ class CephArgtype(object): assert len(type_args) == 1 attrs['n'] = 'N' return CephArgtype.to_argdesc(type_args[0], attrs, positional=positional) - elif orig_type is Tuple: + elif orig_type in (Tuple, tuple): assert len(type_args) >= 1 inner_tp = type_args[0] assert type_args.count(inner_tp) == len(type_args), \ diff --git a/src/pybind/mgr/ceph_module.pyi b/src/pybind/mgr/ceph_module.pyi index 91419461faa..9c9e4c0d339 100644 --- a/src/pybind/mgr/ceph_module.pyi +++ b/src/pybind/mgr/ceph_module.pyi @@ -77,7 +77,7 @@ class BaseMgrModule(object): def _ceph_get_rocksdb_version(self) -> str: ... def _ceph_get_unlabeled_counter(self, svc_type: str, svc_name: str, path: str) -> Dict[str, List[Tuple[float, int]]]: ... def _ceph_get_latest_unlabeled_counter(self, svc_type, svc_name, path): ... - def _ceph_get_latest_counter(self, svc_type, svc_name, path): ... + def _ceph_get_latest_counter(self, svc_type: str, svc_name: str, counter_name: str, sub_counter_name: str, labels: List[Tuple[str, str]]): ... def _ceph_get_metadata(self, svc_type, svc_id): ... def _ceph_get_daemon_status(self, svc_type, svc_id): ... def _ceph_send_command(self, diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index 990343c31b0..f57b5fc3f6c 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -1641,7 +1641,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): :return: list of dicts describing the counters requested """ return self._ceph_get_unlabeled_perf_schema(svc_type, svc_name) - + @API.expose def get_perf_schema(self, svc_type: str, @@ -1689,8 +1689,8 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): self, svc_type: str, svc_name: str, path: str ) -> Dict[str, Union[Tuple[float, int], Tuple[float, int, int]]]: """ - Called by the plugin to fetch only the newest performance counter data - point for a particular counter on a particular service. + Called by the plugin to fetch only the newest performance unlabeled counter + data point for a particular counter on a particular service. :param str svc_type: :param str svc_name: @@ -1702,6 +1702,32 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): """ return self._ceph_get_latest_unlabeled_counter(svc_type, svc_name, path) + @API.expose + def get_latest_counter(self, + svc_type: str, + svc_name: str, + counter_name: str, + sub_counter_name: str, + labels: List[Tuple[str, str]]) -> Dict[str, Union[Tuple[float, int], + Tuple[float, int, int]]]: + """ + Called by the plugin to fetch only the newest performance counter data + point for a particular counter on a particular service. + + :param str svc_type: + :param str svc_name: + :param str counter_name: the key_name of the counter, for example + "osd_scrub_sh_repl" + :param str sub_counter_name: the counters present under the key_name, + for example "successful_scrubs_elapsed" + :param list[(str, str)] labels: the labels associated with the counter, + for example "[("level", "deep"), ("pooltype", "ec")]" + :return: A list of two-tuples of (timestamp, value) or three-tuple of + (timestamp, value, count) is returned. This may be empty if no + data is available. + """ + return self._ceph_get_latest_counter(svc_type, svc_name, counter_name, sub_counter_name, labels) + @API.expose def list_servers(self) -> List[ServerInfoT]: """ @@ -2214,6 +2240,16 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): else: return 0 + @API.expose + def get_counter_latest(self, daemon_type: str, daemon_name: str, counter_name: str, + sub_counter_name: str, labels: List[Tuple[str, str]]) -> int: + data = self.get_latest_counter( + daemon_type, daemon_name, counter_name, sub_counter_name, labels)[counter_name] + if data: + return data[1] + else: + return 0 + @API.expose def get_unlabeled_counter_latest_avg(self, daemon_type: str, daemon_name: str, counter: str) -> Tuple[int, int]: data = self.get_latest_unlabeled_counter( @@ -2225,6 +2261,18 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): else: return 0, 0 + @API.expose + def get_counter_latest_avg(self, daemon_type: str, daemon_name: str, counter_name: str, + sub_counter_name: str, labels: List[Tuple[str, str]]) -> Tuple[int, int]: + data = self.get_latest_counter( + daemon_type, daemon_name, counter_name, sub_counter_name, labels)[counter_name] + if data: + # https://github.com/python/mypy/issues/1178 + _, value, count = cast(Tuple[float, int, int], data) + return value, count + else: + return 0, 0 + @API.expose @profile_method() def get_unlabeled_perf_counters(