telemetry/module.py: update gather_perf_counters to emit values of labeled counters

author Naveen Naidu <naveennaidu479@gmail.com>

Tue, 25 Feb 2025 17:36:26 +0000 (17:36 +0000)

committer Naveen Naidu <naveennaidu479@gmail.com>

Tue, 8 Apr 2025 02:26:46 +0000 (07:56 +0530)
author Naveen Naidu <naveennaidu479@gmail.com>
Tue, 25 Feb 2025 17:36:26 +0000 (17:36 +0000)
committer Naveen Naidu <naveennaidu479@gmail.com>
Tue, 8 Apr 2025 02:26:46 +0000 (07:56 +0530)
diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py

index cd13dd79643191a3153784300c1ed5ce0fbbde9a..c9aa3b2792d0cff7d067678294bccc99af4579d7 100644 (file)
--- a/src/pybind/mgr/telemetry/module.py
+++ b/src/pybind/mgr/telemetry/module.py
@@ -808,27 +808,36 @@ class Module(MgrModule):
          return crashlist
  
      def gather_perf_counters(self, mode: str = 'separated') -> Dict[str, dict]:
-        # Extract perf counter data with get_unlabeled_perf_counters(), a method
-        # from mgr/mgr_module.py. This method returns a nested dictionary that
-        # looks a lot like perf schema, except with some additional fields.
-        #
-        # Example of output, a snapshot of a mon daemon:
-        #   "mon.b": {
-        #       "bluestore.kv_flush_lat": {
-        #           "count": 2431,
-        #           "description": "Average kv_thread flush latency",
-        #           "nick": "fl_l",
-        #           "priority": 8,
-        #           "type": 5,
-        #           "units": 1,
-        #           "value": 88814109
-        #       },
-        #   },
-        perf_counters = self.get_unlabeled_perf_counters()
+        """
+        Extract perf counter data with get_perf_counters(), a method from
+        mgr/mgr_module.py. This method returns a nested dictionary that looks a
+        lot like perf schema, except with some additional fields.
+
+        Example of output, a snapshot of a mon daemon:
+            "mon.b":{
+                "bluestore": [
+                    {
+                        "labels": {},
+                        "counters": {
+                            "kv_flush_lat": {
+                                "description": "bluestore.kv_flush_lat",
+                                "nick": "kfsl",
+                                "type": 5,
+                                "priority": 8,
+                                "units": 1,
+                                "value": 14814406948,
+                                "count": 141
+                            },
+                        }
+                    },
+                ]
+            }
+
+        """
+        perf_counters = self.get_perf_counters()
  
          # Initialize 'result' dict
-        result: Dict[str, dict] = defaultdict(lambda: defaultdict(
-            lambda: defaultdict(lambda: defaultdict(int))))
+        result: Dict[str, dict] = defaultdict(lambda: defaultdict(list))
  
          # 'separated' mode
          anonymized_daemon_dict = {}
@@ -850,11 +859,7 @@ class Module(MgrModule):
                  else:
                      result[daemon_type]['num_combined_daemons'] += 1
  
-            for collection in perf_counters_by_daemon:
-                # Split the collection to avoid redundancy in final report; i.e.:
-                #   bluestore.kv_flush_lat, bluestore.kv_final_lat -->
-                #   bluestore: kv_flush_lat, kv_final_lat
-                col_0, col_1 = collection.split('.')
+            for collection, sub_collection_list in perf_counters_by_daemon.items():
  
                  # Debug log for empty keys. This initially was a problem for prioritycache
                  # perf counters, where the col_0 was empty for certain mon counters:
@@ -864,42 +869,52 @@ class Module(MgrModule):
                  #        "cache_bytes": {...},                          "cache_bytes": {...},
                  #
                  # This log is here to detect any future instances of a similar issue.
-                if (daemon == "") or (col_0 == "") or (col_1 == ""):
+                if (daemon == "") or (collection == ""):
                      self.log.debug("Instance of an empty key: {}{}".format(daemon, collection))
+                    continue
  
-                if mode == 'separated':
-                    # Add value to result
-                    result[daemon][col_0][col_1]['value'] = \
-                            perf_counters_by_daemon[collection]['value']
-
-                    # Check that 'count' exists, as not all counters have a count field.
-                    if 'count' in perf_counters_by_daemon[collection]:
-                        result[daemon][col_0][col_1]['count'] = \
-                                perf_counters_by_daemon[collection]['count']
-                elif mode == 'aggregated':
-                    # Not every rgw daemon has the same schema. Specifically, each rgw daemon
-                    # has a uniquely-named collection that starts off identically (i.e.
-                    # "objecter-0x...") then diverges (i.e. "...55f4e778e140.op_rmw").
-                    # This bit of code combines these unique counters all under one rgw instance.
-                    # Without this check, the schema would remain separeted out in the final report.
-                    if col_0[0:11] == "objecter-0x":
-                        col_0 = "objecter-0x"
-
-                    # Check that the value can be incremented. In some cases,
-                    # the files are of type 'pair' (real-integer-pair, integer-integer pair).
-                    # In those cases, the value is a dictionary, and not a number.
-                    #   i.e. throttle-msgr_dispatch_throttler-hbserver["wait"]
-                    if isinstance(perf_counters_by_daemon[collection]['value'], numbers.Number):
-                        result[daemon_type][col_0][col_1]['value'] += \
-                                perf_counters_by_daemon[collection]['value']
-
-                    # Check that 'count' exists, as not all counters have a count field.
-                    if 'count' in perf_counters_by_daemon[collection]:
-                        result[daemon_type][col_0][col_1]['count'] += \
-                                perf_counters_by_daemon[collection]['count']
-                else:
-                    self.log.error('Incorrect mode specified in gather_perf_counters: {}'.format(mode))
-                    return {}
+                result[daemon][collection] = []
+
+                for sub_collection in sub_collection_list:
+                    sub_collection_result: Dict[str, dict] = defaultdict(lambda: defaultdict(dict))
+                    sub_collection_result['labels'] = sub_collection['labels']
+                    for sub_collection_counter_name, sub_collection_counter_info in sub_collection['counters'].items():
+                        if mode == 'separated':
+                            # Add value to result
+                            sub_collection_result['counters'][sub_collection_counter_name]['value'] = \
+                                sub_collection_counter_info['value']
+
+                            # Check that 'count' exists, as not all counters have a count field.
+                            if 'count' in sub_collection_counter_info:
+                                sub_collection_result['counters'][sub_collection_counter_name]['count'] = \
+                                        sub_collection_counter_info['count']
+                        elif mode == 'aggregated':
+                            self.log.debug("telemetry in mode: agregated")
+                            # Not every rgw daemon has the same schema. Specifically, each rgw daemon
+                            # has a uniquely-named collection that starts off identically (i.e.
+                            # "objecter-0x...") then diverges (i.e. "...55f4e778e140.op_rmw").
+                            # This bit of code combines these unique counters all under one rgw instance.
+                            # Without this check, the schema would remain separeted out in the final report.
+                            if collection[0:11] == "objecter-0x":
+                                collection = "objecter-0x"
+
+                            # Check that the value can be incremented. In some cases,
+                            # the files are of type 'pair' (real-integer-pair, integer-integer pair).
+                            # In those cases, the value is a dictionary, and not a number.
+                            #   i.e. throttle-msgr_dispatch_throttler-hbserver["wait"]
+                            if isinstance(sub_collection_counter_info['value'], numbers.Number):
+                                sub_collection_result['counters'][sub_collection_counter_name]['value'] += \
+                                        sub_collection_counter_info['value']
+
+                            # Check that 'count' exists, as not all counters have a count field.
+                            if 'count' in sub_collection_counter_info:
+                                sub_collection_result['counters'][sub_collection_counter_name]['count'] += \
+                                        sub_collection_counter_info['count']
+                        else:
+                            self.log.error('Incorrect mode specified in gather_perf_counters: {}'.format(mode))
+                            return {}
+
+                    result[daemon][collection].append(sub_collection_result)
  
          if mode == 'separated':
              # for debugging purposes only, this data is never reported
author	Naveen Naidu <naveennaidu479@gmail.com>
	Tue, 25 Feb 2025 17:36:26 +0000 (17:36 +0000)
committer	Naveen Naidu <naveennaidu479@gmail.com>
	Tue, 8 Apr 2025 02:26:46 +0000 (07:56 +0530)