mgr/cephadm: additional debug logging for autotuner

author Adam King <adking@redhat.com>

Tue, 9 Apr 2024 16:10:14 +0000 (12:10 -0400)

committer Adam King <adking@redhat.com>

Wed, 10 Apr 2024 18:01:02 +0000 (14:01 -0400)
author Adam King <adking@redhat.com>
Tue, 9 Apr 2024 16:10:14 +0000 (12:10 -0400)
committer Adam King <adking@redhat.com>
Wed, 10 Apr 2024 18:01:02 +0000 (14:01 -0400)
diff --git a/src/pybind/mgr/cephadm/autotune.py b/src/pybind/mgr/cephadm/autotune.py

index 72ebcd66064c09cb74eede3b388b9245e1958e59..0365c76a868c3b4cdf0f330ed2f4ead61cc86d96 100644 (file)
--- a/src/pybind/mgr/cephadm/autotune.py
+++ b/src/pybind/mgr/cephadm/autotune.py
@@ -32,24 +32,38 @@ class MemoryAutotuner(object):
      def tune(self) -> Tuple[Optional[int], List[str]]:
          tuned_osds: List[str] = []
          total = self.total_mem
+        logger.debug('Autotuning OSD memory with given parameters:\n'
+                     f'Total memory: {total}\nDaemons: {self.daemons}')
          for d in self.daemons:
              if d.daemon_type == 'mds':
-                total -= self.config_get(d.name(), 'mds_cache_memory_limit')
+                mds_mem = self.config_get(d.name(), 'mds_cache_memory_limit')
+                logger.debug(f'Subtracting {mds_mem} from total for mds daemon')
+                total -= mds_mem
+                logger.debug(f'new total: {total}')
                  continue
              if d.daemon_type != 'osd':
                  assert d.daemon_type
-                total -= max(
+                daemon_mem = max(
                      self.min_size_by_type.get(d.daemon_type, self.default_size),
                      d.memory_usage or 0
                  )
+                logger.debug(f'Subtracting {daemon_mem} from total for {d.daemon_type} daemon')
+                total -= daemon_mem
+                logger.debug(f'new total: {total}')
                  continue
              if not self.config_get(d.name(), 'osd_memory_target_autotune'):
-                total -= self.config_get(d.name(), 'osd_memory_target')
+                osd_mem = self.config_get(d.name(), 'osd_memory_target')
+                logger.debug('osd_memory_target_autotune disabled. '
+                             f'Subtracting {osd_mem} from total for osd daemon')
+                total -= osd_mem
+                logger.debug(f'new total: {total}')
                  continue
              tuned_osds.append(d.name())
          if total < 0:
              return None, []
          if not tuned_osds:
              return None, []
+        logger.debug(f'Final total is {total} to be split among {len(tuned_osds)} OSDs')
          per = total // len(tuned_osds)
+        logger.debug(f'Result is {per} per OSD')
          return int(per), tuned_osds
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py

index 4c7889bd18fe0528611b9194180285f1460b7d2a..b8a8d4e5208247456826adc78b5436ae5a9eda2e 100644 (file)
--- a/src/pybind/mgr/cephadm/serve.py
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -195,6 +195,9 @@ class CephadmServe:
              val = None
          else:
              total_mem *= 1024   # kb -> bytes
+            self.log.debug(f'Autotuning memory for host {host} with '
+                           f'{total_mem} total bytes of memory and '
+                           f'{self.mgr.autotune_memory_target_ratio} target ratio')
              total_mem *= self.mgr.autotune_memory_target_ratio
              a = MemoryAutotuner(
                  daemons=self.mgr.cache.get_daemons_by_host(host),
@@ -231,6 +234,9 @@ class CephadmServe:
              # options as users may be using them. Since there is no way to set autotuning
              # on/off at a host level, best we can do is check if it is globally on.
              if self.mgr.get_foreign_ceph_option('osd', 'osd_memory_target_autotune'):
+                self.mgr.log.debug(f'Removing osd_memory_target for OSDs on {host}'
+                                   ' as either there were no OSDs to tune or the '
+                                   ' per OSD memory calculation result was <= 0')
                  self.mgr.check_mon_command({
                      'prefix': 'config rm',
                      'who': f'osd/host:{host.split(".")[0]}',
diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py

index 7994c390a7e8ae8a15e95e485c1b2f815976b7d5..bf6f3d5ef59595d68c0e523c05ffb49fa58256e3 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_autotune.py
+++ b/src/pybind/mgr/cephadm/tests/test_autotune.py
@@ -57,7 +57,31 @@ from orchestrator import DaemonDescription
              ],
              {},
              60 * 1024 * 1024 * 1024,
-        )
+        ),
+        (  # Taken from an actual user case
+            int(32827840 * 1024 * 0.7),
+            [
+                DaemonDescription('crash', 'a', 'host1'),
+                DaemonDescription('grafana', 'a', 'host1'),
+                DaemonDescription('mds', 'a', 'host1'),
+                DaemonDescription('mds', 'b', 'host1'),
+                DaemonDescription('mds', 'c', 'host1'),
+                DaemonDescription('mgr', 'a', 'host1'),
+                DaemonDescription('mon', 'a', 'host1'),
+                DaemonDescription('node-exporter', 'a', 'host1'),
+                DaemonDescription('osd', '1', 'host1'),
+                DaemonDescription('osd', '2', 'host1'),
+                DaemonDescription('osd', '3', 'host1'),
+                DaemonDescription('osd', '4', 'host1'),
+                DaemonDescription('prometheus', 'a', 'host1'),
+            ],
+            {
+                'mds.a': 4 * 1024 * 1024 * 1024,  # 4294967296
+                'mds.b': 4 * 1024 * 1024 * 1024,
+                'mds.c': 4 * 1024 * 1024 * 1024,
+            },
+            480485376,
+        ),
      ])
  def test_autotune(total, daemons, config, result):
      def fake_getter(who, opt):
@@ -69,6 +93,8 @@ def test_autotune(total, daemons, config, result):
          if opt == 'osd_memory_target':
              return config.get(who, 4 * 1024 * 1024 * 1024)
          if opt == 'mds_cache_memory_limit':
+            if who in config:
+                return config.get(who, 16 * 1024 * 1024 * 1024)
              return 16 * 1024 * 1024 * 1024
  
      a = MemoryAutotuner(
author	Adam King <adking@redhat.com>
	Tue, 9 Apr 2024 16:10:14 +0000 (12:10 -0400)
committer	Adam King <adking@redhat.com>
	Wed, 10 Apr 2024 18:01:02 +0000 (14:01 -0400)
src/pybind/mgr/cephadm/autotune.py		patch \| blob \| history
src/pybind/mgr/cephadm/serve.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/test_autotune.py		patch \| blob \| history