]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: make logging refresh metadata to debug logs configurable
authorAdam King <adking@redhat.com>
Thu, 20 Oct 2022 22:53:18 +0000 (18:53 -0400)
committerAdam King <adking@redhat.com>
Thu, 20 Oct 2022 22:53:18 +0000 (18:53 -0400)
This output is so verbose it actually makes the debug
logs difficult to use. The info logged from one host
refresh can take up over 1000 lines in the logs. I also
find I rarely actually need this info for debugging, so
having it be something you can toggle would be nice.

Fixes: https://tracker.ceph.com/issues/57909
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/offline_watcher.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/ssh.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_tuned_profiles.py
src/pybind/mgr/cephadm/tuned_profiles.py

index a226e808502cde4c5b913dc3bec8d492463cd432..fefb56a3efd89de0c61f414e6b6947f9d39beb52 100644 (file)
@@ -435,6 +435,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             default=True,
             desc='Pass --cgroups=split when cephadm creates containers (currently podman only)'
         ),
+        Option(
+            'log_refresh_metadata',
+            type='bool',
+            default=False,
+            desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level'
+        ),
     ]
 
     def __init__(self, *args: Any, **kwargs: Any):
@@ -511,6 +517,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             self.max_osd_draining_count = 10
             self.device_enhanced_scan = False
             self.cgroups_split = True
+            self.log_refresh_metadata = False
 
         self.notify(NotifyType.mon_map, None)
         self.config_notify()
index b80f5104eca4ceb3e750179779696fd1fa27136b..2b7751dfc34d8118833ef1c597f1cc5cb370a3c2 100644 (file)
@@ -38,7 +38,7 @@ class OfflineHostWatcher(threading.Thread):
     def check_host(self, host: str) -> None:
         if host not in self.mgr.offline_hosts:
             try:
-                self.mgr.ssh.check_execute_command(host, ['true'])
+                self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata)
             except Exception:
                 logger.debug(f'OfflineHostDetector: detected {host} to be offline')
                 # kick serve loop in case corrective action must be taken for offline host
index 9f0bbb40d17a47ed3459331e142b358e355a3eb9..3960402a46316ced20516d8976057955b7d06429 100644 (file)
@@ -333,7 +333,7 @@ class CephadmServe:
             addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host
             out, err, code = self.mgr.wait_async(self._run_cephadm(
                 host, cephadmNoImage, 'check-host', [],
-                error_ok=True, no_fsid=True))
+                error_ok=True, no_fsid=True, log_output=self.mgr.log_refresh_metadata))
             self.mgr.cache.update_last_host_check(host)
             self.mgr.cache.save_host(host)
             if code:
@@ -349,7 +349,8 @@ class CephadmServe:
 
     def _refresh_host_daemons(self, host: str) -> Optional[str]:
         try:
-            ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [], no_fsid=True))
+            ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [],
+                                                            no_fsid=True, log_output=self.mgr.log_refresh_metadata))
         except OrchestratorError as e:
             return str(e)
         self.mgr._process_ls_output(host, ls)
@@ -358,7 +359,7 @@ class CephadmServe:
     def _refresh_facts(self, host: str) -> Optional[str]:
         try:
             val = self.mgr.wait_async(self._run_cephadm_json(
-                host, cephadmNoImage, 'gather-facts', [], no_fsid=True))
+                host, cephadmNoImage, 'gather-facts', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata))
         except OrchestratorError as e:
             return str(e)
 
@@ -377,13 +378,13 @@ class CephadmServe:
         try:
             try:
                 devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume',
-                                                                     inventory_args))
+                                                                     inventory_args, log_output=self.mgr.log_refresh_metadata))
             except OrchestratorError as e:
                 if 'unrecognized arguments: --filter-for-batch' in str(e):
                     rerun_args = inventory_args.copy()
                     rerun_args.remove('--filter-for-batch')
                     devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume',
-                                                                         rerun_args))
+                                                                         rerun_args, log_output=self.mgr.log_refresh_metadata))
                 else:
                     raise
 
@@ -401,7 +402,7 @@ class CephadmServe:
     def _refresh_host_networks(self, host: str) -> Optional[str]:
         try:
             networks = self.mgr.wait_async(self._run_cephadm_json(
-                host, 'mon', 'list-networks', [], no_fsid=True))
+                host, 'mon', 'list-networks', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata))
         except OrchestratorError as e:
             return str(e)
 
@@ -1297,10 +1298,11 @@ class CephadmServe:
                                 args: List[str],
                                 no_fsid: Optional[bool] = False,
                                 image: Optional[str] = "",
+                                log_output: Optional[bool] = True,
                                 ) -> Any:
         try:
             out, err, code = await self._run_cephadm(
-                host, entity, command, args, no_fsid=no_fsid, image=image)
+                host, entity, command, args, no_fsid=no_fsid, image=image, log_output=log_output)
             if code:
                 raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}')
         except Exception as e:
@@ -1323,6 +1325,7 @@ class CephadmServe:
                            error_ok: Optional[bool] = False,
                            image: Optional[str] = "",
                            env_vars: Optional[List[str]] = None,
+                           log_output: Optional[bool] = True,
                            ) -> Tuple[List[str], List[str], int]:
         """
         Run cephadm on the remote host with the given command + args
@@ -1387,7 +1390,8 @@ class CephadmServe:
                     host, cmd, stdin=stdin, addr=addr)
                 if code == 2:
                     ls_cmd = ['ls', self.mgr.cephadm_binary_path]
-                    out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr)
+                    out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr,
+                                                                                  log_command=log_output)
                     if code_ls == 2:
                         await self._deploy_cephadm_binary(host, addr)
                         out, err, code = await self.mgr.ssh._execute_command(
@@ -1417,11 +1421,12 @@ class CephadmServe:
         else:
             assert False, 'unsupported mode'
 
-        self.log.debug(f'code: {code}')
-        if out:
-            self.log.debug(f'out: {out}')
-        if err:
-            self.log.debug(f'err: {err}')
+        if log_output:
+            self.log.debug(f'code: {code}')
+            if out:
+                self.log.debug(f'out: {out}')
+            if err:
+                self.log.debug(f'err: {err}')
         if code and not error_ok:
             raise OrchestratorError(
                 f'cephadm exited with an error code: {code}, stderr: {err}')
index bdd3ae046ddaead2e5f22853fee56a8ecbbd255a..dcfdd0a79b266b2bac3fe9325d67fa5eed50c900 100644 (file)
@@ -134,11 +134,13 @@ class SSHManager:
                                cmd: List[str],
                                stdin: Optional[str] = None,
                                addr: Optional[str] = None,
+                               log_command: Optional[bool] = True,
                                ) -> Tuple[str, str, int]:
         conn = await self._remote_connection(host, addr)
         sudo_prefix = "sudo " if self.mgr.ssh_user != 'root' else ""
         cmd = sudo_prefix + " ".join(quote(x) for x in cmd)
-        logger.debug(f'Running command: {cmd}')
+        if log_command:
+            logger.debug(f'Running command: {cmd}')
         try:
             r = await conn.run(f'{sudo_prefix}true', check=True, timeout=5)
             r = await conn.run(cmd, input=stdin)
@@ -171,16 +173,18 @@ class SSHManager:
                         cmd: List[str],
                         stdin: Optional[str] = None,
                         addr: Optional[str] = None,
+                        log_command: Optional[bool] = True
                         ) -> Tuple[str, str, int]:
-        return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr))
+        return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr, log_command))
 
     async def _check_execute_command(self,
                                      host: str,
                                      cmd: List[str],
                                      stdin: Optional[str] = None,
                                      addr: Optional[str] = None,
+                                     log_command: Optional[bool] = True
                                      ) -> str:
-        out, err, code = await self._execute_command(host, cmd, stdin, addr)
+        out, err, code = await self._execute_command(host, cmd, stdin, addr, log_command)
         if code != 0:
             msg = f'Command {cmd} failed. {err}'
             logger.debug(msg)
@@ -192,8 +196,9 @@ class SSHManager:
                               cmd: List[str],
                               stdin: Optional[str] = None,
                               addr: Optional[str] = None,
+                              log_command: Optional[bool] = True,
                               ) -> str:
-        return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr))
+        return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr, log_command))
 
     async def _write_remote_file(self,
                                  host: str,
index 76a1073fc4aa88c480e967361cf0ff6bf2bd3a55..83879c387c39f89baa9777bb9196a31558d5adfd 100644 (file)
@@ -117,13 +117,13 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str
         [host]).stdout == f"Created osd(s) 1 on host '{host}'"
     assert _run_cephadm.mock_calls == [
         mock.call(host, 'osd', 'ceph-volume',
-                  ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image=''),
+                  ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True),
         mock.call(host, f'osd.{osd_id}', 'deploy',
                   ['--name', f'osd.{osd_id}', '--meta-json', mock.ANY,
                    '--config-json', '-', '--osd-fsid', 'uuid'],
                   stdin=mock.ANY, image=''),
         mock.call(host, 'osd', 'ceph-volume',
-                  ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image=''),
+                  ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True),
     ]
     dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host)
     assert dd.name() == f'osd.{osd_id}'
@@ -790,11 +790,12 @@ class TestCephadm(object):
                 'test', 'osd', 'ceph-volume',
                 ['--config-json', '-', '--', 'lvm', 'batch',
                     '--no-auto', '/dev/sdb', '--yes', '--no-systemd'],
-                env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, stdin='{"config": "", "keyring": ""}')
+                env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True,
+                stdin='{"config": "", "keyring": ""}')
             _run_cephadm.assert_any_call(
-                'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
+                'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
             _run_cephadm.assert_any_call(
-                'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
+                'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
 
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
@@ -834,9 +835,9 @@ class TestCephadm(object):
                 env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'],
                 error_ok=True, stdin='{"config": "", "keyring": ""}')
             _run_cephadm.assert_any_call(
-                'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
+                'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
             _run_cephadm.assert_any_call(
-                'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
+                'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
 
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
     @mock.patch("cephadm.module.SpecStore.save")
@@ -1886,10 +1887,10 @@ Traceback (most recent call last):
             assert _run_cephadm.mock_calls == [
                 mock.call('test', 'osd', 'ceph-volume',
                           ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='',
-                          no_fsid=False),
+                          no_fsid=False, log_output=False),
                 mock.call('test', 'osd', 'ceph-volume',
                           ['--', 'inventory', '--format=json-pretty'], image='',
-                          no_fsid=False),
+                          no_fsid=False, log_output=False),
             ]
 
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
index 1521b01105231d6b92cbc4404f5e91f076c11832..38a3a390745e1c73d94f2d1619fdb14504d4a00f 100644 (file)
@@ -56,6 +56,7 @@ class FakeMgr:
         self.tuned_profiles.profiles = profiles
         self.ssh = SSHManager(self)
         self.offline_hosts = []
+        self.log_refresh_metadata = False
 
     def set_store(self, what: str, value: str):
         raise SaveError(f'{what}: {value}')
@@ -138,7 +139,7 @@ class TestTunedProfiles:
         tp = TunedProfileUtils(mgr)
         tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
         calls = [
-            mock.call('a', ['ls', SYSCTL_DIR]),
+            mock.call('a', ['ls', SYSCTL_DIR], log_command=False),
             mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']),
             mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']),
             mock.call('a', ['sysctl', '--system'])
index 5e5189f629f5b651e11836741842ae7e3529a183..19d97f42133f0b9d8e5c723c73fdef8b7a22bff5 100644 (file)
@@ -70,7 +70,7 @@ class TunedProfileUtils():
         if host in self.mgr.offline_hosts:
             return
         cmd = ['ls', SYSCTL_DIR]
-        found_files = self.mgr.ssh.check_execute_command(host, cmd).split('\n')
+        found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n')
         found_files = [s.strip() for s in found_files]
         profile_names: List[str] = sum([[*p] for p in profiles], [])  # extract all profiles names
         profile_names = list(set(profile_names))  # remove duplicates