From: Adam King Date: Thu, 20 Oct 2022 22:53:18 +0000 (-0400) Subject: mgr/cephadm: make logging refresh metadata to debug logs configurable X-Git-Tag: v18.1.0~528^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=9ff19e6705b584efe8bfb35cd29fcc20d1caa638;p=ceph-ci.git mgr/cephadm: make logging refresh metadata to debug logs configurable This output is so verbose it actually makes the debug logs difficult to use. The info logged from one host refresh can take up over 1000 lines in the logs. I also find I rarely actually need this info for debugging, so having it be something you can toggle would be nice. Fixes: https://tracker.ceph.com/issues/57909 Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index a226e808502..fefb56a3efd 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -435,6 +435,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, default=True, desc='Pass --cgroups=split when cephadm creates containers (currently podman only)' ), + Option( + 'log_refresh_metadata', + type='bool', + default=False, + desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level' + ), ] def __init__(self, *args: Any, **kwargs: Any): @@ -511,6 +517,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.max_osd_draining_count = 10 self.device_enhanced_scan = False self.cgroups_split = True + self.log_refresh_metadata = False self.notify(NotifyType.mon_map, None) self.config_notify() diff --git a/src/pybind/mgr/cephadm/offline_watcher.py b/src/pybind/mgr/cephadm/offline_watcher.py index b80f5104eca..2b7751dfc34 100644 --- a/src/pybind/mgr/cephadm/offline_watcher.py +++ b/src/pybind/mgr/cephadm/offline_watcher.py @@ -38,7 +38,7 @@ class OfflineHostWatcher(threading.Thread): def check_host(self, host: str) -> None: if host not in self.mgr.offline_hosts: try: - self.mgr.ssh.check_execute_command(host, ['true']) + self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata) except Exception: logger.debug(f'OfflineHostDetector: detected {host} to be offline') # kick serve loop in case corrective action must be taken for offline host diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 9f0bbb40d17..3960402a463 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -333,7 +333,7 @@ class CephadmServe: addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host out, err, code = self.mgr.wait_async(self._run_cephadm( host, cephadmNoImage, 'check-host', [], - error_ok=True, no_fsid=True)) + error_ok=True, no_fsid=True, log_output=self.mgr.log_refresh_metadata)) self.mgr.cache.update_last_host_check(host) self.mgr.cache.save_host(host) if code: @@ -349,7 +349,8 @@ class CephadmServe: def _refresh_host_daemons(self, host: str) -> Optional[str]: try: - ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [], no_fsid=True)) + ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [], + no_fsid=True, log_output=self.mgr.log_refresh_metadata)) except OrchestratorError as e: return str(e) self.mgr._process_ls_output(host, ls) @@ -358,7 +359,7 @@ class CephadmServe: def _refresh_facts(self, host: str) -> Optional[str]: try: val = self.mgr.wait_async(self._run_cephadm_json( - host, cephadmNoImage, 'gather-facts', [], no_fsid=True)) + host, cephadmNoImage, 'gather-facts', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata)) except OrchestratorError as e: return str(e) @@ -377,13 +378,13 @@ class CephadmServe: try: try: devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume', - inventory_args)) + inventory_args, log_output=self.mgr.log_refresh_metadata)) except OrchestratorError as e: if 'unrecognized arguments: --filter-for-batch' in str(e): rerun_args = inventory_args.copy() rerun_args.remove('--filter-for-batch') devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume', - rerun_args)) + rerun_args, log_output=self.mgr.log_refresh_metadata)) else: raise @@ -401,7 +402,7 @@ class CephadmServe: def _refresh_host_networks(self, host: str) -> Optional[str]: try: networks = self.mgr.wait_async(self._run_cephadm_json( - host, 'mon', 'list-networks', [], no_fsid=True)) + host, 'mon', 'list-networks', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata)) except OrchestratorError as e: return str(e) @@ -1297,10 +1298,11 @@ class CephadmServe: args: List[str], no_fsid: Optional[bool] = False, image: Optional[str] = "", + log_output: Optional[bool] = True, ) -> Any: try: out, err, code = await self._run_cephadm( - host, entity, command, args, no_fsid=no_fsid, image=image) + host, entity, command, args, no_fsid=no_fsid, image=image, log_output=log_output) if code: raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}') except Exception as e: @@ -1323,6 +1325,7 @@ class CephadmServe: error_ok: Optional[bool] = False, image: Optional[str] = "", env_vars: Optional[List[str]] = None, + log_output: Optional[bool] = True, ) -> Tuple[List[str], List[str], int]: """ Run cephadm on the remote host with the given command + args @@ -1387,7 +1390,8 @@ class CephadmServe: host, cmd, stdin=stdin, addr=addr) if code == 2: ls_cmd = ['ls', self.mgr.cephadm_binary_path] - out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr) + out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr, + log_command=log_output) if code_ls == 2: await self._deploy_cephadm_binary(host, addr) out, err, code = await self.mgr.ssh._execute_command( @@ -1417,11 +1421,12 @@ class CephadmServe: else: assert False, 'unsupported mode' - self.log.debug(f'code: {code}') - if out: - self.log.debug(f'out: {out}') - if err: - self.log.debug(f'err: {err}') + if log_output: + self.log.debug(f'code: {code}') + if out: + self.log.debug(f'out: {out}') + if err: + self.log.debug(f'err: {err}') if code and not error_ok: raise OrchestratorError( f'cephadm exited with an error code: {code}, stderr: {err}') diff --git a/src/pybind/mgr/cephadm/ssh.py b/src/pybind/mgr/cephadm/ssh.py index bdd3ae046dd..dcfdd0a79b2 100644 --- a/src/pybind/mgr/cephadm/ssh.py +++ b/src/pybind/mgr/cephadm/ssh.py @@ -134,11 +134,13 @@ class SSHManager: cmd: List[str], stdin: Optional[str] = None, addr: Optional[str] = None, + log_command: Optional[bool] = True, ) -> Tuple[str, str, int]: conn = await self._remote_connection(host, addr) sudo_prefix = "sudo " if self.mgr.ssh_user != 'root' else "" cmd = sudo_prefix + " ".join(quote(x) for x in cmd) - logger.debug(f'Running command: {cmd}') + if log_command: + logger.debug(f'Running command: {cmd}') try: r = await conn.run(f'{sudo_prefix}true', check=True, timeout=5) r = await conn.run(cmd, input=stdin) @@ -171,16 +173,18 @@ class SSHManager: cmd: List[str], stdin: Optional[str] = None, addr: Optional[str] = None, + log_command: Optional[bool] = True ) -> Tuple[str, str, int]: - return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr)) + return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr, log_command)) async def _check_execute_command(self, host: str, cmd: List[str], stdin: Optional[str] = None, addr: Optional[str] = None, + log_command: Optional[bool] = True ) -> str: - out, err, code = await self._execute_command(host, cmd, stdin, addr) + out, err, code = await self._execute_command(host, cmd, stdin, addr, log_command) if code != 0: msg = f'Command {cmd} failed. {err}' logger.debug(msg) @@ -192,8 +196,9 @@ class SSHManager: cmd: List[str], stdin: Optional[str] = None, addr: Optional[str] = None, + log_command: Optional[bool] = True, ) -> str: - return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr)) + return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr, log_command)) async def _write_remote_file(self, host: str, diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 76a1073fc4a..83879c387c3 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -117,13 +117,13 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str [host]).stdout == f"Created osd(s) 1 on host '{host}'" assert _run_cephadm.mock_calls == [ mock.call(host, 'osd', 'ceph-volume', - ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image=''), + ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True), mock.call(host, f'osd.{osd_id}', 'deploy', ['--name', f'osd.{osd_id}', '--meta-json', mock.ANY, '--config-json', '-', '--osd-fsid', 'uuid'], stdin=mock.ANY, image=''), mock.call(host, 'osd', 'ceph-volume', - ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image=''), + ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True), ] dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host) assert dd.name() == f'osd.{osd_id}' @@ -790,11 +790,12 @@ class TestCephadm(object): 'test', 'osd', 'ceph-volume', ['--config-json', '-', '--', 'lvm', 'batch', '--no-auto', '/dev/sdb', '--yes', '--no-systemd'], - env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, stdin='{"config": "", "keyring": ""}') + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, + stdin='{"config": "", "keyring": ""}') _run_cephadm.assert_any_call( - 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False) + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True) _run_cephadm.assert_any_call( - 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False) + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True) @mock.patch("cephadm.serve.CephadmServe._run_cephadm") def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator): @@ -834,9 +835,9 @@ class TestCephadm(object): env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'], error_ok=True, stdin='{"config": "", "keyring": ""}') _run_cephadm.assert_any_call( - 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False) + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True) _run_cephadm.assert_any_call( - 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False) + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True) @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.module.SpecStore.save") @@ -1886,10 +1887,10 @@ Traceback (most recent call last): assert _run_cephadm.mock_calls == [ mock.call('test', 'osd', 'ceph-volume', ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='', - no_fsid=False), + no_fsid=False, log_output=False), mock.call('test', 'osd', 'ceph-volume', ['--', 'inventory', '--format=json-pretty'], image='', - no_fsid=False), + no_fsid=False, log_output=False), ] @mock.patch("cephadm.serve.CephadmServe._run_cephadm") diff --git a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py index 1521b011052..38a3a390745 100644 --- a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py +++ b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py @@ -56,6 +56,7 @@ class FakeMgr: self.tuned_profiles.profiles = profiles self.ssh = SSHManager(self) self.offline_hosts = [] + self.log_refresh_metadata = False def set_store(self, what: str, value: str): raise SaveError(f'{what}: {value}') @@ -138,7 +139,7 @@ class TestTunedProfiles: tp = TunedProfileUtils(mgr) tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) calls = [ - mock.call('a', ['ls', SYSCTL_DIR]), + mock.call('a', ['ls', SYSCTL_DIR], log_command=False), mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']), mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']), mock.call('a', ['sysctl', '--system']) diff --git a/src/pybind/mgr/cephadm/tuned_profiles.py b/src/pybind/mgr/cephadm/tuned_profiles.py index 5e5189f629f..19d97f42133 100644 --- a/src/pybind/mgr/cephadm/tuned_profiles.py +++ b/src/pybind/mgr/cephadm/tuned_profiles.py @@ -70,7 +70,7 @@ class TunedProfileUtils(): if host in self.mgr.offline_hosts: return cmd = ['ls', SYSCTL_DIR] - found_files = self.mgr.ssh.check_execute_command(host, cmd).split('\n') + found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n') found_files = [s.strip() for s in found_files] profile_names: List[str] = sum([[*p] for p in profiles], []) # extract all profiles names profile_names = list(set(profile_names)) # remove duplicates