From: Aashish Sharma Date: Fri, 11 Feb 2022 09:10:17 +0000 (+0530) Subject: mgr/dashboard: fixing cephadm errors X-Git-Tag: v18.0.0~1249^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ba6efa121ab0e7b6949cf41219b68c38d1a77641;p=ceph-ci.git mgr/dashboard: fixing cephadm errors Signed-off-by: Aashish Sharma --- diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 47a4a6f15ca..88765c37311 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -507,7 +507,7 @@ class Monitoring(object): def get_version(ctx, container_id, daemon_type): # type: (CephadmContext, str, str) -> str """ - :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter" + :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" """ assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail') cmd = daemon_type.replace('-', '_') @@ -523,6 +523,14 @@ class Monitoring(object): if code == 0: break cmd = 'alertmanager' # reset cmd for version extraction + elif daemon_type == 'loki': + _, err, code = call(ctx, [ + ctx.container_engine.path, 'exec', container_id, cmd, '--version' + ], verbosity=CallVerbosity.DEBUG) + if code == 0: + ver = err.index('version') + brnch = err.index('branch') + version = (err[ver + 7:brnch - 2]) else: _, err, code = call(ctx, [ ctx.container_engine.path, 'exec', container_id, cmd, '--version' @@ -2359,7 +2367,7 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id): metadata = Monitoring.components[daemon_type] r += metadata.get('args', list()) # set ip and port to bind to for nodeexporter,alertmanager,prometheus - if daemon_type != 'grafana' and daemon_type != 'loki' and daemon_type != 'promtail': + if daemon_type not in ['grafana', 'loki', 'promtail']: ip = '' port = Monitoring.port_map[daemon_type][0] if 'meta_json' in ctx and ctx.meta_json: @@ -4749,7 +4757,7 @@ def prepare_ssh( cli(['orch', 'apply', 'crash']) if not ctx.skip_monitoring_stack: - for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager', 'loki', 'promtail']: + for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: logger.info('Deploying %s service with default placement...' % t) cli(['orch', 'apply', t]) @@ -6069,10 +6077,6 @@ def command_adopt(ctx): command_adopt_ceph(ctx, daemon_type, daemon_id, fsid) elif daemon_type == 'prometheus': command_adopt_prometheus(ctx, daemon_id, fsid) - elif daemon_type == 'loki': - command_adopt_loki(ctx, daemon_id, fsid) - elif daemon_type == 'promtail': - command_adopt_promtail(ctx, daemon_id, fsid) elif daemon_type == 'grafana': command_adopt_grafana(ctx, daemon_id, fsid) elif daemon_type == 'node-exporter': @@ -6308,64 +6312,6 @@ def command_adopt_prometheus(ctx, daemon_id, fsid): update_firewalld(ctx, daemon_type) -def command_adopt_loki(ctx, daemon_id, fsid): - # type: (CephadmContext, str, str) -> None - daemon_type = 'loki' - (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) - - _stop_and_disable(ctx, 'loki') - - data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, - uid=uid, gid=gid) - - # config - config_src = '/etc/loki/loki.yml' - config_src = os.path.abspath(ctx.legacy_dir + config_src) - config_dst = os.path.join(data_dir_dst, 'etc/loki') - makedirs(config_dst, uid, gid, 0o755) - copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) - - # data - data_src = '/var/lib/loki' - data_src = os.path.abspath(ctx.legacy_dir + data_src) - data_dst = os.path.join(data_dir_dst, 'data') - copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) - - make_var_run(ctx, fsid, uid, gid) - c = get_container(ctx, fsid, daemon_type, daemon_id) - deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) - update_firewalld(ctx, daemon_type) - - -def command_adopt_promtail(ctx, daemon_id, fsid): - # type: (CephadmContext, str, str) -> None - daemon_type = 'promtail' - (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) - - _stop_and_disable(ctx, 'promtail') - - data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, - uid=uid, gid=gid) - - # config - config_src = '/etc/promtail/promtail.yml' - config_src = os.path.abspath(ctx.legacy_dir + config_src) - config_dst = os.path.join(data_dir_dst, 'etc/promtail') - makedirs(config_dst, uid, gid, 0o755) - copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) - - # data - data_src = '/var/lib/promtail' - data_src = os.path.abspath(ctx.legacy_dir + data_src) - data_dst = os.path.join(data_dir_dst, 'data') - copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) - - make_var_run(ctx, fsid, uid, gid) - c = get_container(ctx, fsid, daemon_type, daemon_id) - deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) - update_firewalld(ctx, daemon_type) - - def command_adopt_grafana(ctx, daemon_id, fsid): # type: (CephadmContext, str, str) -> None @@ -8444,7 +8390,7 @@ def _get_parser(): parser_bootstrap.add_argument( '--skip-monitoring-stack', action='store_true', - help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter, loki, promtail)') + help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)') parser_bootstrap.add_argument( '--apply-spec', help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)') diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini index cf76cfa31d0..324e725394b 100644 --- a/src/cephadm/tox.ini +++ b/src/cephadm/tox.ini @@ -62,4 +62,4 @@ deps = flake8-quotes commands = flake8 --config=tox.ini {posargs:cephadm} - bash -c "test $(grep 'docker.io' cephadm | wc -l) == 11" + bash -c "test $(grep 'docker.io' cephadm | wc -l) == 13" diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index bce2bd98b54..ab988637149 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -423,6 +423,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.container_image_alertmanager = '' self.container_image_node_exporter = '' self.container_image_loki = '' + self.container_image_promtail = '' self.container_image_haproxy = '' self.container_image_keepalived = '' self.container_image_snmp_gateway = '' @@ -2463,7 +2464,7 @@ Then run the following: 'prometheus': PlacementSpec(count=1), 'node-exporter': PlacementSpec(host_pattern='*'), 'loki': PlacementSpec(count=1), - 'promtail': PlacementSpec(host_pattern='*'), + 'promtail': PlacementSpec(count=1), 'crash': PlacementSpec(host_pattern='*'), 'container': PlacementSpec(count=1), 'snmp-gateway': PlacementSpec(count=1), diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 7f57cbac2de..61bd1662feb 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -37,12 +37,16 @@ class GrafanaService(CephadmService): deps.append(dd.name()) - loki_services = [] # type: List[str] - for dd in self.mgr.cache.get_daemons_by_service('mgr'): - addr = self.mgr.inventory.get_addr(dd.hostname) - loki_services.append(build_url(scheme='http', host=addr, port=3100)) + daemons = self.mgr.cache.get_daemons_by_service('mgr') + loki_host = '' + assert daemons is not None + if daemons != []: + assert daemons[0].hostname is not None + addr = daemons[0].ip if daemons[0].ip else self._inventory_get_addr(daemons[0].hostname) + loki_host = build_url(scheme='http', host=addr, port=3100) + grafana_data_sources = self.mgr.template.render( - 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'loki_host': loki_services[0]}) + 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'loki_host': loki_host}) cert = self.mgr.get_store('grafana_crt') pkey = self.mgr.get_store('grafana_key') @@ -395,10 +399,6 @@ class LokiService(CephadmService): def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type deps: List[str] = [] - hostnames: List[str] = [] - for dd in self.mgr.cache.get_daemons_by_service('mgr'): - addr = self.mgr.inventory.get_addr(dd.hostname) - hostnames.append(addr) yml = self.mgr.template.render('services/loki.yml.j2') return { @@ -422,6 +422,7 @@ class PromtailService(CephadmService): deps: List[str] = [] hostnames: List[str] = [] for dd in self.mgr.cache.get_daemons_by_service('mgr'): + assert dd.hostname is not None addr = self.mgr.inventory.get_addr(dd.hostname) hostnames.append(addr) context = { diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 index 663a712c509..7e5ffe5eaa8 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 @@ -6,7 +6,7 @@ deleteDatasources: {% endfor %} - name: 'Loki' - orgId: 1 + orgId: 2 datasources: {% for host in hosts %} @@ -23,7 +23,7 @@ datasources: - name: 'Loki' type: 'loki' access: 'proxy' - orgId: 1 + orgId: 2 url: '{{ loki_host }}' basicAuth: false isDefault: true diff --git a/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 b/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 index ce29445f05d..27143723113 100644 --- a/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 @@ -1,3 +1,4 @@ +# {{ cephadm_managed }} auth_enabled: false server: @@ -24,4 +25,4 @@ schema_config: schema: v11 index: prefix: index_ - period: 24h \ No newline at end of file + period: 24h diff --git a/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 b/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 index 651ee43e511..f500f5d22ee 100644 --- a/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 @@ -1,3 +1,4 @@ +# {{ cephadm_managed }} server: http_listen_port: 9080 grpc_listen_port: 0 diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 742da7a2259..afe2c2e3447 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1003,8 +1003,6 @@ class TestCephadm(object): ServiceSpec('grafana'), ServiceSpec('node-exporter'), ServiceSpec('alertmanager'), - ServiceSpec('loki'), - ServiceSpec('promtail'), ServiceSpec('rbd-mirror'), ServiceSpec('cephfs-mirror'), ServiceSpec('mds', service_id='fsname'), @@ -1219,8 +1217,6 @@ class TestCephadm(object): (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr), (ServiceSpec('crash'), CephadmOrchestrator.apply_crash), (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus), - (ServiceSpec('loki'), CephadmOrchestrator.apply_loki), - (ServiceSpec('promtail'), CephadmOrchestrator.apply_promtail), (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana), (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter), (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager), diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 30b1ae93bbe..199e1fc7656 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -325,7 +325,7 @@ class TestMonitoring: ], stdin=json.dumps({"files": {"prometheus.yml": y}}), image='') - + @patch("cephadm.serve.CephadmServe._run_cephadm") def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) @@ -338,32 +338,30 @@ class TestMonitoring: auth_enabled: false server: - http_listen_port: 3100 - grpc_listen_port: 8080 + http_listen_port: 3100 + grpc_listen_port: 8080 common: - path_prefix: /tmp/loki - storage: + path_prefix: /tmp/loki + storage: filesystem: - chunks_directory: /tmp/loki/chunks - rules_directory: /tmp/loki/rules - replication_factor: 1 - ring: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: instance_addr: 127.0.0.1 kvstore: - store: inmemory + store: inmemory schema_config: - configs: + configs: - from: 2020-10-24 - store: boltdb-shipper - object_store: filesystem - schema: v11 - index: + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: prefix: index_ - period: 24h - - """).lstrip() + period: 24h""").lstrip() _run_cephadm.assert_called_with( 'test', @@ -371,19 +369,21 @@ class TestMonitoring: 'deploy', [ '--name', 'loki.test', + '--meta-json', '{"service_name": "loki", "ports": [3100], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '3100' ], stdin=json.dumps({"files": {"loki.yml": y}}), image='') - + @patch("cephadm.serve.CephadmServe._run_cephadm") def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) with with_host(cephadm_module, 'test'): - with with_service(cephadm_module, MonitoringSpec('promtail')) as _: + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, MonitoringSpec('promtail')) as _: y = dedent(""" # This file is generated by cephadm. @@ -395,18 +395,16 @@ class TestMonitoring: filename: /tmp/positions.yaml clients: - - url: http://192.168.1.1:3100/loki/api/v1/push + - url: http://1::4:3100/loki/api/v1/push scrape_configs: - - job_name: system - static_configs: - - targets: - - 192.168.1.1 - labels: - job: Cluster Logs - __path__: /var/log/ceph/**/*.log - - """).lstrip() + - job_name: system + static_configs: + - targets: + - 1::4 + labels: + job: Cluster Logs + __path__: /var/log/ceph/**/*.log""").lstrip() _run_cephadm.assert_called_with( 'test', @@ -414,6 +412,7 @@ class TestMonitoring: 'deploy', [ '--name', 'promtail.test', + '--meta-json', '{"service_name": "promtail", "ports": [9080], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9080' @@ -425,13 +424,16 @@ class TestMonitoring: @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): - _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - - with with_host(cephadm_module, 'test'): - cephadm_module.set_store('grafana_crt', 'c') - cephadm_module.set_store('grafana_key', 'k') - with with_service(cephadm_module, MonitoringSpec('prometheus')) as _, \ - with_service(cephadm_module, GrafanaSpec('grafana')) as _: + _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) + + with with_host(cephadm_module, "test"): + cephadm_module.set_store("grafana_crt", "c") + cephadm_module.set_store("grafana_key", "k") + with with_service( + cephadm_module, MonitoringSpec("prometheus") + ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service( + cephadm_module, GrafanaSpec("grafana") + ) as _: files = { 'grafana.ini': dedent(""" # This file is generated by cephadm. @@ -459,6 +461,9 @@ class TestMonitoring: - name: 'Dashboard1' orgId: 1 + - name: 'Loki' + orgId: 2 + datasources: - name: 'Dashboard1' type: 'prometheus' @@ -468,7 +473,15 @@ class TestMonitoring: basicAuth: false isDefault: true editable: false - """).lstrip(), + + - name: 'Loki' + type: 'loki' + access: 'proxy' + orgId: 2 + url: 'http://[1::4]:3100' + basicAuth: false + isDefault: true + editable: false""").lstrip(), 'certs/cert_file': dedent(""" # generated by cephadm c""").lstrip(), @@ -492,15 +505,14 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): - with with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')): + with with_service(cephadm_module, ServiceSpec('mgr')) as _, \ + with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')): out = cephadm_module.cephadm_services['grafana'].generate_config( CephadmDaemonDeploySpec('test', 'daemon', 'grafana')) assert out == ( { 'files': { - 'certs/cert_file': ANY, - 'certs/cert_key': ANY, 'grafana.ini': '# This file is generated by cephadm.\n' '[users]\n' @@ -523,14 +535,21 @@ class TestMonitoring: ' cookie_samesite = none\n' ' allow_embedding = true', 'provisioning/datasources/ceph-dashboard.yml': - '# This file is generated by cephadm.\n' - 'deleteDatasources:\n' - '\n' - 'datasources:\n' - } - }, - [], - ) + "# This file is generated by cephadm.\n" + 'deleteDatasources:\n\n' + " - name: 'Loki'\n" + ' orgId: 2\n\n' + 'datasources:\n\n' + " - name: 'Loki'\n" + " type: 'loki'\n" + " access: 'proxy'\n" + ' orgId: 2\n' + " url: 'http://[1::4]:3100'\n" + ' basicAuth: false\n' + ' isDefault: true\n' + ' editable: false', + 'certs/cert_file': ANY, + 'certs/cert_key': ANY}}, []) @patch("cephadm.serve.CephadmServe._run_cephadm") def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator): diff --git a/src/pybind/mgr/tox.ini b/src/pybind/mgr/tox.ini index 7d172847f15..ea69889e318 100644 --- a/src/pybind/mgr/tox.ini +++ b/src/pybind/mgr/tox.ini @@ -183,7 +183,7 @@ modules = commands = flake8 --config=tox.ini {posargs} \ {posargs:{[testenv:flake8]modules}} - bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "docker.io" | wc -l) == 13' + bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "docker.io" | wc -l) == 15' [testenv:jinjalint] basepython = python3