]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: fixing cephadm errors
authorAashish Sharma <aashishsharma@localhost.localdomain>
Fri, 11 Feb 2022 09:10:17 +0000 (14:40 +0530)
committerAdam King <adking@redhat.com>
Tue, 3 May 2022 00:48:34 +0000 (20:48 -0400)
Signed-off-by: Aashish Sharma <aasharma@redhat.com>
(cherry picked from commit ba6efa121ab0e7b6949cf41219b68c38d1a77641)

src/cephadm/cephadm
src/cephadm/tox.ini
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2
src/pybind/mgr/cephadm/templates/services/loki.yml.j2
src/pybind/mgr/cephadm/templates/services/promtail.yml.j2
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_services.py
src/pybind/mgr/tox.ini

index f8966b26de74cffde5ff6c1bf1d6ac950af8afd5..fd84ec4b9936a1870f0b12d0168b3004bc5605e5 100755 (executable)
@@ -604,7 +604,7 @@ class Monitoring(object):
     def get_version(ctx, container_id, daemon_type):
         # type: (CephadmContext, str, str) -> str
         """
-        :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
+        :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
         """
         assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
         cmd = daemon_type.replace('-', '_')
@@ -620,6 +620,14 @@ class Monitoring(object):
                 if code == 0:
                     break
             cmd = 'alertmanager'  # reset cmd for version extraction
+        elif daemon_type == 'loki':
+            _, err, code = call(ctx, [
+                ctx.container_engine.path, 'exec', container_id, cmd, '--version'
+            ], verbosity=CallVerbosity.DEBUG)
+            if code == 0:
+                ver = err.index('version')
+                brnch = err.index('branch')
+                version = (err[ver + 7:brnch - 2])
         else:
             _, err, code = call(ctx, [
                 ctx.container_engine.path, 'exec', container_id, cmd, '--version'
@@ -2534,7 +2542,7 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
         metadata = Monitoring.components[daemon_type]
         r += metadata.get('args', list())
         # set ip and port to bind to for nodeexporter,alertmanager,prometheus
-        if daemon_type != 'grafana' and daemon_type != 'loki' and daemon_type != 'promtail':
+        if daemon_type not in ['grafana', 'loki', 'promtail']:
             ip = ''
             port = Monitoring.port_map[daemon_type][0]
             if 'meta_json' in ctx and ctx.meta_json:
@@ -5035,7 +5043,7 @@ def prepare_ssh(
         cli(['orch', 'apply', 'crash'])
 
     if not ctx.skip_monitoring_stack:
-        for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager', 'loki', 'promtail']:
+        for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
             logger.info('Deploying %s service with default placement...' % t)
             cli(['orch', 'apply', t])
 
@@ -6407,10 +6415,6 @@ def command_adopt(ctx):
         command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
     elif daemon_type == 'prometheus':
         command_adopt_prometheus(ctx, daemon_id, fsid)
-    elif daemon_type == 'loki':
-        command_adopt_loki(ctx, daemon_id, fsid)
-    elif daemon_type == 'promtail':
-        command_adopt_promtail(ctx, daemon_id, fsid)
     elif daemon_type == 'grafana':
         command_adopt_grafana(ctx, daemon_id, fsid)
     elif daemon_type == 'node-exporter':
@@ -6646,64 +6650,6 @@ def command_adopt_prometheus(ctx, daemon_id, fsid):
     update_firewalld(ctx, daemon_type)
 
 
-def command_adopt_loki(ctx, daemon_id, fsid):
-    # type: (CephadmContext, str, str) -> None
-    daemon_type = 'loki'
-    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
-
-    _stop_and_disable(ctx, 'loki')
-
-    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
-                                 uid=uid, gid=gid)
-
-    # config
-    config_src = '/etc/loki/loki.yml'
-    config_src = os.path.abspath(ctx.legacy_dir + config_src)
-    config_dst = os.path.join(data_dir_dst, 'etc/loki')
-    makedirs(config_dst, uid, gid, 0o755)
-    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
-
-    # data
-    data_src = '/var/lib/loki'
-    data_src = os.path.abspath(ctx.legacy_dir + data_src)
-    data_dst = os.path.join(data_dir_dst, 'data')
-    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
-
-    make_var_run(ctx, fsid, uid, gid)
-    c = get_container(ctx, fsid, daemon_type, daemon_id)
-    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
-    update_firewalld(ctx, daemon_type)
-
-
-def command_adopt_promtail(ctx, daemon_id, fsid):
-    # type: (CephadmContext, str, str) -> None
-    daemon_type = 'promtail'
-    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
-
-    _stop_and_disable(ctx, 'promtail')
-
-    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
-                                 uid=uid, gid=gid)
-
-    # config
-    config_src = '/etc/promtail/promtail.yml'
-    config_src = os.path.abspath(ctx.legacy_dir + config_src)
-    config_dst = os.path.join(data_dir_dst, 'etc/promtail')
-    makedirs(config_dst, uid, gid, 0o755)
-    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
-
-    # data
-    data_src = '/var/lib/promtail'
-    data_src = os.path.abspath(ctx.legacy_dir + data_src)
-    data_dst = os.path.join(data_dir_dst, 'data')
-    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
-
-    make_var_run(ctx, fsid, uid, gid)
-    c = get_container(ctx, fsid, daemon_type, daemon_id)
-    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
-    update_firewalld(ctx, daemon_type)
-
-
 def command_adopt_grafana(ctx, daemon_id, fsid):
     # type: (CephadmContext, str, str) -> None
 
@@ -8803,7 +8749,7 @@ def _get_parser():
     parser_bootstrap.add_argument(
         '--skip-monitoring-stack',
         action='store_true',
-        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter, loki, promtail)')
+        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
     parser_bootstrap.add_argument(
         '--apply-spec',
         help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
index cf76cfa31d0589feae5554084ba962790ac920af..324e725394b342420a936014b3f9cd493a34a7ec 100644 (file)
@@ -62,4 +62,4 @@ deps =
     flake8-quotes
 commands =
     flake8 --config=tox.ini {posargs:cephadm}
-    bash -c "test $(grep 'docker.io' cephadm | wc -l) == 11"
+    bash -c "test $(grep 'docker.io' cephadm | wc -l) == 13"
index ef4c9de761a93adf3bbf1f6ce1fa0a1a717158e6..3b58d9251010c2c1b02b8acf9f19f580fe0a6d17 100644 (file)
@@ -423,6 +423,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             self.container_image_alertmanager = ''
             self.container_image_node_exporter = ''
             self.container_image_loki = ''
+            self.container_image_promtail = ''
             self.container_image_haproxy = ''
             self.container_image_keepalived = ''
             self.container_image_snmp_gateway = ''
@@ -2486,7 +2487,7 @@ Then run the following:
                 'prometheus': PlacementSpec(count=1),
                 'node-exporter': PlacementSpec(host_pattern='*'),
                 'loki': PlacementSpec(count=1),
-                'promtail': PlacementSpec(host_pattern='*'),
+                'promtail': PlacementSpec(count=1),
                 'crash': PlacementSpec(host_pattern='*'),
                 'container': PlacementSpec(count=1),
                 'snmp-gateway': PlacementSpec(count=1),
index be759c3e75dfa83929a73b16d3593d0935269ef3..6bde0cc3e8c18773d4fa665707c4f1bbc40d9d4c 100644 (file)
@@ -37,12 +37,16 @@ class GrafanaService(CephadmService):
 
             deps.append(dd.name())
 
-        loki_services = []  # type: List[str]
-        for dd in self.mgr.cache.get_daemons_by_service('mgr'):
-            addr = self.mgr.inventory.get_addr(dd.hostname)
-            loki_services.append(build_url(scheme='http', host=addr, port=3100))
+        daemons = self.mgr.cache.get_daemons_by_service('mgr')
+        loki_host = ''
+        assert daemons is not None
+        if daemons != []:
+            assert daemons[0].hostname is not None
+            addr = daemons[0].ip if daemons[0].ip else self._inventory_get_fqdn(daemons[0].hostname)
+            loki_host = build_url(scheme='http', host=addr, port=3100)
+
         grafana_data_sources = self.mgr.template.render(
-            'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'loki_host': loki_services[0]})
+            'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'loki_host': loki_host})
 
         cert = self.mgr.get_store('grafana_crt')
         pkey = self.mgr.get_store('grafana_key')
@@ -407,10 +411,6 @@ class LokiService(CephadmService):
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
         deps: List[str] = []
-        hostnames: List[str] = []
-        for dd in self.mgr.cache.get_daemons_by_service('mgr'):
-            addr = self.mgr.inventory.get_addr(dd.hostname)
-            hostnames.append(addr)
 
         yml = self.mgr.template.render('services/loki.yml.j2')
         return {
@@ -434,6 +434,7 @@ class PromtailService(CephadmService):
         deps: List[str] = []
         hostnames: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+            assert dd.hostname is not None
             addr = self.mgr.inventory.get_addr(dd.hostname)
             hostnames.append(addr)
         context = {
index 663a712c5091fdea0dc56fa97f9e83fa9037cc09..7e5ffe5eaa88da1f6cc9142608010287a8e4fb7d 100644 (file)
@@ -6,7 +6,7 @@ deleteDatasources:
 {% endfor %}
 
   - name: 'Loki'
-    orgId: 1
+    orgId: 2
 
 datasources:
 {% for host in hosts %}
@@ -23,7 +23,7 @@ datasources:
   - name: 'Loki'
     type: 'loki'
     access: 'proxy'
-    orgId: 1
+    orgId: 2
     url: '{{ loki_host }}'
     basicAuth: false
     isDefault: true
index ce29445f05dc5e83c5df49004f9c65a23ee318e2..27143723113d897089a1cd60aab2647658b0e7d0 100644 (file)
@@ -1,3 +1,4 @@
+# {{ cephadm_managed }}
 auth_enabled: false
 
 server:
@@ -24,4 +25,4 @@ schema_config:
       schema: v11
       index:
         prefix: index_
-        period: 24h
\ No newline at end of file
+        period: 24h
index 651ee43e51113cdf83990f9e96972c9d773922e2..f500f5d22eef2d815d5012a138251aa8ae39f417 100644 (file)
@@ -1,3 +1,4 @@
+# {{ cephadm_managed }}
 server:
   http_listen_port: 9080
   grpc_listen_port: 0
index ce588791ef965b4f28ef6cc10dd3cc540a896115..4d60e6611b25bc8529635d6e18f23702afc54cf3 100644 (file)
@@ -1033,8 +1033,6 @@ class TestCephadm(object):
             ServiceSpec('grafana'),
             ServiceSpec('node-exporter'),
             ServiceSpec('alertmanager'),
-            ServiceSpec('loki'),
-            ServiceSpec('promtail'),
             ServiceSpec('rbd-mirror'),
             ServiceSpec('cephfs-mirror'),
             ServiceSpec('mds', service_id='fsname'),
@@ -1249,8 +1247,6 @@ class TestCephadm(object):
             (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr),
             (ServiceSpec('crash'), CephadmOrchestrator.apply_crash),
             (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus),
-            (ServiceSpec('loki'), CephadmOrchestrator.apply_loki),
-            (ServiceSpec('promtail'), CephadmOrchestrator.apply_promtail),
             (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana),
             (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter),
             (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager),
index 765e0c659939a4d7244d70e619f80ca2bbe299c7..e401c5b93d024ee8ee0d726435246bf6676b86e9 100644 (file)
@@ -328,7 +328,7 @@ class TestMonitoring:
                     ],
                     stdin=json.dumps({"files": {"prometheus.yml": y}}),
                     image='')
-    
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
@@ -341,32 +341,30 @@ class TestMonitoring:
                 auth_enabled: false
 
                 server:
-                http_listen_port: 3100
-                grpc_listen_port: 8080
+                  http_listen_port: 3100
+                  grpc_listen_port: 8080
 
                 common:
-                path_prefix: /tmp/loki
-                storage:
+                  path_prefix: /tmp/loki
+                  storage:
                     filesystem:
-                    chunks_directory: /tmp/loki/chunks
-                    rules_directory: /tmp/loki/rules
-                replication_factor: 1
-                ring:
+                      chunks_directory: /tmp/loki/chunks
+                      rules_directory: /tmp/loki/rules
+                  replication_factor: 1
+                  ring:
                     instance_addr: 127.0.0.1
                     kvstore:
-                    store: inmemory
+                      store: inmemory
 
                 schema_config:
-                configs:
+                  configs:
                     - from: 2020-10-24
-                    store: boltdb-shipper
-                    object_store: filesystem
-                    schema: v11
-                    index:
+                      store: boltdb-shipper
+                      object_store: filesystem
+                      schema: v11
+                      index:
                         prefix: index_
-                        period: 24h
-
-                """).lstrip()
+                        period: 24h""").lstrip()
 
                 _run_cephadm.assert_called_with(
                     'test',
@@ -374,19 +372,21 @@ class TestMonitoring:
                     'deploy',
                     [
                         '--name', 'loki.test',
+                        '--meta-json',
                         '{"service_name": "loki", "ports": [3100], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
                         '--config-json', '-',
                         '--tcp-ports', '3100'
                     ],
                     stdin=json.dumps({"files": {"loki.yml": y}}),
                     image='')
-    
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
 
         with with_host(cephadm_module, 'test'):
-            with with_service(cephadm_module, MonitoringSpec('promtail')) as _:
+            with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+                    with_service(cephadm_module, MonitoringSpec('promtail')) as _:
 
                 y = dedent("""
                 # This file is generated by cephadm.
@@ -398,18 +398,16 @@ class TestMonitoring:
                   filename: /tmp/positions.yaml
 
                 clients:
-                  - url: http://192.168.1.1:3100/loki/api/v1/push
+                  - url: http://1::4:3100/loki/api/v1/push
 
                 scrape_configs:
-                  - job_name: system
-                    static_configs:
-                    - targets:
-                      - 192.168.1.1
-                      labels:
-                        job: Cluster Logs
-                        __path__: /var/log/ceph/**/*.log
-
-                """).lstrip()
+                - job_name: system
+                  static_configs:
+                  - targets:
+                    - 1::4
+                    labels:
+                      job: Cluster Logs
+                      __path__: /var/log/ceph/**/*.log""").lstrip()
 
                 _run_cephadm.assert_called_with(
                     'test',
@@ -417,6 +415,7 @@ class TestMonitoring:
                     'deploy',
                     [
                         '--name', 'promtail.test',
+                        '--meta-json',
                         '{"service_name": "promtail", "ports": [9080], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
                         '--config-json', '-',
                         '--tcp-ports', '9080'
@@ -428,13 +427,16 @@ class TestMonitoring:
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
     @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
     def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
-        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
-
-        with with_host(cephadm_module, 'test'):
-            cephadm_module.set_store('grafana_crt', 'c')
-            cephadm_module.set_store('grafana_key', 'k')
-            with with_service(cephadm_module, MonitoringSpec('prometheus')) as _, \
-                    with_service(cephadm_module, GrafanaSpec('grafana')) as _:
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+
+        with with_host(cephadm_module, "test"):
+            cephadm_module.set_store("grafana_crt", "c")
+            cephadm_module.set_store("grafana_key", "k")
+            with with_service(
+                cephadm_module, MonitoringSpec("prometheus")
+            ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service(
+                cephadm_module, GrafanaSpec("grafana")
+            ) as _:
                 files = {
                     'grafana.ini': dedent("""
                         # This file is generated by cephadm.
@@ -462,6 +464,9 @@ class TestMonitoring:
                           - name: 'Dashboard1'
                             orgId: 1
 
+                          - name: 'Loki'
+                            orgId: 2
+
                         datasources:
                           - name: 'Dashboard1'
                             type: 'prometheus'
@@ -471,7 +476,15 @@ class TestMonitoring:
                             basicAuth: false
                             isDefault: true
                             editable: false
-                        """).lstrip(),
+
+                          - name: 'Loki'
+                            type: 'loki'
+                            access: 'proxy'
+                            orgId: 2
+                            url: 'http://[1::4]:3100'
+                            basicAuth: false
+                            isDefault: true
+                            editable: false""").lstrip(),
                     'certs/cert_file': dedent("""
                         # generated by cephadm
                         c""").lstrip(),
@@ -495,15 +508,14 @@ class TestMonitoring:
     @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
     def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator):
         with with_host(cephadm_module, 'test'):
-            with with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')):
+            with with_service(cephadm_module, ServiceSpec('mgr')) as _, \
+                    with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')):
                 out = cephadm_module.cephadm_services['grafana'].generate_config(
                     CephadmDaemonDeploySpec('test', 'daemon', 'grafana'))
                 assert out == (
                     {
                         'files':
                             {
-                                'certs/cert_file': ANY,
-                                'certs/cert_key': ANY,
                                 'grafana.ini':
                                     '# This file is generated by cephadm.\n'
                                     '[users]\n'
@@ -526,14 +538,21 @@ class TestMonitoring:
                                     '  cookie_samesite = none\n'
                                     '  allow_embedding = true',
                                 'provisioning/datasources/ceph-dashboard.yml':
-                                    '# This file is generated by cephadm.\n'
-                                    'deleteDatasources:\n'
-                                    '\n'
-                                    'datasources:\n'
-                            }
-                    },
-                    [],
-                )
+                                    "# This file is generated by cephadm.\n"
+                                    'deleteDatasources:\n\n'
+                                    "  - name: 'Loki'\n"
+                                    '    orgId: 2\n\n'
+                                    'datasources:\n\n'
+                                    "  - name: 'Loki'\n"
+                                    "    type: 'loki'\n"
+                                    "    access: 'proxy'\n"
+                                    '    orgId: 2\n'
+                                    "    url: 'http://[1::4]:3100'\n"
+                                    '    basicAuth: false\n'
+                                    '    isDefault: true\n'
+                                    '    editable: false',
+                                'certs/cert_file': ANY,
+                                'certs/cert_key': ANY}}, [])
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
index 7d172847f159a9b7f228df0045e03fe2b4f0f128..ea69889e31800ca3eb89cff17c8e28ae1fb1cf17 100644 (file)
@@ -183,7 +183,7 @@ modules =
 commands =
     flake8 --config=tox.ini {posargs} \
       {posargs:{[testenv:flake8]modules}}
-    bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "docker.io" | wc -l) == 13'
+    bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "docker.io" | wc -l) == 15'
 
 [testenv:jinjalint]
 basepython = python3