]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: introducing orch apply daemon loki
authorAvan Thakkar <athakkar@redhat.com>
Mon, 24 Jan 2022 22:37:45 +0000 (04:07 +0530)
committerAdam King <adking@redhat.com>
Tue, 3 May 2022 00:48:34 +0000 (20:48 -0400)
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
(cherry picked from commit 5dc96cb62165109338e6e13a259e5e6dc2b5a8ce)

Conflicts:
src/cephadm/cephadm
src/pybind/mgr/cephadm/module.py

src/cephadm/cephadm
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/loki.yml.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 [new file with mode: 0644]
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/python-common/ceph/deployment/service_spec.py

index d87cb1d0026a68fea89dcd3d697e90df7ebe78a3..0ea604c35dd6756795a4985ed155104743460b52 100755 (executable)
@@ -50,8 +50,8 @@ DEFAULT_IMAGE_IS_MASTER = False
 DEFAULT_IMAGE_RELEASE = 'quincy'
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
 DEFAULT_CADVISOR_IMAGE = 'gcr.io/cadvisor/cadvisor:v0.39.3'
-DEFAULT_LOKI_IMAGE = 'docker.io/r/grafana/loki:2.4.0'
-DEFAULT_PROMTAIL_IMAGE = 'docker.io/r/grafana/promtail:2.4.0'
+DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
+DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
 DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
 DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5'
@@ -612,7 +612,7 @@ class Monitoring(object):
         """
         :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
         """
-        assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter')
+        assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki')
         cmd = daemon_type.replace('-', '_')
         code = -1
         err = ''
@@ -2540,7 +2540,7 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
         metadata = Monitoring.components[daemon_type]
         r += metadata.get('args', list())
         # set ip and port to bind to for nodeexporter,alertmanager,prometheus
-        if daemon_type != 'grafana':
+        if daemon_type != 'grafana' and daemon_type != 'loki':
             ip = ''
             port = Monitoring.port_map[daemon_type][0]
             if 'meta_json' in ctx and ctx.meta_json:
@@ -2557,6 +2557,8 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
                 r += ['--cluster.peer={}'.format(peer)]
             # some alertmanager, by default, look elsewhere for a config
             r += ['--config.file=/etc/alertmanager/alertmanager.yml']
+        if daemon_type == 'loki':
+            r += ['--config.file=/etc/loki/loki.yml']
     elif daemon_type == NFSGanesha.daemon_type:
         nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
         r += nfs_ganesha.get_daemon_args()
@@ -2637,6 +2639,7 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
                                          daemon_type, daemon_id)
             config_dir = 'etc/loki'
             makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
 
         # populate the config directory for the component from the config-json
         if 'files' in config_json:
@@ -2832,8 +2835,10 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
             mounts['/var/lib/docker'] = '/var/lib/docker:ro'
         elif daemon_type == 'loki':
             mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
+            mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
         elif daemon_type == 'promtail':
             mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
+            mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
         elif daemon_type == 'node-exporter':
             mounts['/proc'] = '/host/proc:ro'
             mounts['/sys'] = '/host/sys:ro'
@@ -5036,7 +5041,7 @@ def prepare_ssh(
         cli(['orch', 'apply', 'crash'])
 
     if not ctx.skip_monitoring_stack:
-        for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
+        for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager', 'loki']:
             logger.info('Deploying %s service with default placement...' % t)
             cli(['orch', 'apply', t])
 
@@ -5600,6 +5605,10 @@ def extract_uid_gid_monitoring(ctx, daemon_type):
         uid, gid = 65534, 65534
     elif daemon_type == 'grafana':
         uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
+    elif daemon_type == 'loki':
+        uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
+    elif daemon_type == 'promtail':
+        uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
     elif daemon_type == 'alertmanager':
         uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
     else:
@@ -6250,7 +6259,8 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
                                         seen_versions[image_id] = version
                                 elif daemon_type in ['prometheus',
                                                      'alertmanager',
-                                                     'node-exporter']:
+                                                     'node-exporter',
+                                                     'loki']:
                                     version = Monitoring.get_version(ctx, container_id, daemon_type)
                                     seen_versions[image_id] = version
                                 elif daemon_type == 'haproxy':
@@ -6402,6 +6412,8 @@ def command_adopt(ctx):
         command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
     elif daemon_type == 'prometheus':
         command_adopt_prometheus(ctx, daemon_id, fsid)
+    elif daemon_type == 'loki':
+        command_adopt_loki(ctx, daemon_id, fsid)
     elif daemon_type == 'grafana':
         command_adopt_grafana(ctx, daemon_id, fsid)
     elif daemon_type == 'node-exporter':
@@ -6637,6 +6649,35 @@ def command_adopt_prometheus(ctx, daemon_id, fsid):
     update_firewalld(ctx, daemon_type)
 
 
+def command_adopt_loki(ctx, daemon_id, fsid):
+    # type: (CephadmContext, str, str) -> None
+    daemon_type = 'loki'
+    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
+
+    _stop_and_disable(ctx, 'loki')
+
+    data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
+                                 uid=uid, gid=gid)
+
+    # config
+    config_src = '/etc/loki/loki.yml'
+    config_src = os.path.abspath(ctx.legacy_dir + config_src)
+    config_dst = os.path.join(data_dir_dst, 'etc/loki')
+    makedirs(config_dst, uid, gid, 0o755)
+    copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
+
+    # data
+    data_src = '/var/lib/loki'
+    data_src = os.path.abspath(ctx.legacy_dir + data_src)
+    data_dst = os.path.join(data_dir_dst, 'data')
+    copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
+
+    make_var_run(ctx, fsid, uid, gid)
+    c = get_container(ctx, fsid, daemon_type, daemon_id)
+    deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
+    update_firewalld(ctx, daemon_type)
+
+
 def command_adopt_grafana(ctx, daemon_id, fsid):
     # type: (CephadmContext, str, str) -> None
 
@@ -8736,7 +8777,7 @@ def _get_parser():
     parser_bootstrap.add_argument(
         '--skip-monitoring-stack',
         action='store_true',
-        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
+        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter, loki)')
     parser_bootstrap.add_argument(
         '--apply-spec',
         help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
index d5777a6876da54bae2aee3e3b907c4037b001d22..3355a701cfc294dca29b667229cc7a7fa1f8af52 100644 (file)
@@ -52,7 +52,7 @@ from .services.iscsi import IscsiService
 from .services.nfs import NFSService
 from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError
 from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \
-    NodeExporterService, SNMPGatewayService
+    NodeExporterService, SNMPGatewayService, LokiService
 from .schedule import HostAssignment
 from .inventory import Inventory, SpecStore, HostCache, AgentCache, EventStore, \
     ClientKeyringStore, ClientKeyringSpec
@@ -93,6 +93,8 @@ os._exit = os_exit_noop   # type: ignore
 DEFAULT_IMAGE = 'quay.io/ceph/ceph'
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
 DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
+DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
+DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
 DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5'
 DEFAULT_HAPROXY_IMAGE = 'docker.io/library/haproxy:2.3'
@@ -197,6 +199,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             default=DEFAULT_NODE_EXPORTER_IMAGE,
             desc='Prometheus container image',
         ),
+        Option(
+            'container_image_loki',
+            default=DEFAULT_LOKI_IMAGE,
+            desc='Loki container image',
+        ),
         Option(
             'container_image_haproxy',
             default=DEFAULT_HAPROXY_IMAGE,
@@ -410,6 +417,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             self.container_image_grafana = ''
             self.container_image_alertmanager = ''
             self.container_image_node_exporter = ''
+            self.container_image_loki = ''
             self.container_image_haproxy = ''
             self.container_image_keepalived = ''
             self.container_image_snmp_gateway = ''
@@ -505,7 +513,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         _service_clses: Sequence[Type[CephadmService]] = [
             OSDService, NFSService, MonService, MgrService, MdsService,
             RgwService, RbdMirrorService, GrafanaService, AlertmanagerService,
-            PrometheusService, NodeExporterService, CrashService, IscsiService,
+            PrometheusService, NodeExporterService, LokiService, CrashService, IscsiService,
             IngressService, CustomContainerService, CephfsMirrorService,
             CephadmAgent, SNMPGatewayService
         ]
@@ -657,7 +665,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         suffix = daemon_type not in [
             'mon', 'crash',
             'prometheus', 'node-exporter', 'grafana', 'alertmanager',
-            'container', 'agent', 'snmp-gateway'
+            'container', 'agent', 'snmp-gateway', 'loki'
         ]
         if forcename:
             if len([d for d in existing if d.daemon_id == forcename]):
@@ -1296,6 +1304,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             image = self.container_image_alertmanager
         elif daemon_type == 'node-exporter':
             image = self.container_image_node_exporter
+        elif daemon_type == 'loki':
+            image = self.container_image_loki
         elif daemon_type == 'haproxy':
             image = self.container_image_haproxy
         elif daemon_type == 'keepalived':
@@ -2468,6 +2478,7 @@ Then run the following:
                 'alertmanager': PlacementSpec(count=1),
                 'prometheus': PlacementSpec(count=1),
                 'node-exporter': PlacementSpec(host_pattern='*'),
+                'loki': PlacementSpec(count=1),
                 'crash': PlacementSpec(host_pattern='*'),
                 'container': PlacementSpec(count=1),
                 'snmp-gateway': PlacementSpec(count=1),
@@ -2563,6 +2574,10 @@ Then run the following:
     def apply_prometheus(self, spec: ServiceSpec) -> str:
         return self._apply(spec)
 
+    @handle_orch_error
+    def apply_loki(self, spec: ServiceSpec) -> str:
+        return self._apply(spec)
+
     @handle_orch_error
     def apply_node_exporter(self, spec: ServiceSpec) -> str:
         return self._apply(spec)
index 239c24bab096b13a131ffe887a8cab1fb82befba..b2174be1d4286b78c84087b263f6844cad5357ff 100644 (file)
@@ -390,6 +390,30 @@ class NodeExporterService(CephadmService):
         return HandleCommandResult(0, out, '')
 
 
+class LokiService(CephadmService):
+    TYPE = 'loki'
+
+    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
+    def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+        assert self.TYPE == daemon_spec.daemon_type
+        deps: List[str] = []
+        hostnames: List[str] = []
+        for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+            addr = self.mgr.inventory.get_addr(dd.hostname)
+            hostnames.append(addr)
+
+        yml = self.mgr.template.render('services/loki.yml.j2')
+        return {
+            "files": {
+                "loki.yml": yml
+            }
+        }, sorted(deps)
+
+
 class SNMPGatewayService(CephadmService):
     TYPE = 'snmp-gateway'
 
diff --git a/src/pybind/mgr/cephadm/templates/services/loki.yml.j2 b/src/pybind/mgr/cephadm/templates/services/loki.yml.j2
new file mode 100644 (file)
index 0000000..d1849cb
--- /dev/null
@@ -0,0 +1,26 @@
+auth_enabled: false
+
+server:
+  http_listen_port: 3100
+
+common:
+  path_prefix: /tmp/loki
+  storage:
+    filesystem:
+      chunks_directory: /tmp/loki/chunks
+      rules_directory: /tmp/loki/rules
+  replication_factor: 1
+  ring:
+    instance_addr: 127.0.0.1
+    kvstore:
+      store: inmemory
+
+schema_config:
+  configs:
+    - from: 2020-10-24
+      store: boltdb-shipper
+      object_store: filesystem
+      schema: v11
+      index:
+        prefix: index_
+        period: 24h
\ No newline at end of file
diff --git a/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2 b/src/pybind/mgr/cephadm/templates/services/promtail.yml.j2
new file mode 100644 (file)
index 0000000..33e5664
--- /dev/null
@@ -0,0 +1,18 @@
+server:
+  http_listen_port: 9080
+  grpc_listen_port: 0
+
+positions:
+  filename: /tmp/positions.yaml
+
+client:
+  url: http:localhost:3100/loki/api/v1/push
+
+scrape_configs:
+- job_name: docker
+  pipeline_stages:
+    - docker: {}
+  static_configs:
+    - labels:
+        job: docker
+        __path__: /var/lib/docker/containers//-json.log
\ No newline at end of file
index 912040f9e16cfb083cff37ed8f077e223c31947a..fb7e87f6704dfdf06440508182b85babf398f614 100644 (file)
@@ -466,6 +466,7 @@ class Orchestrator(object):
             'node-exporter': self.apply_node_exporter,
             'osd': lambda dg: self.apply_drivegroups([dg]),  # type: ignore
             'prometheus': self.apply_prometheus,
+            'loki': self.apply_loki,
             'rbd-mirror': self.apply_rbd_mirror,
             'rgw': self.apply_rgw,
             'ingress': self.apply_ingress,
@@ -641,6 +642,10 @@ class Orchestrator(object):
     def apply_node_exporter(self, spec: ServiceSpec) -> OrchResult[str]:
         """Update existing a Node-Exporter daemon(s)"""
         raise NotImplementedError()
+    
+    def apply_loki(self, spec: ServiceSpec) -> OrchResult[str]:
+        """Update existing a Loki daemon(s)"""
+        raise NotImplementedError()
 
     def apply_crash(self, spec: ServiceSpec) -> OrchResult[str]:
         """Update existing a crash daemon(s)"""
@@ -722,6 +727,7 @@ def daemon_type_to_service(dtype: str) -> str:
         'alertmanager': 'alertmanager',
         'prometheus': 'prometheus',
         'node-exporter': 'node-exporter',
+        'loki': 'loki',
         'crash': 'crash',
         'crashcollector': 'crash',  # Specific Rook Daemon
         'container': 'container',
@@ -746,6 +752,7 @@ def service_to_daemon_types(stype: str) -> List[str]:
         'grafana': ['grafana'],
         'alertmanager': ['alertmanager'],
         'prometheus': ['prometheus'],
+        'loki': ['loki'],
         'node-exporter': ['node-exporter'],
         'crash': ['crash'],
         'container': ['container'],
index c7631df749297d96ec84e7b645a7f3d90ce63760..ef7ab03450748e7f1f65e0dae289472bead5c943 100644 (file)
@@ -57,6 +57,7 @@ class ServiceType(enum.Enum):
     grafana = 'grafana'
     node_exporter = 'node-exporter'
     prometheus = 'prometheus'
+    loki = 'loki'
     mds = 'mds'
     rgw = 'rgw'
     nfs = 'nfs'
index 854b1ae4acf30ee8f6a10fa1ae58cd54cb0b9ebf..544b9ac93f0008bd4544b9939a5673cc9fea187d 100644 (file)
@@ -443,7 +443,7 @@ class ServiceSpec(object):
     This structure is supposed to be enough information to
     start the services.
     """
-    KNOWN_SERVICE_TYPES = 'alertmanager crash grafana iscsi mds mgr mon nfs ' \
+    KNOWN_SERVICE_TYPES = 'alertmanager crash grafana iscsi loki mds mgr mon nfs ' \
                           'node-exporter osd prometheus rbd-mirror rgw agent ' \
                           'container ingress cephfs-mirror snmp-gateway'.split()
     REQUIRES_SERVICE_ID = 'iscsi mds nfs rgw container ingress '.split()
@@ -467,6 +467,7 @@ class ServiceSpec(object):
             'grafana': GrafanaSpec,
             'node-exporter': MonitoringSpec,
             'prometheus': MonitoringSpec,
+            'loki': MonitoringSpec,
             'snmp-gateway': SNMPGatewaySpec,
         }.get(service_type, cls)
         if ret == ServiceSpec and not service_type:
@@ -1069,7 +1070,7 @@ class MonitoringSpec(ServiceSpec):
                  port: Optional[int] = None,
                  extra_container_args: Optional[List[str]] = None,
                  ):
-        assert service_type in ['grafana', 'node-exporter', 'prometheus', 'alertmanager']
+        assert service_type in ['grafana', 'node-exporter', 'prometheus', 'alertmanager', 'loki']
 
         super(MonitoringSpec, self).__init__(
             service_type, service_id,
@@ -1090,7 +1091,8 @@ class MonitoringSpec(ServiceSpec):
             return {'prometheus': 9095,
                     'node-exporter': 9100,
                     'alertmanager': 9093,
-                    'grafana': 3000}[self.service_type]
+                    'grafana': 3000,
+                    'loki': 3100}[self.service_type]
 
 
 yaml.add_representer(MonitoringSpec, ServiceSpec.yaml_representer)