From f6021bd4829fe2bf3fbc63900a8f69143f7dd444 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Fri, 25 Apr 2025 11:58:37 +0530 Subject: [PATCH] mgr/dashboard: Migrate from promtail to grafana alloy Since promtail is now deprecated, we need to start using grafana alloy for centralized logging setup Fixes: https://tracker.ceph.com/issues/71072 Signed-off-by: Aashish Sharma --- doc/cephadm/services/monitoring.rst | 11 ++- src/cephadm/cephadm.py | 7 +- src/cephadm/cephadmlib/daemons/monitoring.py | 25 ++++++- src/cephadm/cephadmlib/listing_updaters.py | 1 + src/pybind/mgr/cephadm/migrations.py | 73 ++++++++++++++++++- src/pybind/mgr/cephadm/module.py | 9 ++- src/pybind/mgr/cephadm/services/monitoring.py | 37 ++++++++++ .../mgr/cephadm/templates/services/alloy.j2 | 20 +++++ src/pybind/mgr/cephadm/tests/test_services.py | 2 +- src/pybind/mgr/cephadm/utils.py | 2 +- .../app/ceph/cluster/logs/logs.component.html | 6 +- .../app/ceph/cluster/logs/logs.component.ts | 4 +- .../service-form/service-form.component.ts | 3 +- src/pybind/mgr/orchestrator/_interface.py | 7 ++ src/pybind/mgr/orchestrator/module.py | 1 + src/python-common/ceph/cephadm/images.py | 4 +- .../ceph/deployment/service_spec.py | 5 +- 17 files changed, 194 insertions(+), 23 deletions(-) create mode 100644 src/pybind/mgr/cephadm/templates/services/alloy.j2 diff --git a/doc/cephadm/services/monitoring.rst b/doc/cephadm/services/monitoring.rst index 6f06799f79d..ef29860854f 100644 --- a/doc/cephadm/services/monitoring.rst +++ b/doc/cephadm/services/monitoring.rst @@ -119,7 +119,7 @@ retrieve the current credentials. Centralized Logging in Ceph ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Ceph now provides centralized logging with Loki & Promtail. Centralized Log Management (CLM) consolidates all log data and pushes it to a central repository, +Ceph now provides centralized logging with Loki and Alloy. Centralized Log Management (CLM) consolidates all log data and pushes it to a central repository, with an accessible and easy-to-use interface. Centralized logging is designed to make your life easier. Some of the advantages are: @@ -128,13 +128,12 @@ Some of the advantages are: #. **Flexible retention policies**: with per-daemon logs, log rotation is usually set to a short interval (1-2 weeks) to save disk usage. #. **Increased security & backup**: logs can contain sensitive information and expose usage patterns. Additionally, centralized logging allows for HA, etc. -Centralized Logging in Ceph is implemented using two new services - ``loki`` & ``promtail``. +Centralized Logging in Ceph is implemented using two services: ``loki`` and ``alloy``. -Loki: It is basically a log aggregation system and is used to query logs. It can be configured as a datasource in Grafana. +* Loki is a log aggregation system and is used to query logs. It can be configured as a ``datasource`` in Grafana. +* Alloy acts as an agent that gathers logs from each node and forwards them to Loki. -Promtail: It acts as an agent that gathers logs from the system and makes them available to Loki. - -These two services are not deployed by default in a Ceph cluster. To enable the centralized logging you can follow the steps mentioned here :ref:`centralized-logging`. +These two services are not deployed by default in a Ceph cluster. To enable centralized logging you can follow the steps mentioned here :ref:`centralized-logging`. .. _cephadm-monitoring-networks-ports: diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index f75aaa86dac..95cdcc28eae 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -655,6 +655,9 @@ def create_daemon_dirs( elif daemon_type == 'promtail': data_dir_root = ident.data_dir(ctx.data_dir) config_dir = 'etc/promtail' + elif daemon_type == 'alloy': + data_dir_root = ident.data_dir(ctx.data_dir) + config_dir = 'etc/alloy' makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) elif daemon_type == 'loki': @@ -2250,7 +2253,7 @@ def prepare_ssh( 'Perhaps the ceph version being bootstrapped does not support it') if ctx.with_centralized_logging: - for t in ['loki', 'promtail']: + for t in ['loki', 'alloy']: logger.info('Deploying %s service with default placement...' % t) try: cli(['orch', 'apply', t]) @@ -5009,7 +5012,7 @@ def _get_parser(): parser_bootstrap.add_argument( '--with-centralized-logging', action='store_true', - help='Automatically provision centralized logging (promtail, loki)') + help='Automatically provision centralized logging (alloy, loki)') parser_bootstrap.add_argument( '--apply-spec', help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)') diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index 4ba00daaefb..d93fc6c68ec 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -38,6 +38,7 @@ class Monitoring(ContainerDaemonForm): 'alertmanager': [9093, 9094], 'loki': [3100], 'promtail': [9080], + 'alloy': [9080], } components = { @@ -73,6 +74,17 @@ class Monitoring(ContainerDaemonForm): 'promtail.yml', ], }, + 'alloy': { + 'image': DefaultImages.ALLOY.image_ref, + 'cpus': '1', + 'memory': '1GB', + 'args': [ + 'run', + '/etc/alloy/config.alloy', + '--storage.path=/var/lib/alloy/data', + ], + 'config-json-files': ['config.alloy'], + }, 'node-exporter': { 'image': DefaultImages.NODE_EXPORTER.image_ref, 'cpus': '1', @@ -112,7 +124,7 @@ class Monitoring(ContainerDaemonForm): def get_version(ctx, container_id, daemon_type): # type: (CephadmContext, str, str) -> str """ - :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" + :param: daemon_type Either "prometheus", "alertmanager", "loki", "alloy" or "node-exporter" """ assert daemon_type in ( 'prometheus', @@ -120,6 +132,7 @@ class Monitoring(ContainerDaemonForm): 'node-exporter', 'loki', 'promtail', + 'alloy', ) cmd = daemon_type.replace('-', '_') code = -1 @@ -175,6 +188,8 @@ class Monitoring(ContainerDaemonForm): uid, gid = extract_uid_gid(ctx, file_path='/etc/loki') elif daemon_type == 'promtail': uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail') + elif daemon_type == 'alloy': + uid, gid = extract_uid_gid(ctx, file_path='/etc/alloy') elif daemon_type == 'alertmanager': uid, gid = extract_uid_gid( ctx, file_path=['/etc/alertmanager', '/etc/prometheus'] @@ -240,7 +255,7 @@ class Monitoring(ContainerDaemonForm): metadata = self.components[daemon_type] r = list(metadata.get('args', [])) # set ip and port to bind to for nodeexporter,alertmanager,prometheus - if daemon_type not in ['grafana', 'loki', 'promtail']: + if daemon_type not in ['grafana', 'loki', 'promtail', 'alloy']: ip = '' port = self.port_map[daemon_type][0] meta = fetch_meta(ctx) @@ -333,6 +348,10 @@ class Monitoring(ContainerDaemonForm): mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z' mounts[log_dir] = '/var/log/ceph:z' mounts[os.path.join(data_dir, 'data')] = '/promtail:Z' + elif daemon_type == 'alloy': + mounts[os.path.join(data_dir, 'etc/alloy')] = '/etc/alloy:Z' + mounts[log_dir] = '/var/log/ceph:z' + mounts[os.path.join(data_dir, 'data')] = '/var/lib/alloy/data:Z' elif daemon_type == 'node-exporter': mounts[ os.path.join(data_dir, 'etc/node-exporter') @@ -379,6 +398,8 @@ class Monitoring(ContainerDaemonForm): # by ubuntu 18.04 kernel!) ] args.extend(monitoring_args) + if self.identity.daemon_type == 'alloy': + args.extend(['--user=root']) if self.identity.daemon_type == 'node-exporter': # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys', # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation diff --git a/src/cephadm/cephadmlib/listing_updaters.py b/src/cephadm/cephadmlib/listing_updaters.py index 8344fb9b03d..910505891c7 100644 --- a/src/cephadm/cephadmlib/listing_updaters.py +++ b/src/cephadm/cephadmlib/listing_updaters.py @@ -240,6 +240,7 @@ class VersionStatusUpdater(DaemonStatusUpdater): 'node-exporter', 'loki', 'promtail', + 'alloy', ]: version = Monitoring.get_version( ctx, container_id, daemon_type diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index a475e2a7efe..f664897f35f 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -16,7 +16,7 @@ from orchestrator import OrchestratorError, DaemonDescription if TYPE_CHECKING: from .module import CephadmOrchestrator -LAST_MIGRATION = 8 +LAST_MIGRATION = 9 logger = logging.getLogger(__name__) @@ -86,7 +86,6 @@ class Migrations: "cephadm migration still ongoing. Please wait, until the migration is complete.") def migrate(self, startup: bool = False) -> None: - logger.info('running migrations') if self.mgr.migration_current == 0: @@ -121,6 +120,10 @@ class Migrations: if self.migrate_7_8(): self.set(8) + if self.mgr.migration_current == 8: + if self.migrate_8_9(): + self.set(9) + def migrate_0_1(self) -> bool: """ Migration 0 -> 1 @@ -498,6 +501,72 @@ class Migrations: self.rgw_ssl_migration_queue = [] return True + def migrate_8_9(self) -> bool: + """ + Replace Promtail with Alloy. + + - If mgr daemons are still being upgraded, return True WITHOUT bumping migration_current. + - Mark Promtail service unmanaged so cephadm won't redeploy it. + - Remove Promtail daemons to free ports. + - Deploy Alloy with Promtail's placement. + - Once Alloy is confirmed deployed, remove Promtail service spec. + """ + try: + target_digests = getattr(self.mgr.upgrade.upgrade_state, "target_digests", []) + active_mgr_digests = self.mgr.get_active_mgr_digests() + + if target_digests: + if not any(d in target_digests for d in active_mgr_digests): + logger.info( + "Promtail -> Alloy migration: mgr daemons still upgrading. " + "Marking as complete without bumping migration_current." + ) + return False + + promtail_spec = self.mgr.spec_store.active_specs.get("promtail") + if not promtail_spec: + logger.info("Promtail -> Alloy migration: no Promtail \ + service found, nothing to do.") + return True + + if not promtail_spec.unmanaged: + logger.info("Promtail -> Alloy migration: marking promtail unmanaged") + self.mgr.spec_store.set_unmanaged("promtail", True) + + daemons = self.mgr.cache.get_daemons() + promtail_daemons = [d for d in daemons if d.daemon_type == "promtail"] + if promtail_daemons: + promtail_names = [d.name() for d in promtail_daemons] + logger.info(f"Promtail -> Alloy migration: removing daemons {promtail_names}") + self.mgr.remove_daemons(promtail_names) + + daemons = self.mgr.cache.get_daemons() + if any(d.daemon_type == "promtail" for d in daemons): + logger.info( + "Promtail -> Alloy migration: promtail daemons still present, " + "skipping Alloy deployment until next run." + ) + return False + + alloy_spec = ServiceSpec( + service_type="alloy", + service_id="alloy", + placement=promtail_spec.placement + ) + + logger.info("Promtail -> Alloy migration: deploying Alloy service") + self.mgr.apply_alloy(alloy_spec) + + logger.info("Promtail -> Alloy migration: removing promtail service spec") + self.mgr.remove_service("promtail") + + logger.info("Promtail -> Alloy migration completed successfully.") + return True + + except Exception as e: + logger.error(f"Promtail -> Alloy migration failed: {e}") + return False + def queue_migrate_rgw_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None: """ diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index e1983e47db0..e8db61fd232 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -537,6 +537,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.container_image_node_exporter = '' self.container_image_loki = '' self.container_image_promtail = '' + self.container_image_alloy = '' self.container_image_haproxy = '' self.container_image_keepalived = '' self.container_image_snmp_gateway = '' @@ -931,7 +932,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, suffix = daemon_type not in [ 'mon', 'crash', 'ceph-exporter', 'node-proxy', 'prometheus', 'node-exporter', 'grafana', 'alertmanager', - 'container', 'agent', 'snmp-gateway', 'loki', 'promtail', + 'container', 'agent', 'snmp-gateway', 'loki', 'promtail', 'alloy', 'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query', 'mgmt-gateway', 'oauth2-proxy' ] if forcename: @@ -1747,6 +1748,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'nvmeof': self.container_image_nvmeof, 'prometheus': self.container_image_prometheus, 'promtail': self.container_image_promtail, + 'alloy': self.container_image_alloy, 'snmp-gateway': self.container_image_snmp_gateway, 'mgmt-gateway': self.container_image_nginx, 'oauth2-proxy': self.container_image_oauth2_proxy, @@ -3756,6 +3758,7 @@ Then run the following: 'ceph-exporter': PlacementSpec(host_pattern='*'), 'loki': PlacementSpec(count=1), 'promtail': PlacementSpec(host_pattern='*'), + 'alloy': PlacementSpec(host_pattern='*'), 'crash': PlacementSpec(host_pattern='*'), 'container': PlacementSpec(count=1), 'snmp-gateway': PlacementSpec(count=1), @@ -3901,6 +3904,10 @@ Then run the following: def apply_promtail(self, spec: ServiceSpec) -> str: return self._apply(spec) + @handle_orch_error + def apply_alloy(self, spec: ServiceSpec) -> str: + return self._apply(spec) + @handle_orch_error def apply_node_exporter(self, spec: ServiceSpec) -> str: return self._apply(spec) diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 667b5eb211a..5bfe52b7835 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -826,6 +826,43 @@ class LokiService(CephadmService): }, sorted(deps) +@register_cephadm_service +class AlloyService(CephadmService): + TYPE = 'alloy' + DEFAULT_SERVICE_PORT = 9080 + + @classmethod + def get_dependencies(cls, mgr: "CephadmOrchestrator", + spec: Optional[ServiceSpec] = None, + daemon_type: Optional[str] = None) -> List[str]: + return sorted(mgr.cache.get_daemons_by_types(['loki'])) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + daemons = self.mgr.cache.get_daemons_by_service('loki') + loki_host = '' + for i, dd in enumerate(daemons): + assert dd.hostname is not None + if i == 0: + loki_host = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) + + context = { + 'client_hostname': loki_host, + } + + alloy_config = self.mgr.template.render('services/alloy.j2', context) + return { + "files": { + "config.alloy": alloy_config + } + }, self.get_dependencies(self.mgr) + + @register_cephadm_service class PromtailService(CephadmService): TYPE = 'promtail' diff --git a/src/pybind/mgr/cephadm/templates/services/alloy.j2 b/src/pybind/mgr/cephadm/templates/services/alloy.j2 new file mode 100644 index 00000000000..240b81e6ae6 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/alloy.j2 @@ -0,0 +1,20 @@ +local.file_match "system" { + path_targets = [{ + __address__ = "localhost", + __path__ = "/var/log/ceph/**/*.log", + job = "Cluster Logs", + }] +} + +loki.source.file "system" { + targets = local.file_match.system.targets + forward_to = [loki.write.default.receiver] + legacy_positions_file = "/var/lib/alloy/data/positions.yaml" +} + +loki.write "default" { + endpoint { + url = "http://{{ client_hostname }}:3100/loki/api/v1/push" + } + external_labels = {} +} diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 3849bf4671e..0c44108056b 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -143,7 +143,7 @@ class TestCephadmService: # services based on CephadmService shouldn't have get_auth_entity with pytest.raises(AttributeError): - for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail']: + for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail', 'alloy']: service_registry.get_service(daemon_type).get_auth_entity("id1", "host") service_registry.get_service(daemon_type).get_auth_entity("id1", "") service_registry.get_service(daemon_type).get_auth_entity("id1") diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py index 5d09518da4c..bc509904c24 100644 --- a/src/pybind/mgr/cephadm/utils.py +++ b/src/pybind/mgr/cephadm/utils.py @@ -26,7 +26,7 @@ CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', 'rbd-mirror', 'cephfs-mirror', 'ceph-exporter'] GATEWAY_TYPES = ['iscsi', 'nfs', 'nvmeof', 'smb'] MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', - 'alertmanager', 'grafana', 'loki', 'promtail'] + 'alertmanager', 'grafana', 'loki', 'promtail', 'alloy'] RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['haproxy', 'nfs'] CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html index d869e15062c..f5cf5ddb0d0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html @@ -79,7 +79,7 @@ i18n>Daemon Logs -
+
- Please start the loki and promtail service to see these logs. + Please start the loki and alloy services to see these logs. diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts index 4c381eab037..a40aeccb676 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts @@ -39,7 +39,7 @@ export class LogsComponent implements OnInit, OnDestroy { clogText: string; auditLogText: string; lokiServiceStatus$: Observable; - promtailServiceStatus$: Observable; + alloyServiceStatus$: Observable; interval: number; priorities: Array<{ name: string; value: string }> = [ @@ -89,7 +89,7 @@ export class LogsComponent implements OnInit, OnDestroy { return data.length > 0 && data[0].status === 1; }) ); - this.promtailServiceStatus$ = this.cephService.getDaemons('promtail').pipe( + this.alloyServiceStatus$ = this.cephService.getDaemons('alloy').pipe( map((data: any) => { return data.length > 0 && data[0].status === 1; }) diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts index b010ba39905..e1228f5ef90 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts @@ -633,7 +633,8 @@ export class ServiceFormComponent extends CdForm implements OnInit { // Remove service types: // osd - This is deployed a different way. // container - This should only be used in the CLI. - this.hiddenServices.push('osd', 'container'); + // promtail - This is deprecated and replaced by alloy. + this.hiddenServices.push('osd', 'container', 'promtail'); this.serviceTypes = _.difference(resp, this.hiddenServices).sort(); }); diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 8fff2919d8f..8842b40f74b 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -695,6 +695,7 @@ class Orchestrator(object): 'prometheus': self.apply_prometheus, 'loki': self.apply_loki, 'promtail': self.apply_promtail, + 'alloy': self.apply_alloy, 'rbd-mirror': self.apply_rbd_mirror, 'rgw': self.apply_rgw, 'ingress': self.apply_ingress, @@ -946,6 +947,10 @@ class Orchestrator(object): """Update existing a Promtail daemon(s)""" raise NotImplementedError() + def apply_alloy(self, spec: ServiceSpec) -> OrchResult[str]: + """Update existing a alloy daemon(s)""" + raise NotImplementedError() + def apply_crash(self, spec: ServiceSpec) -> OrchResult[str]: """Update existing a crash daemon(s)""" raise NotImplementedError() @@ -1076,6 +1081,7 @@ def daemon_type_to_service(dtype: str) -> str: 'ceph-exporter': 'ceph-exporter', 'loki': 'loki', 'promtail': 'promtail', + 'alloy': 'alloy', 'crash': 'crash', 'crashcollector': 'crash', # Specific Rook Daemon 'container': 'container', @@ -1111,6 +1117,7 @@ def service_to_daemon_types(stype: str) -> List[str]: 'prometheus': ['prometheus'], 'loki': ['loki'], 'promtail': ['promtail'], + 'alloy': ['alloy'], 'node-exporter': ['node-exporter'], 'ceph-exporter': ['ceph-exporter'], 'crash': ['crash'], diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 14319a7d894..b570e8ced27 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -186,6 +186,7 @@ class ServiceType(enum.Enum): prometheus = 'prometheus' loki = 'loki' promtail = 'promtail' + alloy = 'alloy' mds = 'mds' rgw = 'rgw' nfs = 'nfs' diff --git a/src/python-common/ceph/cephadm/images.py b/src/python-common/ceph/cephadm/images.py index fbe1fe572eb..ce4fbc78f71 100644 --- a/src/python-common/ceph/cephadm/images.py +++ b/src/python-common/ceph/cephadm/images.py @@ -27,6 +27,7 @@ class DefaultImages(Enum): PROMETHEUS = _create_image('quay.io/prometheus/prometheus:v2.51.0', 'prometheus') LOKI = _create_image('docker.io/grafana/loki:3.0.0', 'loki') PROMTAIL = _create_image('docker.io/grafana/promtail:3.0.0', 'promtail') + ALLOY = _create_image('docker.io/grafana/alloy:latest', 'alloy') NODE_EXPORTER = _create_image('quay.io/prometheus/node-exporter:v1.7.0', 'node_exporter') ALERTMANAGER = _create_image('quay.io/prometheus/alertmanager:v0.27.0', 'alertmanager') GRAFANA = _create_image('quay.io/ceph/grafana:11.6.0', 'grafana') @@ -61,10 +62,11 @@ class DefaultImages(Enum): class NonCephImageServiceTypes(Enum): prometheus = 'prometheus' loki = 'loki' - promtail = 'promtail' + alloy = 'alloy' node_exporter = 'node-exporter' alertmanager = 'alertmanager' grafana = 'grafana' + promtail = 'promtail' nvmeof = 'nvmeof' snmp_gateway = 'snmp-gateway' elasticsearch = 'elasticsearch' diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index cd1df722fcc..10b1d391f8f 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -822,6 +822,7 @@ class ServiceSpec(object): 'osd', 'prometheus', 'promtail', + 'alloy', 'rbd-mirror', 'rgw', 'smb', @@ -892,6 +893,7 @@ class ServiceSpec(object): 'prometheus': PrometheusSpec, 'loki': MonitoringSpec, 'promtail': MonitoringSpec, + 'alloy': MonitoringSpec, 'snmp-gateway': SNMPGatewaySpec, 'elasticsearch': TracingSpec, 'jaeger-agent': TracingSpec, @@ -2717,7 +2719,7 @@ class MonitoringSpec(ServiceSpec): custom_configs: Optional[List[CustomConfig]] = None, ): assert service_type in ['grafana', 'node-exporter', 'prometheus', 'alertmanager', - 'loki', 'promtail'] + 'loki', 'alloy', 'promtail'] super(MonitoringSpec, self).__init__( service_type, service_id, @@ -2743,6 +2745,7 @@ class MonitoringSpec(ServiceSpec): 'alertmanager': 9093, 'grafana': 3000, 'loki': 3100, + 'alloy': 9080, 'promtail': 9080}[self.service_type] -- 2.39.5