mgr/dashboard: Migrate from promtail to grafana alloy

author Aashish Sharma <Aashish.Sharma1@ibm.com>

Fri, 25 Apr 2025 06:28:37 +0000 (11:58 +0530)

committer Aashish Sharma <Aashish.Sharma1@ibm.com>

Fri, 12 Sep 2025 04:32:51 +0000 (10:02 +0530)
author Aashish Sharma <Aashish.Sharma1@ibm.com>
Fri, 25 Apr 2025 06:28:37 +0000 (11:58 +0530)
committer Aashish Sharma <Aashish.Sharma1@ibm.com>
Fri, 12 Sep 2025 04:32:51 +0000 (10:02 +0530)
diff --git a/doc/cephadm/services/monitoring.rst b/doc/cephadm/services/monitoring.rst

index 6f06799f79d15e465f0c25bc245c1ac2da2054c3..ef29860854fb4e94f95c5a8e2d17544c16e49c05 100644 (file)
--- a/doc/cephadm/services/monitoring.rst
+++ b/doc/cephadm/services/monitoring.rst
@@ -119,7 +119,7 @@ retrieve the current credentials.
  Centralized Logging in Ceph
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
-Ceph now provides centralized logging with Loki & Promtail. Centralized Log Management (CLM) consolidates all log data and pushes it to a central repository, 
+Ceph now provides centralized logging with Loki and Alloy. Centralized Log Management (CLM) consolidates all log data and pushes it to a central repository, 
  with an accessible and easy-to-use interface. Centralized logging is designed to make your life easier. 
  Some of the advantages are:
  
@@ -128,13 +128,12 @@ Some of the advantages are:
  #. **Flexible retention policies**: with per-daemon logs, log rotation is usually set to a short interval (1-2 weeks) to save disk usage.
  #. **Increased security & backup**: logs can contain sensitive information and expose usage patterns. Additionally, centralized logging allows for HA, etc.
  
-Centralized Logging in Ceph is implemented using two new services - ``loki`` & ``promtail``.
+Centralized Logging in Ceph is implemented using two services: ``loki`` and ``alloy``.
  
-Loki: It is basically a log aggregation system and is used to query logs. It can be configured as a datasource in Grafana. 
+* Loki is a log aggregation system and is used to query logs. It can be configured as a ``datasource`` in Grafana.
+* Alloy acts as an agent that gathers logs from each node and forwards them to Loki.
  
-Promtail: It acts as an agent that gathers logs from the system and makes them available to Loki.
-
-These two services are not deployed by default in a Ceph cluster. To enable the centralized logging you can follow the steps mentioned here :ref:`centralized-logging`.
+These two services are not deployed by default in a Ceph cluster. To enable centralized logging you can follow the steps mentioned here :ref:`centralized-logging`.
  
  .. _cephadm-monitoring-networks-ports:
  
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py

index f75aaa86dac6835a64ff70fa863089c2f2a68d22..95cdcc28eae6d2aa674311740b52cbfa22eb7258 100755 (executable)
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -655,6 +655,9 @@ def create_daemon_dirs(
          elif daemon_type == 'promtail':
              data_dir_root = ident.data_dir(ctx.data_dir)
              config_dir = 'etc/promtail'
+        elif daemon_type == 'alloy':
+            data_dir_root = ident.data_dir(ctx.data_dir)
+            config_dir = 'etc/alloy'
              makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
              makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
          elif daemon_type == 'loki':
@@ -2250,7 +2253,7 @@ def prepare_ssh(
                               'Perhaps the ceph version being bootstrapped does not support it')
  
      if ctx.with_centralized_logging:
-        for t in ['loki', 'promtail']:
+        for t in ['loki', 'alloy']:
              logger.info('Deploying %s service with default placement...' % t)
              try:
                  cli(['orch', 'apply', t])
@@ -5009,7 +5012,7 @@ def _get_parser():
      parser_bootstrap.add_argument(
          '--with-centralized-logging',
          action='store_true',
-        help='Automatically provision centralized logging (promtail, loki)')
+        help='Automatically provision centralized logging (alloy, loki)')
      parser_bootstrap.add_argument(
          '--apply-spec',
          help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py

index 4ba00daaefb8a1efefffb299edb72a7f22b7a35d..d93fc6c68ecc4cf2bbbb6551b26f1cb26939f1b1 100644 (file)
--- a/src/cephadm/cephadmlib/daemons/monitoring.py
+++ b/src/cephadm/cephadmlib/daemons/monitoring.py
@@ -38,6 +38,7 @@ class Monitoring(ContainerDaemonForm):
          'alertmanager': [9093, 9094],
          'loki': [3100],
          'promtail': [9080],
+        'alloy': [9080],
      }
  
      components = {
@@ -73,6 +74,17 @@ class Monitoring(ContainerDaemonForm):
                  'promtail.yml',
              ],
          },
+        'alloy': {
+            'image': DefaultImages.ALLOY.image_ref,
+            'cpus': '1',
+            'memory': '1GB',
+            'args': [
+                'run',
+                '/etc/alloy/config.alloy',
+                '--storage.path=/var/lib/alloy/data',
+            ],
+            'config-json-files': ['config.alloy'],
+        },
          'node-exporter': {
              'image': DefaultImages.NODE_EXPORTER.image_ref,
              'cpus': '1',
@@ -112,7 +124,7 @@ class Monitoring(ContainerDaemonForm):
      def get_version(ctx, container_id, daemon_type):
          # type: (CephadmContext, str, str) -> str
          """
-        :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
+        :param: daemon_type Either "prometheus", "alertmanager", "loki", "alloy" or "node-exporter"
          """
          assert daemon_type in (
              'prometheus',
@@ -120,6 +132,7 @@ class Monitoring(ContainerDaemonForm):
              'node-exporter',
              'loki',
              'promtail',
+            'alloy',
          )
          cmd = daemon_type.replace('-', '_')
          code = -1
@@ -175,6 +188,8 @@ class Monitoring(ContainerDaemonForm):
              uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
          elif daemon_type == 'promtail':
              uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
+        elif daemon_type == 'alloy':
+            uid, gid = extract_uid_gid(ctx, file_path='/etc/alloy')
          elif daemon_type == 'alertmanager':
              uid, gid = extract_uid_gid(
                  ctx, file_path=['/etc/alertmanager', '/etc/prometheus']
@@ -240,7 +255,7 @@ class Monitoring(ContainerDaemonForm):
          metadata = self.components[daemon_type]
          r = list(metadata.get('args', []))
          # set ip and port to bind to for nodeexporter,alertmanager,prometheus
-        if daemon_type not in ['grafana', 'loki', 'promtail']:
+        if daemon_type not in ['grafana', 'loki', 'promtail', 'alloy']:
              ip = ''
              port = self.port_map[daemon_type][0]
              meta = fetch_meta(ctx)
@@ -333,6 +348,10 @@ class Monitoring(ContainerDaemonForm):
              mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
              mounts[log_dir] = '/var/log/ceph:z'
              mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
+        elif daemon_type == 'alloy':
+            mounts[os.path.join(data_dir, 'etc/alloy')] = '/etc/alloy:Z'
+            mounts[log_dir] = '/var/log/ceph:z'
+            mounts[os.path.join(data_dir, 'data')] = '/var/lib/alloy/data:Z'
          elif daemon_type == 'node-exporter':
              mounts[
                  os.path.join(data_dir, 'etc/node-exporter')
@@ -379,6 +398,8 @@ class Monitoring(ContainerDaemonForm):
              # by ubuntu 18.04 kernel!)
          ]
          args.extend(monitoring_args)
+        if self.identity.daemon_type == 'alloy':
+            args.extend(['--user=root'])
          if self.identity.daemon_type == 'node-exporter':
              # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
              # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
diff --git a/src/cephadm/cephadmlib/listing_updaters.py b/src/cephadm/cephadmlib/listing_updaters.py

index 8344fb9b03d308a4e4900e6762a6c7069161b6d3..910505891c7128dc42426432c7e30002ca7cbadd 100644 (file)
--- a/src/cephadm/cephadmlib/listing_updaters.py
+++ b/src/cephadm/cephadmlib/listing_updaters.py
@@ -240,6 +240,7 @@ class VersionStatusUpdater(DaemonStatusUpdater):
                  'node-exporter',
                  'loki',
                  'promtail',
+                'alloy',
              ]:
                  version = Monitoring.get_version(
                      ctx, container_id, daemon_type
diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py

index a475e2a7efe7d620bc7a37cfb15feb235fd1ac96..f664897f35f3b68eda9ad025462a91314d00212c 100644 (file)
--- a/src/pybind/mgr/cephadm/migrations.py
+++ b/src/pybind/mgr/cephadm/migrations.py
@@ -16,7 +16,7 @@ from orchestrator import OrchestratorError, DaemonDescription
  if TYPE_CHECKING:
      from .module import CephadmOrchestrator
  
-LAST_MIGRATION = 8
+LAST_MIGRATION = 9
  
  logger = logging.getLogger(__name__)
  
@@ -86,7 +86,6 @@ class Migrations:
                  "cephadm migration still ongoing. Please wait, until the migration is complete.")
  
      def migrate(self, startup: bool = False) -> None:
-
          logger.info('running migrations')
  
          if self.mgr.migration_current == 0:
@@ -121,6 +120,10 @@ class Migrations:
              if self.migrate_7_8():
                  self.set(8)
  
+        if self.mgr.migration_current == 8:
+            if self.migrate_8_9():
+                self.set(9)
+
      def migrate_0_1(self) -> bool:
          """
          Migration 0 -> 1
@@ -498,6 +501,72 @@ class Migrations:
          self.rgw_ssl_migration_queue = []
          return True
  
+    def migrate_8_9(self) -> bool:
+        """
+        Replace Promtail with Alloy.
+
+        - If mgr daemons are still being upgraded, return True WITHOUT bumping migration_current.
+        - Mark Promtail service unmanaged so cephadm won't redeploy it.
+        - Remove Promtail daemons to free ports.
+        - Deploy Alloy with Promtail's placement.
+        - Once Alloy is confirmed deployed, remove Promtail service spec.
+        """
+        try:
+            target_digests = getattr(self.mgr.upgrade.upgrade_state, "target_digests", [])
+            active_mgr_digests = self.mgr.get_active_mgr_digests()
+
+            if target_digests:
+                if not any(d in target_digests for d in active_mgr_digests):
+                    logger.info(
+                        "Promtail -> Alloy migration: mgr daemons still upgrading. "
+                        "Marking as complete without bumping migration_current."
+                    )
+                    return False
+
+            promtail_spec = self.mgr.spec_store.active_specs.get("promtail")
+            if not promtail_spec:
+                logger.info("Promtail -> Alloy migration: no Promtail \
+                    service found, nothing to do.")
+                return True
+
+            if not promtail_spec.unmanaged:
+                logger.info("Promtail -> Alloy migration: marking promtail unmanaged")
+                self.mgr.spec_store.set_unmanaged("promtail", True)
+
+            daemons = self.mgr.cache.get_daemons()
+            promtail_daemons = [d for d in daemons if d.daemon_type == "promtail"]
+            if promtail_daemons:
+                promtail_names = [d.name() for d in promtail_daemons]
+                logger.info(f"Promtail -> Alloy migration: removing daemons {promtail_names}")
+                self.mgr.remove_daemons(promtail_names)
+
+            daemons = self.mgr.cache.get_daemons()
+            if any(d.daemon_type == "promtail" for d in daemons):
+                logger.info(
+                    "Promtail -> Alloy migration: promtail daemons still present, "
+                    "skipping Alloy deployment until next run."
+                )
+                return False
+
+            alloy_spec = ServiceSpec(
+                service_type="alloy",
+                service_id="alloy",
+                placement=promtail_spec.placement
+            )
+
+            logger.info("Promtail -> Alloy migration: deploying Alloy service")
+            self.mgr.apply_alloy(alloy_spec)
+
+            logger.info("Promtail -> Alloy migration: removing promtail service spec")
+            self.mgr.remove_service("promtail")
+
+            logger.info("Promtail -> Alloy migration completed successfully.")
+            return True
+
+        except Exception as e:
+            logger.error(f"Promtail -> Alloy migration failed: {e}")
+            return False
+
  
  def queue_migrate_rgw_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None:
      """
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index e1983e47db0c6065afb15c7b1c81b469e8fb5c50..e8db61fd232c7a02aa443697decce99b64ee40a0 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -537,6 +537,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
              self.container_image_node_exporter = ''
              self.container_image_loki = ''
              self.container_image_promtail = ''
+            self.container_image_alloy = ''
              self.container_image_haproxy = ''
              self.container_image_keepalived = ''
              self.container_image_snmp_gateway = ''
@@ -931,7 +932,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
          suffix = daemon_type not in [
              'mon', 'crash', 'ceph-exporter', 'node-proxy',
              'prometheus', 'node-exporter', 'grafana', 'alertmanager',
-            'container', 'agent', 'snmp-gateway', 'loki', 'promtail',
+            'container', 'agent', 'snmp-gateway', 'loki', 'promtail', 'alloy',
              'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query', 'mgmt-gateway', 'oauth2-proxy'
          ]
          if forcename:
@@ -1747,6 +1748,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
                  'nvmeof': self.container_image_nvmeof,
                  'prometheus': self.container_image_prometheus,
                  'promtail': self.container_image_promtail,
+                'alloy': self.container_image_alloy,
                  'snmp-gateway': self.container_image_snmp_gateway,
                  'mgmt-gateway': self.container_image_nginx,
                  'oauth2-proxy': self.container_image_oauth2_proxy,
@@ -3756,6 +3758,7 @@ Then run the following:
                  'ceph-exporter': PlacementSpec(host_pattern='*'),
                  'loki': PlacementSpec(count=1),
                  'promtail': PlacementSpec(host_pattern='*'),
+                'alloy': PlacementSpec(host_pattern='*'),
                  'crash': PlacementSpec(host_pattern='*'),
                  'container': PlacementSpec(count=1),
                  'snmp-gateway': PlacementSpec(count=1),
@@ -3901,6 +3904,10 @@ Then run the following:
      def apply_promtail(self, spec: ServiceSpec) -> str:
          return self._apply(spec)
  
+    @handle_orch_error
+    def apply_alloy(self, spec: ServiceSpec) -> str:
+        return self._apply(spec)
+
      @handle_orch_error
      def apply_node_exporter(self, spec: ServiceSpec) -> str:
          return self._apply(spec)
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py

index 667b5eb211a31974be3a28c1c0a33233ef14fddc..5bfe52b78353eea4eeabc06dd58769a4d802dbf4 100644 (file)
--- a/src/pybind/mgr/cephadm/services/monitoring.py
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -826,6 +826,43 @@ class LokiService(CephadmService):
          }, sorted(deps)
  
  
+@register_cephadm_service
+class AlloyService(CephadmService):
+    TYPE = 'alloy'
+    DEFAULT_SERVICE_PORT = 9080
+
+    @classmethod
+    def get_dependencies(cls, mgr: "CephadmOrchestrator",
+                         spec: Optional[ServiceSpec] = None,
+                         daemon_type: Optional[str] = None) -> List[str]:
+        return sorted(mgr.cache.get_daemons_by_types(['loki']))
+
+    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
+    def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemons = self.mgr.cache.get_daemons_by_service('loki')
+        loki_host = ''
+        for i, dd in enumerate(daemons):
+            assert dd.hostname is not None
+            if i == 0:
+                loki_host = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
+
+        context = {
+            'client_hostname': loki_host,
+        }
+
+        alloy_config = self.mgr.template.render('services/alloy.j2', context)
+        return {
+            "files": {
+                "config.alloy": alloy_config
+            }
+        }, self.get_dependencies(self.mgr)
+
+
  @register_cephadm_service
  class PromtailService(CephadmService):
      TYPE = 'promtail'
diff --git a/src/pybind/mgr/cephadm/templates/services/alloy.j2 b/src/pybind/mgr/cephadm/templates/services/alloy.j2

new file mode 100644 (file)

index 0000000..240b81e
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/alloy.j2
@@ -0,0 +1,20 @@
+local.file_match "system" {
+       path_targets = [{
+               __address__ = "localhost",
+               __path__    = "/var/log/ceph/**/*.log",
+               job         = "Cluster Logs",
+       }]
+}
+
+loki.source.file "system" {
+       targets               = local.file_match.system.targets
+       forward_to            = [loki.write.default.receiver]
+       legacy_positions_file = "/var/lib/alloy/data/positions.yaml"
+}
+
+loki.write "default" {
+       endpoint {
+               url = "http://{{ client_hostname }}:3100/loki/api/v1/push"
+       }
+       external_labels = {}
+}
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py

index 3849bf4671e3f4f1f63d688dcf82ecc4ff186d67..0c44108056b81cc4496d71cc6312ee552682e2e6 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -143,7 +143,7 @@ class TestCephadmService:
  
          # services based on CephadmService shouldn't have get_auth_entity
          with pytest.raises(AttributeError):
-            for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail']:
+            for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'loki', 'promtail', 'alloy']:
                  service_registry.get_service(daemon_type).get_auth_entity("id1", "host")
                  service_registry.get_service(daemon_type).get_auth_entity("id1", "")
                  service_registry.get_service(daemon_type).get_auth_entity("id1")
diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py

index 5d09518da4c4d9812968590ecd0ee76584d4f145..bc509904c24b27a7f530082983f509e771e09ce7 100644 (file)
--- a/src/pybind/mgr/cephadm/utils.py
+++ b/src/pybind/mgr/cephadm/utils.py
@@ -26,7 +26,7 @@ CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw',
                'rbd-mirror', 'cephfs-mirror', 'ceph-exporter']
  GATEWAY_TYPES = ['iscsi', 'nfs', 'nvmeof', 'smb']
  MONITORING_STACK_TYPES = ['node-exporter', 'prometheus',
-                          'alertmanager', 'grafana', 'loki', 'promtail']
+                          'alertmanager', 'grafana', 'loki', 'promtail', 'alloy']
  RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['haproxy', 'nfs']
  
  CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html

index d869e15062cba40c050fa85ca41fb395622596f2..f5cf5ddb0d0154153bb8401d58eede4fb4ec8573 100644 (file)
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html
@@ -79,7 +79,7 @@
           i18n>Daemon Logs</a>
        <ng-template ngbNavContent>
          <ng-container *ngIf="showDaemonLogs && lokiServiceStatus$ | async as lokiServiceStatus ; else daemonLogsTpl ">
-          <div *ngIf="promtailServiceStatus$ | async as promtailServiceStatus; else daemonLogsTpl">
+          <div *ngIf="alloyServiceStatus$ | async as alloyServiceStatus; else daemonLogsTpl">
              <cd-grafana i18n-title
                          title="Daemon logs"
                          [grafanaPath]="'explore?'"
@@ -193,8 +193,8 @@
  
  <ng-template #daemonLogsTpl>
    <cd-alert-panel type="info"
-                  title="Loki/Promtail service not running"
+                  title="Loki/Alloy service not running"
                    i18n-title>
-  <ng-container i18n>Please start the loki and promtail service to see these logs.</ng-container>
+  <ng-container i18n>Please start the loki and alloy services to see these logs.</ng-container>
    </cd-alert-panel>
  </ng-template>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts

index 4c381eab037e7fd725c0f82c9b7b30925c085e24..a40aeccb676e7c73e78d5dad4027b63b28331a5a 100644 (file)
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts
@@ -39,7 +39,7 @@ export class LogsComponent implements OnInit, OnDestroy {
    clogText: string;
    auditLogText: string;
    lokiServiceStatus$: Observable<boolean>;
-  promtailServiceStatus$: Observable<boolean>;
+  alloyServiceStatus$: Observable<boolean>;
  
    interval: number;
    priorities: Array<{ name: string; value: string }> = [
@@ -89,7 +89,7 @@ export class LogsComponent implements OnInit, OnDestroy {
          return data.length > 0 && data[0].status === 1;
        })
      );
-    this.promtailServiceStatus$ = this.cephService.getDaemons('promtail').pipe(
+    this.alloyServiceStatus$ = this.cephService.getDaemons('alloy').pipe(
        map((data: any) => {
          return data.length > 0 && data[0].status === 1;
        })
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts

index b010ba39905da925fde78bcca42099cf84b385e8..e1228f5ef90ac98921620c62b892577e0a57621b 100644 (file)
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts
@@ -633,7 +633,8 @@ export class ServiceFormComponent extends CdForm implements OnInit {
        // Remove service types:
        // osd       - This is deployed a different way.
        // container - This should only be used in the CLI.
-      this.hiddenServices.push('osd', 'container');
+      // promtail  - This is deprecated and replaced by alloy.
+      this.hiddenServices.push('osd', 'container', 'promtail');
  
        this.serviceTypes = _.difference(resp, this.hiddenServices).sort();
      });
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py

index 8fff2919d8f723b35451708a27da44eb06c53e3d..8842b40f74bb0433793fcf6ec76ade812305d41b 100644 (file)
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -695,6 +695,7 @@ class Orchestrator(object):
              'prometheus': self.apply_prometheus,
              'loki': self.apply_loki,
              'promtail': self.apply_promtail,
+            'alloy': self.apply_alloy,
              'rbd-mirror': self.apply_rbd_mirror,
              'rgw': self.apply_rgw,
              'ingress': self.apply_ingress,
@@ -946,6 +947,10 @@ class Orchestrator(object):
          """Update existing a Promtail daemon(s)"""
          raise NotImplementedError()
  
+    def apply_alloy(self, spec: ServiceSpec) -> OrchResult[str]:
+        """Update existing a alloy daemon(s)"""
+        raise NotImplementedError()
+
      def apply_crash(self, spec: ServiceSpec) -> OrchResult[str]:
          """Update existing a crash daemon(s)"""
          raise NotImplementedError()
@@ -1076,6 +1081,7 @@ def daemon_type_to_service(dtype: str) -> str:
          'ceph-exporter': 'ceph-exporter',
          'loki': 'loki',
          'promtail': 'promtail',
+        'alloy': 'alloy',
          'crash': 'crash',
          'crashcollector': 'crash',  # Specific Rook Daemon
          'container': 'container',
@@ -1111,6 +1117,7 @@ def service_to_daemon_types(stype: str) -> List[str]:
          'prometheus': ['prometheus'],
          'loki': ['loki'],
          'promtail': ['promtail'],
+        'alloy': ['alloy'],
          'node-exporter': ['node-exporter'],
          'ceph-exporter': ['ceph-exporter'],
          'crash': ['crash'],
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py

index 14319a7d894201defef2af65729afe3c4a5de712..b570e8ced27f412c6053fa75e24e71a4927ded11 100644 (file)
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -186,6 +186,7 @@ class ServiceType(enum.Enum):
      prometheus = 'prometheus'
      loki = 'loki'
      promtail = 'promtail'
+    alloy = 'alloy'
      mds = 'mds'
      rgw = 'rgw'
      nfs = 'nfs'
diff --git a/src/python-common/ceph/cephadm/images.py b/src/python-common/ceph/cephadm/images.py

index fbe1fe572eb56c8f3f925b1966ff799e68d1e20a..ce4fbc78f71ffa6d2274b6ff8cf0d69026c9e7c8 100644 (file)
--- a/src/python-common/ceph/cephadm/images.py
+++ b/src/python-common/ceph/cephadm/images.py
@@ -27,6 +27,7 @@ class DefaultImages(Enum):
      PROMETHEUS = _create_image('quay.io/prometheus/prometheus:v2.51.0', 'prometheus')
      LOKI = _create_image('docker.io/grafana/loki:3.0.0', 'loki')
      PROMTAIL = _create_image('docker.io/grafana/promtail:3.0.0', 'promtail')
+    ALLOY = _create_image('docker.io/grafana/alloy:latest', 'alloy')
      NODE_EXPORTER = _create_image('quay.io/prometheus/node-exporter:v1.7.0', 'node_exporter')
      ALERTMANAGER = _create_image('quay.io/prometheus/alertmanager:v0.27.0', 'alertmanager')
      GRAFANA = _create_image('quay.io/ceph/grafana:11.6.0', 'grafana')
@@ -61,10 +62,11 @@ class DefaultImages(Enum):
  class NonCephImageServiceTypes(Enum):
      prometheus = 'prometheus'
      loki = 'loki'
-    promtail = 'promtail'
+    alloy = 'alloy'
      node_exporter = 'node-exporter'
      alertmanager = 'alertmanager'
      grafana = 'grafana'
+    promtail = 'promtail'
      nvmeof = 'nvmeof'
      snmp_gateway = 'snmp-gateway'
      elasticsearch = 'elasticsearch'
diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py

index cd1df722fcc2fc8ee34879907f108d316f8dd0d5..10b1d391f8f115a14a23b4a8c2ba7fa6fde4c134 100644 (file)
--- a/src/python-common/ceph/deployment/service_spec.py
+++ b/src/python-common/ceph/deployment/service_spec.py
@@ -822,6 +822,7 @@ class ServiceSpec(object):
          'osd',
          'prometheus',
          'promtail',
+        'alloy',
          'rbd-mirror',
          'rgw',
          'smb',
@@ -892,6 +893,7 @@ class ServiceSpec(object):
              'prometheus': PrometheusSpec,
              'loki': MonitoringSpec,
              'promtail': MonitoringSpec,
+            'alloy': MonitoringSpec,
              'snmp-gateway': SNMPGatewaySpec,
              'elasticsearch': TracingSpec,
              'jaeger-agent': TracingSpec,
@@ -2717,7 +2719,7 @@ class MonitoringSpec(ServiceSpec):
                   custom_configs: Optional[List[CustomConfig]] = None,
                   ):
          assert service_type in ['grafana', 'node-exporter', 'prometheus', 'alertmanager',
-                                'loki', 'promtail']
+                                'loki', 'alloy', 'promtail']
  
          super(MonitoringSpec, self).__init__(
              service_type, service_id,
@@ -2743,6 +2745,7 @@ class MonitoringSpec(ServiceSpec):
                      'alertmanager': 9093,
                      'grafana': 3000,
                      'loki': 3100,
+                    'alloy': 9080,
                      'promtail': 9080}[self.service_type]
author	Aashish Sharma <Aashish.Sharma1@ibm.com>
	Fri, 25 Apr 2025 06:28:37 +0000 (11:58 +0530)
committer	Aashish Sharma <Aashish.Sharma1@ibm.com>
	Fri, 12 Sep 2025 04:32:51 +0000 (10:02 +0530)
doc/cephadm/services/monitoring.rst		patch \| blob \| history
src/cephadm/cephadm.py		patch \| blob \| history
src/cephadm/cephadmlib/daemons/monitoring.py		patch \| blob \| history
src/cephadm/cephadmlib/listing_updaters.py		patch \| blob \| history
src/pybind/mgr/cephadm/migrations.py		patch \| blob \| history
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/monitoring.py		patch \| blob \| history
src/pybind/mgr/cephadm/templates/services/alloy.j2	[new file with mode: 0644]	patch \| blob
src/pybind/mgr/cephadm/tests/test_services.py		patch \| blob \| history
src/pybind/mgr/cephadm/utils.py		patch \| blob \| history
src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.html		patch \| blob \| history
src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/logs/logs.component.ts		patch \| blob \| history
src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts		patch \| blob \| history
src/pybind/mgr/orchestrator/_interface.py		patch \| blob \| history
src/pybind/mgr/orchestrator/module.py		patch \| blob \| history
src/python-common/ceph/cephadm/images.py		patch \| blob \| history
src/python-common/ceph/deployment/service_spec.py		patch \| blob \| history