]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: adding ceph exporter container
authorAvan Thakkar <athakkar@redhat.com>
Mon, 4 Apr 2022 19:28:43 +0000 (00:58 +0530)
committerAvan Thakkar <athakkar@redhat.com>
Mon, 20 Jun 2022 18:20:46 +0000 (23:50 +0530)
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
src/ceph_exporter.cc
src/cephadm/cephadm
src/exporter/DaemonMetricCollector.cc
src/exporter/http_server.cc
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/python-common/ceph/deployment/service_spec.py

index 4406828b3bb056540d6f36ced260e987864a1bc6..67af60f5b508a33bb6e6a41a1cbf1a28803e79ca 100644 (file)
@@ -13,12 +13,7 @@ DaemonMetricCollector collector;
 
 
 int main(int argc, char** argv) {
-  // TODO: daemonize
-  std::cout << "inside exporter" << std::endl;
-
-  std::cout << "Starting http server thread..." << std::endl;
   boost::thread server_thread(http_server_thread_entrypoint);
-  std::cout << "Starting collector..." << std::endl;
   DaemonMetricCollector &collector = collector_instance();
   collector.main();
   server_thread.join();
index 069dd23d16f0ae2be6630bd8c13426959d646cd9..1ae09f0561b20f59a8eddbb88498b985532c694e 100755 (executable)
@@ -44,7 +44,7 @@ from pathlib import Path
 FuncT = TypeVar('FuncT', bound=Callable)
 
 # Default container images -----------------------------------------------------
-DEFAULT_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main'
+DEFAULT_IMAGE = 'docker.io/rhcsdashboard/ceph-exporter'
 DEFAULT_IMAGE_IS_MAIN = True
 DEFAULT_IMAGE_RELEASE = 'quincy'
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
@@ -914,6 +914,53 @@ class CephIscsi(object):
 ##################################
 
 
+class CephExporter(object):
+    """Defines a Ceph-exporter container"""
+
+    daemon_type = 'exporter'
+    entrypoint = '/usr/bin/ceph-exporter'
+
+    port_map = {
+        'exporter': 9085,
+    }
+
+    def __init__(self,
+                 ctx,
+                 fsid,
+                 daemon_id,
+                 image=DEFAULT_IMAGE):
+        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+        self.ctx = ctx
+        self.fsid = fsid
+        self.daemon_id = daemon_id
+        self.image = image
+
+    @classmethod
+    def init(cls, ctx, fsid, daemon_id):
+        # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
+        return cls(ctx, fsid, daemon_id,
+                   get_parm(ctx.config_json), ctx.image)
+
+    @staticmethod
+    def get_container_mounts():
+        mounts = dict()
+        mounts['/var/run/ceph'] = '/var/run/ceph:z'
+        return mounts
+
+    def get_daemon_name(self):
+        # type: () -> str
+        return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+    def get_container_name(self, desc=None):
+        # type: (Optional[str]) -> str
+        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+        if desc:
+            cname = '%s-%s' % (cname, desc)
+        return cname
+
+##################################
+
+
 class HAproxy(object):
     """Defines an HAproxy container"""
     daemon_type = 'haproxy'
@@ -1267,6 +1314,7 @@ def get_supported_daemons():
     supported_daemons.extend(Monitoring.components)
     supported_daemons.append(NFSGanesha.daemon_type)
     supported_daemons.append(CephIscsi.daemon_type)
+    supported_daemons.append(CephExporter.daemon_type)
     supported_daemons.append(CustomContainer.daemon_type)
     supported_daemons.append(HAproxy.daemon_type)
     supported_daemons.append(Keepalived.daemon_type)
@@ -2874,6 +2922,10 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
         log_dir = get_log_dir(fsid, ctx.log_dir)
         mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
 
+    if daemon_type == CephExporter.daemon_type:
+        assert daemon_id
+        mounts.update(CephExporter.get_container_mounts())
+
     if daemon_type == Keepalived.daemon_type:
         assert daemon_id
         data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
@@ -2966,6 +3018,9 @@ def get_container(ctx: CephadmContext,
         entrypoint = NFSGanesha.entrypoint
         name = '%s.%s' % (daemon_type, daemon_id)
         envs.extend(NFSGanesha.get_container_envs())
+    elif daemon_type == CephExporter.daemon_type:
+        entrypoint = CephExporter.entrypoint
+        name = '%s.%s' % (daemon_type, daemon_id)
     elif daemon_type == HAproxy.daemon_type:
         name = '%s.%s' % (daemon_type, daemon_id)
         container_args.extend(['--user=root'])  # haproxy 2.4 defaults to a different user
@@ -5794,6 +5849,17 @@ def command_deploy(ctx):
                       config=config, keyring=keyring,
                       reconfig=ctx.reconfig,
                       ports=daemon_ports)
+    
+    elif daemon_type == CephExporter.daemon_type:
+        if not ctx.reconfig and not redeploy and not daemon_ports:
+            daemon_ports = list(CephExporter.port_map.values())
+
+        uid = 0
+        gid = 0
+        c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
+        deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
+                      reconfig=ctx.reconfig,
+                      ports=daemon_ports)
 
     elif daemon_type == CephIscsi.daemon_type:
         config, keyring = get_config_and_keyring(ctx)
index fa88834198b932ad5f7ecc249fb5323d54b71945..754b7d27f6ef46d4e319cfa434b17b3db6436b3d 100644 (file)
@@ -17,7 +17,6 @@ void DaemonMetricCollector::request_loop(boost::asio::deadline_timer &timer) {
   timer.async_wait([&](const boost::system::error_code& e) {
     std::cerr << e << std::endl;
     update_sockets();
-    std::cout << "updating metrics" << std::endl;
     send_requests();
     timer.expires_from_now(boost::posix_time::seconds(stats_period));
     request_loop(timer);
@@ -128,7 +127,7 @@ void DaemonMetricCollector::send_requests() {
           } else {
             add_double_or_int_metric(ss, perf_values, name, description, mtype, labels);
           }
-          result += ss.str() + "\n";
+          result += ss.str();
         }
       }
     }
@@ -138,10 +137,9 @@ void DaemonMetricCollector::send_requests() {
 
 void DaemonMetricCollector::update_sockets() {
   std::string path = "/var/run/ceph/";
-  for (const auto & entry : std::filesystem::directory_iterator(path)) {
+  for (const auto & entry : std::filesystem::recursive_directory_iterator(path)) {
     if (entry.path().extension() == ".asok") {
       std::string daemon_socket_name = entry.path().filename().string();
-      std::cout << "Got socket: " << daemon_socket_name << std::endl;
       // remove .asok
       std::string daemon_name = daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
       if (clients.find(daemon_name) == clients.end()) {
index b3db4d30934ec0c23f0686eaf42575f554dce413..efe164d14c76edfd30de75a018fe183dcd7cfd07 100644 (file)
@@ -111,13 +111,12 @@ private:
     // Construct a response message based on the program state.
     void create_response()
     {
-        std::cout << "Got request on " << request_.target() << std::endl;
         if(request_.target() == "/metrics")
         {
             response_.set(http::field::content_type, "text/plain");
             DaemonMetricCollector &collector = collector_instance();
             std::string metrics = collector.get_metrics();
-            beast::ostream(response_.body()) << "Perf Counters\n" << metrics << std::endl;
+            beast::ostream(response_.body()) << metrics << std::endl;
         }
         else
         {
@@ -167,7 +166,6 @@ void http_server(tcp::acceptor& acceptor, tcp::socket& socket)
   acceptor.async_accept(socket,
       [&](beast::error_code ec)
       {
-          std::cout << "async accept" << std::endl;
           if(!ec)
               std::make_shared<http_connection>(std::move(socket))->start();
           http_server(acceptor, socket);
index d8cf7cee5c1744af0dc2dadd5a6ca6eb4b2dde5b..a2b724992d834299239fa8b0044563bfbb2cbdac 100644 (file)
@@ -46,7 +46,8 @@ from . import utils
 from . import ssh
 from .migrations import Migrations
 from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
-    RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent
+    RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent, \
+    CephExporterService
 from .services.ingress import IngressService
 from .services.container import CustomContainerService
 from .services.iscsi import IscsiService
@@ -93,9 +94,9 @@ os._exit = os_exit_noop   # type: ignore
 
 
 # Default container images -----------------------------------------------------
-DEFAULT_IMAGE = 'quay.io/ceph/ceph'
-DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
-DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
+DEFAULT_IMAGE = 'docker.io/rhcsdashboard/ceph-exporter'
+DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.18.1'
+DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v0.18.1'
 DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
 DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
@@ -524,7 +525,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             RgwService, RbdMirrorService, GrafanaService, AlertmanagerService,
             PrometheusService, NodeExporterService, LokiService, PromtailService, CrashService, IscsiService,
             IngressService, CustomContainerService, CephfsMirrorService,
-            CephadmAgent, SNMPGatewayService
+            CephadmAgent, SNMPGatewayService, CephExporterService
         ]
 
         # https://github.com/python/mypy/issues/8993
@@ -1332,7 +1333,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             image = self.container_image_haproxy
         elif daemon_type == 'keepalived':
             image = self.container_image_keepalived
-        elif daemon_type == CustomContainerService.TYPE:
+        elif daemon_type == CustomContainerService.TYPE or daemon_type == 'exporter':
             # The image can't be resolved, the necessary information
             # is only available when a container is deployed (given
             # via spec).
@@ -2507,6 +2508,7 @@ Then run the following:
                 'alertmanager': PlacementSpec(count=1),
                 'prometheus': PlacementSpec(count=1),
                 'node-exporter': PlacementSpec(host_pattern='*'),
+                'exporter': PlacementSpec(host_pattern='*'),
                 'loki': PlacementSpec(count=1),
                 'promtail': PlacementSpec(host_pattern='*'),
                 'crash': PlacementSpec(host_pattern='*'),
@@ -2616,6 +2618,10 @@ Then run the following:
     def apply_node_exporter(self, spec: ServiceSpec) -> str:
         return self._apply(spec)
 
+    @handle_orch_error
+    def apply_exporter(self, spec: ServiceSpec) -> str:
+        return self._apply(spec)
+
     @handle_orch_error
     def apply_crash(self, spec: ServiceSpec) -> str:
         return self._apply(spec)
index 8abb0e63a2c102181d7937c3cf8f99c357a4fbb9..b3e01650599785521aea3b56a63849076ae9259f 100644 (file)
@@ -1012,6 +1012,19 @@ class CrashService(CephService):
         return daemon_spec
 
 
+class CephExporterService(CephService):
+    TYPE = 'exporter'
+
+    def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
+    def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+        assert self.TYPE == daemon_spec.daemon_type
+        return {}, []
+
+
 class CephfsMirrorService(CephService):
     TYPE = 'cephfs-mirror'
 
index c9414d797bb25d4eafdb9edfa4bd0e8c2b9d89ba..83f2bca78be69141055080d010ed8fbcd63558d8 100644 (file)
@@ -464,6 +464,7 @@ class Orchestrator(object):
             'mon': self.apply_mon,
             'nfs': self.apply_nfs,
             'node-exporter': self.apply_node_exporter,
+            'exporter': self.apply_exporter,
             'osd': lambda dg: self.apply_drivegroups([dg]),  # type: ignore
             'prometheus': self.apply_prometheus,
             'loki': self.apply_loki,
@@ -644,6 +645,10 @@ class Orchestrator(object):
         """Update existing a Node-Exporter daemon(s)"""
         raise NotImplementedError()
 
+    def apply_exporter(self, spec: ServiceSpec) -> OrchResult[str]:
+        """Update existing a exporter daemon(s)"""
+        raise NotImplementedError()
+
     def apply_loki(self, spec: ServiceSpec) -> OrchResult[str]:
         """Update existing a Loki daemon(s)"""
         raise NotImplementedError()
@@ -733,6 +738,7 @@ def daemon_type_to_service(dtype: str) -> str:
         'alertmanager': 'alertmanager',
         'prometheus': 'prometheus',
         'node-exporter': 'node-exporter',
+        'exporter': 'exporter',
         'loki': 'loki',
         'promtail': 'promtail',
         'crash': 'crash',
@@ -762,6 +768,7 @@ def service_to_daemon_types(stype: str) -> List[str]:
         'loki': ['loki'],
         'promtail': ['promtail'],
         'node-exporter': ['node-exporter'],
+        'exporter': ['exporter'],
         'crash': ['crash'],
         'container': ['container'],
         'agent': ['agent'],
index 0dd5c23f71248394ea39ec8d9ad3bd819acaaf6d..e17905ea4e33092303e1ef3f02b9a5d0361cf346 100644 (file)
@@ -54,6 +54,7 @@ class ServiceType(enum.Enum):
     alertmanager = 'alertmanager'
     grafana = 'grafana'
     node_exporter = 'node-exporter'
+    exporter = 'exporter'
     prometheus = 'prometheus'
     loki = 'loki'
     promtail = 'promtail'
index c23783c5da0e8a4415ea2e1a48d980c54a2ab2b9..0c83efc36496339bac59242e7e70531ec49559df 100644 (file)
@@ -444,7 +444,7 @@ class ServiceSpec(object):
     start the services.
     """
     KNOWN_SERVICE_TYPES = 'alertmanager crash grafana iscsi loki promtail mds mgr mon nfs ' \
-                          'node-exporter osd prometheus rbd-mirror rgw agent ' \
+                          'node-exporter exporter osd prometheus rbd-mirror rgw agent ' \
                           'container ingress cephfs-mirror snmp-gateway'.split()
     REQUIRES_SERVICE_ID = 'iscsi mds nfs rgw container ingress '.split()
     MANAGED_CONFIG_OPTIONS = [