From ceeb0ceadefb83707bc2d5ef6c8d5f543069f6e9 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Tue, 5 Apr 2022 00:58:43 +0530 Subject: [PATCH] mgr/cephadm: adding ceph exporter container Signed-off-by: Avan Thakkar --- src/ceph_exporter.cc | 5 -- src/cephadm/cephadm | 68 ++++++++++++++++++- src/exporter/DaemonMetricCollector.cc | 6 +- src/exporter/http_server.cc | 4 +- src/pybind/mgr/cephadm/module.py | 18 +++-- .../mgr/cephadm/services/cephadmservice.py | 13 ++++ src/pybind/mgr/orchestrator/_interface.py | 7 ++ src/pybind/mgr/orchestrator/module.py | 1 + .../ceph/deployment/service_spec.py | 2 +- 9 files changed, 104 insertions(+), 20 deletions(-) diff --git a/src/ceph_exporter.cc b/src/ceph_exporter.cc index 4406828b3bb05..67af60f5b508a 100644 --- a/src/ceph_exporter.cc +++ b/src/ceph_exporter.cc @@ -13,12 +13,7 @@ DaemonMetricCollector collector; int main(int argc, char** argv) { - // TODO: daemonize - std::cout << "inside exporter" << std::endl; - - std::cout << "Starting http server thread..." << std::endl; boost::thread server_thread(http_server_thread_entrypoint); - std::cout << "Starting collector..." << std::endl; DaemonMetricCollector &collector = collector_instance(); collector.main(); server_thread.join(); diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 069dd23d16f0a..1ae09f0561b20 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -44,7 +44,7 @@ from pathlib import Path FuncT = TypeVar('FuncT', bound=Callable) # Default container images ----------------------------------------------------- -DEFAULT_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main' +DEFAULT_IMAGE = 'docker.io/rhcsdashboard/ceph-exporter' DEFAULT_IMAGE_IS_MAIN = True DEFAULT_IMAGE_RELEASE = 'quincy' DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4' @@ -914,6 +914,53 @@ class CephIscsi(object): ################################## +class CephExporter(object): + """Defines a Ceph-exporter container""" + + daemon_type = 'exporter' + entrypoint = '/usr/bin/ceph-exporter' + + port_map = { + 'exporter': 9085, + } + + def __init__(self, + ctx, + fsid, + daemon_id, + image=DEFAULT_IMAGE): + # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + @classmethod + def init(cls, ctx, fsid, daemon_id): + # type: (CephadmContext, str, Union[int, str]) -> CephIscsi + return cls(ctx, fsid, daemon_id, + get_parm(ctx.config_json), ctx.image) + + @staticmethod + def get_container_mounts(): + mounts = dict() + mounts['/var/run/ceph'] = '/var/run/ceph:z' + return mounts + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + +################################## + + class HAproxy(object): """Defines an HAproxy container""" daemon_type = 'haproxy' @@ -1267,6 +1314,7 @@ def get_supported_daemons(): supported_daemons.extend(Monitoring.components) supported_daemons.append(NFSGanesha.daemon_type) supported_daemons.append(CephIscsi.daemon_type) + supported_daemons.append(CephExporter.daemon_type) supported_daemons.append(CustomContainer.daemon_type) supported_daemons.append(HAproxy.daemon_type) supported_daemons.append(Keepalived.daemon_type) @@ -2874,6 +2922,10 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id, log_dir = get_log_dir(fsid, ctx.log_dir) mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir)) + if daemon_type == CephExporter.daemon_type: + assert daemon_id + mounts.update(CephExporter.get_container_mounts()) + if daemon_type == Keepalived.daemon_type: assert daemon_id data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) @@ -2966,6 +3018,9 @@ def get_container(ctx: CephadmContext, entrypoint = NFSGanesha.entrypoint name = '%s.%s' % (daemon_type, daemon_id) envs.extend(NFSGanesha.get_container_envs()) + elif daemon_type == CephExporter.daemon_type: + entrypoint = CephExporter.entrypoint + name = '%s.%s' % (daemon_type, daemon_id) elif daemon_type == HAproxy.daemon_type: name = '%s.%s' % (daemon_type, daemon_id) container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user @@ -5794,6 +5849,17 @@ def command_deploy(ctx): config=config, keyring=keyring, reconfig=ctx.reconfig, ports=daemon_ports) + + elif daemon_type == CephExporter.daemon_type: + if not ctx.reconfig and not redeploy and not daemon_ports: + daemon_ports = list(CephExporter.port_map.values()) + + uid = 0 + gid = 0 + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) + deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + reconfig=ctx.reconfig, + ports=daemon_ports) elif daemon_type == CephIscsi.daemon_type: config, keyring = get_config_and_keyring(ctx) diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc index fa88834198b93..754b7d27f6ef4 100644 --- a/src/exporter/DaemonMetricCollector.cc +++ b/src/exporter/DaemonMetricCollector.cc @@ -17,7 +17,6 @@ void DaemonMetricCollector::request_loop(boost::asio::deadline_timer &timer) { timer.async_wait([&](const boost::system::error_code& e) { std::cerr << e << std::endl; update_sockets(); - std::cout << "updating metrics" << std::endl; send_requests(); timer.expires_from_now(boost::posix_time::seconds(stats_period)); request_loop(timer); @@ -128,7 +127,7 @@ void DaemonMetricCollector::send_requests() { } else { add_double_or_int_metric(ss, perf_values, name, description, mtype, labels); } - result += ss.str() + "\n"; + result += ss.str(); } } } @@ -138,10 +137,9 @@ void DaemonMetricCollector::send_requests() { void DaemonMetricCollector::update_sockets() { std::string path = "/var/run/ceph/"; - for (const auto & entry : std::filesystem::directory_iterator(path)) { + for (const auto & entry : std::filesystem::recursive_directory_iterator(path)) { if (entry.path().extension() == ".asok") { std::string daemon_socket_name = entry.path().filename().string(); - std::cout << "Got socket: " << daemon_socket_name << std::endl; // remove .asok std::string daemon_name = daemon_socket_name.substr(0, daemon_socket_name.size() - 5); if (clients.find(daemon_name) == clients.end()) { diff --git a/src/exporter/http_server.cc b/src/exporter/http_server.cc index b3db4d30934ec..efe164d14c76e 100644 --- a/src/exporter/http_server.cc +++ b/src/exporter/http_server.cc @@ -111,13 +111,12 @@ private: // Construct a response message based on the program state. void create_response() { - std::cout << "Got request on " << request_.target() << std::endl; if(request_.target() == "/metrics") { response_.set(http::field::content_type, "text/plain"); DaemonMetricCollector &collector = collector_instance(); std::string metrics = collector.get_metrics(); - beast::ostream(response_.body()) << "Perf Counters\n" << metrics << std::endl; + beast::ostream(response_.body()) << metrics << std::endl; } else { @@ -167,7 +166,6 @@ void http_server(tcp::acceptor& acceptor, tcp::socket& socket) acceptor.async_accept(socket, [&](beast::error_code ec) { - std::cout << "async accept" << std::endl; if(!ec) std::make_shared(std::move(socket))->start(); http_server(acceptor, socket); diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index d8cf7cee5c174..a2b724992d834 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -46,7 +46,8 @@ from . import utils from . import ssh from .migrations import Migrations from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ - RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent + RbdMirrorService, CrashService, CephadmService, CephfsMirrorService, CephadmAgent, \ + CephExporterService from .services.ingress import IngressService from .services.container import CustomContainerService from .services.iscsi import IscsiService @@ -93,9 +94,9 @@ os._exit = os_exit_noop # type: ignore # Default container images ----------------------------------------------------- -DEFAULT_IMAGE = 'quay.io/ceph/ceph' -DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4' -DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1' +DEFAULT_IMAGE = 'docker.io/rhcsdashboard/ceph-exporter' +DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.18.1' +DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v0.18.1' DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0' DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0' DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0' @@ -524,7 +525,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, RgwService, RbdMirrorService, GrafanaService, AlertmanagerService, PrometheusService, NodeExporterService, LokiService, PromtailService, CrashService, IscsiService, IngressService, CustomContainerService, CephfsMirrorService, - CephadmAgent, SNMPGatewayService + CephadmAgent, SNMPGatewayService, CephExporterService ] # https://github.com/python/mypy/issues/8993 @@ -1332,7 +1333,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, image = self.container_image_haproxy elif daemon_type == 'keepalived': image = self.container_image_keepalived - elif daemon_type == CustomContainerService.TYPE: + elif daemon_type == CustomContainerService.TYPE or daemon_type == 'exporter': # The image can't be resolved, the necessary information # is only available when a container is deployed (given # via spec). @@ -2507,6 +2508,7 @@ Then run the following: 'alertmanager': PlacementSpec(count=1), 'prometheus': PlacementSpec(count=1), 'node-exporter': PlacementSpec(host_pattern='*'), + 'exporter': PlacementSpec(host_pattern='*'), 'loki': PlacementSpec(count=1), 'promtail': PlacementSpec(host_pattern='*'), 'crash': PlacementSpec(host_pattern='*'), @@ -2616,6 +2618,10 @@ Then run the following: def apply_node_exporter(self, spec: ServiceSpec) -> str: return self._apply(spec) + @handle_orch_error + def apply_exporter(self, spec: ServiceSpec) -> str: + return self._apply(spec) + @handle_orch_error def apply_crash(self, spec: ServiceSpec) -> str: return self._apply(spec) diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 8abb0e63a2c10..b3e0165059978 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1012,6 +1012,19 @@ class CrashService(CephService): return daemon_spec +class CephExporterService(CephService): + TYPE = 'exporter' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + return {}, [] + + class CephfsMirrorService(CephService): TYPE = 'cephfs-mirror' diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index c9414d797bb25..83f2bca78be69 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -464,6 +464,7 @@ class Orchestrator(object): 'mon': self.apply_mon, 'nfs': self.apply_nfs, 'node-exporter': self.apply_node_exporter, + 'exporter': self.apply_exporter, 'osd': lambda dg: self.apply_drivegroups([dg]), # type: ignore 'prometheus': self.apply_prometheus, 'loki': self.apply_loki, @@ -644,6 +645,10 @@ class Orchestrator(object): """Update existing a Node-Exporter daemon(s)""" raise NotImplementedError() + def apply_exporter(self, spec: ServiceSpec) -> OrchResult[str]: + """Update existing a exporter daemon(s)""" + raise NotImplementedError() + def apply_loki(self, spec: ServiceSpec) -> OrchResult[str]: """Update existing a Loki daemon(s)""" raise NotImplementedError() @@ -733,6 +738,7 @@ def daemon_type_to_service(dtype: str) -> str: 'alertmanager': 'alertmanager', 'prometheus': 'prometheus', 'node-exporter': 'node-exporter', + 'exporter': 'exporter', 'loki': 'loki', 'promtail': 'promtail', 'crash': 'crash', @@ -762,6 +768,7 @@ def service_to_daemon_types(stype: str) -> List[str]: 'loki': ['loki'], 'promtail': ['promtail'], 'node-exporter': ['node-exporter'], + 'exporter': ['exporter'], 'crash': ['crash'], 'container': ['container'], 'agent': ['agent'], diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 0dd5c23f71248..e17905ea4e330 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -54,6 +54,7 @@ class ServiceType(enum.Enum): alertmanager = 'alertmanager' grafana = 'grafana' node_exporter = 'node-exporter' + exporter = 'exporter' prometheus = 'prometheus' loki = 'loki' promtail = 'promtail' diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index c23783c5da0e8..0c83efc364963 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -444,7 +444,7 @@ class ServiceSpec(object): start the services. """ KNOWN_SERVICE_TYPES = 'alertmanager crash grafana iscsi loki promtail mds mgr mon nfs ' \ - 'node-exporter osd prometheus rbd-mirror rgw agent ' \ + 'node-exporter exporter osd prometheus rbd-mirror rgw agent ' \ 'container ingress cephfs-mirror snmp-gateway'.split() REQUIRES_SERVICE_ID = 'iscsi mds nfs rgw container ingress '.split() MANAGED_CONFIG_OPTIONS = [ -- 2.39.5