From 79576552279b68b9402f6ee798211c4ddb54403d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 12 Mar 2020 18:33:53 -0500 Subject: [PATCH] cephadm: bootstrap: deploy monitoring stack by default Signed-off-by: Sage Weil --- doc/cephadm/monitoring.rst | 21 ++++++++++++++++++-- qa/suites/rados/cephadm/upgrade/1-start.yaml | 4 ++-- qa/tasks/cephadm.py | 1 + src/cephadm/cephadm | 11 ++++++++++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/doc/cephadm/monitoring.rst b/doc/cephadm/monitoring.rst index 0d4a16e0a5df..31c93a3cce59 100644 --- a/doc/cephadm/monitoring.rst +++ b/doc/cephadm/monitoring.rst @@ -5,7 +5,9 @@ The Ceph dashboard makes use of prometheus, grafana, and related tools to store and visualize detailed metrics on cluster utilization and performance. Ceph users have three options: -#. Have cephadm deploy and configure these services. +#. Have cephadm deploy and configure these services. This is the default + when bootstrapping a new cluster unless the ``--skip-monitoring-stack`` + option is used. #. Deploy and configure these services manually. This is recommended for users with existing prometheus services in their environment (and in cases where Ceph is running in Kubernetes with Rook). @@ -15,7 +17,10 @@ performance. Ceph users have three options: Deploying monitoring with cephadm --------------------------------- -To deploy a basic monitoring stack: +By default, bootstrap will deploy a basic monitoring stack. If you +did not do this (by passing ``--skip-monitoring-stack``, or if you +converted an existing cluster to cephadm management, you can set up +monitoring by following the steps below. #. Enable the prometheus module in the ceph-mgr daemon. This exposes the internal Ceph metrics so that prometheus can scrape them.:: @@ -52,6 +57,18 @@ completed, you should see something like this from ``ceph orch ls``:: node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present +Disabling monitoring +-------------------- + +If you have deployed monitoring and would like to remove it, you can do +so with:: + + ceph orch rm grafana + ceph orch rm prometheus --force # this will delete metrics data collected so far + ceph orch rm node-exporter + ceph orch rm alertmanager + ceph mgr module disable prometheus + Deploying monitoring manually ----------------------------- diff --git a/qa/suites/rados/cephadm/upgrade/1-start.yaml b/qa/suites/rados/cephadm/upgrade/1-start.yaml index 25e6ee90e850..99cc19b77f3b 100644 --- a/qa/suites/rados/cephadm/upgrade/1-start.yaml +++ b/qa/suites/rados/cephadm/upgrade/1-start.yaml @@ -1,4 +1,4 @@ tasks: - cephadm: - image: quay.io/ceph-ci/ceph:wip-sage3-testing-2020-03-14-0747 - cephadm_branch: wip-sage3-testing-2020-03-14-0747 + image: quay.io/ceph-ci/ceph:wip-sage4-testing-2020-03-14-1141 + cephadm_branch: wip-sage4-testing-2020-03-14-1141 diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py index 8f7947571fc6..05911c7bd70a 100644 --- a/qa/tasks/cephadm.py +++ b/qa/tasks/cephadm.py @@ -358,6 +358,7 @@ def ceph_bootstrap(ctx, config): '--mon-id', first_mon, '--mgr-id', first_mgr, '--orphan-initial-daemons', # we will do it explicitly! + '--skip-monitoring-stack', # we'll provision these explicitly '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), '--output-keyring', diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 850a4d35463a..e92e1bc34730 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -2317,6 +2317,13 @@ def command_bootstrap(): logger.info('Deploying %s service with default placement...' % t) cli(['orch', 'apply', t]) + if not args.skip_monitoring_stack: + logger.info('Enabling mgr prometheus module...') + cli(['mgr', 'module', 'enable', 'prometheus']) + for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: + logger.info('Deploying %s service with default placement...' % t) + cli(['orch', 'apply', t]) + if not args.skip_dashboard: logger.info('Enabling the dashboard module...') cli(['mgr', 'module', 'enable', 'dashboard']) @@ -3955,6 +3962,10 @@ def _get_parser(): '--orphan-initial-daemons', action='store_true', help='Do not create initial mon, mgr, and crash service specs') + parser_bootstrap.add_argument( + '--skip-monitoring-stack', + action='store_true', + help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)') parser_deploy = subparsers.add_parser( 'deploy', help='deploy a daemon') -- 2.47.3