From 79576552279b68b9402f6ee798211c4ddb54403d Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Thu, 12 Mar 2020 18:33:53 -0500
Subject: [PATCH] cephadm: bootstrap: deploy monitoring stack by default

Signed-off-by: Sage Weil <sage@redhat.com>
---
 doc/cephadm/monitoring.rst                   | 21 ++++++++++++++++++--
 qa/suites/rados/cephadm/upgrade/1-start.yaml |  4 ++--
 qa/tasks/cephadm.py                          |  1 +
 src/cephadm/cephadm                          | 11 ++++++++++
 4 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/doc/cephadm/monitoring.rst b/doc/cephadm/monitoring.rst
index 0d4a16e0a5df..31c93a3cce59 100644
--- a/doc/cephadm/monitoring.rst
+++ b/doc/cephadm/monitoring.rst
@@ -5,7 +5,9 @@ The Ceph dashboard makes use of prometheus, grafana, and related tools
 to store and visualize detailed metrics on cluster utilization and
 performance.  Ceph users have three options:
 
-#. Have cephadm deploy and configure these services.
+#. Have cephadm deploy and configure these services.  This is the default
+   when bootstrapping a new cluster unless the ``--skip-monitoring-stack``
+   option is used.
 #. Deploy and configure these services manually.  This is recommended for users
    with existing prometheus services in their environment (and in cases where
    Ceph is running in Kubernetes with Rook).
@@ -15,7 +17,10 @@ performance.  Ceph users have three options:
 Deploying monitoring with cephadm
 ---------------------------------
 
-To deploy a basic monitoring stack:
+By default, bootstrap will deploy a basic monitoring stack.  If you
+did not do this (by passing ``--skip-monitoring-stack``, or if you
+converted an existing cluster to cephadm management, you can set up
+monitoring by following the steps below.
 
 #. Enable the prometheus module in the ceph-mgr daemon.  This exposes the internal Ceph metrics so that prometheus can scrape them.::
 
@@ -52,6 +57,18 @@ completed, you should see something like this from ``ceph orch ls``::
   node-exporter      2/2  6s ago     docker.io/prom/node-exporter:latest             e5a616e4b9cf  present
   prometheus         1/1  6s ago     docker.io/prom/prometheus:latest                e935122ab143  present
 
+Disabling monitoring
+--------------------
+
+If you have deployed monitoring and would like to remove it, you can do
+so with::
+
+  ceph orch rm grafana
+  ceph orch rm prometheus --force   # this will delete metrics data collected so far
+  ceph orch rm node-exporter
+  ceph orch rm alertmanager
+  ceph mgr module disable prometheus
+
 
 Deploying monitoring manually
 -----------------------------
diff --git a/qa/suites/rados/cephadm/upgrade/1-start.yaml b/qa/suites/rados/cephadm/upgrade/1-start.yaml
index 25e6ee90e850..99cc19b77f3b 100644
--- a/qa/suites/rados/cephadm/upgrade/1-start.yaml
+++ b/qa/suites/rados/cephadm/upgrade/1-start.yaml
@@ -1,4 +1,4 @@
 tasks:
 - cephadm:
-    image: quay.io/ceph-ci/ceph:wip-sage3-testing-2020-03-14-0747
-    cephadm_branch: wip-sage3-testing-2020-03-14-0747
+    image: quay.io/ceph-ci/ceph:wip-sage4-testing-2020-03-14-1141
+    cephadm_branch: wip-sage4-testing-2020-03-14-1141
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
index 8f7947571fc6..05911c7bd70a 100644
--- a/qa/tasks/cephadm.py
+++ b/qa/tasks/cephadm.py
@@ -358,6 +358,7 @@ def ceph_bootstrap(ctx, config):
             '--mon-id', first_mon,
             '--mgr-id', first_mgr,
             '--orphan-initial-daemons',   # we will do it explicitly!
+            '--skip-monitoring-stack',    # we'll provision these explicitly
             '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
             '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
             '--output-keyring',
diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm
index 850a4d35463a..e92e1bc34730 100755
--- a/src/cephadm/cephadm
+++ b/src/cephadm/cephadm
@@ -2317,6 +2317,13 @@ def command_bootstrap():
                 logger.info('Deploying %s service with default placement...' % t)
                 cli(['orch', 'apply', t])
 
+        if not args.skip_monitoring_stack:
+            logger.info('Enabling mgr prometheus module...')
+            cli(['mgr', 'module', 'enable', 'prometheus'])
+            for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
+                logger.info('Deploying %s service with default placement...' % t)
+                cli(['orch', 'apply', t])
+
     if not args.skip_dashboard:
         logger.info('Enabling the dashboard module...')
         cli(['mgr', 'module', 'enable', 'dashboard'])
@@ -3955,6 +3962,10 @@ def _get_parser():
         '--orphan-initial-daemons',
         action='store_true',
         help='Do not create initial mon, mgr, and crash service specs')
+    parser_bootstrap.add_argument(
+        '--skip-monitoring-stack',
+        action='store_true',
+        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
 
     parser_deploy = subparsers.add_parser(
         'deploy', help='deploy a daemon')
-- 
2.47.3