cephadm: bootstrap: deploy monitoring stack by default

author Sage Weil <sage@redhat.com>

Thu, 12 Mar 2020 23:33:53 +0000 (18:33 -0500)

committer Sage Weil <sage@redhat.com>

Mon, 16 Mar 2020 17:39:44 +0000 (12:39 -0500)
author Sage Weil <sage@redhat.com>
Thu, 12 Mar 2020 23:33:53 +0000 (18:33 -0500)
committer Sage Weil <sage@redhat.com>
Mon, 16 Mar 2020 17:39:44 +0000 (12:39 -0500)
diff --git a/doc/cephadm/monitoring.rst b/doc/cephadm/monitoring.rst

index 0d4a16e0a5df0a14906744242d217994c44d952c..31c93a3cce59b4e3b112a3fe48b0d8297e2ca624 100644 (file)
--- a/doc/cephadm/monitoring.rst
+++ b/doc/cephadm/monitoring.rst
@@ -5,7 +5,9 @@ The Ceph dashboard makes use of prometheus, grafana, and related tools
  to store and visualize detailed metrics on cluster utilization and
  performance.  Ceph users have three options:
  
-#. Have cephadm deploy and configure these services.
+#. Have cephadm deploy and configure these services.  This is the default
+   when bootstrapping a new cluster unless the ``--skip-monitoring-stack``
+   option is used.
  #. Deploy and configure these services manually.  This is recommended for users
     with existing prometheus services in their environment (and in cases where
     Ceph is running in Kubernetes with Rook).
@@ -15,7 +17,10 @@ performance.  Ceph users have three options:
  Deploying monitoring with cephadm
  ---------------------------------
  
-To deploy a basic monitoring stack:
+By default, bootstrap will deploy a basic monitoring stack.  If you
+did not do this (by passing ``--skip-monitoring-stack``, or if you
+converted an existing cluster to cephadm management, you can set up
+monitoring by following the steps below.
  
  #. Enable the prometheus module in the ceph-mgr daemon.  This exposes the internal Ceph metrics so that prometheus can scrape them.::
  
@@ -52,6 +57,18 @@ completed, you should see something like this from ``ceph orch ls``::
    node-exporter      2/2  6s ago     docker.io/prom/node-exporter:latest             e5a616e4b9cf  present
    prometheus         1/1  6s ago     docker.io/prom/prometheus:latest                e935122ab143  present
  
+Disabling monitoring
+--------------------
+
+If you have deployed monitoring and would like to remove it, you can do
+so with::
+
+  ceph orch rm grafana
+  ceph orch rm prometheus --force   # this will delete metrics data collected so far
+  ceph orch rm node-exporter
+  ceph orch rm alertmanager
+  ceph mgr module disable prometheus
+
  
  Deploying monitoring manually
  -----------------------------
diff --git a/qa/suites/rados/cephadm/upgrade/1-start.yaml b/qa/suites/rados/cephadm/upgrade/1-start.yaml

index 25e6ee90e85020e80d89c51658878e3adbc3a9b9..99cc19b77f3bdba385133b5d930e6c52206ae32c 100644 (file)
--- a/qa/suites/rados/cephadm/upgrade/1-start.yaml
+++ b/qa/suites/rados/cephadm/upgrade/1-start.yaml
@@ -1,4 +1,4 @@
  tasks:
  - cephadm:
-    image: quay.io/ceph-ci/ceph:wip-sage3-testing-2020-03-14-0747
-    cephadm_branch: wip-sage3-testing-2020-03-14-0747
+    image: quay.io/ceph-ci/ceph:wip-sage4-testing-2020-03-14-1141
+    cephadm_branch: wip-sage4-testing-2020-03-14-1141
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py

index 8f7947571fc6ed4354dbfb30879011a0315ab93b..05911c7bd70ada011e93b54a21082357c45b242e 100644 (file)
--- a/qa/tasks/cephadm.py
+++ b/qa/tasks/cephadm.py
@@ -358,6 +358,7 @@ def ceph_bootstrap(ctx, config):
              '--mon-id', first_mon,
              '--mgr-id', first_mgr,
              '--orphan-initial-daemons',   # we will do it explicitly!
+            '--skip-monitoring-stack',    # we'll provision these explicitly
              '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
              '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
              '--output-keyring',
diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm

index 850a4d35463a37a8302a807b9f7a9d5f2263d1e5..e92e1bc347305c8c5a1360660cea6869c9fa0e29 100755 (executable)
--- a/src/cephadm/cephadm
+++ b/src/cephadm/cephadm
@@ -2317,6 +2317,13 @@ def command_bootstrap():
                  logger.info('Deploying %s service with default placement...' % t)
                  cli(['orch', 'apply', t])
  
+        if not args.skip_monitoring_stack:
+            logger.info('Enabling mgr prometheus module...')
+            cli(['mgr', 'module', 'enable', 'prometheus'])
+            for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
+                logger.info('Deploying %s service with default placement...' % t)
+                cli(['orch', 'apply', t])
+
      if not args.skip_dashboard:
          logger.info('Enabling the dashboard module...')
          cli(['mgr', 'module', 'enable', 'dashboard'])
@@ -3955,6 +3962,10 @@ def _get_parser():
          '--orphan-initial-daemons',
          action='store_true',
          help='Do not create initial mon, mgr, and crash service specs')
+    parser_bootstrap.add_argument(
+        '--skip-monitoring-stack',
+        action='store_true',
+        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
  
      parser_deploy = subparsers.add_parser(
          'deploy', help='deploy a daemon')
author	Sage Weil <sage@redhat.com>
	Thu, 12 Mar 2020 23:33:53 +0000 (18:33 -0500)
committer	Sage Weil <sage@redhat.com>
	Mon, 16 Mar 2020 17:39:44 +0000 (12:39 -0500)
doc/cephadm/monitoring.rst		patch \| blob \| history
qa/suites/rados/cephadm/upgrade/1-start.yaml		patch \| blob \| history
qa/tasks/cephadm.py		patch \| blob \| history
src/cephadm/cephadm		patch \| blob \| history