Merge cephmetrics/dashboard-ansible repo

author Boris Ranto <branto@redhat.com>

Wed, 5 Dec 2018 18:59:47 +0000 (19:59 +0100)

committer Guillaume Abrioux <gabrioux@redhat.com>

Thu, 16 May 2019 14:39:13 +0000 (16:39 +0200)
author Boris Ranto <branto@redhat.com>
Wed, 5 Dec 2018 18:59:47 +0000 (19:59 +0100)
committer Guillaume Abrioux <gabrioux@redhat.com>
Thu, 16 May 2019 14:39:13 +0000 (16:39 +0200)
diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample

index c27941592a1a4bb2cf10129b63d21d6bf632b721..f20886e18339321298955f75d51828778a1a76d6 100644 (file)
--- a/group_vars/all.yml.sample
+++ b/group_vars/all.yml.sample
@@ -69,6 +69,7 @@ dummy:
  #ceph_nfs_firewall_zone: public
  #ceph_rbdmirror_firewall_zone: public
  #ceph_iscsi_firewall_zone: public
+#ceph_dashboard_firewall_zone: public
  
  # Generate local ceph.conf in fetch directory
  #ceph_conf_local: false
@@ -687,6 +688,17 @@ dummy:
  #  - { name: client.openstack, caps: { mon: "profile rbd", osd: "profile rbd pool={{ openstack_glance_pool.name }}, profile rbd pool={{ openstack_nova_pool.name }}, profile rbd pool={{ openstack_cinder_pool.name }}, profile rbd pool={{ openstack_cinder_backup_pool.name }}"}, mode: "0600" }
  
  
+#############
+# DASHBOARD #
+#############
+#dashboard_enabled: False
+#dashboard_network_name: ceph-dashboard
+# Choose http or https
+# For https, you should set dashboard.crt/key and grafana.crt/key
+#dashboard_protocol: http
+#dashboard_port: 8234
+
+
  ###############
  # DEPRECATION #
  ###############
diff --git a/group_vars/rhcs.yml.sample b/group_vars/rhcs.yml.sample

index d431ca11269529b7b70066c6866adb51fead41cd..c4a3ce23c5de78fbd316f794cf4ade73496db826 100644 (file)
--- a/group_vars/rhcs.yml.sample
+++ b/group_vars/rhcs.yml.sample
@@ -69,6 +69,7 @@ fetch_directory: ~/ceph-ansible-keys
  #ceph_nfs_firewall_zone: public
  #ceph_rbdmirror_firewall_zone: public
  #ceph_iscsi_firewall_zone: public
+#ceph_dashboard_firewall_zone: public
  
  # Generate local ceph.conf in fetch directory
  #ceph_conf_local: false
@@ -687,6 +688,17 @@ ceph_docker_registry: "registry.access.redhat.com"
  #  - { name: client.openstack, caps: { mon: "profile rbd", osd: "profile rbd pool={{ openstack_glance_pool.name }}, profile rbd pool={{ openstack_nova_pool.name }}, profile rbd pool={{ openstack_cinder_pool.name }}, profile rbd pool={{ openstack_cinder_backup_pool.name }}"}, mode: "0600" }
  
  
+#############
+# DASHBOARD #
+#############
+#dashboard_enabled: False
+#dashboard_network_name: ceph-dashboard
+# Choose http or https
+# For https, you should set dashboard.crt/key and grafana.crt/key
+#dashboard_protocol: http
+#dashboard_port: 8234
+
+
  ###############
  # DEPRECATION #
  ###############
diff --git a/infrastructure-playbooks/purge-cluster.yml b/infrastructure-playbooks/purge-cluster.yml

index 66700cf06972dde9cab161fce6cfc790c7bac031..005486e51c2196fd4b584cd616bb8e152a7ffb49 100644 (file)
--- a/infrastructure-playbooks/purge-cluster.yml
+++ b/infrastructure-playbooks/purge-cluster.yml
@@ -42,12 +42,130 @@
      - "{{ nfs_group_name|default('nfss') }}"
      - "{{ client_group_name|default('clients') }}"
      - "{{ mgr_group_name|default('mgrs') }}"
+    - grafana-server
  
    become: true
  
    tasks:
      - debug: msg="gather facts on all Ceph hosts for following reference"
  
+
+- name: purge node-exporter
+  hosts:
+    - "{{ mon_group_name|default('mons') }}"
+    - "{{ osd_group_name|default('osds') }}"
+    - "{{ mds_group_name|default('mdss') }}"
+    - "{{ rgw_group_name|default('rgws') }}"
+    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"
+    - "{{ nfs_group_name|default('nfss') }}"
+    - "{{ client_group_name|default('clients') }}"
+    - "{{ mgr_group_name|default('mgrs') }}"
+    - agents
+    - grafana-server
+    - clients
+    - iscsigws
+    - iscsi-gws # for backward compatibility only!
+
+  become: true
+
+  tasks:
+    - name: set ceph_docker_registry value if not set
+      set_fact:
+        ceph_docker_registry: "docker.io"
+      when: ceph_docker_registry is not defined
+
+    - name: disable node_exporter service
+      service:
+        name: node_exporter
+        state: stopped
+        enabled: no
+      failed_when: false
+
+    - name: remove node-exporter container
+      docker_container:
+        name: node_exporter
+        state: absent
+      failed_when: false
+
+    - name: remove node_exporter service file
+      file:
+        name: /etc/systemd/system/node_exporter.service
+        state: absent
+
+    - name: remove node-exporter image
+      docker_image:
+        image: "{{ ceph_docker_registry }}/prom/node-exporter"
+        state: absent
+        force: yes
+      tags:
+        - remove_img
+      failed_when: false
+
+
+- name: purge ceph grafana-server
+  hosts: grafana-server
+  become: true
+  vars:
+    grafana_services:
+      - grafana-server
+      - prometheus
+      - alertmanager
+
+  tasks:
+    - name: set ceph_docker_registry value if not set
+      set_fact:
+        ceph_docker_registry: "docker.io"
+      when: ceph_docker_registry is not defined
+
+    - name: stop services
+      service:
+        name: "{{ item }}"
+        state: stopped
+        enabled: no
+      with_items: "{{ grafana_services }}"
+      failed_when: false
+
+    - name: remove containers
+      docker_container:
+        name: "{{ item }}"
+        state: absent
+      with_items: "{{ grafana_services }}"
+      failed_when: false
+
+    - name: remove service files
+      file:
+        name: "/etc/systemd/system/{{ item }}.service"
+        state: absent
+      with_items: "{{ grafana_services }}"
+      failed_when: false
+
+    - name: remove images
+      docker_image:
+        name: "{{ item }}"
+        state: absent
+        force: yes
+      with_items:
+        - "{{ ceph_docker_registry }}/prom/prometheus"
+        - "{{ ceph_docker_registry }}/grafana/grafana"
+        - "{{ ceph_docker_registry }}/prom/alertmanager"
+      failed_when: false
+
+    - name: remove data
+      file:
+        name: "{{ item }}"
+        state: absent
+      with_items:
+        - /etc/grafana/dashboards
+        - /etc/grafana/grafana.ini
+        - /etc/grafana/provisioning
+        - /var/lib/grafana
+        - /etc/alertmanager
+        - /var/lib/alertmanager
+        - /var/lib/prometheus
+        - /etc/prometheus
+      failed_when: false
+
+
  - name: purge ceph mds cluster
  
    vars:
@@ -445,6 +563,7 @@
        - ceph-release
        - ceph-radosgw
        - calamari-server
+      - ceph-grafana-dashboards
  
      ceph_remaining_packages:
        - libcephfs1
@@ -466,6 +585,7 @@
      - "{{ nfs_group_name|default('nfss') }}"
      - "{{ client_group_name|default('clients') }}"
      - "{{ mgr_group_name|default('mgrs') }}"
+    - grafana-server
  
    gather_facts: false # Already gathered previously
  
diff --git a/infrastructure-playbooks/purge-docker-cluster.yml b/infrastructure-playbooks/purge-docker-cluster.yml

index 3c948961cb26b0379a555ef7becebd098b9a49a5..447c9474a0f7a8a7e2490713f2e72184c273a0c6 100644 (file)
--- a/infrastructure-playbooks/purge-docker-cluster.yml
+++ b/infrastructure-playbooks/purge-docker-cluster.yml
@@ -444,6 +444,128 @@
      tags: remove_img
      ignore_errors: true
  
+
+- name: purge node-exporter
+
+  hosts:
+    - "{{ mon_group_name|default('mons') }}"
+    - "{{ osd_group_name|default('osds') }}"
+    - "{{ mds_group_name|default('mdss') }}"
+    - "{{ rgw_group_name|default('rgws') }}"
+    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"
+    - "{{ nfs_group_name|default('nfss') }}"
+    - "{{ mgr_group_name|default('mgrs') }}"
+    - agents
+    - grafana-server
+    - iscsigws
+    - iscsi-gws # for backward compatibility only!
+    - clients
+
+  gather_facts: false
+
+  become: true
+
+  tasks:
+    - name: set ceph_docker_registry value if not set
+      set_fact:
+        ceph_docker_registry: "docker.io"
+      when: ceph_docker_registry is not defined
+
+    - name: disable node_exporter service
+      service:
+        name: node_exporter
+        state: stopped
+        enabled: no
+      failed_when: false
+
+    - name: remove node-exporter container
+      docker_container:
+        name: node_exporter
+        state: absent
+      failed_when: false
+
+    - name: remove node_exporter service file
+      file:
+        name: /etc/systemd/system/node_exporter.service
+        state: absent
+
+    - name: remove node-exporter image
+      docker_image:
+        image: "{{ ceph_docker_registry }}/prom/node-exporter"
+        state: absent
+        force: yes
+      tags:
+        - remove_img
+      failed_when: false
+
+
+- name: purge ceph-grafana
+
+  hosts: grafana-server
+
+  gather_facts: false
+
+  become: true
+
+  vars:
+    grafana_services:
+      - grafana-server
+      - prometheus
+      - alertmanager
+
+  tasks:
+    - name: set ceph_docker_registry value if not set
+      set_fact:
+        ceph_docker_registry: "docker.io"
+      when: ceph_docker_registry is not defined
+
+    - name: stop services
+      service:
+        name: "{{ item }}"
+        state: stopped
+        enabled: no
+      with_items: "{{ grafana_services }}"
+      failed_when: false
+
+    - name: remove containers
+      docker_container:
+        name: "{{ item }}"
+        state: absent
+      with_items: "{{ grafana_services }}"
+      failed_when: false
+
+    - name: remove service files
+      file:
+        name: "/etc/systemd/system/{{ item }}.service"
+        state: absent
+      with_items: "{{ grafana_services }}"
+      failed_when: false
+
+    - name: remove images
+      docker_image:
+        name: "{{ item }}"
+        state: absent
+        force: yes
+      with_items:
+        - "{{ ceph_docker_registry }}/prom/prometheus"
+        - "{{ ceph_docker_registry }}/grafana/grafana"
+        - "{{ ceph_docker_registry }}/prom/alertmanager"
+      failed_when: false
+
+    - name: remove data
+      file:
+        name: "{{ item }}"
+        state: absent
+      with_items:
+        - /etc/grafana/grafana.ini
+        - /etc/grafana/provisioning
+        - /var/lib/grafana
+        - /etc/alertmanager
+        - /var/lib/alertmanager
+        - /var/lib/prometheus
+        - /etc/prometheus
+      failed_when: false
+
  - name: check container hosts
  
    hosts:
diff --git a/roles/ceph-container-common/tasks/main.yml b/roles/ceph-container-common/tasks/main.yml

index 9062c072bd63bf1217cfe366b3d9c7dd06d4279a..f6d1334e854703d21c266001ca4299e18cd64f3f 100644 (file)
--- a/roles/ceph-container-common/tasks/main.yml
+++ b/roles/ceph-container-common/tasks/main.yml
@@ -18,6 +18,7 @@
  - name: include fetch_image.yml
    include_tasks: fetch_image.yml
    tags: fetch_container_image
+  when: containerized_deployment
  
  - name: get ceph version
    command: >
@@ -27,10 +28,13 @@
    changed_when: false
    check_mode: no
    register: ceph_version
+  when: containerized_deployment
  
  - name: set_fact ceph_version ceph_version.stdout.split
    set_fact:
      ceph_version: "{{ ceph_version.stdout.split(' ')[2] }}"
+  when: containerized_deployment
  
  - name: include release.yml
    include_tasks: release.yml
+  when: containerized_deployment
diff --git a/roles/ceph-dashboard/defaults/main.yml b/roles/ceph-dashboard/defaults/main.yml

new file mode 100644 (file)

index 0000000..c3838e0
--- /dev/null
+++ b/roles/ceph-dashboard/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+dashboard_admin_user: admin
+dashboard_admin_password: admin
+# We only need this for SSL (https) connections
+dashboard_crt: ''
+dashboard_key: ''
+dashboard_rgw_api_user_id: ceph-dashboard
+dashboard_rgw_api_host: ''
+dashboard_rgw_api_port: ''
+dashboard_rgw_api_scheme: ''
+dashboard_rgw_api_admin_resource: ''
+dashboard_rgw_api_no_ssl_verify: ''
diff --git a/roles/ceph-dashboard/meta/main.yml b/roles/ceph-dashboard/meta/main.yml

new file mode 100644 (file)

index 0000000..464f131
--- /dev/null
+++ b/roles/ceph-dashboard/meta/main.yml
@@ -0,0 +1,14 @@
+---
+galaxy_info:
+  company: Red Hat
+  author: Boris Ranto
+  description: Configures Ceph Dashboard
+  license: Apache
+  min_ansible_version: 2.4
+  platforms:
+    - name: EL
+      versions:
+        - 7
+  galaxy_tags:
+    - system
+dependencies: []
diff --git a/roles/ceph-dashboard/tasks/configure_dashboard.yml b/roles/ceph-dashboard/tasks/configure_dashboard.yml

new file mode 100644 (file)

index 0000000..4780e03
--- /dev/null
+++ b/roles/ceph-dashboard/tasks/configure_dashboard.yml
@@ -0,0 +1,162 @@
+---
+- name: set mgr_prefix default
+  set_fact:
+    mgr_prefix: ""
+
+- block:
+  - name: check to see if the mgr is containerized
+    command: "{{ container_binary }} inspect ceph-mgr-{{ ansible_hostname }}"
+    register: mgr_container
+    failed_when: false
+    changed_when: false
+
+  - name: choose the correct container name
+    set_fact:
+      container_name: "{% if mgr_container.rc == 0 %}ceph-mgr-{{ ansible_hostname }}{% endif %}"
+
+  - name: prefix the mgr command with a {{ container_binary }} command
+    set_fact:
+      mgr_prefix: "{{ container_binary }} exec {{ container_name }}"
+    when: container_name != ""
+  when: container_binary != ""
+
+- name: disable SSL for dashboard
+  shell: |
+    {{ mgr_prefix }} ceph config set mgr mgr/dashboard/ssl false || \
+    {{ mgr_prefix }} ceph config-key set mgr/dashboard/ssl false
+  when: dashboard_protocol != "https"
+
+- name: enable SSL for dashboard
+  shell: |
+    {{ mgr_prefix }} ceph config set mgr mgr/dashboard/ssl true || \
+    {{ mgr_prefix }} ceph config-key set mgr/dashboard/ssl true
+  when: dashboard_protocol == "https"
+
+- name: copy dashboard SSL certificate file
+  copy:
+    src: "{{ dashboard_crt }}"
+    dest: "/etc/ceph/ceph-dashboard.crt"
+    owner: root
+    group: root
+    mode: 0644
+  when:
+    - dashboard_crt
+    - dashboard_protocol == "https"
+
+- name: copy dashboard SSL certificate key
+  copy:
+    src: "{{ dashboard_key }}"
+    dest: "/etc/ceph/ceph-dashboard.key"
+    owner: root
+    group: root
+    mode: 0644
+  when:
+    - dashboard_key
+    - dashboard_protocol == "https"
+
+- name: generate a Self Signed OpenSSL certificate for dashboard
+  shell: |
+    test -f /etc/ceph/ceph-dashboard.key -a -f /etc/ceph/ceph-dashboard.crt || \
+    openssl req -new -nodes -x509 -subj '/O=IT/CN=ceph-dashboard' -days 3650 -keyout /etc/ceph/ceph-dashboard.key -out /etc/ceph/ceph-dashboard.crt -extensions v3_ca
+  when:
+    - dashboard_protocol == "https"
+    - not dashboard_key or not dashboard_crt
+
+- name: import dashboard certificate file
+  command: "{{ mgr_prefix }} ceph config-key set mgr/dashboard/crt -i /etc/ceph/ceph-dashboard.crt"
+  changed_when: false
+  when: dashboard_protocol == "https"
+
+- name: import dashboard certificate key
+  command: "{{ mgr_prefix }} ceph config-key set mgr/dashboard/key -i /etc/ceph/ceph-dashboard.key"
+  changed_when: false
+  when: dashboard_protocol == "https"
+
+- name: "set the dashboard port ({{ dashboard_port }})"
+  shell: |
+    {{ mgr_prefix }} ceph config set mgr mgr/dashboard/server_port {{ dashboard_port }} || \
+    {{ mgr_prefix }} ceph config-key set mgr/dashboard/server_port {{ dashboard_port }}
+
+- name: disable mgr dashboard module (restart)
+  command: "{{ mgr_prefix }} ceph mgr module disable dashboard"
+  changed_when: false
+
+- name: enable mgr dashboard module (restart)
+  command: "{{ mgr_prefix }} ceph mgr module enable dashboard"
+  changed_when: false
+
+- name: set or update dashboard admin username and password
+  shell: |
+    if {{ mgr_prefix }} ceph dashboard ac-user-show {{ dashboard_admin_user }}; then
+      {{ mgr_prefix }} ceph dashboard ac-user-set-password {{ dashboard_admin_user }} {{ dashboard_admin_password }}
+    else
+      {{ mgr_prefix }} ceph dashboard ac-user-create {{ dashboard_admin_user }} {{ dashboard_admin_password }} administrator
+    fi
+  retries: 6
+  delay: 5
+  register: ac_result
+  until: ac_result.rc == 0
+
+- name: set grafana url
+  command: "{{ mgr_prefix }} ceph dashboard set-grafana-api-url {{ dashboard_protocol }}://{{ groups['grafana-server'][0] }}:3000/"
+  changed_when: false
+
+- name: set alertmanager host
+  command: "{{ mgr_prefix }} ceph dashboard set-alertmanager-api-host {{ dashboard_protocol }}://{{ groups['grafana-server'][0] }}:9093/"
+  changed_when: false
+
+- name: create radosgw system user
+  shell: "timeout 20 {{ mgr_prefix }} radosgw-admin user create --uid={{ dashboard_rgw_api_user_id }} --display-name='Ceph dashboard' --system"
+  register: rgw_user_output
+  until: rgw_user_output.rc == 0
+  retries: 3
+
+- name: get the rgw access and secret keys
+  set_fact:
+    rgw_access_key: "{{ (rgw_user_output.stdout | from_json)['keys'][0]['access_key'] }}"
+    rgw_secret_key: "{{ (rgw_user_output.stdout | from_json)['keys'][0]['secret_key'] }}"
+
+- name: set the rgw user
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-user-id {{ dashboard_rgw_api_user_id }}"
+  changed_when: false
+
+- name: set the rgw access key
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-access-key {{ rgw_access_key }}"
+  changed_when: false
+
+- name: set the rgw secret key
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-secret-key {{ rgw_secret_key }}"
+  changed_when: false
+
+- name: set the rgw host
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-host {{ dashboard_rgw_api_host }}"
+  changed_when: false
+  when: dashboard_rgw_api_host
+
+- name: set the rgw port
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-port {{ dashboard_rgw_api_port }}"
+  changed_when: false
+  when: dashboard_rgw_api_port
+
+- name: set the rgw scheme
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-scheme {{ dashboard_rgw_api_scheme }}"
+  changed_when: false
+  when: dashboard_rgw_api_scheme
+
+- name: set the rgw admin resource
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-admin-resource {{ dashboard_rgw_api_admin_resource }}"
+  changed_when: false
+  when: dashboard_rgw_api_admin_resource
+
+- name: disable ssl verification for rgw
+  command: "{{ mgr_prefix }} ceph dashboard set-rgw-api-ssl-verify False"
+  changed_when: false
+  when: dashboard_rgw_api_no_ssl_verify
+
+- name: disable mgr dashboard module (restart)
+  command: "{{ mgr_prefix }} ceph mgr module disable dashboard"
+  changed_when: false
+
+- name: enable mgr dashboard module (restart)
+  command: "{{ mgr_prefix }} ceph mgr module enable dashboard"
+  changed_when: false
diff --git a/roles/ceph-dashboard/tasks/main.yml b/roles/ceph-dashboard/tasks/main.yml

new file mode 100644 (file)

index 0000000..e92cd7a
--- /dev/null
+++ b/roles/ceph-dashboard/tasks/main.yml
@@ -0,0 +1,7 @@
+---
+- name: include configure_dashboard.yml
+  include_tasks: configure_dashboard.yml
+
+- name: print dashboard URL
+  debug:
+    msg: "The dashboard has been deployed! You can access your dashboard web UI at {{ dashboard_protocol }}://{{ ansible_fqdn }}:{{ dashboard_port }}/ as an '{{ dashboard_admin_user }}' user with '{{ dashboard_admin_password }}' password."
diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml

index adb468297c16ccf295d7ad24b1f0240eab9f7ae8..4236d01b2c04a42dddc35091571cee4abaa6a036 100644 (file)
--- a/roles/ceph-defaults/defaults/main.yml
+++ b/roles/ceph-defaults/defaults/main.yml
@@ -61,6 +61,7 @@ ceph_mds_firewall_zone: public
  ceph_nfs_firewall_zone: public
  ceph_rbdmirror_firewall_zone: public
  ceph_iscsi_firewall_zone: public
+ceph_dashboard_firewall_zone: public
  
  # Generate local ceph.conf in fetch directory
  ceph_conf_local: false
@@ -679,6 +680,17 @@ openstack_keys:
    - { name: client.openstack, caps: { mon: "profile rbd", osd: "profile rbd pool={{ openstack_glance_pool.name }}, profile rbd pool={{ openstack_nova_pool.name }}, profile rbd pool={{ openstack_cinder_pool.name }}, profile rbd pool={{ openstack_cinder_backup_pool.name }}"}, mode: "0600" }
  
  
+#############
+# DASHBOARD #
+#############
+dashboard_enabled: False
+dashboard_network_name: ceph-dashboard
+# Choose http or https
+# For https, you should set dashboard.crt/key and grafana.crt/key
+dashboard_protocol: http
+dashboard_port: 8234
+
+
  ###############
  # DEPRECATION #
  ###############
diff --git a/roles/ceph-grafana/defaults/main.yml b/roles/ceph-grafana/defaults/main.yml

new file mode 100644 (file)

index 0000000..687c31e
--- /dev/null
+++ b/roles/ceph-grafana/defaults/main.yml
@@ -0,0 +1,17 @@
+---
+grafana_admin_user: admin
+grafana_admin_password: admin
+# We only need this for SSL (https) connections
+grafana_crt: ''
+grafana_key: ''
+grafana_container_image: "grafana/grafana:latest"
+grafana_container_cpu_period: 100000
+grafana_container_cpu_cores: 2
+# container_memory is in GB
+grafana_container_memory: 4
+grafana_uid: 472
+grafana_datasource: Dashboard
+grafana_dashboards_path: "/etc/grafana/dashboards/ceph-dashboard"
+grafana_plugins:
+  - vonage-status-panel
+  - grafana-piechart-panel
diff --git a/roles/ceph-grafana/files/grafana-server.service b/roles/ceph-grafana/files/grafana-server.service

new file mode 100644 (file)

index 0000000..fab8f51
--- /dev/null
+++ b/roles/ceph-grafana/files/grafana-server.service
@@ -0,0 +1,17 @@
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=grafana-server
+After=docker.service
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/docker start --attach grafana-server
+ExecStop=-/usr/bin/docker stop grafana-server
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/ceph-grafana/files/grafana.list b/roles/ceph-grafana/files/grafana.list

new file mode 100644 (file)

index 0000000..886da8d
--- /dev/null
+++ b/roles/ceph-grafana/files/grafana.list
@@ -0,0 +1 @@
+deb https://packagecloud.io/grafana/stable/debian/ jessie main
diff --git a/roles/ceph-grafana/handlers/main.yml b/roles/ceph-grafana/handlers/main.yml

new file mode 100644 (file)

index 0000000..fd1bbd4
--- /dev/null
+++ b/roles/ceph-grafana/handlers/main.yml
@@ -0,0 +1,8 @@
+---
+- name: enable service
+  # We use the systemd module here so we can use the daemon_reload feature,
+  # since we're shipping the .service file ourselves
+  systemd:
+    name: grafana-server
+    daemon_reload: true
+    enabled: true
diff --git a/roles/ceph-grafana/meta/main.yml b/roles/ceph-grafana/meta/main.yml

new file mode 100644 (file)

index 0000000..76a6bd6
--- /dev/null
+++ b/roles/ceph-grafana/meta/main.yml
@@ -0,0 +1,14 @@
+---
+galaxy_info:
+  company: Red Hat
+  author: Boris Ranto
+  description: Configures Grafana for Ceph Dashboard
+  license: Apache
+  min_ansible_version: 2.4
+  platforms:
+    - name: EL
+      versions:
+        - 7
+  galaxy_tags:
+    - system
+dependencies: []
diff --git a/roles/ceph-grafana/tasks/configure_grafana.yml b/roles/ceph-grafana/tasks/configure_grafana.yml

new file mode 100644 (file)

index 0000000..e031e17
--- /dev/null
+++ b/roles/ceph-grafana/tasks/configure_grafana.yml
@@ -0,0 +1,83 @@
+---
+- name: make sure grafana is down
+  service:
+    name: grafana-server
+    state: stopped
+
+- name: wait for grafana to be stopped
+  wait_for:
+    port: 3000
+    state: stopped
+
+- name: make sure grafana configuration directories exist
+  file:
+    path: "{{ item }}"
+    state: directory
+    recurse: yes
+  with_items:
+    - "/etc/grafana/provisioning/datasources"
+    - "/etc/grafana/provisioning/dashboards"
+
+- name: write grafana.ini
+  template:
+    src: grafana.ini
+    dest: /etc/grafana/grafana.ini
+    mode: 0640
+
+- name: write datasources provisioning config file
+  template:
+    src: datasources-ceph-dashboard.yml
+    dest: /etc/grafana/provisioning/datasources/ceph-dashboard.yml
+    mode: 0640
+
+- name: Write dashboards provisioning config file
+  template:
+    src: dashboards-ceph-dashboard.yml
+    dest: /etc/grafana/provisioning/dashboards/ceph-dashboard.yml
+    mode: 0640
+
+- name: copy grafana SSL certificate file
+  copy:
+    src: "{{ grafana_crt }}"
+    dest: "/etc/grafana/ceph-dashboard.crt"
+    mode: 0640
+  when:
+    - grafana_crt
+    - dashboard_protocol == "https"
+
+- name: copy grafana SSL certificate key
+  copy:
+    src: "{{ grafana_key }}"
+    dest: "/etc/grafana/ceph-dashboard.key"
+    mode: 0640
+  when:
+    - grafana_key
+    - dashboard_protocol == "https"
+
+- name: generate a Self Signed OpenSSL certificate for dashboard
+  shell: |
+    test -f /etc/grafana/ceph-dashboard.key -a -f /etc/grafana/ceph-dashboard.crt || \
+    openssl req -new -nodes -x509 -subj '/O=IT/CN=ceph-grafana' -days 3650 -keyout /etc/grafana/ceph-dashboard.key -out /etc/grafana/ceph-dashboard.crt -extensions v3_ca
+  when:
+    - dashboard_protocol == "https"
+    - not grafana_key or not grafana_crt
+
+- name: set owner/group on /etc/grafana
+  file:
+    path: /etc/grafana
+    state: directory
+    # This is the UID used by the grafana container
+    owner: "{{ grafana_uid }}"
+    # This group is used by the grafana rpm
+    group: "grafana"
+    recurse: true
+
+- name: enable and start grafana
+  service:
+    name: grafana-server
+    state: restarted
+    enabled: true
+
+- name: wait for grafana to start
+  wait_for:
+    port: 3000
diff --git a/roles/ceph-grafana/tasks/main.yml b/roles/ceph-grafana/tasks/main.yml

new file mode 100644 (file)

index 0000000..d125a51
--- /dev/null
+++ b/roles/ceph-grafana/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: include setup_container.yml
+  include_tasks: setup_container.yml
+
+- name: include configure_grafana.yml
+  include_tasks: configure_grafana.yml
diff --git a/roles/ceph-grafana/tasks/setup_container.yml b/roles/ceph-grafana/tasks/setup_container.yml

new file mode 100644 (file)

index 0000000..f0c1640
--- /dev/null
+++ b/roles/ceph-grafana/tasks/setup_container.yml
@@ -0,0 +1,64 @@
+---
+- name: include ceph-container-common
+  include_role:
+    name: ceph-container-common
+    allow_duplicates: false
+
+- name: create grafana user
+  user:
+    name: grafana
+    shell: '/bin/false'
+    createhome: false
+    system: true
+
+- name: create /etc/grafana and /var/lib/grafana
+  file:
+    path: "{{ item }}"
+    state: directory
+    owner: "{{ grafana_uid }}"
+    recurse: true
+  with_items:
+    - /etc/grafana
+    - /var/lib/grafana
+
+- name: make sure the grafana-server service is down
+  service:
+    name: grafana-server
+    state: stopped
+  failed_when: false
+
+- name: create docker container
+  docker_container:
+    name: grafana-server
+    image: "{{ grafana_container_image }}"
+    state: present
+    # restart to allow updates
+    restart: true
+    restart_policy: no
+    force_kill: yes
+    published_ports: '3000:3000'
+    detach: true
+    volumes:
+      - "/etc/grafana:/etc/grafana:Z"
+      - "/var/lib/grafana:/var/lib/grafana:Z"
+    networks:
+      - name: "{{ dashboard_network_name }}"
+    keep_volumes: true
+    pull: true
+    cpu_period: "{{ grafana_container_cpu_period }}"
+    # As of ansible-2.5.2, this module doesn't support the equivalent of the
+    # --cpus flag, so we must use period/quota for now
+    cpu_quota: "{{ grafana_container_cpu_period * grafana_container_cpu_cores }}"
+    memory: "{{ grafana_container_memory }}GB"
+    memory_swap: "{{ grafana_container_memory * 2 }}GB"
+    env:
+      GF_INSTALL_PLUGINS: "{{ grafana_plugins|join(',') }}"
+
+- name: ship systemd service
+  copy:
+    src: grafana-server.service
+    dest: "/etc/systemd/system/"
+    owner: root
+    group: root
+    mode: 0644
+  notify: enable service
diff --git a/roles/ceph-grafana/templates/dashboards-ceph-dashboard.yml b/roles/ceph-grafana/templates/dashboards-ceph-dashboard.yml

new file mode 100644 (file)

index 0000000..64dbf1d
--- /dev/null
+++ b/roles/ceph-grafana/templates/dashboards-ceph-dashboard.yml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+- name: 'Ceph Dashboard'
+  orgId: 1
+  folder: 'ceph-dashboard'
+  type: file
+  disableDeletion: false
+  updateIntervalSeconds: 3
+  editable: false
+  options:
+    path: '{{ grafana_dashboards_path }}'
diff --git a/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml b/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml

new file mode 100644 (file)

index 0000000..b2947b7
--- /dev/null
+++ b/roles/ceph-grafana/templates/datasources-ceph-dashboard.yml
@@ -0,0 +1,26 @@
+apiVersion: 1
+
+# list of datasources that should be deleted from the database
+deleteDatasources:
+  - name: '{{ grafana_datasource }}'
+    orgId: 1
+
+# list of datasources to insert/update depending
+# what's available in the database
+datasources:
+  # <string, required> name of the datasource. Required
+- name: '{{ grafana_datasource }}'
+  # <string, required> datasource type. Required
+  type: 'prometheus'
+  # <string, required> access mode. proxy or direct (Server or Browser in the UI). Required
+  access: 'proxy'
+  # <int> org id. will default to orgId 1 if not specified
+  orgId: 1
+  # <string> url
+  url: 'http://prometheus:9090'
+  # <bool> enable/disable basic auth
+  basicAuth: false
+  # <bool> mark as default datasource. Max one per org
+  isDefault: true
+  # <bool> allow users to edit datasources from the UI.
+  editable: false
diff --git a/roles/ceph-grafana/templates/grafana.ini b/roles/ceph-grafana/templates/grafana.ini

new file mode 100644 (file)

index 0000000..0ea67e9
--- /dev/null
+++ b/roles/ceph-grafana/templates/grafana.ini
@@ -0,0 +1,26 @@
+# [server]
+# root_url = %(protocol)s://%(domain)s:%(http_port)s/api/grafana/proxy
+
+[users]
+default_theme = light
+
+#################################### Anonymous Auth ##########################
+[auth.anonymous]
+# enable anonymous access
+enabled = true
+
+# specify organization name that should be used for unauthenticated users
+org_name = Main Org.
+
+# specify role for unauthenticated users
+org_role = Viewer
+
+[server]
+cert_file = /etc/grafana/ceph-dashboard.crt
+cert_key = /etc/grafana/ceph-dashboard.key
+domain = {{ ansible_fqdn }}
+protocol = {{ dashboard_protocol }}
+
+[security]
+admin_user = {{ grafana_admin_user }}
+admin_password = {{ grafana_admin_password }}
diff --git a/roles/ceph-grafana/templates/grafana.repo b/roles/ceph-grafana/templates/grafana.repo

new file mode 100644 (file)

index 0000000..1ba7fb6
--- /dev/null
+++ b/roles/ceph-grafana/templates/grafana.repo
@@ -0,0 +1,9 @@
+[grafana]
+name=grafana
+baseurl=https://packagecloud.io/grafana/stable/el/{{ ansible_distribution_major_version }}/$basearch
+repo_gpgcheck=1
+enabled=1
+gpgcheck=1
+gpgkey=https://packagecloud.io/gpg.key https://grafanarel.s3.amazonaws.com/RPM-GPG-KEY-grafana
+sslverify=1
+sslcacert=/etc/pki/tls/certs/ca-bundle.crt
diff --git a/roles/ceph-handler/handlers/main.yml b/roles/ceph-handler/handlers/main.yml

index 9333b9cc2daa859d3c9b6b92491c58f72ebaf8e6..2a9f16dc7dabb93d697d090c5b06243c37c1bca3 100644 (file)
--- a/roles/ceph-handler/handlers/main.yml
+++ b/roles/ceph-handler/handlers/main.yml
@@ -458,3 +458,13 @@
        set_fact:
           _rbd_target_api_handler_called: False
        listen: "restart ceph rbd-target-api"
+
+    - name: restart node-exporter service
+      listen: "restart node-exporter service"
+      # We use the systemd module here so we can use the daemon_reload feature,
+      # since we're shipping the .service file ourselves
+      systemd:
+        name: 'node_exporter'
+        daemon_reload: true
+        enabled: true
+        state: restarted
diff --git a/roles/ceph-infra/tasks/configure_firewall.yml b/roles/ceph-infra/tasks/configure_firewall.yml

index dc89664e43f0ab9058ca5850e4f90fd779c11c82..3f1dbb6b413652cae3a43659415d7bd00a442e93 100644 (file)
--- a/roles/ceph-infra/tasks/configure_firewall.yml
+++ b/roles/ceph-infra/tasks/configure_firewall.yml
@@ -155,4 +155,38 @@
        - iscsi_gw_group_name in group_names
      tags: firewall
  
+  - block:
+      - name: open grafana port
+        firewalld:
+          port: "3000/tcp"
+          zone: "{{ ceph_dashboard_firewall_zone }}"
+          permanent: true
+          immediate: true
+          state: enabled
+
+      - name: open node_exporter port
+        firewalld:
+          port: "9100/tcp"
+          zone: "{{ ceph_dashboard_firewall_zone }}"
+          permanent: true
+          immediate: true
+          state: enabled
+
+      - name: open mgr/prometheus port
+        firewalld:
+          port: "9283/tcp"
+          zone: "{{ ceph_dashboard_firewall_zone }}"
+          permanent: true
+          immediate: true
+          state: enabled
+
+      - name: open dashboard port
+        firewalld:
+          port: "{{ dashboard_port }}/tcp"
+          zone: "{{ ceph_dashboard_firewall_zone }}"
+          permanent: true
+          immediate: true
+          state: enabled
+    when: dashboard_enabled
+
  - meta: flush_handlers
diff --git a/roles/ceph-mgr/tasks/main.yml b/roles/ceph-mgr/tasks/main.yml

index 1acda2f56dbe997ef877531edddc4f81f6189443..29444d0b8fa2b2a8be4e70e0a12e40af447f219a 100644 (file)
--- a/roles/ceph-mgr/tasks/main.yml
+++ b/roles/ceph-mgr/tasks/main.yml
@@ -17,6 +17,6 @@
  - name: include mgr_modules.yml
    include_tasks: mgr_modules.yml
    when:
-    - ceph_mgr_modules | length > 0
+    - ceph_mgr_modules | length > 0 or dashboard_enabled
      - ((groups[mgr_group_name] | default([]) | length == 0 and inventory_hostname == groups[mon_group_name] | last) or
        (groups[mgr_group_name] | default([]) | length > 0 and inventory_hostname == groups[mgr_group_name] | last))
diff --git a/roles/ceph-mgr/tasks/pre_requisite.yml b/roles/ceph-mgr/tasks/pre_requisite.yml

index 81bc623fa7b79b756d7418fced52f27447745d8e..b3bc689bf55cf2ac6b2b0e42b93c1900382fa5b0 100644 (file)
--- a/roles/ceph-mgr/tasks/pre_requisite.yml
+++ b/roles/ceph-mgr/tasks/pre_requisite.yml
@@ -7,6 +7,15 @@
    until: result is succeeded
    when: ansible_os_family in ['RedHat', 'Suse']
  
+- name: install ceph-grafana-dashboards package on RedHat or SUSE
+  package:
+    name: ceph-grafana-dashboards
+    state: "{{ (upgrade_ceph_packages|bool) | ternary('latest','present') }}"
+  register: result
+  until: result is succeeded
+  when:
+    - ansible_os_family in ['RedHat', 'Suse']
+
  - name: install ceph-mgr packages for debian
    apt:
      name: '{{ ceph_mgr_packages }}'
diff --git a/roles/ceph-node-exporter/defaults/main.yml b/roles/ceph-node-exporter/defaults/main.yml

new file mode 100644 (file)

index 0000000..1f150b9
--- /dev/null
+++ b/roles/ceph-node-exporter/defaults/main.yml
@@ -0,0 +1,2 @@
+---
+node_exporter_container_image: prom/node-exporter:latest
diff --git a/roles/ceph-node-exporter/files/node_exporter.service b/roles/ceph-node-exporter/files/node_exporter.service

new file mode 100644 (file)

index 0000000..ebf57b1
--- /dev/null
+++ b/roles/ceph-node-exporter/files/node_exporter.service
@@ -0,0 +1,20 @@
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=Node Exporter
+After=docker.service
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/docker start --attach node-exporter
+# Make sure the cfg80211 is loaded before running the container, the node
+# exporter needs this module loaded to test for presence of wi-fi devices
+ExecStartPre=/usr/sbin/modprobe cfg80211
+ExecStop=-/usr/bin/docker stop node-exporter
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/ceph-node-exporter/meta/main.yml b/roles/ceph-node-exporter/meta/main.yml

new file mode 100644 (file)

index 0000000..633df08
--- /dev/null
+++ b/roles/ceph-node-exporter/meta/main.yml
@@ -0,0 +1,14 @@
+---
+galaxy_info:
+  company: Red Hat
+  author: Boris Ranto
+  description: Configures Prometheus Node Exporter
+  license: Apache
+  min_ansible_version: 2.4
+  platforms:
+    - name: EL
+      versions:
+        - 7
+  galaxy_tags:
+    - system
+dependencies: []
diff --git a/roles/ceph-node-exporter/tasks/main.yml b/roles/ceph-node-exporter/tasks/main.yml

new file mode 100644 (file)

index 0000000..c187076
--- /dev/null
+++ b/roles/ceph-node-exporter/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: include setup_container.yml
+  include_tasks: setup_container.yml
diff --git a/roles/ceph-node-exporter/tasks/setup_container.yml b/roles/ceph-node-exporter/tasks/setup_container.yml

new file mode 100644 (file)

index 0000000..09035e2
--- /dev/null
+++ b/roles/ceph-node-exporter/tasks/setup_container.yml
@@ -0,0 +1,42 @@
+---
+- name: include ceph-container-common
+  include_role:
+    name: ceph-container-common
+    allow_duplicates: false
+
+- name: make sure the node_exporter service is down
+  service:
+    name: node_exporter
+    state: stopped
+  failed_when: false
+
+- name: start docker container
+  docker_container:
+    name: node-exporter
+    image: "{{ node_exporter_container_image }}"
+    state: started
+    command:
+      - '--path.procfs=/host/proc'
+      - '--path.sysfs=/host/sys'
+      - '--no-collector.timex'
+    # restart to allow updates
+    restart: true
+    restart_policy: no
+    force_kill: yes
+    detach: true
+    volumes:
+      - '/proc:/host/proc:ro'
+      - '/sys:/host/sys:ro'
+    network_mode: host
+    keep_volumes: true
+    pull: true
+  notify: restart node-exporter service
+
+- name: ship systemd service
+  copy:
+    src: node_exporter.service
+    dest: "/etc/systemd/system/"
+    owner: root
+    group: root
+    mode: 0644
+  notify: restart node-exporter service
diff --git a/roles/ceph-prometheus/defaults/main.yml b/roles/ceph-prometheus/defaults/main.yml

new file mode 100644 (file)

index 0000000..4e92142
--- /dev/null
+++ b/roles/ceph-prometheus/defaults/main.yml
@@ -0,0 +1,17 @@
+---
+prometheus_container_image: prom/prometheus:latest
+prometheus_container_cpu_period: 100000
+prometheus_container_cpu_cores: 2
+# container_memory is in GB
+prometheus_container_memory: 4
+prometheus_data_dir: /var/lib/prometheus
+prometheus_conf_dir: /etc/prometheus
+prometheus_user_id: '65534'  # This is the UID used by the prom/prometheus docker image
+
+alertmanager_container_image: prom/alertmanager:latest
+alertmanager_container_cpu_period: 100000
+alertmanager_container_cpu_cores: 2
+# container_memory is in GB
+alertmanager_container_memory: 4
+alertmanager_data_dir: /var/lib/alertmanager
+alertmanager_conf_dir: /etc/alertmanager
diff --git a/roles/ceph-prometheus/files/alertmanager.service b/roles/ceph-prometheus/files/alertmanager.service

new file mode 100644 (file)

index 0000000..2683c23
--- /dev/null
+++ b/roles/ceph-prometheus/files/alertmanager.service
@@ -0,0 +1,17 @@
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=alertmanager
+After=docker.service
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/docker start --attach alertmanager
+ExecStop=/usr/bin/docker stop alertmanager
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/ceph-prometheus/files/prometheus.service b/roles/ceph-prometheus/files/prometheus.service

new file mode 100644 (file)

index 0000000..1470935
--- /dev/null
+++ b/roles/ceph-prometheus/files/prometheus.service
@@ -0,0 +1,17 @@
+# This file is managed by ansible, don't make changes here - they will be
+# overwritten.
+[Unit]
+Description=prometheus
+After=docker.service
+
+[Service]
+EnvironmentFile=-/etc/environment
+ExecStart=/usr/bin/docker start --attach prometheus
+ExecStop=/usr/bin/docker stop prometheus
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/ceph-prometheus/handlers/main.yml b/roles/ceph-prometheus/handlers/main.yml

new file mode 100644 (file)

index 0000000..1e84e56
--- /dev/null
+++ b/roles/ceph-prometheus/handlers/main.yml
@@ -0,0 +1,12 @@
+---
+- name: service handler
+  # We use the systemd module here so we can use the daemon_reload feature,
+  # since we're shipping the .service file ourselves
+  systemd:
+    name: "{{ item }}"
+    daemon_reload: true
+    enabled: true
+    state: restarted
+  with_items:
+    - 'alertmanager'
+    - 'prometheus'
diff --git a/roles/ceph-prometheus/meta/main.yml b/roles/ceph-prometheus/meta/main.yml

new file mode 100644 (file)

index 0000000..e97ea33
--- /dev/null
+++ b/roles/ceph-prometheus/meta/main.yml
@@ -0,0 +1,3 @@
+---
+dependencies:
+  - role: ceph-defaults
diff --git a/roles/ceph-prometheus/tasks/main.yml b/roles/ceph-prometheus/tasks/main.yml

new file mode 100644 (file)

index 0000000..aaa0309
--- /dev/null
+++ b/roles/ceph-prometheus/tasks/main.yml
@@ -0,0 +1,35 @@
+---
+- name: create prometheus directories
+  file:
+    path: "{{ item }}"
+    state: directory
+    owner: "{{ prometheus_user_id }}"
+  with_items:
+   - "{{ prometheus_conf_dir }}"
+   - "{{ prometheus_data_dir }}"
+
+- name: write prometheus config file
+  template:
+    src: prometheus.yml
+    dest: "{{ prometheus_conf_dir }}/"
+    owner: "{{ prometheus_user_id }}"
+  notify: service handler
+
+- name: create alertmanager directories
+  file:
+    path: "{{ item }}"
+    state: directory
+    owner: "root"
+  with_items:
+   - "{{ alertmanager_conf_dir }}"
+   - "{{ alertmanager_data_dir }}"
+
+- name: write alertmanager config file
+  template:
+    src: alertmanager.yml
+    dest: "{{ alertmanager_conf_dir }}/"
+    owner: "root"
+  notify: service handler
+
+- name: include setup_container.yml
+  include_tasks: setup_container.yml
diff --git a/roles/ceph-prometheus/tasks/setup_container.yml b/roles/ceph-prometheus/tasks/setup_container.yml

new file mode 100644 (file)

index 0000000..2574ab4
--- /dev/null
+++ b/roles/ceph-prometheus/tasks/setup_container.yml
@@ -0,0 +1,93 @@
+---
+- name: include ceph-container-common
+  include_role:
+    name: ceph-container-common
+    allow_duplicates: false
+
+- name: make sure the alertmanager service is down
+  service:
+    name: alertmanager
+    state: stopped
+  failed_when: false
+
+- name: start alertmanager container
+  docker_container:
+    name: alertmanager
+    image: "{{ alertmanager_container_image }}"
+    state: started
+    command:
+      - '--config.file=/etc/alertmanager/alertmanager.yml'
+      - '--storage.path=/alertmanager'
+    # restart to allow updates
+    restart: true
+    restart_policy: no
+    force_kill: yes
+    published_ports: '9093:9093'
+    detach: true
+    volumes:
+      - "{{ alertmanager_conf_dir }}:/etc/alertmanager:Z"
+      - "{{ alertmanager_data_dir }}:/alertmanager:Z"
+    networks:
+      - name: "{{ dashboard_network_name }}"
+    keep_volumes: true
+    pull: true
+    cpu_period: "{{ alertmanager_container_cpu_period }}"
+    # As of ansible-2.5.2, this module doesn't support the equivalent of the
+    # --cpus flag, so we must use period/quota for now
+    cpu_quota: "{{ alertmanager_container_cpu_period * alertmanager_container_cpu_cores }}"
+    #memory: 0
+    #memory_swap: 0
+    memory: "{{ alertmanager_container_memory }}GB"
+    memory_swap: "{{ alertmanager_container_memory * 2 }}GB"
+  notify: service handler
+
+- name: make sure the prometheus service is down
+  service:
+    name: prometheus
+    state: stopped
+  failed_when: false
+
+- name: start prometheus docker container
+  docker_container:
+    name: prometheus
+    image: "{{ prometheus_container_image }}"
+    state: started
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.external-url=http://{{ inventory_hostname }}:9090/'
+    # restart to allow updates
+    restart: true
+    restart_policy: no
+    force_kill: yes
+    published_ports: '9090:9090'
+    detach: true
+    volumes:
+      - "{{ prometheus_conf_dir }}:/etc/prometheus:Z"
+      - "{{ prometheus_data_dir }}:/prometheus:Z"
+    networks:
+      - name: "{{ dashboard_network_name }}"
+    user: "{{ prometheus_user_id }}"
+    keep_volumes: true
+    pull: true
+    cpu_period: "{{ prometheus_container_cpu_period }}"
+    # As of ansible-2.5.2, this module doesn't support the equivalent of the
+    # --cpus flag, so we must use period/quota for now
+    cpu_quota: "{{ prometheus_container_cpu_period * prometheus_container_cpu_cores }}"
+    #memory: 0
+    #memory_swap: 0
+    memory: "{{ prometheus_container_memory }}GB"
+    memory_swap: "{{ prometheus_container_memory * 2 }}GB"
+  notify: service handler
+
+- name: ship systemd services
+  copy:
+    src: "{{ item }}"
+    dest: "/etc/systemd/system/"
+    owner: root
+    group: root
+    mode: 0644
+  with_items:
+    - 'alertmanager.service'
+    - 'prometheus.service'
+  notify: service handler
diff --git a/roles/ceph-prometheus/templates/alertmanager.yml b/roles/ceph-prometheus/templates/alertmanager.yml

new file mode 100644 (file)

index 0000000..4408de0
--- /dev/null
+++ b/roles/ceph-prometheus/templates/alertmanager.yml
@@ -0,0 +1,15 @@
+global:
+  resolve_timeout: 5m
+
+route:
+  group_by: ['alertname']
+  group_wait: 10s
+  group_interval: 10s
+  repeat_interval: 1h
+  receiver: 'ceph-dashboard'
+receivers:
+- name: 'ceph-dashboard'
+  webhook_configs:
+{% for host in groups['mgrs'] | default(groups['mons']) %}
+  - url: '{{ dashboard_protocol }}://{{ host }}:{{ dashboard_port }}/api/prometheus_receiver'
+{% endfor %}
diff --git a/roles/ceph-prometheus/templates/prometheus.yml b/roles/ceph-prometheus/templates/prometheus.yml

new file mode 100644 (file)

index 0000000..860eb5e
--- /dev/null
+++ b/roles/ceph-prometheus/templates/prometheus.yml
@@ -0,0 +1,47 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+rule_files:
+  - '/etc/prometheus/alerts/*'
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+  - job_name: 'ceph'
+    honor_labels: true
+    static_configs:
+{% for host in groups['mgrs'] | default(groups['mons']) %}
+      - targets: ['{{ host }}:9283']
+        labels:
+          instance: 'ceph_cluster'
+{% endfor %}
+  - job_name: 'node'
+    static_configs:
+{% for host in (groups['all'] | difference(groups['grafana-server'])) %}
+      - targets: ['{{ host }}:9100']
+        labels:
+          instance: "{{ hostvars[host]['ansible_nodename'] }}"
+{% endfor %}
+  - job_name: 'grafana'
+    static_configs:
+{% for host in groups['grafana-server'] %}
+      - targets: ['{{ host }}:9100']
+        labels:
+          instance: "{{ hostvars[host]['ansible_nodename'] }}"
+{% endfor %}
+{% if 'iscsigws' in groups %}
+  - job_name: 'iscsi-gws'
+    static_configs:
+{% for host in groups['iscsigws'] %}
+      - targets: ['{{ host }}:9287']
+        labels:
+          instance: "{{ hostvars[host]['ansible_nodename'] }}"
+{% endfor %}
+{% endif %}
+alerting:
+  alertmanagers:
+  - scheme: http
+    static_configs:
+    - targets: ['alertmanager:9093']
diff --git a/site-container.yml.sample b/site-container.yml.sample

index 516116951db97fd219759ca7f0b8a00650b2ecb4..a76394f5f2ea4401c1800b51a196df144866fc2d 100644 (file)
--- a/site-container.yml.sample
+++ b/site-container.yml.sample
@@ -13,6 +13,7 @@
    - iscsigws
    - iscsi-gws # for backward compatibility only!
    - mgrs
+  - grafana-server
  
    gather_facts: false
    become: True
@@ -113,6 +114,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -157,6 +161,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -196,6 +203,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -235,6 +245,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -274,6 +287,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -313,6 +329,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -352,6 +371,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -391,6 +413,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
        when: inventory_hostname == groups.get('clients', ['']) | first
@@ -433,6 +458,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-container-common
      - import_role:
@@ -482,3 +510,45 @@
        delegate_to: "{{ groups[mon_group_name][0] }}"
        run_once: true
        when: not ceph_status.failed
+
+- hosts: grafana-server
+  become: true
+  tasks:
+    - import_role:
+        name: ceph-defaults
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-facts
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-handler
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-common
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-config
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-prometheus
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-grafana
+      when: dashboard_enabled
+
+- hosts: '{{ (groups["mgrs"] | default(groups["mons"]))[0] }}'
+  become: true
+  tasks:
+    - import_role:
+        name: ceph-defaults
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-dashboard
+      when: dashboard_enabled
diff --git a/site.yml.sample b/site.yml.sample

index 9aa1923c831bee2c67aa500cbf1a0de1babf650a..23250a79a3609ef2a1e9eb43f8da0b591c9813c1 100644 (file)
--- a/site.yml.sample
+++ b/site.yml.sample
@@ -13,6 +13,7 @@
    - mgrs
    - iscsigws
    - iscsi-gws # for backward compatibility only!
+  - grafana-server
  
    gather_facts: false
    any_errors_fatal: true
@@ -92,6 +93,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -134,6 +138,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -173,6 +180,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -212,6 +222,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -251,6 +264,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -290,6 +306,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -329,6 +348,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -368,6 +390,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -407,6 +432,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -448,6 +476,9 @@
        tags: ['ceph_update_config']
      - import_role:
          name: ceph-handler
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
      - import_role:
          name: ceph-common
      - import_role:
@@ -485,3 +516,45 @@
        delegate_to: "{{ groups[mon_group_name][0] }}"
        run_once: true
        when: not ceph_status.failed
+
+- hosts: grafana-server
+  become: true
+  tasks:
+    - import_role:
+        name: ceph-defaults
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-facts
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-handler
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-node-exporter
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-common
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-config
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-prometheus
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-grafana
+      when: dashboard_enabled
+
+- hosts: '{{ (groups["mgrs"] | default(groups["mons"]))[0] }}'
+  become: true
+  tasks:
+    - import_role:
+        name: ceph-defaults
+      tags: ['ceph_update_config']
+      when: dashboard_enabled
+    - import_role:
+        name: ceph-dashboard
+      when: dashboard_enabled
author	Boris Ranto <branto@redhat.com>
	Wed, 5 Dec 2018 18:59:47 +0000 (19:59 +0100)
committer	Guillaume Abrioux <gabrioux@redhat.com>
	Thu, 16 May 2019 14:39:13 +0000 (16:39 +0200)
group_vars/all.yml.sample		patch \| blob \| history
group_vars/rhcs.yml.sample		patch \| blob \| history
infrastructure-playbooks/purge-cluster.yml		patch \| blob \| history
infrastructure-playbooks/purge-docker-cluster.yml		patch \| blob \| history
roles/ceph-container-common/tasks/main.yml		patch \| blob \| history
roles/ceph-dashboard/defaults/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-dashboard/meta/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-dashboard/tasks/configure_dashboard.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-dashboard/tasks/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-defaults/defaults/main.yml		patch \| blob \| history
roles/ceph-grafana/defaults/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/files/grafana-server.service	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/files/grafana.list	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/handlers/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/meta/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/tasks/configure_grafana.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/tasks/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/tasks/setup_container.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/templates/dashboards-ceph-dashboard.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/templates/datasources-ceph-dashboard.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/templates/grafana.ini	[new file with mode: 0644]	patch \| blob
roles/ceph-grafana/templates/grafana.repo	[new file with mode: 0644]	patch \| blob
roles/ceph-handler/handlers/main.yml		patch \| blob \| history
roles/ceph-infra/tasks/configure_firewall.yml		patch \| blob \| history
roles/ceph-mgr/tasks/main.yml		patch \| blob \| history
roles/ceph-mgr/tasks/pre_requisite.yml		patch \| blob \| history
roles/ceph-node-exporter/defaults/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-node-exporter/files/node_exporter.service	[new file with mode: 0644]	patch \| blob
roles/ceph-node-exporter/meta/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-node-exporter/tasks/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-node-exporter/tasks/setup_container.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/defaults/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/files/alertmanager.service	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/files/prometheus.service	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/handlers/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/meta/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/tasks/main.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/tasks/setup_container.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/templates/alertmanager.yml	[new file with mode: 0644]	patch \| blob
roles/ceph-prometheus/templates/prometheus.yml	[new file with mode: 0644]	patch \| blob
site-container.yml.sample		patch \| blob \| history
site.yml.sample		patch \| blob \| history