]> git.apps.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
add ceph-handler role
authorSébastien Han <seb@redhat.com>
Fri, 27 Jul 2018 14:56:09 +0000 (16:56 +0200)
committermergify[bot] <mergify[bot]@users.noreply.github.com>
Fri, 28 Sep 2018 15:15:49 +0000 (15:15 +0000)
The role contains all the handlers for Ceph services. We decided to
leave ceph-defaults role with variables and a few facts only. This is
useful when organizing the site.yml files and also adding the known
variables to infrastructure-playbooks.

Signed-off-by: Sébastien Han <seb@redhat.com>
38 files changed:
roles/ceph-config/tasks/create_ceph_initial_dirs.yml [new file with mode: 0644]
roles/ceph-config/tasks/main.yml
roles/ceph-defaults/handlers/main.yml [deleted file]
roles/ceph-defaults/tasks/check_running_cluster.yml [deleted file]
roles/ceph-defaults/tasks/check_running_containers.yml [deleted file]
roles/ceph-defaults/tasks/check_socket_non_container.yml [deleted file]
roles/ceph-defaults/tasks/create_ceph_initial_dirs.yml [deleted file]
roles/ceph-defaults/tasks/main.yml
roles/ceph-defaults/templates/restart_mds_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_mon_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_nfs_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_rbd_target_api.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_rbd_target_gw.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2 [deleted file]
roles/ceph-defaults/templates/restart_tcmu_runner.sh.j2 [deleted file]
roles/ceph-handler/LICENSE [new file with mode: 0644]
roles/ceph-handler/README.md [new file with mode: 0644]
roles/ceph-handler/handlers/main.yml [new file with mode: 0644]
roles/ceph-handler/meta/main.yml [new file with mode: 0644]
roles/ceph-handler/tasks/check_running_cluster.yml [new file with mode: 0644]
roles/ceph-handler/tasks/check_running_containers.yml [new file with mode: 0644]
roles/ceph-handler/tasks/check_socket_non_container.yml [new file with mode: 0644]
roles/ceph-handler/tasks/main.yml [new file with mode: 0644]
roles/ceph-handler/templates/restart_mds_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_mon_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_osd_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_rbd_target_api.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_rbd_target_gw.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 [new file with mode: 0644]
roles/ceph-handler/templates/restart_tcmu_runner.sh.j2 [new file with mode: 0644]
site-docker.yml.sample
site.yml.sample

diff --git a/roles/ceph-config/tasks/create_ceph_initial_dirs.yml b/roles/ceph-config/tasks/create_ceph_initial_dirs.yml
new file mode 100644 (file)
index 0000000..a20f9a4
--- /dev/null
@@ -0,0 +1,25 @@
+---
+- name: set_fact ceph_directories
+  set_fact:
+    ceph_directories:
+      - /etc/ceph
+      - /var/lib/ceph/
+      - /var/lib/ceph/mon
+      - /var/lib/ceph/osd
+      - /var/lib/ceph/mds
+      - /var/lib/ceph/tmp
+      - /var/lib/ceph/radosgw
+      - /var/lib/ceph/bootstrap-rgw
+      - /var/lib/ceph/bootstrap-mds
+      - /var/lib/ceph/bootstrap-osd
+      - /var/lib/ceph/bootstrap-rbd
+      - /var/run/ceph
+
+- name: create ceph initial directories
+  file:
+    path: "{{ item }}"
+    state: directory
+    owner: "{{ ceph_uid }}"
+    group: "{{ ceph_uid }}"
+    mode: 0755
+  with_items: "{{ ceph_directories }}"
index 0e1e4389ca6227b85efa1ed78dc6cc24deea292c..5e5b3526e1fad1ac67882edde73a7b35450519e9 100644 (file)
@@ -1,4 +1,7 @@
 ---
+- name: include create_ceph_initial_dirs.yml
+  include: create_ceph_initial_dirs.yml
+
 # ceph-common
 - block:
   - name: create ceph conf directory
diff --git a/roles/ceph-defaults/handlers/main.yml b/roles/ceph-defaults/handlers/main.yml
deleted file mode 100644 (file)
index bc6732e..0000000
+++ /dev/null
@@ -1,459 +0,0 @@
----
-- name: update apt cache
-  apt:
-    update-cache: yes
-  when:
-    - ansible_os_family == 'Debian'
-
-# We only want to restart on hosts that have called the handler.
-# This var is set when he handler is called, and unset after the
-# restart to ensure only the correct hosts are restarted.
-- name: set _mon_handler_called before restart
-  set_fact:
-     _mon_handler_called: True
-  listen: "restart ceph mons"
-
-- name: copy mon restart script
-  template:
-    src: restart_mon_daemon.sh.j2
-    dest: /tmp/restart_mon_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph mons"
-  when:
-    - mon_group_name in group_names
-
-- name: restart ceph mon daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_mon_daemon.sh
-  listen: "restart ceph mons"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - mon_group_name in group_names
-    - not containerized_deployment
-    - hostvars[item]['_mon_handler_called'] | default(False)
-    - mon_socket_stat.rc == 0
-  with_items: "{{ groups[mon_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph mon daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_mon_daemon.sh
-  listen: "restart ceph mons"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - mon_group_name in group_names
-    - containerized_deployment
-    - ceph_mon_container_stat.get('rc') == 0
-    - hostvars[item]['_mon_handler_called'] | default(False)
-    - ceph_mon_container_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[mon_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _mon_handler_called after restart
-  set_fact:
-     _mon_handler_called: False
-  listen: "restart ceph mons"
-
-- name: set _osd_handler_called before restart
-  set_fact:
-     _osd_handler_called: True
-  listen: "restart ceph osds"
-
-# This does not just restart OSDs but everything else too. Unfortunately
-# at this time the ansible role does not have an OSD id list to use
-# for restarting them specifically.
-# This does not need to run during a rolling update as the playbook will
-# restart all OSDs using the tasks "start ceph osd" or
-# "restart containerized ceph osd"
-- name: copy osd restart script
-  template:
-    src: restart_osd_daemon.sh.j2
-    dest: /tmp/restart_osd_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph osds"
-  when:
-    - osd_group_name in group_names
-    - not rolling_update
-
-- name: restart ceph osds daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_osd_daemon.sh
-  listen: "restart ceph osds"
-  when:
-    - osd_group_name in group_names
-    - not containerized_deployment
-    - not rolling_update
-    # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
-    # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
-    - osd_socket_stat.rc == 0
-    - ceph_current_status.fsid is defined
-    - handler_health_osd_check
-    - hostvars[item]['_osd_handler_called'] | default(False)
-  with_items: "{{ groups[osd_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph osds daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_osd_daemon.sh
-  listen: "restart ceph osds"
-  when:
-    # We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`)
-    # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
-    - osd_group_name in group_names
-    - containerized_deployment
-    - not rolling_update
-    - ceph_osd_container_stat.get('rc') == 0
-    - inventory_hostname == groups.get(osd_group_name) | last
-    - ceph_osd_container_stat.get('stdout_lines', [])|length != 0
-    - handler_health_osd_check
-    - hostvars[item]['_osd_handler_called'] | default(False)
-  with_items: "{{ groups[osd_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _osd_handler_called after restart
-  set_fact:
-     _osd_handler_called: False
-  listen: "restart ceph osds"
-
-- name: set _mds_handler_called before restart
-  set_fact:
-     _mds_handler_called: True
-  listen: "restart ceph mdss"
-
-- name: copy mds restart script
-  template:
-    src: restart_mds_daemon.sh.j2
-    dest: /tmp/restart_mds_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph mdss"
-  when:
-    - mds_group_name in group_names
-
-- name: restart ceph mds daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_mds_daemon.sh
-  listen: "restart ceph mdss"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - mds_group_name in group_names
-    - not containerized_deployment
-    - hostvars[item]['_mds_handler_called'] | default(False)
-    - mds_socket_stat.rc == 0
-  with_items: "{{ groups[mds_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph mds daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_mds_daemon.sh
-  listen: "restart ceph mdss"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - mds_group_name in group_names
-    - containerized_deployment
-    - ceph_mds_container_stat.get('rc') == 0
-    - hostvars[item]['_mds_handler_called'] | default(False)
-    - ceph_mds_container_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[mds_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _mds_handler_called after restart
-  set_fact:
-     _mds_handler_called: False
-  listen: "restart ceph mdss"
-
-- name: set _rgw_handler_called before restart
-  set_fact:
-     _rgw_handler_called: True
-  listen: "restart ceph rgws"
-
-- name: copy rgw restart script
-  template:
-    src: restart_rgw_daemon.sh.j2
-    dest: /tmp/restart_rgw_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph rgws"
-  when:
-    - rgw_group_name in group_names
-
-- name: restart ceph rgw daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh
-  listen: "restart ceph rgws"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - rgw_group_name in group_names
-    - not containerized_deployment
-    - hostvars[item]['_rgw_handler_called'] | default(False)
-    - rgw_socket_stat.rc == 0
-  with_items: "{{ groups[rgw_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph rgw daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh
-  listen: "restart ceph rgws"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - rgw_group_name in group_names
-    - containerized_deployment
-    - ceph_rgw_container_stat.get('rc') == 0
-    - hostvars[item]['_rgw_handler_called'] | default(False)
-    - ceph_rgw_container_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[rgw_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _rgw_handler_called after restart
-  set_fact:
-     _rgw_handler_called: False
-  listen: "restart ceph rgws"
-
-- name: set _nfs_handler_called before restart
-  set_fact:
-     _nfs_handler_called: True
-  listen: "restart ceph nfss"
-
-- name: copy nfs restart script
-  template:
-    src: restart_nfs_daemon.sh.j2
-    dest: /tmp/restart_nfs_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph nfss"
-  when:
-    - nfs_group_name in group_names
-
-- name: restart ceph nfs daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh
-  listen: "restart ceph nfss"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - nfs_group_name in group_names
-    - not containerized_deployment
-    - hostvars[item]['_nfs_handler_called'] | default(False)
-    - nfs_socket_stat.rc == 0
-  with_items: "{{ groups[nfs_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph nfs daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh
-  listen: "restart ceph nfss"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - nfs_group_name in group_names
-    - containerized_deployment
-    - ceph_nfs_container_stat.get('rc') == 0
-    - hostvars[item]['_nfs_handler_called'] | default(False)
-    - ceph_nfs_container_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[nfs_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _nfs_handler_called after restart
-  set_fact:
-     _nfs_handler_called: False
-  listen: "restart ceph nfss"
-
-- name: set _rbdmirror_handler_called before restart
-  set_fact:
-     _rbdmirror_handler_called: True
-  listen: "restart ceph rbdmirrors"
-
-- name: copy rbd mirror restart script
-  template:
-    src: restart_rbd_mirror_daemon.sh.j2
-    dest: /tmp/restart_rbd_mirror_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph rbdmirrors"
-  when:
-    - rbdmirror_group_name in group_names
-
-- name: restart ceph rbd mirror daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh
-  listen: "restart ceph rbdmirrors"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - rbdmirror_group_name in group_names
-    - not containerized_deployment
-    - hostvars[item]['_rbdmirror_handler_called'] | default(False)
-    - rbd_mirror_socket_stat.rc == 0
-  with_items: "{{ groups[rbdmirror_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph rbd mirror daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh
-  listen: "restart ceph rbdmirrors"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - rbdmirror_group_name in group_names
-    - containerized_deployment
-    - ceph_rbd_mirror_container_stat.get('rc') == 0
-    - hostvars[item]['_rbdmirror_handler_called'] | default(False)
-    - ceph_rbd_mirror_container_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[rbdmirror_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _rbdmirror_handler_called after restart
-  set_fact:
-     _rbdmirror_handler_called: False
-  listen: "restart ceph rbdmirrors"
-
-- name: set _mgr_handler_called before restart
-  set_fact:
-     _mgr_handler_called: True
-  listen: "restart ceph mgrs"
-
-- name: copy mgr restart script
-  template:
-    src: restart_mgr_daemon.sh.j2
-    dest: /tmp/restart_mgr_daemon.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph mgrs"
-  when:
-    - mgr_group_name in group_names
-
-- name: restart ceph mgr daemon(s) - non container
-  command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh
-  listen: "restart ceph mgrs"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - mgr_group_name in group_names
-    - not containerized_deployment
-    - hostvars[item]['_mgr_handler_called'] | default(False)
-    - mgr_socket_stat.rc == 0
-  with_items: "{{ groups[mgr_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: restart ceph mgr daemon(s) - container
-  command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh
-  listen: "restart ceph mgrs"
-  when:
-    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
-    - mgr_group_name in group_names
-    - containerized_deployment
-    - ceph_mgr_container_stat.get('rc') == 0
-    - hostvars[item]['_mgr_handler_called'] | default(False)
-    - ceph_mgr_container_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[mgr_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _mgr_handler_called after restart
-  set_fact:
-     _mgr_handler_called: False
-  listen: "restart ceph mgrs"
-
-- name: set _tcmu_runner_handler_called before restart
-  set_fact:
-     _tcmu_runner_handler_called: True
-  listen: "restart ceph tcmu-runner"
-
-- name: copy tcmu-runner restart script
-  template:
-    src: restart_tcmu_runner.sh.j2
-    dest: /tmp/restart_tcmu_runner.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph tcmu-runner"
-  when:
-    - iscsi_gw_group_name in group_names
-
-- name: restart tcmu-runner
-  command: /usr/bin/env bash /tmp/restart_tcmu_runner.sh
-  listen: "restart ceph tcmu-runner"
-  when:
-    - iscsi_gw_group_name in group_names
-    - ceph_tcmu_runner_stat.get('rc') == 0
-    - hostvars[item]['_tcmu_runner_handler_called'] | default(False)
-    - ceph_tcmu_runner_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[iscsi_gw_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _tcmu_runner_handler_called after restart
-  set_fact:
-     _tcmu_runner_handler_called: False
-  listen: "restart ceph tcmu-runner"
-
-- name: set _rbd_target_gw_handler_called before restart
-  set_fact:
-     _rbd_target_gw_handler_called: True
-  listen: "restart ceph rbd-target-gw"
-
-- name: copy rbd-target-gw restart script
-  template:
-    src: restart_rbd_target_gw.sh.j2
-    dest: /tmp/restart_rbd_target_gw.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph rbd-target-gw"
-  when:
-    - iscsi_gw_group_name in group_names
-
-- name: restart rbd-target-gw
-  command: /usr/bin/env bash /tmp/restart_rbd_target_gw.sh
-  listen: "restart ceph rbd-target-gw"
-  when:
-    - iscsi_gw_group_name in group_names
-    - ceph_rbd_target_gw_stat.get('rc') == 0
-    - hostvars[item]['_rbd_target_gw_handler_called'] | default(False)
-    - ceph_rbd_target_gw_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[iscsi_gw_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _rbd_target_gw_handler_called after restart
-  set_fact:
-     _rbd_target_gw_handler_called: False
-  listen: "restart ceph rbd-target-gw"
-
-- name: set _rbd_target_api_handler_called before restart
-  set_fact:
-     _rbd_target_api_handler_called: True
-  listen: "restart ceph rbd-target-api"
-
-- name: copy rbd-target-api restart script
-  template:
-    src: restart_rbd_target_api.sh.j2
-    dest: /tmp/restart_rbd_target_api.sh
-    owner: root
-    group: root
-    mode: 0750
-  listen: "restart ceph rbd-target-api"
-  when:
-    - iscsi_gw_group_name in group_names
-
-- name: restart rbd-target-api
-  command: /usr/bin/env bash /tmp/restart_rbd_target_api.sh
-  listen: "restart ceph rbd-target-api"
-  when:
-    - iscsi_gw_group_name in group_names
-    - ceph_rbd_target_api_stat.get('rc') == 0
-    - hostvars[item]['_rbd_target_api_handler_called'] | default(False)
-    - ceph_rbd_target_api_stat.get('stdout_lines', [])|length != 0
-  with_items: "{{ groups[iscsi_gw_group_name] }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _rbd_target_api_handler_called after restart
-  set_fact:
-     _rbd_target_api_handler_called: False
-  listen: "restart ceph rbd-target-api"
diff --git a/roles/ceph-defaults/tasks/check_running_cluster.yml b/roles/ceph-defaults/tasks/check_running_cluster.yml
deleted file mode 100644 (file)
index 0418d2f..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
----
-- name: include check_running_containers.yml
-  include_tasks: check_running_containers.yml
-  when:
-    - containerized_deployment
-
-- name: include check_socket_non_container.yml
-  include_tasks: check_socket_non_container.yml
-  when:
-    - not containerized_deployment
diff --git a/roles/ceph-defaults/tasks/check_running_containers.yml b/roles/ceph-defaults/tasks/check_running_containers.yml
deleted file mode 100644 (file)
index 111d112..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
----
-- name: check for a mon container
-  command: "docker ps -q --filter='name=ceph-mon-{{ ansible_hostname }}'"
-  register: ceph_mon_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(mon_group_name, [])
-
-- name: check for an osd container
-  command: "docker ps -q --filter='name=ceph-osd-{{ ansible_hostname }}'"
-  register: ceph_osd_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(osd_group_name, [])
-
-- name: check for a mds container
-  command: "docker ps -q --filter='name=ceph-mds-{{ ansible_hostname }}'"
-  register: ceph_mds_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(mds_group_name, [])
-
-- name: check for a rgw container
-  command: "docker ps -q --filter='name=ceph-rgw-{{ ansible_hostname }}'"
-  register: ceph_rgw_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(rgw_group_name, [])
-
-- name: check for a mgr container
-  command: "docker ps -q --filter='name=ceph-mgr-{{ ansible_hostname }}'"
-  register: ceph_mgr_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(mgr_group_name, [])
-
-- name: check for a rbd mirror container
-  command: "docker ps -q --filter='name=ceph-rbd-mirror-{{ ansible_hostname }}'"
-  register: ceph_rbd_mirror_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(rbdmirror_group_name, [])
-
-- name: check for a nfs container
-  command: "docker ps -q --filter='name=ceph-nfs-{{ ansible_hostname }}'"
-  register: ceph_nfs_container_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(nfs_group_name, [])
-
-- name: check for a tcmu-runner container
-  command: "docker ps -q --filter='name=tcmu-runner'"
-  register: ceph_tcmu_runner_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
-
-- name: check for a rbd-target-api container
-  command: "docker ps -q --filter='name=rbd-target-api'"
-  register: ceph_rbd_target_api_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
-
-- name: check for a rbd-target-gw container
-  command: "docker ps -q --filter='name=rbd-target-gw'"
-  register: ceph_rbd_target_gw_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
diff --git a/roles/ceph-defaults/tasks/check_socket_non_container.yml b/roles/ceph-defaults/tasks/check_socket_non_container.yml
deleted file mode 100644 (file)
index 0afe3ea..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
----
-- name: check for a ceph mon socket
-  shell: stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: mon_socket_stat
-  when:
-    - inventory_hostname in groups.get(mon_group_name, [])
-
-- name: check if the ceph mon socket is in-use
-  command: fuser --silent {{ mon_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: mon_socket
-  when:
-    - inventory_hostname in groups.get(mon_group_name, [])
-    - mon_socket_stat.rc == 0
-
-- name: remove ceph mon socket if exists and not used by a process
-  file:
-    name: "{{ mon_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(mon_group_name, [])
-    - mon_socket_stat.rc == 0
-    - mon_socket.rc == 1
-
-- name: check for a ceph osd socket
-  shell: |
-    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-osd*.asok
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: osd_socket_stat
-  when:
-    - inventory_hostname in groups.get(osd_group_name, [])
-
-- name: check if the ceph osd socket is in-use
-  command: fuser --silent {{ osd_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: osd_socket
-  when:
-    - inventory_hostname in groups.get(osd_group_name, [])
-    - osd_socket_stat.rc == 0
-
-- name: remove ceph osd socket if exists and not used by a process
-  file:
-    name: "{{ osd_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(osd_group_name, [])
-    - osd_socket_stat.rc == 0
-    - osd_socket.rc == 1
-
-- name: check for a ceph mds socket
-  shell: |
-    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mds*.asok
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: mds_socket_stat
-  when:
-    - inventory_hostname in groups.get(mds_group_name, [])
-
-- name: check if the ceph mds socket is in-use
-  command: fuser --silent {{ mds_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: mds_socket
-  when:
-    - inventory_hostname in groups.get(mds_group_name, [])
-    - mds_socket_stat.rc == 0
-
-- name: remove ceph mds socket if exists and not used by a process
-  file:
-    name: "{{ mds_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(mds_group_name, [])
-    - mds_socket_stat.rc == 0
-    - mds_socket.rc == 1
-
-- name: check for a ceph rgw socket
-  shell: |
-    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rgw*.asok
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: rgw_socket_stat
-  when:
-    - inventory_hostname in groups.get(rgw_group_name, [])
-
-- name: check if the ceph rgw socket is in-use
-  command: fuser --silent {{ rgw_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: rgw_socket
-  when:
-    - inventory_hostname in groups.get(rgw_group_name, [])
-    - rgw_socket_stat.rc == 0
-
-- name: remove ceph rgw socket if exists and not used by a process
-  file:
-    name: "{{ rgw_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(rgw_group_name, [])
-    - rgw_socket_stat.rc == 0
-    - rgw_socket.rc == 1
-
-- name: check for a ceph mgr socket
-  shell: |
-    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mgr*.asok
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: mgr_socket_stat
-  when:
-    - inventory_hostname in groups.get(mgr_group_name, [])
-
-- name: check if the ceph mgr socket is in-use
-  command: fuser --silent {{ mgr_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: mgr_socket
-  when:
-    - inventory_hostname in groups.get(mgr_group_name, [])
-    - mgr_socket_stat.rc == 0
-
-- name: remove ceph mgr socket if exists and not used by a process
-  file:
-    name: "{{ mgr_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(mgr_group_name, [])
-    - mgr_socket_stat.rc == 0
-    - mgr_socket.rc == 1
-
-- name: check for a ceph rbd mirror socket
-  shell: |
-    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rbd-mirror*.asok
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: rbd_mirror_socket_stat
-  when:
-    - inventory_hostname in groups.get(rbdmirror_group_name, [])
-
-- name: check if the ceph rbd mirror socket is in-use
-  command: fuser --silent {{ rbd_mirror_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: rbd_mirror_socket
-  when:
-    - inventory_hostname in groups.get(rbdmirror_group_name, [])
-    - rbd_mirror_socket_stat.rc == 0
-
-- name: remove ceph rbd mirror socket if exists and not used by a process
-  file:
-    name: "{{ rbd_mirror_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(rbdmirror_group_name, [])
-    - rbd_mirror_socket_stat.rc == 0
-    - rbd_mirror_socket.rc == 1
-
-- name: check for a ceph nfs ganesha socket
-  command: stat --printf=%n /var/run/ganesha.pid
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: nfs_socket_stat
-  when:
-    - inventory_hostname in groups.get(nfs_group_name, [])
-
-- name: check if the ceph nfs ganesha socket is in-use
-  command: fuser --silent {{ nfs_socket_stat.stdout }}
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  register: nfs_socket
-  when:
-    - inventory_hostname in groups.get(nfs_group_name, [])
-    - nfs_socket_stat.rc == 0
-
-- name: remove ceph nfs ganesha socket if exists and not used by a process
-  file:
-    name: "{{ nfs_socket_stat.stdout }}"
-    state: absent
-  when:
-    - inventory_hostname in groups.get(nfs_group_name, [])
-    - nfs_socket_stat.rc == 0
-    - nfs_socket.rc == 1
-
-- name: check for a tcmu-runner
-  command: "pgrep tcmu-runner"
-  register: ceph_tcmu_runner_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
-
-- name: check for a rbd-target-api
-  command: "pgrep rbd-target-api"
-  register: ceph_rbd_target_api_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
-
-- name: check for a rbd-target-gw
-  command: "pgrep name=rbd-target-gw"
-  register: ceph_rbd_target_gw_stat
-  changed_when: false
-  failed_when: false
-  check_mode: no
-  when:
-    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
diff --git a/roles/ceph-defaults/tasks/create_ceph_initial_dirs.yml b/roles/ceph-defaults/tasks/create_ceph_initial_dirs.yml
deleted file mode 100644 (file)
index a20f9a4..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
----
-- name: set_fact ceph_directories
-  set_fact:
-    ceph_directories:
-      - /etc/ceph
-      - /var/lib/ceph/
-      - /var/lib/ceph/mon
-      - /var/lib/ceph/osd
-      - /var/lib/ceph/mds
-      - /var/lib/ceph/tmp
-      - /var/lib/ceph/radosgw
-      - /var/lib/ceph/bootstrap-rgw
-      - /var/lib/ceph/bootstrap-mds
-      - /var/lib/ceph/bootstrap-osd
-      - /var/lib/ceph/bootstrap-rbd
-      - /var/run/ceph
-
-- name: create ceph initial directories
-  file:
-    path: "{{ item }}"
-    state: directory
-    owner: "{{ ceph_uid }}"
-    group: "{{ ceph_uid }}"
-    mode: 0755
-  with_items: "{{ ceph_directories }}"
index 3559ee8bdb518c941b2ad1eba5ecaf2bcf78c3af..0d1f7c93c9fe0d8adff6e3301ea998fd2e850b02 100644 (file)
@@ -1,9 +1,3 @@
 ---
-- name: include check_running_cluster.yml
-  include_tasks: check_running_cluster.yml
-
 - name: include facts.yml
-  include_tasks: facts.yml
-
-- name: include create_ceph_initial_dirs.yml
-  include_tasks: create_ceph_initial_dirs.yml
+  include: facts.yml
diff --git a/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2
deleted file mode 100644 (file)
index f265546..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-RETRIES="{{ handler_health_mds_check_retries }}"
-DELAY="{{ handler_health_mds_check_delay }}"
-MDS_NAME="{{ mds_name }}"
-{% if containerized_deployment %}
-DOCKER_EXEC="docker exec ceph-mds-{{ ansible_hostname }}"
-{% endif %}
-
-# Backward compatibility
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok
-
-# First, restart the daemon
-systemctl restart ceph-mds@${MDS_NAME}
-
-COUNT=10
-# Wait and ensure the socket exists after restarting the daemds
-while [ $RETRIES -ne 0 ]; do
-  $DOCKER_EXEC test -S $SOCKET && exit 0
-  sleep $DELAY
-  let RETRIES=RETRIES-1
-done
-# If we reach this point, it means the socket is not present.
-echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running."
-exit 1
diff --git a/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2
deleted file mode 100644 (file)
index 2b06a04..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-RETRIES="{{ handler_health_mgr_check_retries }}"
-DELAY="{{ handler_health_mgr_check_delay }}"
-MGR_NAME="{{ ansible_hostname }}"
-{% if containerized_deployment %}
-DOCKER_EXEC="docker exec ceph-mgr-{{ ansible_hostname }}"
-{% endif %}
-
-# Backward compatibility
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok
-
-systemctl reset-failed ceph-mgr@${MGR_NAME}
-# First, restart the daemon
-systemctl restart ceph-mgr@${MGR_NAME}
-
-COUNT=10
-# Wait and ensure the socket exists after restarting the daemds
-while [ $RETRIES -ne 0 ]; do
-  $DOCKER_EXEC test -S $SOCKET && exit 0
-  sleep $DELAY
-  let RETRIES=RETRIES-1
-done
-# If we reach this point, it means the socket is not present.
-echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running."
-exit 1
diff --git a/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2
deleted file mode 100644 (file)
index 748b073..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-RETRIES="{{ handler_health_mon_check_retries }}"
-DELAY="{{ handler_health_mon_check_delay }}"
-MONITOR_NAME="{{ monitor_name }}"
-{% if containerized_deployment %}
-DOCKER_EXEC="docker exec ceph-mon-{{ ansible_hostname }}"
-{% endif %}
-
-# Backward compatibility
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok
-
-check_quorum() {
-while [ $RETRIES -ne 0 ]; do
-  $DOCKER_EXEC ceph --cluster {{ cluster }} -s --format json | python -c 'import sys, json; exit(0) if "{{ monitor_name }}" in json.load(sys.stdin)["quorum_names"] else exit(1)' && exit 0
-  sleep $DELAY
-  let RETRIES=RETRIES-1
-done
-# If we reach this point, it means there is a problem with the quorum
-echo "Error with quorum."
-echo "cluster status:"
-$DOCKER_EXEC ceph --cluster {{ cluster }} -s
-echo "quorum status:"
-$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} mon_status
-$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} quorum_status
-exit 1
-}
-
-# First, restart the daemon
-systemctl restart ceph-mon@{{ ansible_hostname }}
-
-COUNT=10
-# Wait and ensure the socket exists after restarting the daemon
-while [ $COUNT -ne 0 ]; do
-  $DOCKER_EXEC test -S $SOCKET && check_quorum
-  sleep $DELAY
-  let COUNT=COUNT-1
-done
-# If we reach this point, it means the socket is not present.
-echo "Socket file ${SOCKET} could not be found, which means the monitor is not running."
-exit 1
diff --git a/roles/ceph-defaults/templates/restart_nfs_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_nfs_daemon.sh.j2
deleted file mode 100644 (file)
index 5828e1a..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-RETRIES="{{ handler_health_nfs_check_retries }}"
-DELAY="{{ handler_health_nfs_check_delay }}"
-NFS_NAME="ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}"
-PID=/var/run/ganesha.pid
-{% if containerized_deployment %}
-DOCKER_EXEC="docker exec ceph-nfs-{{ ansible_hostname }}"
-{% endif %}
-
-# First, restart the daemon
-{% if containerized_deployment -%}
-systemctl restart $NFS_NAME
-COUNT=10
-# Wait and ensure the pid exists after restarting the daemon
-while [ $RETRIES -ne 0 ]; do
-  $DOCKER_EXEC test -f $PID && exit 0
-  sleep $DELAY
-  let RETRIES=RETRIES-1
-done
-# If we reach this point, it means the pid is not present.
-echo "PID file ${PID} could not be found, which means Ganesha is not running."
-exit 1
-{% else %}
-systemctl restart nfs-ganesha
-{% endif %}
diff --git a/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2
deleted file mode 100644 (file)
index 15b2559..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-DELAY="{{ handler_health_osd_check_delay }}"
-CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
-
-check_pgs() {
-  num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')
-  if [[ "$num_pgs" == "0" ]]; then
-    return 0
-  fi
-  while [ $RETRIES -ne 0 ]; do
-    test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')"
-    RET=$?
-    test $RET -eq 0 && return 0
-    sleep $DELAY
-    let RETRIES=RETRIES-1
-  done
-  # PGs not clean, exiting with return code 1
-  echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
-  echo "It is possible that the cluster has less OSDs than the replica configuration"
-  echo "Will refuse to continue"
-  $docker_exec ceph $CEPH_CLI -s
-  $docker_exec ceph $CEPH_CLI osd dump
-  $docker_exec ceph $CEPH_CLI osd tree
-  $docker_exec ceph $CEPH_CLI osd crush rule dump
-  exit 1
-}
-
-wait_for_socket_in_docker() {
-  osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/')
-  whoami=$(docker exec "$1" cat $osd_mount_point/whoami)
-  if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then
-    echo "Timed out while trying to look for a Ceph OSD socket."
-    echo "Abort mission!"
-    exit 1
-  fi
-}
-
-get_dev_name() {
-  echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
-}
-
-get_docker_id_from_dev_name() {
-  local id
-  local count
-  count=10
-  while [ $count -ne 0 ]; do
-    id=$(docker ps -q -f "name=$1")
-    test "$id" != "" && break
-    sleep $DELAY
-    let count=count-1
-  done
-  echo "$id"
-}
-
-get_docker_osd_id() {
-  wait_for_socket_in_docker $1
-  docker exec "$1" ls /var/run/ceph | cut -d'.' -f2
-}
-
-# For containerized deployments, the unit file looks like: ceph-osd@sda.service
-# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID
-for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do
-  # First, restart daemon(s)
-  systemctl restart "${unit}"
-  # We need to wait because it may take some time for the socket to actually exists
-  COUNT=10
-  # Wait and ensure the socket exists after restarting the daemon
-  {% if containerized_deployment -%}
-  id=$(get_dev_name "$unit")
-  container_id=$(get_docker_id_from_dev_name "$id")
-  wait_for_socket_in_docker "$container_id"
-  osd_id=$whoami
-  docker_exec="docker exec $container_id"
-  {% else %}
-  osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+')
-  {% endif %}
-  SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
-  while [ $COUNT -ne 0 ]; do
-    RETRIES="{{ handler_health_osd_check_retries }}"
-    $docker_exec test -S "$SOCKET" && check_pgs && continue 2
-    sleep $DELAY
-    let COUNT=COUNT-1
-  done
-  # If we reach this point, it means the socket is not present.
-  echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
-  exit 1
-done
diff --git a/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2
deleted file mode 100644 (file)
index 73a8708..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-RETRIES="{{ handler_health_rbd_mirror_check_retries }}"
-DELAY="{{ handler_health_rbd_mirror_check_delay }}"
-RBD_MIRROR_NAME="{{ ansible_hostname }}"
-{% if containerized_deployment %}
-DOCKER_EXEC="docker exec ceph-rbd-mirror-{{ ansible_hostname }}"
-{% endif %}
-{% if ceph_release_num[ceph_release] < ceph_release_num['luminous'] %}
-SOCKET=/var/run/ceph/{{ cluster }}-client.admin.asok
-{% else %}
-# Backward compatibility
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok
-{% endif %}
-
-# First, restart the daemon
-systemctl restart ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME}
-
-COUNT=10
-# Wait and ensure the socket exists after restarting the daemon
-while [ $RETRIES -ne 0 ]; do
-  $DOCKER_EXEC test -S $SOCKET && exit 0
-  sleep $DELAY
-  let RETRIES=RETRIES-1
-done
-# If we reach this point, it means the socket is not present.
-echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running."
-exit 1
diff --git a/roles/ceph-defaults/templates/restart_rbd_target_api.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_target_api.sh.j2
deleted file mode 100644 (file)
index fd477c3..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-systemctl restart rbd-target-api
diff --git a/roles/ceph-defaults/templates/restart_rbd_target_gw.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_target_gw.sh.j2
deleted file mode 100644 (file)
index 10c34bf..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-systemctl restart rbd-target-gw
diff --git a/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2
deleted file mode 100644 (file)
index ce6efc0..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-RETRIES="{{ handler_health_rgw_check_retries }}"
-DELAY="{{ handler_health_rgw_check_delay }}"
-RGW_NAME="{{ ansible_hostname }}"
-RGW_PORT="{{ radosgw_frontend_port }}"
-{% if containerized_deployment %}
-DOCKER_EXEC="docker exec ceph-rgw-{{ ansible_hostname }}"
-{% endif %}
-# Backward compatibility
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok
-$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok
-{% if hostvars[inventory_hostname]['radosgw_address_block'] is defined and hostvars[inventory_hostname]['radosgw_address_block'] != 'subnet' %}
-    {% if ip_version == 'ipv4' %}
-RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \
-    {% elif ip_version == 'ipv6' %}
-RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \
-    {% endif %}
-{% elif radosgw_address_block is defined and radosgw_address_block != 'subnet' -%}
-    {% if ip_version == 'ipv4' %}
-RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \
-    {% elif ip_version == 'ipv6' %}
-RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \
-    {% endif %}
-{% elif hostvars[inventory_hostname]['radosgw_address'] is defined and hostvars[inventory_hostname]['radosgw_address'] != 'address' -%}
-    {% if ip_version == 'ipv4' %}
-RGW_IP={{ hostvars[inventory_hostname]['radosgw_address'] }} \
-    {% elif ip_version == 'ipv6' %}
-RGW_IP=[{{ hostvars[inventory_hostname]['radosgw_address'] }}] \
-    {% endif %}
-{% elif radosgw_address is defined and radosgw_address != 'address' -%}
-    {% if ip_version == 'ipv4' %}
-RGW_IP={{ radosgw_address }} \
-    {% elif ip_version == 'ipv6' %}
-RGW_IP=[{{ radosgw_address }}] \
-    {% endif %}
-{% elif hostvars[inventory_hostname]['radosgw_interface'] is defined -%}
-    {% set interface = 'ansible_' + (hostvars[inventory_hostname]['radosgw_interface'] | replace('-', '_')) %}
-    {% if ip_version == 'ipv4' %}
-RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \
-    {% elif ip_version == 'ipv6' %}
-RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \
-    {% endif %}
-{% else %}
-    {% set interface = 'ansible_' + (radosgw_interface | replace('-', '_')) %}
-    {% if ip_version == 'ipv4' %}
-RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \
-    {% elif ip_version == 'ipv6' %}
-RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \
-    {% endif %}
-{% endif %}
-
-check_for_curl_or_wget() {
-  if $DOCKER_EXEC command -v wget &>/dev/null; then
-    rgw_test_command="wget --quiet"
-  elif $DOCKER_EXEC command -v curl &>/dev/null; then
-    rgw_test_command="curl --fail --silent --output /dev/null"
-  else
-    echo "It seems that neither curl or wget are available on your system."
-    echo "Cannot test rgw connection."
-    exit 0
-  fi
-}
-
-check_rest() {
-  check_for_curl_or_wget
-  while [ $RETRIES -ne 0 ]; do
-    test "$rgw_test_command http://$RGW_IP:$RGW_PORT" && exit 0
-    sleep $DELAY
-    let RETRIES=RETRIES-1
-  done
-  # If we reach this point, it means there is a problem with the connection to rgw
-  echo "Error connecting locally to Rados Gateway service: http://$rgw_listen"
-  exit 1
-}
-
-# First, restart the daemon
-systemctl restart ceph-radosgw@rgw.${RGW_NAME}
-
-COUNT=10
-# Wait and ensure the socket exists after restarting the daemon
-while [ $COUNT -ne 0 ]; do
-  $DOCKER_EXEC test -S $SOCKET && check_rest
-  sleep $DELAY
-  let COUNT=COUNT-1
-done
-echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running."
-exit 1
diff --git a/roles/ceph-defaults/templates/restart_tcmu_runner.sh.j2 b/roles/ceph-defaults/templates/restart_tcmu_runner.sh.j2
deleted file mode 100644 (file)
index 5dd5ff8..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-systemctl restart tcmu-runner
diff --git a/roles/ceph-handler/LICENSE b/roles/ceph-handler/LICENSE
new file mode 100644 (file)
index 0000000..b0d1c9f
--- /dev/null
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!) The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [2014] [Guillaume Abrioux]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/roles/ceph-handler/README.md b/roles/ceph-handler/README.md
new file mode 100644 (file)
index 0000000..3145a7f
--- /dev/null
@@ -0,0 +1,2 @@
+# Ansible role: ceph-handler
+Documentation is available at http://docs.ceph.com/ceph-ansible/.
diff --git a/roles/ceph-handler/handlers/main.yml b/roles/ceph-handler/handlers/main.yml
new file mode 100644 (file)
index 0000000..bc6732e
--- /dev/null
@@ -0,0 +1,459 @@
+---
+- name: update apt cache
+  apt:
+    update-cache: yes
+  when:
+    - ansible_os_family == 'Debian'
+
+# We only want to restart on hosts that have called the handler.
+# This var is set when he handler is called, and unset after the
+# restart to ensure only the correct hosts are restarted.
+- name: set _mon_handler_called before restart
+  set_fact:
+     _mon_handler_called: True
+  listen: "restart ceph mons"
+
+- name: copy mon restart script
+  template:
+    src: restart_mon_daemon.sh.j2
+    dest: /tmp/restart_mon_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph mons"
+  when:
+    - mon_group_name in group_names
+
+- name: restart ceph mon daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_mon_daemon.sh
+  listen: "restart ceph mons"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - mon_group_name in group_names
+    - not containerized_deployment
+    - hostvars[item]['_mon_handler_called'] | default(False)
+    - mon_socket_stat.rc == 0
+  with_items: "{{ groups[mon_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph mon daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_mon_daemon.sh
+  listen: "restart ceph mons"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - mon_group_name in group_names
+    - containerized_deployment
+    - ceph_mon_container_stat.get('rc') == 0
+    - hostvars[item]['_mon_handler_called'] | default(False)
+    - ceph_mon_container_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[mon_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _mon_handler_called after restart
+  set_fact:
+     _mon_handler_called: False
+  listen: "restart ceph mons"
+
+- name: set _osd_handler_called before restart
+  set_fact:
+     _osd_handler_called: True
+  listen: "restart ceph osds"
+
+# This does not just restart OSDs but everything else too. Unfortunately
+# at this time the ansible role does not have an OSD id list to use
+# for restarting them specifically.
+# This does not need to run during a rolling update as the playbook will
+# restart all OSDs using the tasks "start ceph osd" or
+# "restart containerized ceph osd"
+- name: copy osd restart script
+  template:
+    src: restart_osd_daemon.sh.j2
+    dest: /tmp/restart_osd_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph osds"
+  when:
+    - osd_group_name in group_names
+    - not rolling_update
+
+- name: restart ceph osds daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_osd_daemon.sh
+  listen: "restart ceph osds"
+  when:
+    - osd_group_name in group_names
+    - not containerized_deployment
+    - not rolling_update
+    # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
+    # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
+    - osd_socket_stat.rc == 0
+    - ceph_current_status.fsid is defined
+    - handler_health_osd_check
+    - hostvars[item]['_osd_handler_called'] | default(False)
+  with_items: "{{ groups[osd_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph osds daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_osd_daemon.sh
+  listen: "restart ceph osds"
+  when:
+    # We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`)
+    # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
+    - osd_group_name in group_names
+    - containerized_deployment
+    - not rolling_update
+    - ceph_osd_container_stat.get('rc') == 0
+    - inventory_hostname == groups.get(osd_group_name) | last
+    - ceph_osd_container_stat.get('stdout_lines', [])|length != 0
+    - handler_health_osd_check
+    - hostvars[item]['_osd_handler_called'] | default(False)
+  with_items: "{{ groups[osd_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _osd_handler_called after restart
+  set_fact:
+     _osd_handler_called: False
+  listen: "restart ceph osds"
+
+- name: set _mds_handler_called before restart
+  set_fact:
+     _mds_handler_called: True
+  listen: "restart ceph mdss"
+
+- name: copy mds restart script
+  template:
+    src: restart_mds_daemon.sh.j2
+    dest: /tmp/restart_mds_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph mdss"
+  when:
+    - mds_group_name in group_names
+
+- name: restart ceph mds daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_mds_daemon.sh
+  listen: "restart ceph mdss"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - mds_group_name in group_names
+    - not containerized_deployment
+    - hostvars[item]['_mds_handler_called'] | default(False)
+    - mds_socket_stat.rc == 0
+  with_items: "{{ groups[mds_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph mds daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_mds_daemon.sh
+  listen: "restart ceph mdss"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - mds_group_name in group_names
+    - containerized_deployment
+    - ceph_mds_container_stat.get('rc') == 0
+    - hostvars[item]['_mds_handler_called'] | default(False)
+    - ceph_mds_container_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[mds_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _mds_handler_called after restart
+  set_fact:
+     _mds_handler_called: False
+  listen: "restart ceph mdss"
+
+- name: set _rgw_handler_called before restart
+  set_fact:
+     _rgw_handler_called: True
+  listen: "restart ceph rgws"
+
+- name: copy rgw restart script
+  template:
+    src: restart_rgw_daemon.sh.j2
+    dest: /tmp/restart_rgw_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph rgws"
+  when:
+    - rgw_group_name in group_names
+
+- name: restart ceph rgw daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh
+  listen: "restart ceph rgws"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - rgw_group_name in group_names
+    - not containerized_deployment
+    - hostvars[item]['_rgw_handler_called'] | default(False)
+    - rgw_socket_stat.rc == 0
+  with_items: "{{ groups[rgw_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph rgw daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh
+  listen: "restart ceph rgws"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - rgw_group_name in group_names
+    - containerized_deployment
+    - ceph_rgw_container_stat.get('rc') == 0
+    - hostvars[item]['_rgw_handler_called'] | default(False)
+    - ceph_rgw_container_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[rgw_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _rgw_handler_called after restart
+  set_fact:
+     _rgw_handler_called: False
+  listen: "restart ceph rgws"
+
+- name: set _nfs_handler_called before restart
+  set_fact:
+     _nfs_handler_called: True
+  listen: "restart ceph nfss"
+
+- name: copy nfs restart script
+  template:
+    src: restart_nfs_daemon.sh.j2
+    dest: /tmp/restart_nfs_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph nfss"
+  when:
+    - nfs_group_name in group_names
+
+- name: restart ceph nfs daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh
+  listen: "restart ceph nfss"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - nfs_group_name in group_names
+    - not containerized_deployment
+    - hostvars[item]['_nfs_handler_called'] | default(False)
+    - nfs_socket_stat.rc == 0
+  with_items: "{{ groups[nfs_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph nfs daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh
+  listen: "restart ceph nfss"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - nfs_group_name in group_names
+    - containerized_deployment
+    - ceph_nfs_container_stat.get('rc') == 0
+    - hostvars[item]['_nfs_handler_called'] | default(False)
+    - ceph_nfs_container_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[nfs_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _nfs_handler_called after restart
+  set_fact:
+     _nfs_handler_called: False
+  listen: "restart ceph nfss"
+
+- name: set _rbdmirror_handler_called before restart
+  set_fact:
+     _rbdmirror_handler_called: True
+  listen: "restart ceph rbdmirrors"
+
+- name: copy rbd mirror restart script
+  template:
+    src: restart_rbd_mirror_daemon.sh.j2
+    dest: /tmp/restart_rbd_mirror_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph rbdmirrors"
+  when:
+    - rbdmirror_group_name in group_names
+
+- name: restart ceph rbd mirror daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh
+  listen: "restart ceph rbdmirrors"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - rbdmirror_group_name in group_names
+    - not containerized_deployment
+    - hostvars[item]['_rbdmirror_handler_called'] | default(False)
+    - rbd_mirror_socket_stat.rc == 0
+  with_items: "{{ groups[rbdmirror_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph rbd mirror daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh
+  listen: "restart ceph rbdmirrors"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - rbdmirror_group_name in group_names
+    - containerized_deployment
+    - ceph_rbd_mirror_container_stat.get('rc') == 0
+    - hostvars[item]['_rbdmirror_handler_called'] | default(False)
+    - ceph_rbd_mirror_container_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[rbdmirror_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _rbdmirror_handler_called after restart
+  set_fact:
+     _rbdmirror_handler_called: False
+  listen: "restart ceph rbdmirrors"
+
+- name: set _mgr_handler_called before restart
+  set_fact:
+     _mgr_handler_called: True
+  listen: "restart ceph mgrs"
+
+- name: copy mgr restart script
+  template:
+    src: restart_mgr_daemon.sh.j2
+    dest: /tmp/restart_mgr_daemon.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph mgrs"
+  when:
+    - mgr_group_name in group_names
+
+- name: restart ceph mgr daemon(s) - non container
+  command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh
+  listen: "restart ceph mgrs"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - mgr_group_name in group_names
+    - not containerized_deployment
+    - hostvars[item]['_mgr_handler_called'] | default(False)
+    - mgr_socket_stat.rc == 0
+  with_items: "{{ groups[mgr_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: restart ceph mgr daemon(s) - container
+  command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh
+  listen: "restart ceph mgrs"
+  when:
+    # We do not want to run these checks on initial deployment (`socket.rc == 0`)
+    - mgr_group_name in group_names
+    - containerized_deployment
+    - ceph_mgr_container_stat.get('rc') == 0
+    - hostvars[item]['_mgr_handler_called'] | default(False)
+    - ceph_mgr_container_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[mgr_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _mgr_handler_called after restart
+  set_fact:
+     _mgr_handler_called: False
+  listen: "restart ceph mgrs"
+
+- name: set _tcmu_runner_handler_called before restart
+  set_fact:
+     _tcmu_runner_handler_called: True
+  listen: "restart ceph tcmu-runner"
+
+- name: copy tcmu-runner restart script
+  template:
+    src: restart_tcmu_runner.sh.j2
+    dest: /tmp/restart_tcmu_runner.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph tcmu-runner"
+  when:
+    - iscsi_gw_group_name in group_names
+
+- name: restart tcmu-runner
+  command: /usr/bin/env bash /tmp/restart_tcmu_runner.sh
+  listen: "restart ceph tcmu-runner"
+  when:
+    - iscsi_gw_group_name in group_names
+    - ceph_tcmu_runner_stat.get('rc') == 0
+    - hostvars[item]['_tcmu_runner_handler_called'] | default(False)
+    - ceph_tcmu_runner_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[iscsi_gw_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _tcmu_runner_handler_called after restart
+  set_fact:
+     _tcmu_runner_handler_called: False
+  listen: "restart ceph tcmu-runner"
+
+- name: set _rbd_target_gw_handler_called before restart
+  set_fact:
+     _rbd_target_gw_handler_called: True
+  listen: "restart ceph rbd-target-gw"
+
+- name: copy rbd-target-gw restart script
+  template:
+    src: restart_rbd_target_gw.sh.j2
+    dest: /tmp/restart_rbd_target_gw.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph rbd-target-gw"
+  when:
+    - iscsi_gw_group_name in group_names
+
+- name: restart rbd-target-gw
+  command: /usr/bin/env bash /tmp/restart_rbd_target_gw.sh
+  listen: "restart ceph rbd-target-gw"
+  when:
+    - iscsi_gw_group_name in group_names
+    - ceph_rbd_target_gw_stat.get('rc') == 0
+    - hostvars[item]['_rbd_target_gw_handler_called'] | default(False)
+    - ceph_rbd_target_gw_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[iscsi_gw_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _rbd_target_gw_handler_called after restart
+  set_fact:
+     _rbd_target_gw_handler_called: False
+  listen: "restart ceph rbd-target-gw"
+
+- name: set _rbd_target_api_handler_called before restart
+  set_fact:
+     _rbd_target_api_handler_called: True
+  listen: "restart ceph rbd-target-api"
+
+- name: copy rbd-target-api restart script
+  template:
+    src: restart_rbd_target_api.sh.j2
+    dest: /tmp/restart_rbd_target_api.sh
+    owner: root
+    group: root
+    mode: 0750
+  listen: "restart ceph rbd-target-api"
+  when:
+    - iscsi_gw_group_name in group_names
+
+- name: restart rbd-target-api
+  command: /usr/bin/env bash /tmp/restart_rbd_target_api.sh
+  listen: "restart ceph rbd-target-api"
+  when:
+    - iscsi_gw_group_name in group_names
+    - ceph_rbd_target_api_stat.get('rc') == 0
+    - hostvars[item]['_rbd_target_api_handler_called'] | default(False)
+    - ceph_rbd_target_api_stat.get('stdout_lines', [])|length != 0
+  with_items: "{{ groups[iscsi_gw_group_name] }}"
+  delegate_to: "{{ item }}"
+  run_once: True
+
+- name: set _rbd_target_api_handler_called after restart
+  set_fact:
+     _rbd_target_api_handler_called: False
+  listen: "restart ceph rbd-target-api"
diff --git a/roles/ceph-handler/meta/main.yml b/roles/ceph-handler/meta/main.yml
new file mode 100644 (file)
index 0000000..acb144c
--- /dev/null
@@ -0,0 +1,13 @@
+---
+galaxy_info:
+  author: Sébastien Han
+  description: Contains handlers for Ceph services
+  license: Apache
+  min_ansible_version: 2.3
+  platforms:
+    - name: EL
+      versions:
+        - 7
+  categories:
+    - system
+dependencies: []
diff --git a/roles/ceph-handler/tasks/check_running_cluster.yml b/roles/ceph-handler/tasks/check_running_cluster.yml
new file mode 100644 (file)
index 0000000..0418d2f
--- /dev/null
@@ -0,0 +1,10 @@
+---
+- name: include check_running_containers.yml
+  include_tasks: check_running_containers.yml
+  when:
+    - containerized_deployment
+
+- name: include check_socket_non_container.yml
+  include_tasks: check_socket_non_container.yml
+  when:
+    - not containerized_deployment
diff --git a/roles/ceph-handler/tasks/check_running_containers.yml b/roles/ceph-handler/tasks/check_running_containers.yml
new file mode 100644 (file)
index 0000000..111d112
--- /dev/null
@@ -0,0 +1,90 @@
+---
+- name: check for a mon container
+  command: "docker ps -q --filter='name=ceph-mon-{{ ansible_hostname }}'"
+  register: ceph_mon_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(mon_group_name, [])
+
+- name: check for an osd container
+  command: "docker ps -q --filter='name=ceph-osd-{{ ansible_hostname }}'"
+  register: ceph_osd_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(osd_group_name, [])
+
+- name: check for a mds container
+  command: "docker ps -q --filter='name=ceph-mds-{{ ansible_hostname }}'"
+  register: ceph_mds_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(mds_group_name, [])
+
+- name: check for a rgw container
+  command: "docker ps -q --filter='name=ceph-rgw-{{ ansible_hostname }}'"
+  register: ceph_rgw_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(rgw_group_name, [])
+
+- name: check for a mgr container
+  command: "docker ps -q --filter='name=ceph-mgr-{{ ansible_hostname }}'"
+  register: ceph_mgr_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(mgr_group_name, [])
+
+- name: check for a rbd mirror container
+  command: "docker ps -q --filter='name=ceph-rbd-mirror-{{ ansible_hostname }}'"
+  register: ceph_rbd_mirror_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(rbdmirror_group_name, [])
+
+- name: check for a nfs container
+  command: "docker ps -q --filter='name=ceph-nfs-{{ ansible_hostname }}'"
+  register: ceph_nfs_container_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(nfs_group_name, [])
+
+- name: check for a tcmu-runner container
+  command: "docker ps -q --filter='name=tcmu-runner'"
+  register: ceph_tcmu_runner_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
+
+- name: check for a rbd-target-api container
+  command: "docker ps -q --filter='name=rbd-target-api'"
+  register: ceph_rbd_target_api_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
+
+- name: check for a rbd-target-gw container
+  command: "docker ps -q --filter='name=rbd-target-gw'"
+  register: ceph_rbd_target_gw_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
diff --git a/roles/ceph-handler/tasks/check_socket_non_container.yml b/roles/ceph-handler/tasks/check_socket_non_container.yml
new file mode 100644 (file)
index 0000000..0afe3ea
--- /dev/null
@@ -0,0 +1,228 @@
+---
+- name: check for a ceph mon socket
+  shell: stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: mon_socket_stat
+  when:
+    - inventory_hostname in groups.get(mon_group_name, [])
+
+- name: check if the ceph mon socket is in-use
+  command: fuser --silent {{ mon_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: mon_socket
+  when:
+    - inventory_hostname in groups.get(mon_group_name, [])
+    - mon_socket_stat.rc == 0
+
+- name: remove ceph mon socket if exists and not used by a process
+  file:
+    name: "{{ mon_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(mon_group_name, [])
+    - mon_socket_stat.rc == 0
+    - mon_socket.rc == 1
+
+- name: check for a ceph osd socket
+  shell: |
+    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-osd*.asok
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: osd_socket_stat
+  when:
+    - inventory_hostname in groups.get(osd_group_name, [])
+
+- name: check if the ceph osd socket is in-use
+  command: fuser --silent {{ osd_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: osd_socket
+  when:
+    - inventory_hostname in groups.get(osd_group_name, [])
+    - osd_socket_stat.rc == 0
+
+- name: remove ceph osd socket if exists and not used by a process
+  file:
+    name: "{{ osd_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(osd_group_name, [])
+    - osd_socket_stat.rc == 0
+    - osd_socket.rc == 1
+
+- name: check for a ceph mds socket
+  shell: |
+    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mds*.asok
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: mds_socket_stat
+  when:
+    - inventory_hostname in groups.get(mds_group_name, [])
+
+- name: check if the ceph mds socket is in-use
+  command: fuser --silent {{ mds_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: mds_socket
+  when:
+    - inventory_hostname in groups.get(mds_group_name, [])
+    - mds_socket_stat.rc == 0
+
+- name: remove ceph mds socket if exists and not used by a process
+  file:
+    name: "{{ mds_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(mds_group_name, [])
+    - mds_socket_stat.rc == 0
+    - mds_socket.rc == 1
+
+- name: check for a ceph rgw socket
+  shell: |
+    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rgw*.asok
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: rgw_socket_stat
+  when:
+    - inventory_hostname in groups.get(rgw_group_name, [])
+
+- name: check if the ceph rgw socket is in-use
+  command: fuser --silent {{ rgw_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: rgw_socket
+  when:
+    - inventory_hostname in groups.get(rgw_group_name, [])
+    - rgw_socket_stat.rc == 0
+
+- name: remove ceph rgw socket if exists and not used by a process
+  file:
+    name: "{{ rgw_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(rgw_group_name, [])
+    - rgw_socket_stat.rc == 0
+    - rgw_socket.rc == 1
+
+- name: check for a ceph mgr socket
+  shell: |
+    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mgr*.asok
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: mgr_socket_stat
+  when:
+    - inventory_hostname in groups.get(mgr_group_name, [])
+
+- name: check if the ceph mgr socket is in-use
+  command: fuser --silent {{ mgr_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: mgr_socket
+  when:
+    - inventory_hostname in groups.get(mgr_group_name, [])
+    - mgr_socket_stat.rc == 0
+
+- name: remove ceph mgr socket if exists and not used by a process
+  file:
+    name: "{{ mgr_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(mgr_group_name, [])
+    - mgr_socket_stat.rc == 0
+    - mgr_socket.rc == 1
+
+- name: check for a ceph rbd mirror socket
+  shell: |
+    stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rbd-mirror*.asok
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: rbd_mirror_socket_stat
+  when:
+    - inventory_hostname in groups.get(rbdmirror_group_name, [])
+
+- name: check if the ceph rbd mirror socket is in-use
+  command: fuser --silent {{ rbd_mirror_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: rbd_mirror_socket
+  when:
+    - inventory_hostname in groups.get(rbdmirror_group_name, [])
+    - rbd_mirror_socket_stat.rc == 0
+
+- name: remove ceph rbd mirror socket if exists and not used by a process
+  file:
+    name: "{{ rbd_mirror_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(rbdmirror_group_name, [])
+    - rbd_mirror_socket_stat.rc == 0
+    - rbd_mirror_socket.rc == 1
+
+- name: check for a ceph nfs ganesha socket
+  command: stat --printf=%n /var/run/ganesha.pid
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: nfs_socket_stat
+  when:
+    - inventory_hostname in groups.get(nfs_group_name, [])
+
+- name: check if the ceph nfs ganesha socket is in-use
+  command: fuser --silent {{ nfs_socket_stat.stdout }}
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  register: nfs_socket
+  when:
+    - inventory_hostname in groups.get(nfs_group_name, [])
+    - nfs_socket_stat.rc == 0
+
+- name: remove ceph nfs ganesha socket if exists and not used by a process
+  file:
+    name: "{{ nfs_socket_stat.stdout }}"
+    state: absent
+  when:
+    - inventory_hostname in groups.get(nfs_group_name, [])
+    - nfs_socket_stat.rc == 0
+    - nfs_socket.rc == 1
+
+- name: check for a tcmu-runner
+  command: "pgrep tcmu-runner"
+  register: ceph_tcmu_runner_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
+
+- name: check for a rbd-target-api
+  command: "pgrep rbd-target-api"
+  register: ceph_rbd_target_api_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
+
+- name: check for a rbd-target-gw
+  command: "pgrep name=rbd-target-gw"
+  register: ceph_rbd_target_gw_stat
+  changed_when: false
+  failed_when: false
+  check_mode: no
+  when:
+    - inventory_hostname in groups.get(iscsi_gw_group_name, [])
diff --git a/roles/ceph-handler/tasks/main.yml b/roles/ceph-handler/tasks/main.yml
new file mode 100644 (file)
index 0000000..09280cd
--- /dev/null
@@ -0,0 +1,3 @@
+---
+- name: include check_running_cluster.yml
+  include: check_running_cluster.yml
\ No newline at end of file
diff --git a/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mds_daemon.sh.j2
new file mode 100644 (file)
index 0000000..f265546
--- /dev/null
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+RETRIES="{{ handler_health_mds_check_retries }}"
+DELAY="{{ handler_health_mds_check_delay }}"
+MDS_NAME="{{ mds_name }}"
+{% if containerized_deployment %}
+DOCKER_EXEC="docker exec ceph-mds-{{ ansible_hostname }}"
+{% endif %}
+
+# Backward compatibility
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok
+
+# First, restart the daemon
+systemctl restart ceph-mds@${MDS_NAME}
+
+COUNT=10
+# Wait and ensure the socket exists after restarting the daemds
+while [ $RETRIES -ne 0 ]; do
+  $DOCKER_EXEC test -S $SOCKET && exit 0
+  sleep $DELAY
+  let RETRIES=RETRIES-1
+done
+# If we reach this point, it means the socket is not present.
+echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running."
+exit 1
diff --git a/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2
new file mode 100644 (file)
index 0000000..2b06a04
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+RETRIES="{{ handler_health_mgr_check_retries }}"
+DELAY="{{ handler_health_mgr_check_delay }}"
+MGR_NAME="{{ ansible_hostname }}"
+{% if containerized_deployment %}
+DOCKER_EXEC="docker exec ceph-mgr-{{ ansible_hostname }}"
+{% endif %}
+
+# Backward compatibility
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok
+
+systemctl reset-failed ceph-mgr@${MGR_NAME}
+# First, restart the daemon
+systemctl restart ceph-mgr@${MGR_NAME}
+
+COUNT=10
+# Wait and ensure the socket exists after restarting the daemds
+while [ $RETRIES -ne 0 ]; do
+  $DOCKER_EXEC test -S $SOCKET && exit 0
+  sleep $DELAY
+  let RETRIES=RETRIES-1
+done
+# If we reach this point, it means the socket is not present.
+echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running."
+exit 1
diff --git a/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mon_daemon.sh.j2
new file mode 100644 (file)
index 0000000..748b073
--- /dev/null
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+RETRIES="{{ handler_health_mon_check_retries }}"
+DELAY="{{ handler_health_mon_check_delay }}"
+MONITOR_NAME="{{ monitor_name }}"
+{% if containerized_deployment %}
+DOCKER_EXEC="docker exec ceph-mon-{{ ansible_hostname }}"
+{% endif %}
+
+# Backward compatibility
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok
+
+check_quorum() {
+while [ $RETRIES -ne 0 ]; do
+  $DOCKER_EXEC ceph --cluster {{ cluster }} -s --format json | python -c 'import sys, json; exit(0) if "{{ monitor_name }}" in json.load(sys.stdin)["quorum_names"] else exit(1)' && exit 0
+  sleep $DELAY
+  let RETRIES=RETRIES-1
+done
+# If we reach this point, it means there is a problem with the quorum
+echo "Error with quorum."
+echo "cluster status:"
+$DOCKER_EXEC ceph --cluster {{ cluster }} -s
+echo "quorum status:"
+$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} mon_status
+$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} quorum_status
+exit 1
+}
+
+# First, restart the daemon
+systemctl restart ceph-mon@{{ ansible_hostname }}
+
+COUNT=10
+# Wait and ensure the socket exists after restarting the daemon
+while [ $COUNT -ne 0 ]; do
+  $DOCKER_EXEC test -S $SOCKET && check_quorum
+  sleep $DELAY
+  let COUNT=COUNT-1
+done
+# If we reach this point, it means the socket is not present.
+echo "Socket file ${SOCKET} could not be found, which means the monitor is not running."
+exit 1
diff --git a/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 b/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2
new file mode 100644 (file)
index 0000000..5828e1a
--- /dev/null
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+RETRIES="{{ handler_health_nfs_check_retries }}"
+DELAY="{{ handler_health_nfs_check_delay }}"
+NFS_NAME="ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}"
+PID=/var/run/ganesha.pid
+{% if containerized_deployment %}
+DOCKER_EXEC="docker exec ceph-nfs-{{ ansible_hostname }}"
+{% endif %}
+
+# First, restart the daemon
+{% if containerized_deployment -%}
+systemctl restart $NFS_NAME
+COUNT=10
+# Wait and ensure the pid exists after restarting the daemon
+while [ $RETRIES -ne 0 ]; do
+  $DOCKER_EXEC test -f $PID && exit 0
+  sleep $DELAY
+  let RETRIES=RETRIES-1
+done
+# If we reach this point, it means the pid is not present.
+echo "PID file ${PID} could not be found, which means Ganesha is not running."
+exit 1
+{% else %}
+systemctl restart nfs-ganesha
+{% endif %}
diff --git a/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2
new file mode 100644 (file)
index 0000000..15b2559
--- /dev/null
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+DELAY="{{ handler_health_osd_check_delay }}"
+CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"
+
+check_pgs() {
+  num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')
+  if [[ "$num_pgs" == "0" ]]; then
+    return 0
+  fi
+  while [ $RETRIES -ne 0 ]; do
+    test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')"
+    RET=$?
+    test $RET -eq 0 && return 0
+    sleep $DELAY
+    let RETRIES=RETRIES-1
+  done
+  # PGs not clean, exiting with return code 1
+  echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
+  echo "It is possible that the cluster has less OSDs than the replica configuration"
+  echo "Will refuse to continue"
+  $docker_exec ceph $CEPH_CLI -s
+  $docker_exec ceph $CEPH_CLI osd dump
+  $docker_exec ceph $CEPH_CLI osd tree
+  $docker_exec ceph $CEPH_CLI osd crush rule dump
+  exit 1
+}
+
+wait_for_socket_in_docker() {
+  osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/')
+  whoami=$(docker exec "$1" cat $osd_mount_point/whoami)
+  if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then
+    echo "Timed out while trying to look for a Ceph OSD socket."
+    echo "Abort mission!"
+    exit 1
+  fi
+}
+
+get_dev_name() {
+  echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/'
+}
+
+get_docker_id_from_dev_name() {
+  local id
+  local count
+  count=10
+  while [ $count -ne 0 ]; do
+    id=$(docker ps -q -f "name=$1")
+    test "$id" != "" && break
+    sleep $DELAY
+    let count=count-1
+  done
+  echo "$id"
+}
+
+get_docker_osd_id() {
+  wait_for_socket_in_docker $1
+  docker exec "$1" ls /var/run/ceph | cut -d'.' -f2
+}
+
+# For containerized deployments, the unit file looks like: ceph-osd@sda.service
+# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID
+for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do
+  # First, restart daemon(s)
+  systemctl restart "${unit}"
+  # We need to wait because it may take some time for the socket to actually exists
+  COUNT=10
+  # Wait and ensure the socket exists after restarting the daemon
+  {% if containerized_deployment -%}
+  id=$(get_dev_name "$unit")
+  container_id=$(get_docker_id_from_dev_name "$id")
+  wait_for_socket_in_docker "$container_id"
+  osd_id=$whoami
+  docker_exec="docker exec $container_id"
+  {% else %}
+  osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+')
+  {% endif %}
+  SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok
+  while [ $COUNT -ne 0 ]; do
+    RETRIES="{{ handler_health_osd_check_retries }}"
+    $docker_exec test -S "$SOCKET" && check_pgs && continue 2
+    sleep $DELAY
+    let COUNT=COUNT-1
+  done
+  # If we reach this point, it means the socket is not present.
+  echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
+  exit 1
+done
diff --git a/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2
new file mode 100644 (file)
index 0000000..73a8708
--- /dev/null
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+RETRIES="{{ handler_health_rbd_mirror_check_retries }}"
+DELAY="{{ handler_health_rbd_mirror_check_delay }}"
+RBD_MIRROR_NAME="{{ ansible_hostname }}"
+{% if containerized_deployment %}
+DOCKER_EXEC="docker exec ceph-rbd-mirror-{{ ansible_hostname }}"
+{% endif %}
+{% if ceph_release_num[ceph_release] < ceph_release_num['luminous'] %}
+SOCKET=/var/run/ceph/{{ cluster }}-client.admin.asok
+{% else %}
+# Backward compatibility
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok
+{% endif %}
+
+# First, restart the daemon
+systemctl restart ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME}
+
+COUNT=10
+# Wait and ensure the socket exists after restarting the daemon
+while [ $RETRIES -ne 0 ]; do
+  $DOCKER_EXEC test -S $SOCKET && exit 0
+  sleep $DELAY
+  let RETRIES=RETRIES-1
+done
+# If we reach this point, it means the socket is not present.
+echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running."
+exit 1
diff --git a/roles/ceph-handler/templates/restart_rbd_target_api.sh.j2 b/roles/ceph-handler/templates/restart_rbd_target_api.sh.j2
new file mode 100644 (file)
index 0000000..fd477c3
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+systemctl restart rbd-target-api
diff --git a/roles/ceph-handler/templates/restart_rbd_target_gw.sh.j2 b/roles/ceph-handler/templates/restart_rbd_target_gw.sh.j2
new file mode 100644 (file)
index 0000000..10c34bf
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+systemctl restart rbd-target-gw
diff --git a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2
new file mode 100644 (file)
index 0000000..ce6efc0
--- /dev/null
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+RETRIES="{{ handler_health_rgw_check_retries }}"
+DELAY="{{ handler_health_rgw_check_delay }}"
+RGW_NAME="{{ ansible_hostname }}"
+RGW_PORT="{{ radosgw_frontend_port }}"
+{% if containerized_deployment %}
+DOCKER_EXEC="docker exec ceph-rgw-{{ ansible_hostname }}"
+{% endif %}
+# Backward compatibility
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok
+$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok
+{% if hostvars[inventory_hostname]['radosgw_address_block'] is defined and hostvars[inventory_hostname]['radosgw_address_block'] != 'subnet' %}
+    {% if ip_version == 'ipv4' %}
+RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \
+    {% elif ip_version == 'ipv6' %}
+RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \
+    {% endif %}
+{% elif radosgw_address_block is defined and radosgw_address_block != 'subnet' -%}
+    {% if ip_version == 'ipv4' %}
+RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \
+    {% elif ip_version == 'ipv6' %}
+RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \
+    {% endif %}
+{% elif hostvars[inventory_hostname]['radosgw_address'] is defined and hostvars[inventory_hostname]['radosgw_address'] != 'address' -%}
+    {% if ip_version == 'ipv4' %}
+RGW_IP={{ hostvars[inventory_hostname]['radosgw_address'] }} \
+    {% elif ip_version == 'ipv6' %}
+RGW_IP=[{{ hostvars[inventory_hostname]['radosgw_address'] }}] \
+    {% endif %}
+{% elif radosgw_address is defined and radosgw_address != 'address' -%}
+    {% if ip_version == 'ipv4' %}
+RGW_IP={{ radosgw_address }} \
+    {% elif ip_version == 'ipv6' %}
+RGW_IP=[{{ radosgw_address }}] \
+    {% endif %}
+{% elif hostvars[inventory_hostname]['radosgw_interface'] is defined -%}
+    {% set interface = 'ansible_' + (hostvars[inventory_hostname]['radosgw_interface'] | replace('-', '_')) %}
+    {% if ip_version == 'ipv4' %}
+RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \
+    {% elif ip_version == 'ipv6' %}
+RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \
+    {% endif %}
+{% else %}
+    {% set interface = 'ansible_' + (radosgw_interface | replace('-', '_')) %}
+    {% if ip_version == 'ipv4' %}
+RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \
+    {% elif ip_version == 'ipv6' %}
+RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \
+    {% endif %}
+{% endif %}
+
+check_for_curl_or_wget() {
+  if $DOCKER_EXEC command -v wget &>/dev/null; then
+    rgw_test_command="wget --quiet"
+  elif $DOCKER_EXEC command -v curl &>/dev/null; then
+    rgw_test_command="curl --fail --silent --output /dev/null"
+  else
+    echo "It seems that neither curl or wget are available on your system."
+    echo "Cannot test rgw connection."
+    exit 0
+  fi
+}
+
+check_rest() {
+  check_for_curl_or_wget
+  while [ $RETRIES -ne 0 ]; do
+    test "$rgw_test_command http://$RGW_IP:$RGW_PORT" && exit 0
+    sleep $DELAY
+    let RETRIES=RETRIES-1
+  done
+  # If we reach this point, it means there is a problem with the connection to rgw
+  echo "Error connecting locally to Rados Gateway service: http://$rgw_listen"
+  exit 1
+}
+
+# First, restart the daemon
+systemctl restart ceph-radosgw@rgw.${RGW_NAME}
+
+COUNT=10
+# Wait and ensure the socket exists after restarting the daemon
+while [ $COUNT -ne 0 ]; do
+  $DOCKER_EXEC test -S $SOCKET && check_rest
+  sleep $DELAY
+  let COUNT=COUNT-1
+done
+echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running."
+exit 1
diff --git a/roles/ceph-handler/templates/restart_tcmu_runner.sh.j2 b/roles/ceph-handler/templates/restart_tcmu_runner.sh.j2
new file mode 100644 (file)
index 0000000..5dd5ff8
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+systemctl restart tcmu-runner
index bb5c145d684d0c5e2c8672f726439326c188d9a0..7d2e0fb7b582e9c0a304cdeb6fd80a5bd458009c 100644 (file)
@@ -54,6 +54,7 @@
     - role: ceph-defaults
       tags: [with_pkg, fetch_container_image]
     - role: ceph-validate
+    - role: ceph-handler
     - role: ceph-docker-common
       tags: [with_pkg, fetch_container_image]
       when:
@@ -85,6 +86,7 @@
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-docker-common
       when:
         - inventory_hostname == groups.get('clients', ['']) | first
             start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
   roles:
     - { role: ceph-defaults, tags: ['ceph_update_config'] }
-    - { role: ceph-docker-common }
+    - role: ceph-handler
+    - ceph-docker-common
     - { role: ceph-config, tags: ['ceph_update_config'], when: "ceph_release_num[ceph_release] >= ceph_release_num.luminous" }
     - { role: ceph-iscsi-gw, when: "ceph_release_num[ceph_release] >= ceph_release_num.luminous" }
   post_tasks:
         msg: "{{ ceph_status.stdout_lines }}"
       delegate_to: "{{ groups['mons'][0] }}"
       run_once: true
-      when: not ceph_status.failed
\ No newline at end of file
+      when: not ceph_status.failed
index 3ab96a49ce7da2042de23d8c1fa24b19ff5b1e79..a8a4342ffef3d4aa9d6805927029cc81109d44dd 100644 (file)
@@ -91,6 +91,7 @@
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']
   roles:
     - role: ceph-defaults
       tags: ['ceph_update_config']
+    - role: ceph-handler
     - role: ceph-common
     - role: ceph-config
       tags: ['ceph_update_config']