From: Sébastien Han Date: Fri, 27 Jul 2018 14:56:09 +0000 (+0200) Subject: add ceph-handler role X-Git-Tag: v3.2.0beta3~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4db6a213f72fef5f3cdabdb8adeb9dd8b25df5c2;p=ceph-ansible.git add ceph-handler role The role contains all the handlers for Ceph services. We decided to leave ceph-defaults role with variables and a few facts only. This is useful when organizing the site.yml files and also adding the known variables to infrastructure-playbooks. Signed-off-by: Sébastien Han --- diff --git a/roles/ceph-config/tasks/create_ceph_initial_dirs.yml b/roles/ceph-config/tasks/create_ceph_initial_dirs.yml new file mode 100644 index 000000000..a20f9a453 --- /dev/null +++ b/roles/ceph-config/tasks/create_ceph_initial_dirs.yml @@ -0,0 +1,25 @@ +--- +- name: set_fact ceph_directories + set_fact: + ceph_directories: + - /etc/ceph + - /var/lib/ceph/ + - /var/lib/ceph/mon + - /var/lib/ceph/osd + - /var/lib/ceph/mds + - /var/lib/ceph/tmp + - /var/lib/ceph/radosgw + - /var/lib/ceph/bootstrap-rgw + - /var/lib/ceph/bootstrap-mds + - /var/lib/ceph/bootstrap-osd + - /var/lib/ceph/bootstrap-rbd + - /var/run/ceph + +- name: create ceph initial directories + file: + path: "{{ item }}" + state: directory + owner: "{{ ceph_uid }}" + group: "{{ ceph_uid }}" + mode: 0755 + with_items: "{{ ceph_directories }}" diff --git a/roles/ceph-config/tasks/main.yml b/roles/ceph-config/tasks/main.yml index 0e1e4389c..5e5b3526e 100644 --- a/roles/ceph-config/tasks/main.yml +++ b/roles/ceph-config/tasks/main.yml @@ -1,4 +1,7 @@ --- +- name: include create_ceph_initial_dirs.yml + include: create_ceph_initial_dirs.yml + # ceph-common - block: - name: create ceph conf directory diff --git a/roles/ceph-defaults/handlers/main.yml b/roles/ceph-defaults/handlers/main.yml deleted file mode 100644 index bc6732eb0..000000000 --- a/roles/ceph-defaults/handlers/main.yml +++ /dev/null @@ -1,459 +0,0 @@ ---- -- name: update apt cache - apt: - update-cache: yes - when: - - ansible_os_family == 'Debian' - -# We only want to restart on hosts that have called the handler. -# This var is set when he handler is called, and unset after the -# restart to ensure only the correct hosts are restarted. -- name: set _mon_handler_called before restart - set_fact: - _mon_handler_called: True - listen: "restart ceph mons" - -- name: copy mon restart script - template: - src: restart_mon_daemon.sh.j2 - dest: /tmp/restart_mon_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph mons" - when: - - mon_group_name in group_names - -- name: restart ceph mon daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_mon_daemon.sh - listen: "restart ceph mons" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mon_group_name in group_names - - not containerized_deployment - - hostvars[item]['_mon_handler_called'] | default(False) - - mon_socket_stat.rc == 0 - with_items: "{{ groups[mon_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph mon daemon(s) - container - command: /usr/bin/env bash /tmp/restart_mon_daemon.sh - listen: "restart ceph mons" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mon_group_name in group_names - - containerized_deployment - - ceph_mon_container_stat.get('rc') == 0 - - hostvars[item]['_mon_handler_called'] | default(False) - - ceph_mon_container_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[mon_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _mon_handler_called after restart - set_fact: - _mon_handler_called: False - listen: "restart ceph mons" - -- name: set _osd_handler_called before restart - set_fact: - _osd_handler_called: True - listen: "restart ceph osds" - -# This does not just restart OSDs but everything else too. Unfortunately -# at this time the ansible role does not have an OSD id list to use -# for restarting them specifically. -# This does not need to run during a rolling update as the playbook will -# restart all OSDs using the tasks "start ceph osd" or -# "restart containerized ceph osd" -- name: copy osd restart script - template: - src: restart_osd_daemon.sh.j2 - dest: /tmp/restart_osd_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph osds" - when: - - osd_group_name in group_names - - not rolling_update - -- name: restart ceph osds daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_osd_daemon.sh - listen: "restart ceph osds" - when: - - osd_group_name in group_names - - not containerized_deployment - - not rolling_update - # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) - # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified - - osd_socket_stat.rc == 0 - - ceph_current_status.fsid is defined - - handler_health_osd_check - - hostvars[item]['_osd_handler_called'] | default(False) - with_items: "{{ groups[osd_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph osds daemon(s) - container - command: /usr/bin/env bash /tmp/restart_osd_daemon.sh - listen: "restart ceph osds" - when: - # We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`) - # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified - - osd_group_name in group_names - - containerized_deployment - - not rolling_update - - ceph_osd_container_stat.get('rc') == 0 - - inventory_hostname == groups.get(osd_group_name) | last - - ceph_osd_container_stat.get('stdout_lines', [])|length != 0 - - handler_health_osd_check - - hostvars[item]['_osd_handler_called'] | default(False) - with_items: "{{ groups[osd_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _osd_handler_called after restart - set_fact: - _osd_handler_called: False - listen: "restart ceph osds" - -- name: set _mds_handler_called before restart - set_fact: - _mds_handler_called: True - listen: "restart ceph mdss" - -- name: copy mds restart script - template: - src: restart_mds_daemon.sh.j2 - dest: /tmp/restart_mds_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph mdss" - when: - - mds_group_name in group_names - -- name: restart ceph mds daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_mds_daemon.sh - listen: "restart ceph mdss" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mds_group_name in group_names - - not containerized_deployment - - hostvars[item]['_mds_handler_called'] | default(False) - - mds_socket_stat.rc == 0 - with_items: "{{ groups[mds_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph mds daemon(s) - container - command: /usr/bin/env bash /tmp/restart_mds_daemon.sh - listen: "restart ceph mdss" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mds_group_name in group_names - - containerized_deployment - - ceph_mds_container_stat.get('rc') == 0 - - hostvars[item]['_mds_handler_called'] | default(False) - - ceph_mds_container_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[mds_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _mds_handler_called after restart - set_fact: - _mds_handler_called: False - listen: "restart ceph mdss" - -- name: set _rgw_handler_called before restart - set_fact: - _rgw_handler_called: True - listen: "restart ceph rgws" - -- name: copy rgw restart script - template: - src: restart_rgw_daemon.sh.j2 - dest: /tmp/restart_rgw_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph rgws" - when: - - rgw_group_name in group_names - -- name: restart ceph rgw daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh - listen: "restart ceph rgws" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - rgw_group_name in group_names - - not containerized_deployment - - hostvars[item]['_rgw_handler_called'] | default(False) - - rgw_socket_stat.rc == 0 - with_items: "{{ groups[rgw_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph rgw daemon(s) - container - command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh - listen: "restart ceph rgws" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - rgw_group_name in group_names - - containerized_deployment - - ceph_rgw_container_stat.get('rc') == 0 - - hostvars[item]['_rgw_handler_called'] | default(False) - - ceph_rgw_container_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[rgw_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _rgw_handler_called after restart - set_fact: - _rgw_handler_called: False - listen: "restart ceph rgws" - -- name: set _nfs_handler_called before restart - set_fact: - _nfs_handler_called: True - listen: "restart ceph nfss" - -- name: copy nfs restart script - template: - src: restart_nfs_daemon.sh.j2 - dest: /tmp/restart_nfs_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph nfss" - when: - - nfs_group_name in group_names - -- name: restart ceph nfs daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh - listen: "restart ceph nfss" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - nfs_group_name in group_names - - not containerized_deployment - - hostvars[item]['_nfs_handler_called'] | default(False) - - nfs_socket_stat.rc == 0 - with_items: "{{ groups[nfs_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph nfs daemon(s) - container - command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh - listen: "restart ceph nfss" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - nfs_group_name in group_names - - containerized_deployment - - ceph_nfs_container_stat.get('rc') == 0 - - hostvars[item]['_nfs_handler_called'] | default(False) - - ceph_nfs_container_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[nfs_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _nfs_handler_called after restart - set_fact: - _nfs_handler_called: False - listen: "restart ceph nfss" - -- name: set _rbdmirror_handler_called before restart - set_fact: - _rbdmirror_handler_called: True - listen: "restart ceph rbdmirrors" - -- name: copy rbd mirror restart script - template: - src: restart_rbd_mirror_daemon.sh.j2 - dest: /tmp/restart_rbd_mirror_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph rbdmirrors" - when: - - rbdmirror_group_name in group_names - -- name: restart ceph rbd mirror daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh - listen: "restart ceph rbdmirrors" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - rbdmirror_group_name in group_names - - not containerized_deployment - - hostvars[item]['_rbdmirror_handler_called'] | default(False) - - rbd_mirror_socket_stat.rc == 0 - with_items: "{{ groups[rbdmirror_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph rbd mirror daemon(s) - container - command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh - listen: "restart ceph rbdmirrors" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - rbdmirror_group_name in group_names - - containerized_deployment - - ceph_rbd_mirror_container_stat.get('rc') == 0 - - hostvars[item]['_rbdmirror_handler_called'] | default(False) - - ceph_rbd_mirror_container_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[rbdmirror_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _rbdmirror_handler_called after restart - set_fact: - _rbdmirror_handler_called: False - listen: "restart ceph rbdmirrors" - -- name: set _mgr_handler_called before restart - set_fact: - _mgr_handler_called: True - listen: "restart ceph mgrs" - -- name: copy mgr restart script - template: - src: restart_mgr_daemon.sh.j2 - dest: /tmp/restart_mgr_daemon.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph mgrs" - when: - - mgr_group_name in group_names - -- name: restart ceph mgr daemon(s) - non container - command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh - listen: "restart ceph mgrs" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mgr_group_name in group_names - - not containerized_deployment - - hostvars[item]['_mgr_handler_called'] | default(False) - - mgr_socket_stat.rc == 0 - with_items: "{{ groups[mgr_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: restart ceph mgr daemon(s) - container - command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh - listen: "restart ceph mgrs" - when: - # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mgr_group_name in group_names - - containerized_deployment - - ceph_mgr_container_stat.get('rc') == 0 - - hostvars[item]['_mgr_handler_called'] | default(False) - - ceph_mgr_container_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[mgr_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _mgr_handler_called after restart - set_fact: - _mgr_handler_called: False - listen: "restart ceph mgrs" - -- name: set _tcmu_runner_handler_called before restart - set_fact: - _tcmu_runner_handler_called: True - listen: "restart ceph tcmu-runner" - -- name: copy tcmu-runner restart script - template: - src: restart_tcmu_runner.sh.j2 - dest: /tmp/restart_tcmu_runner.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph tcmu-runner" - when: - - iscsi_gw_group_name in group_names - -- name: restart tcmu-runner - command: /usr/bin/env bash /tmp/restart_tcmu_runner.sh - listen: "restart ceph tcmu-runner" - when: - - iscsi_gw_group_name in group_names - - ceph_tcmu_runner_stat.get('rc') == 0 - - hostvars[item]['_tcmu_runner_handler_called'] | default(False) - - ceph_tcmu_runner_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[iscsi_gw_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _tcmu_runner_handler_called after restart - set_fact: - _tcmu_runner_handler_called: False - listen: "restart ceph tcmu-runner" - -- name: set _rbd_target_gw_handler_called before restart - set_fact: - _rbd_target_gw_handler_called: True - listen: "restart ceph rbd-target-gw" - -- name: copy rbd-target-gw restart script - template: - src: restart_rbd_target_gw.sh.j2 - dest: /tmp/restart_rbd_target_gw.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph rbd-target-gw" - when: - - iscsi_gw_group_name in group_names - -- name: restart rbd-target-gw - command: /usr/bin/env bash /tmp/restart_rbd_target_gw.sh - listen: "restart ceph rbd-target-gw" - when: - - iscsi_gw_group_name in group_names - - ceph_rbd_target_gw_stat.get('rc') == 0 - - hostvars[item]['_rbd_target_gw_handler_called'] | default(False) - - ceph_rbd_target_gw_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[iscsi_gw_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _rbd_target_gw_handler_called after restart - set_fact: - _rbd_target_gw_handler_called: False - listen: "restart ceph rbd-target-gw" - -- name: set _rbd_target_api_handler_called before restart - set_fact: - _rbd_target_api_handler_called: True - listen: "restart ceph rbd-target-api" - -- name: copy rbd-target-api restart script - template: - src: restart_rbd_target_api.sh.j2 - dest: /tmp/restart_rbd_target_api.sh - owner: root - group: root - mode: 0750 - listen: "restart ceph rbd-target-api" - when: - - iscsi_gw_group_name in group_names - -- name: restart rbd-target-api - command: /usr/bin/env bash /tmp/restart_rbd_target_api.sh - listen: "restart ceph rbd-target-api" - when: - - iscsi_gw_group_name in group_names - - ceph_rbd_target_api_stat.get('rc') == 0 - - hostvars[item]['_rbd_target_api_handler_called'] | default(False) - - ceph_rbd_target_api_stat.get('stdout_lines', [])|length != 0 - with_items: "{{ groups[iscsi_gw_group_name] }}" - delegate_to: "{{ item }}" - run_once: True - -- name: set _rbd_target_api_handler_called after restart - set_fact: - _rbd_target_api_handler_called: False - listen: "restart ceph rbd-target-api" diff --git a/roles/ceph-defaults/tasks/check_running_cluster.yml b/roles/ceph-defaults/tasks/check_running_cluster.yml deleted file mode 100644 index 0418d2ffe..000000000 --- a/roles/ceph-defaults/tasks/check_running_cluster.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- -- name: include check_running_containers.yml - include_tasks: check_running_containers.yml - when: - - containerized_deployment - -- name: include check_socket_non_container.yml - include_tasks: check_socket_non_container.yml - when: - - not containerized_deployment diff --git a/roles/ceph-defaults/tasks/check_running_containers.yml b/roles/ceph-defaults/tasks/check_running_containers.yml deleted file mode 100644 index 111d11274..000000000 --- a/roles/ceph-defaults/tasks/check_running_containers.yml +++ /dev/null @@ -1,90 +0,0 @@ ---- -- name: check for a mon container - command: "docker ps -q --filter='name=ceph-mon-{{ ansible_hostname }}'" - register: ceph_mon_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(mon_group_name, []) - -- name: check for an osd container - command: "docker ps -q --filter='name=ceph-osd-{{ ansible_hostname }}'" - register: ceph_osd_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(osd_group_name, []) - -- name: check for a mds container - command: "docker ps -q --filter='name=ceph-mds-{{ ansible_hostname }}'" - register: ceph_mds_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(mds_group_name, []) - -- name: check for a rgw container - command: "docker ps -q --filter='name=ceph-rgw-{{ ansible_hostname }}'" - register: ceph_rgw_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(rgw_group_name, []) - -- name: check for a mgr container - command: "docker ps -q --filter='name=ceph-mgr-{{ ansible_hostname }}'" - register: ceph_mgr_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(mgr_group_name, []) - -- name: check for a rbd mirror container - command: "docker ps -q --filter='name=ceph-rbd-mirror-{{ ansible_hostname }}'" - register: ceph_rbd_mirror_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(rbdmirror_group_name, []) - -- name: check for a nfs container - command: "docker ps -q --filter='name=ceph-nfs-{{ ansible_hostname }}'" - register: ceph_nfs_container_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(nfs_group_name, []) - -- name: check for a tcmu-runner container - command: "docker ps -q --filter='name=tcmu-runner'" - register: ceph_tcmu_runner_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(iscsi_gw_group_name, []) - -- name: check for a rbd-target-api container - command: "docker ps -q --filter='name=rbd-target-api'" - register: ceph_rbd_target_api_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(iscsi_gw_group_name, []) - -- name: check for a rbd-target-gw container - command: "docker ps -q --filter='name=rbd-target-gw'" - register: ceph_rbd_target_gw_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(iscsi_gw_group_name, []) diff --git a/roles/ceph-defaults/tasks/check_socket_non_container.yml b/roles/ceph-defaults/tasks/check_socket_non_container.yml deleted file mode 100644 index 0afe3eaa8..000000000 --- a/roles/ceph-defaults/tasks/check_socket_non_container.yml +++ /dev/null @@ -1,228 +0,0 @@ ---- -- name: check for a ceph mon socket - shell: stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok - changed_when: false - failed_when: false - check_mode: no - register: mon_socket_stat - when: - - inventory_hostname in groups.get(mon_group_name, []) - -- name: check if the ceph mon socket is in-use - command: fuser --silent {{ mon_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: mon_socket - when: - - inventory_hostname in groups.get(mon_group_name, []) - - mon_socket_stat.rc == 0 - -- name: remove ceph mon socket if exists and not used by a process - file: - name: "{{ mon_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(mon_group_name, []) - - mon_socket_stat.rc == 0 - - mon_socket.rc == 1 - -- name: check for a ceph osd socket - shell: | - stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-osd*.asok - changed_when: false - failed_when: false - check_mode: no - register: osd_socket_stat - when: - - inventory_hostname in groups.get(osd_group_name, []) - -- name: check if the ceph osd socket is in-use - command: fuser --silent {{ osd_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: osd_socket - when: - - inventory_hostname in groups.get(osd_group_name, []) - - osd_socket_stat.rc == 0 - -- name: remove ceph osd socket if exists and not used by a process - file: - name: "{{ osd_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(osd_group_name, []) - - osd_socket_stat.rc == 0 - - osd_socket.rc == 1 - -- name: check for a ceph mds socket - shell: | - stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mds*.asok - changed_when: false - failed_when: false - check_mode: no - register: mds_socket_stat - when: - - inventory_hostname in groups.get(mds_group_name, []) - -- name: check if the ceph mds socket is in-use - command: fuser --silent {{ mds_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: mds_socket - when: - - inventory_hostname in groups.get(mds_group_name, []) - - mds_socket_stat.rc == 0 - -- name: remove ceph mds socket if exists and not used by a process - file: - name: "{{ mds_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(mds_group_name, []) - - mds_socket_stat.rc == 0 - - mds_socket.rc == 1 - -- name: check for a ceph rgw socket - shell: | - stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rgw*.asok - changed_when: false - failed_when: false - check_mode: no - register: rgw_socket_stat - when: - - inventory_hostname in groups.get(rgw_group_name, []) - -- name: check if the ceph rgw socket is in-use - command: fuser --silent {{ rgw_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: rgw_socket - when: - - inventory_hostname in groups.get(rgw_group_name, []) - - rgw_socket_stat.rc == 0 - -- name: remove ceph rgw socket if exists and not used by a process - file: - name: "{{ rgw_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(rgw_group_name, []) - - rgw_socket_stat.rc == 0 - - rgw_socket.rc == 1 - -- name: check for a ceph mgr socket - shell: | - stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mgr*.asok - changed_when: false - failed_when: false - check_mode: no - register: mgr_socket_stat - when: - - inventory_hostname in groups.get(mgr_group_name, []) - -- name: check if the ceph mgr socket is in-use - command: fuser --silent {{ mgr_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: mgr_socket - when: - - inventory_hostname in groups.get(mgr_group_name, []) - - mgr_socket_stat.rc == 0 - -- name: remove ceph mgr socket if exists and not used by a process - file: - name: "{{ mgr_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(mgr_group_name, []) - - mgr_socket_stat.rc == 0 - - mgr_socket.rc == 1 - -- name: check for a ceph rbd mirror socket - shell: | - stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rbd-mirror*.asok - changed_when: false - failed_when: false - check_mode: no - register: rbd_mirror_socket_stat - when: - - inventory_hostname in groups.get(rbdmirror_group_name, []) - -- name: check if the ceph rbd mirror socket is in-use - command: fuser --silent {{ rbd_mirror_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: rbd_mirror_socket - when: - - inventory_hostname in groups.get(rbdmirror_group_name, []) - - rbd_mirror_socket_stat.rc == 0 - -- name: remove ceph rbd mirror socket if exists and not used by a process - file: - name: "{{ rbd_mirror_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(rbdmirror_group_name, []) - - rbd_mirror_socket_stat.rc == 0 - - rbd_mirror_socket.rc == 1 - -- name: check for a ceph nfs ganesha socket - command: stat --printf=%n /var/run/ganesha.pid - changed_when: false - failed_when: false - check_mode: no - register: nfs_socket_stat - when: - - inventory_hostname in groups.get(nfs_group_name, []) - -- name: check if the ceph nfs ganesha socket is in-use - command: fuser --silent {{ nfs_socket_stat.stdout }} - changed_when: false - failed_when: false - check_mode: no - register: nfs_socket - when: - - inventory_hostname in groups.get(nfs_group_name, []) - - nfs_socket_stat.rc == 0 - -- name: remove ceph nfs ganesha socket if exists and not used by a process - file: - name: "{{ nfs_socket_stat.stdout }}" - state: absent - when: - - inventory_hostname in groups.get(nfs_group_name, []) - - nfs_socket_stat.rc == 0 - - nfs_socket.rc == 1 - -- name: check for a tcmu-runner - command: "pgrep tcmu-runner" - register: ceph_tcmu_runner_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(iscsi_gw_group_name, []) - -- name: check for a rbd-target-api - command: "pgrep rbd-target-api" - register: ceph_rbd_target_api_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(iscsi_gw_group_name, []) - -- name: check for a rbd-target-gw - command: "pgrep name=rbd-target-gw" - register: ceph_rbd_target_gw_stat - changed_when: false - failed_when: false - check_mode: no - when: - - inventory_hostname in groups.get(iscsi_gw_group_name, []) diff --git a/roles/ceph-defaults/tasks/create_ceph_initial_dirs.yml b/roles/ceph-defaults/tasks/create_ceph_initial_dirs.yml deleted file mode 100644 index a20f9a453..000000000 --- a/roles/ceph-defaults/tasks/create_ceph_initial_dirs.yml +++ /dev/null @@ -1,25 +0,0 @@ ---- -- name: set_fact ceph_directories - set_fact: - ceph_directories: - - /etc/ceph - - /var/lib/ceph/ - - /var/lib/ceph/mon - - /var/lib/ceph/osd - - /var/lib/ceph/mds - - /var/lib/ceph/tmp - - /var/lib/ceph/radosgw - - /var/lib/ceph/bootstrap-rgw - - /var/lib/ceph/bootstrap-mds - - /var/lib/ceph/bootstrap-osd - - /var/lib/ceph/bootstrap-rbd - - /var/run/ceph - -- name: create ceph initial directories - file: - path: "{{ item }}" - state: directory - owner: "{{ ceph_uid }}" - group: "{{ ceph_uid }}" - mode: 0755 - with_items: "{{ ceph_directories }}" diff --git a/roles/ceph-defaults/tasks/main.yml b/roles/ceph-defaults/tasks/main.yml index 3559ee8bd..0d1f7c93c 100644 --- a/roles/ceph-defaults/tasks/main.yml +++ b/roles/ceph-defaults/tasks/main.yml @@ -1,9 +1,3 @@ --- -- name: include check_running_cluster.yml - include_tasks: check_running_cluster.yml - - name: include facts.yml - include_tasks: facts.yml - -- name: include create_ceph_initial_dirs.yml - include_tasks: create_ceph_initial_dirs.yml + include: facts.yml diff --git a/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2 deleted file mode 100644 index f265546f9..000000000 --- a/roles/ceph-defaults/templates/restart_mds_daemon.sh.j2 +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -RETRIES="{{ handler_health_mds_check_retries }}" -DELAY="{{ handler_health_mds_check_delay }}" -MDS_NAME="{{ mds_name }}" -{% if containerized_deployment %} -DOCKER_EXEC="docker exec ceph-mds-{{ ansible_hostname }}" -{% endif %} - -# Backward compatibility -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok - -# First, restart the daemon -systemctl restart ceph-mds@${MDS_NAME} - -COUNT=10 -# Wait and ensure the socket exists after restarting the daemds -while [ $RETRIES -ne 0 ]; do - $DOCKER_EXEC test -S $SOCKET && exit 0 - sleep $DELAY - let RETRIES=RETRIES-1 -done -# If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running." -exit 1 diff --git a/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 deleted file mode 100644 index 2b06a04af..000000000 --- a/roles/ceph-defaults/templates/restart_mgr_daemon.sh.j2 +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -RETRIES="{{ handler_health_mgr_check_retries }}" -DELAY="{{ handler_health_mgr_check_delay }}" -MGR_NAME="{{ ansible_hostname }}" -{% if containerized_deployment %} -DOCKER_EXEC="docker exec ceph-mgr-{{ ansible_hostname }}" -{% endif %} - -# Backward compatibility -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok - -systemctl reset-failed ceph-mgr@${MGR_NAME} -# First, restart the daemon -systemctl restart ceph-mgr@${MGR_NAME} - -COUNT=10 -# Wait and ensure the socket exists after restarting the daemds -while [ $RETRIES -ne 0 ]; do - $DOCKER_EXEC test -S $SOCKET && exit 0 - sleep $DELAY - let RETRIES=RETRIES-1 -done -# If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running." -exit 1 diff --git a/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2 deleted file mode 100644 index 748b07374..000000000 --- a/roles/ceph-defaults/templates/restart_mon_daemon.sh.j2 +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -RETRIES="{{ handler_health_mon_check_retries }}" -DELAY="{{ handler_health_mon_check_delay }}" -MONITOR_NAME="{{ monitor_name }}" -{% if containerized_deployment %} -DOCKER_EXEC="docker exec ceph-mon-{{ ansible_hostname }}" -{% endif %} - -# Backward compatibility -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok - -check_quorum() { -while [ $RETRIES -ne 0 ]; do - $DOCKER_EXEC ceph --cluster {{ cluster }} -s --format json | python -c 'import sys, json; exit(0) if "{{ monitor_name }}" in json.load(sys.stdin)["quorum_names"] else exit(1)' && exit 0 - sleep $DELAY - let RETRIES=RETRIES-1 -done -# If we reach this point, it means there is a problem with the quorum -echo "Error with quorum." -echo "cluster status:" -$DOCKER_EXEC ceph --cluster {{ cluster }} -s -echo "quorum status:" -$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} mon_status -$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} quorum_status -exit 1 -} - -# First, restart the daemon -systemctl restart ceph-mon@{{ ansible_hostname }} - -COUNT=10 -# Wait and ensure the socket exists after restarting the daemon -while [ $COUNT -ne 0 ]; do - $DOCKER_EXEC test -S $SOCKET && check_quorum - sleep $DELAY - let COUNT=COUNT-1 -done -# If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means the monitor is not running." -exit 1 diff --git a/roles/ceph-defaults/templates/restart_nfs_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_nfs_daemon.sh.j2 deleted file mode 100644 index 5828e1ac6..000000000 --- a/roles/ceph-defaults/templates/restart_nfs_daemon.sh.j2 +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -RETRIES="{{ handler_health_nfs_check_retries }}" -DELAY="{{ handler_health_nfs_check_delay }}" -NFS_NAME="ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}" -PID=/var/run/ganesha.pid -{% if containerized_deployment %} -DOCKER_EXEC="docker exec ceph-nfs-{{ ansible_hostname }}" -{% endif %} - -# First, restart the daemon -{% if containerized_deployment -%} -systemctl restart $NFS_NAME -COUNT=10 -# Wait and ensure the pid exists after restarting the daemon -while [ $RETRIES -ne 0 ]; do - $DOCKER_EXEC test -f $PID && exit 0 - sleep $DELAY - let RETRIES=RETRIES-1 -done -# If we reach this point, it means the pid is not present. -echo "PID file ${PID} could not be found, which means Ganesha is not running." -exit 1 -{% else %} -systemctl restart nfs-ganesha -{% endif %} diff --git a/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 deleted file mode 100644 index 15b255900..000000000 --- a/roles/ceph-defaults/templates/restart_osd_daemon.sh.j2 +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash - -DELAY="{{ handler_health_osd_check_delay }}" -CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}" - -check_pgs() { - num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])') - if [[ "$num_pgs" == "0" ]]; then - return 0 - fi - while [ $RETRIES -ne 0 ]; do - test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')" - RET=$? - test $RET -eq 0 && return 0 - sleep $DELAY - let RETRIES=RETRIES-1 - done - # PGs not clean, exiting with return code 1 - echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean" - echo "It is possible that the cluster has less OSDs than the replica configuration" - echo "Will refuse to continue" - $docker_exec ceph $CEPH_CLI -s - $docker_exec ceph $CEPH_CLI osd dump - $docker_exec ceph $CEPH_CLI osd tree - $docker_exec ceph $CEPH_CLI osd crush rule dump - exit 1 -} - -wait_for_socket_in_docker() { - osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/') - whoami=$(docker exec "$1" cat $osd_mount_point/whoami) - if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then - echo "Timed out while trying to look for a Ceph OSD socket." - echo "Abort mission!" - exit 1 - fi -} - -get_dev_name() { - echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/' -} - -get_docker_id_from_dev_name() { - local id - local count - count=10 - while [ $count -ne 0 ]; do - id=$(docker ps -q -f "name=$1") - test "$id" != "" && break - sleep $DELAY - let count=count-1 - done - echo "$id" -} - -get_docker_osd_id() { - wait_for_socket_in_docker $1 - docker exec "$1" ls /var/run/ceph | cut -d'.' -f2 -} - -# For containerized deployments, the unit file looks like: ceph-osd@sda.service -# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID -for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do - # First, restart daemon(s) - systemctl restart "${unit}" - # We need to wait because it may take some time for the socket to actually exists - COUNT=10 - # Wait and ensure the socket exists after restarting the daemon - {% if containerized_deployment -%} - id=$(get_dev_name "$unit") - container_id=$(get_docker_id_from_dev_name "$id") - wait_for_socket_in_docker "$container_id" - osd_id=$whoami - docker_exec="docker exec $container_id" - {% else %} - osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+') - {% endif %} - SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok - while [ $COUNT -ne 0 ]; do - RETRIES="{{ handler_health_osd_check_retries }}" - $docker_exec test -S "$SOCKET" && check_pgs && continue 2 - sleep $DELAY - let COUNT=COUNT-1 - done - # If we reach this point, it means the socket is not present. - echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running." - exit 1 -done diff --git a/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 deleted file mode 100644 index 73a87086b..000000000 --- a/roles/ceph-defaults/templates/restart_rbd_mirror_daemon.sh.j2 +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -RETRIES="{{ handler_health_rbd_mirror_check_retries }}" -DELAY="{{ handler_health_rbd_mirror_check_delay }}" -RBD_MIRROR_NAME="{{ ansible_hostname }}" -{% if containerized_deployment %} -DOCKER_EXEC="docker exec ceph-rbd-mirror-{{ ansible_hostname }}" -{% endif %} -{% if ceph_release_num[ceph_release] < ceph_release_num['luminous'] %} -SOCKET=/var/run/ceph/{{ cluster }}-client.admin.asok -{% else %} -# Backward compatibility -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok -{% endif %} - -# First, restart the daemon -systemctl restart ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME} - -COUNT=10 -# Wait and ensure the socket exists after restarting the daemon -while [ $RETRIES -ne 0 ]; do - $DOCKER_EXEC test -S $SOCKET && exit 0 - sleep $DELAY - let RETRIES=RETRIES-1 -done -# If we reach this point, it means the socket is not present. -echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running." -exit 1 diff --git a/roles/ceph-defaults/templates/restart_rbd_target_api.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_target_api.sh.j2 deleted file mode 100644 index fd477c37a..000000000 --- a/roles/ceph-defaults/templates/restart_rbd_target_api.sh.j2 +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -systemctl restart rbd-target-api diff --git a/roles/ceph-defaults/templates/restart_rbd_target_gw.sh.j2 b/roles/ceph-defaults/templates/restart_rbd_target_gw.sh.j2 deleted file mode 100644 index 10c34bfa2..000000000 --- a/roles/ceph-defaults/templates/restart_rbd_target_gw.sh.j2 +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -systemctl restart rbd-target-gw diff --git a/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2 deleted file mode 100644 index ce6efc0ba..000000000 --- a/roles/ceph-defaults/templates/restart_rgw_daemon.sh.j2 +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash - -RETRIES="{{ handler_health_rgw_check_retries }}" -DELAY="{{ handler_health_rgw_check_delay }}" -RGW_NAME="{{ ansible_hostname }}" -RGW_PORT="{{ radosgw_frontend_port }}" -{% if containerized_deployment %} -DOCKER_EXEC="docker exec ceph-rgw-{{ ansible_hostname }}" -{% endif %} -# Backward compatibility -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok -$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok -{% if hostvars[inventory_hostname]['radosgw_address_block'] is defined and hostvars[inventory_hostname]['radosgw_address_block'] != 'subnet' %} - {% if ip_version == 'ipv4' %} -RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \ - {% elif ip_version == 'ipv6' %} -RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \ - {% endif %} -{% elif radosgw_address_block is defined and radosgw_address_block != 'subnet' -%} - {% if ip_version == 'ipv4' %} -RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \ - {% elif ip_version == 'ipv6' %} -RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \ - {% endif %} -{% elif hostvars[inventory_hostname]['radosgw_address'] is defined and hostvars[inventory_hostname]['radosgw_address'] != 'address' -%} - {% if ip_version == 'ipv4' %} -RGW_IP={{ hostvars[inventory_hostname]['radosgw_address'] }} \ - {% elif ip_version == 'ipv6' %} -RGW_IP=[{{ hostvars[inventory_hostname]['radosgw_address'] }}] \ - {% endif %} -{% elif radosgw_address is defined and radosgw_address != 'address' -%} - {% if ip_version == 'ipv4' %} -RGW_IP={{ radosgw_address }} \ - {% elif ip_version == 'ipv6' %} -RGW_IP=[{{ radosgw_address }}] \ - {% endif %} -{% elif hostvars[inventory_hostname]['radosgw_interface'] is defined -%} - {% set interface = 'ansible_' + (hostvars[inventory_hostname]['radosgw_interface'] | replace('-', '_')) %} - {% if ip_version == 'ipv4' %} -RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \ - {% elif ip_version == 'ipv6' %} -RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \ - {% endif %} -{% else %} - {% set interface = 'ansible_' + (radosgw_interface | replace('-', '_')) %} - {% if ip_version == 'ipv4' %} -RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \ - {% elif ip_version == 'ipv6' %} -RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \ - {% endif %} -{% endif %} - -check_for_curl_or_wget() { - if $DOCKER_EXEC command -v wget &>/dev/null; then - rgw_test_command="wget --quiet" - elif $DOCKER_EXEC command -v curl &>/dev/null; then - rgw_test_command="curl --fail --silent --output /dev/null" - else - echo "It seems that neither curl or wget are available on your system." - echo "Cannot test rgw connection." - exit 0 - fi -} - -check_rest() { - check_for_curl_or_wget - while [ $RETRIES -ne 0 ]; do - test "$rgw_test_command http://$RGW_IP:$RGW_PORT" && exit 0 - sleep $DELAY - let RETRIES=RETRIES-1 - done - # If we reach this point, it means there is a problem with the connection to rgw - echo "Error connecting locally to Rados Gateway service: http://$rgw_listen" - exit 1 -} - -# First, restart the daemon -systemctl restart ceph-radosgw@rgw.${RGW_NAME} - -COUNT=10 -# Wait and ensure the socket exists after restarting the daemon -while [ $COUNT -ne 0 ]; do - $DOCKER_EXEC test -S $SOCKET && check_rest - sleep $DELAY - let COUNT=COUNT-1 -done -echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running." -exit 1 diff --git a/roles/ceph-defaults/templates/restart_tcmu_runner.sh.j2 b/roles/ceph-defaults/templates/restart_tcmu_runner.sh.j2 deleted file mode 100644 index 5dd5ff842..000000000 --- a/roles/ceph-defaults/templates/restart_tcmu_runner.sh.j2 +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -systemctl restart tcmu-runner diff --git a/roles/ceph-handler/LICENSE b/roles/ceph-handler/LICENSE new file mode 100644 index 000000000..b0d1c9fc8 --- /dev/null +++ b/roles/ceph-handler/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2014] [Guillaume Abrioux] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/roles/ceph-handler/README.md b/roles/ceph-handler/README.md new file mode 100644 index 000000000..3145a7f94 --- /dev/null +++ b/roles/ceph-handler/README.md @@ -0,0 +1,2 @@ +# Ansible role: ceph-handler +Documentation is available at http://docs.ceph.com/ceph-ansible/. diff --git a/roles/ceph-handler/handlers/main.yml b/roles/ceph-handler/handlers/main.yml new file mode 100644 index 000000000..bc6732eb0 --- /dev/null +++ b/roles/ceph-handler/handlers/main.yml @@ -0,0 +1,459 @@ +--- +- name: update apt cache + apt: + update-cache: yes + when: + - ansible_os_family == 'Debian' + +# We only want to restart on hosts that have called the handler. +# This var is set when he handler is called, and unset after the +# restart to ensure only the correct hosts are restarted. +- name: set _mon_handler_called before restart + set_fact: + _mon_handler_called: True + listen: "restart ceph mons" + +- name: copy mon restart script + template: + src: restart_mon_daemon.sh.j2 + dest: /tmp/restart_mon_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph mons" + when: + - mon_group_name in group_names + +- name: restart ceph mon daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_mon_daemon.sh + listen: "restart ceph mons" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mon_group_name in group_names + - not containerized_deployment + - hostvars[item]['_mon_handler_called'] | default(False) + - mon_socket_stat.rc == 0 + with_items: "{{ groups[mon_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph mon daemon(s) - container + command: /usr/bin/env bash /tmp/restart_mon_daemon.sh + listen: "restart ceph mons" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mon_group_name in group_names + - containerized_deployment + - ceph_mon_container_stat.get('rc') == 0 + - hostvars[item]['_mon_handler_called'] | default(False) + - ceph_mon_container_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[mon_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _mon_handler_called after restart + set_fact: + _mon_handler_called: False + listen: "restart ceph mons" + +- name: set _osd_handler_called before restart + set_fact: + _osd_handler_called: True + listen: "restart ceph osds" + +# This does not just restart OSDs but everything else too. Unfortunately +# at this time the ansible role does not have an OSD id list to use +# for restarting them specifically. +# This does not need to run during a rolling update as the playbook will +# restart all OSDs using the tasks "start ceph osd" or +# "restart containerized ceph osd" +- name: copy osd restart script + template: + src: restart_osd_daemon.sh.j2 + dest: /tmp/restart_osd_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph osds" + when: + - osd_group_name in group_names + - not rolling_update + +- name: restart ceph osds daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_osd_daemon.sh + listen: "restart ceph osds" + when: + - osd_group_name in group_names + - not containerized_deployment + - not rolling_update + # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) + # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified + - osd_socket_stat.rc == 0 + - ceph_current_status.fsid is defined + - handler_health_osd_check + - hostvars[item]['_osd_handler_called'] | default(False) + with_items: "{{ groups[osd_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph osds daemon(s) - container + command: /usr/bin/env bash /tmp/restart_osd_daemon.sh + listen: "restart ceph osds" + when: + # We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`) + # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified + - osd_group_name in group_names + - containerized_deployment + - not rolling_update + - ceph_osd_container_stat.get('rc') == 0 + - inventory_hostname == groups.get(osd_group_name) | last + - ceph_osd_container_stat.get('stdout_lines', [])|length != 0 + - handler_health_osd_check + - hostvars[item]['_osd_handler_called'] | default(False) + with_items: "{{ groups[osd_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _osd_handler_called after restart + set_fact: + _osd_handler_called: False + listen: "restart ceph osds" + +- name: set _mds_handler_called before restart + set_fact: + _mds_handler_called: True + listen: "restart ceph mdss" + +- name: copy mds restart script + template: + src: restart_mds_daemon.sh.j2 + dest: /tmp/restart_mds_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph mdss" + when: + - mds_group_name in group_names + +- name: restart ceph mds daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_mds_daemon.sh + listen: "restart ceph mdss" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mds_group_name in group_names + - not containerized_deployment + - hostvars[item]['_mds_handler_called'] | default(False) + - mds_socket_stat.rc == 0 + with_items: "{{ groups[mds_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph mds daemon(s) - container + command: /usr/bin/env bash /tmp/restart_mds_daemon.sh + listen: "restart ceph mdss" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mds_group_name in group_names + - containerized_deployment + - ceph_mds_container_stat.get('rc') == 0 + - hostvars[item]['_mds_handler_called'] | default(False) + - ceph_mds_container_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[mds_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _mds_handler_called after restart + set_fact: + _mds_handler_called: False + listen: "restart ceph mdss" + +- name: set _rgw_handler_called before restart + set_fact: + _rgw_handler_called: True + listen: "restart ceph rgws" + +- name: copy rgw restart script + template: + src: restart_rgw_daemon.sh.j2 + dest: /tmp/restart_rgw_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph rgws" + when: + - rgw_group_name in group_names + +- name: restart ceph rgw daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh + listen: "restart ceph rgws" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - rgw_group_name in group_names + - not containerized_deployment + - hostvars[item]['_rgw_handler_called'] | default(False) + - rgw_socket_stat.rc == 0 + with_items: "{{ groups[rgw_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph rgw daemon(s) - container + command: /usr/bin/env bash /tmp/restart_rgw_daemon.sh + listen: "restart ceph rgws" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - rgw_group_name in group_names + - containerized_deployment + - ceph_rgw_container_stat.get('rc') == 0 + - hostvars[item]['_rgw_handler_called'] | default(False) + - ceph_rgw_container_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[rgw_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _rgw_handler_called after restart + set_fact: + _rgw_handler_called: False + listen: "restart ceph rgws" + +- name: set _nfs_handler_called before restart + set_fact: + _nfs_handler_called: True + listen: "restart ceph nfss" + +- name: copy nfs restart script + template: + src: restart_nfs_daemon.sh.j2 + dest: /tmp/restart_nfs_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph nfss" + when: + - nfs_group_name in group_names + +- name: restart ceph nfs daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh + listen: "restart ceph nfss" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - nfs_group_name in group_names + - not containerized_deployment + - hostvars[item]['_nfs_handler_called'] | default(False) + - nfs_socket_stat.rc == 0 + with_items: "{{ groups[nfs_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph nfs daemon(s) - container + command: /usr/bin/env bash /tmp/restart_nfs_daemon.sh + listen: "restart ceph nfss" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - nfs_group_name in group_names + - containerized_deployment + - ceph_nfs_container_stat.get('rc') == 0 + - hostvars[item]['_nfs_handler_called'] | default(False) + - ceph_nfs_container_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[nfs_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _nfs_handler_called after restart + set_fact: + _nfs_handler_called: False + listen: "restart ceph nfss" + +- name: set _rbdmirror_handler_called before restart + set_fact: + _rbdmirror_handler_called: True + listen: "restart ceph rbdmirrors" + +- name: copy rbd mirror restart script + template: + src: restart_rbd_mirror_daemon.sh.j2 + dest: /tmp/restart_rbd_mirror_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph rbdmirrors" + when: + - rbdmirror_group_name in group_names + +- name: restart ceph rbd mirror daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh + listen: "restart ceph rbdmirrors" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - rbdmirror_group_name in group_names + - not containerized_deployment + - hostvars[item]['_rbdmirror_handler_called'] | default(False) + - rbd_mirror_socket_stat.rc == 0 + with_items: "{{ groups[rbdmirror_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph rbd mirror daemon(s) - container + command: /usr/bin/env bash /tmp/restart_rbd_mirror_daemon.sh + listen: "restart ceph rbdmirrors" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - rbdmirror_group_name in group_names + - containerized_deployment + - ceph_rbd_mirror_container_stat.get('rc') == 0 + - hostvars[item]['_rbdmirror_handler_called'] | default(False) + - ceph_rbd_mirror_container_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[rbdmirror_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _rbdmirror_handler_called after restart + set_fact: + _rbdmirror_handler_called: False + listen: "restart ceph rbdmirrors" + +- name: set _mgr_handler_called before restart + set_fact: + _mgr_handler_called: True + listen: "restart ceph mgrs" + +- name: copy mgr restart script + template: + src: restart_mgr_daemon.sh.j2 + dest: /tmp/restart_mgr_daemon.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph mgrs" + when: + - mgr_group_name in group_names + +- name: restart ceph mgr daemon(s) - non container + command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh + listen: "restart ceph mgrs" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mgr_group_name in group_names + - not containerized_deployment + - hostvars[item]['_mgr_handler_called'] | default(False) + - mgr_socket_stat.rc == 0 + with_items: "{{ groups[mgr_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: restart ceph mgr daemon(s) - container + command: /usr/bin/env bash /tmp/restart_mgr_daemon.sh + listen: "restart ceph mgrs" + when: + # We do not want to run these checks on initial deployment (`socket.rc == 0`) + - mgr_group_name in group_names + - containerized_deployment + - ceph_mgr_container_stat.get('rc') == 0 + - hostvars[item]['_mgr_handler_called'] | default(False) + - ceph_mgr_container_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[mgr_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _mgr_handler_called after restart + set_fact: + _mgr_handler_called: False + listen: "restart ceph mgrs" + +- name: set _tcmu_runner_handler_called before restart + set_fact: + _tcmu_runner_handler_called: True + listen: "restart ceph tcmu-runner" + +- name: copy tcmu-runner restart script + template: + src: restart_tcmu_runner.sh.j2 + dest: /tmp/restart_tcmu_runner.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph tcmu-runner" + when: + - iscsi_gw_group_name in group_names + +- name: restart tcmu-runner + command: /usr/bin/env bash /tmp/restart_tcmu_runner.sh + listen: "restart ceph tcmu-runner" + when: + - iscsi_gw_group_name in group_names + - ceph_tcmu_runner_stat.get('rc') == 0 + - hostvars[item]['_tcmu_runner_handler_called'] | default(False) + - ceph_tcmu_runner_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[iscsi_gw_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _tcmu_runner_handler_called after restart + set_fact: + _tcmu_runner_handler_called: False + listen: "restart ceph tcmu-runner" + +- name: set _rbd_target_gw_handler_called before restart + set_fact: + _rbd_target_gw_handler_called: True + listen: "restart ceph rbd-target-gw" + +- name: copy rbd-target-gw restart script + template: + src: restart_rbd_target_gw.sh.j2 + dest: /tmp/restart_rbd_target_gw.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph rbd-target-gw" + when: + - iscsi_gw_group_name in group_names + +- name: restart rbd-target-gw + command: /usr/bin/env bash /tmp/restart_rbd_target_gw.sh + listen: "restart ceph rbd-target-gw" + when: + - iscsi_gw_group_name in group_names + - ceph_rbd_target_gw_stat.get('rc') == 0 + - hostvars[item]['_rbd_target_gw_handler_called'] | default(False) + - ceph_rbd_target_gw_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[iscsi_gw_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _rbd_target_gw_handler_called after restart + set_fact: + _rbd_target_gw_handler_called: False + listen: "restart ceph rbd-target-gw" + +- name: set _rbd_target_api_handler_called before restart + set_fact: + _rbd_target_api_handler_called: True + listen: "restart ceph rbd-target-api" + +- name: copy rbd-target-api restart script + template: + src: restart_rbd_target_api.sh.j2 + dest: /tmp/restart_rbd_target_api.sh + owner: root + group: root + mode: 0750 + listen: "restart ceph rbd-target-api" + when: + - iscsi_gw_group_name in group_names + +- name: restart rbd-target-api + command: /usr/bin/env bash /tmp/restart_rbd_target_api.sh + listen: "restart ceph rbd-target-api" + when: + - iscsi_gw_group_name in group_names + - ceph_rbd_target_api_stat.get('rc') == 0 + - hostvars[item]['_rbd_target_api_handler_called'] | default(False) + - ceph_rbd_target_api_stat.get('stdout_lines', [])|length != 0 + with_items: "{{ groups[iscsi_gw_group_name] }}" + delegate_to: "{{ item }}" + run_once: True + +- name: set _rbd_target_api_handler_called after restart + set_fact: + _rbd_target_api_handler_called: False + listen: "restart ceph rbd-target-api" diff --git a/roles/ceph-handler/meta/main.yml b/roles/ceph-handler/meta/main.yml new file mode 100644 index 000000000..acb144c8c --- /dev/null +++ b/roles/ceph-handler/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: Sébastien Han + description: Contains handlers for Ceph services + license: Apache + min_ansible_version: 2.3 + platforms: + - name: EL + versions: + - 7 + categories: + - system +dependencies: [] diff --git a/roles/ceph-handler/tasks/check_running_cluster.yml b/roles/ceph-handler/tasks/check_running_cluster.yml new file mode 100644 index 000000000..0418d2ffe --- /dev/null +++ b/roles/ceph-handler/tasks/check_running_cluster.yml @@ -0,0 +1,10 @@ +--- +- name: include check_running_containers.yml + include_tasks: check_running_containers.yml + when: + - containerized_deployment + +- name: include check_socket_non_container.yml + include_tasks: check_socket_non_container.yml + when: + - not containerized_deployment diff --git a/roles/ceph-handler/tasks/check_running_containers.yml b/roles/ceph-handler/tasks/check_running_containers.yml new file mode 100644 index 000000000..111d11274 --- /dev/null +++ b/roles/ceph-handler/tasks/check_running_containers.yml @@ -0,0 +1,90 @@ +--- +- name: check for a mon container + command: "docker ps -q --filter='name=ceph-mon-{{ ansible_hostname }}'" + register: ceph_mon_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(mon_group_name, []) + +- name: check for an osd container + command: "docker ps -q --filter='name=ceph-osd-{{ ansible_hostname }}'" + register: ceph_osd_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(osd_group_name, []) + +- name: check for a mds container + command: "docker ps -q --filter='name=ceph-mds-{{ ansible_hostname }}'" + register: ceph_mds_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(mds_group_name, []) + +- name: check for a rgw container + command: "docker ps -q --filter='name=ceph-rgw-{{ ansible_hostname }}'" + register: ceph_rgw_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(rgw_group_name, []) + +- name: check for a mgr container + command: "docker ps -q --filter='name=ceph-mgr-{{ ansible_hostname }}'" + register: ceph_mgr_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(mgr_group_name, []) + +- name: check for a rbd mirror container + command: "docker ps -q --filter='name=ceph-rbd-mirror-{{ ansible_hostname }}'" + register: ceph_rbd_mirror_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: check for a nfs container + command: "docker ps -q --filter='name=ceph-nfs-{{ ansible_hostname }}'" + register: ceph_nfs_container_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(nfs_group_name, []) + +- name: check for a tcmu-runner container + command: "docker ps -q --filter='name=tcmu-runner'" + register: ceph_tcmu_runner_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(iscsi_gw_group_name, []) + +- name: check for a rbd-target-api container + command: "docker ps -q --filter='name=rbd-target-api'" + register: ceph_rbd_target_api_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(iscsi_gw_group_name, []) + +- name: check for a rbd-target-gw container + command: "docker ps -q --filter='name=rbd-target-gw'" + register: ceph_rbd_target_gw_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(iscsi_gw_group_name, []) diff --git a/roles/ceph-handler/tasks/check_socket_non_container.yml b/roles/ceph-handler/tasks/check_socket_non_container.yml new file mode 100644 index 000000000..0afe3eaa8 --- /dev/null +++ b/roles/ceph-handler/tasks/check_socket_non_container.yml @@ -0,0 +1,228 @@ +--- +- name: check for a ceph mon socket + shell: stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok + changed_when: false + failed_when: false + check_mode: no + register: mon_socket_stat + when: + - inventory_hostname in groups.get(mon_group_name, []) + +- name: check if the ceph mon socket is in-use + command: fuser --silent {{ mon_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: mon_socket + when: + - inventory_hostname in groups.get(mon_group_name, []) + - mon_socket_stat.rc == 0 + +- name: remove ceph mon socket if exists and not used by a process + file: + name: "{{ mon_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(mon_group_name, []) + - mon_socket_stat.rc == 0 + - mon_socket.rc == 1 + +- name: check for a ceph osd socket + shell: | + stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-osd*.asok + changed_when: false + failed_when: false + check_mode: no + register: osd_socket_stat + when: + - inventory_hostname in groups.get(osd_group_name, []) + +- name: check if the ceph osd socket is in-use + command: fuser --silent {{ osd_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: osd_socket + when: + - inventory_hostname in groups.get(osd_group_name, []) + - osd_socket_stat.rc == 0 + +- name: remove ceph osd socket if exists and not used by a process + file: + name: "{{ osd_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(osd_group_name, []) + - osd_socket_stat.rc == 0 + - osd_socket.rc == 1 + +- name: check for a ceph mds socket + shell: | + stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mds*.asok + changed_when: false + failed_when: false + check_mode: no + register: mds_socket_stat + when: + - inventory_hostname in groups.get(mds_group_name, []) + +- name: check if the ceph mds socket is in-use + command: fuser --silent {{ mds_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: mds_socket + when: + - inventory_hostname in groups.get(mds_group_name, []) + - mds_socket_stat.rc == 0 + +- name: remove ceph mds socket if exists and not used by a process + file: + name: "{{ mds_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(mds_group_name, []) + - mds_socket_stat.rc == 0 + - mds_socket.rc == 1 + +- name: check for a ceph rgw socket + shell: | + stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rgw*.asok + changed_when: false + failed_when: false + check_mode: no + register: rgw_socket_stat + when: + - inventory_hostname in groups.get(rgw_group_name, []) + +- name: check if the ceph rgw socket is in-use + command: fuser --silent {{ rgw_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: rgw_socket + when: + - inventory_hostname in groups.get(rgw_group_name, []) + - rgw_socket_stat.rc == 0 + +- name: remove ceph rgw socket if exists and not used by a process + file: + name: "{{ rgw_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(rgw_group_name, []) + - rgw_socket_stat.rc == 0 + - rgw_socket.rc == 1 + +- name: check for a ceph mgr socket + shell: | + stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mgr*.asok + changed_when: false + failed_when: false + check_mode: no + register: mgr_socket_stat + when: + - inventory_hostname in groups.get(mgr_group_name, []) + +- name: check if the ceph mgr socket is in-use + command: fuser --silent {{ mgr_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: mgr_socket + when: + - inventory_hostname in groups.get(mgr_group_name, []) + - mgr_socket_stat.rc == 0 + +- name: remove ceph mgr socket if exists and not used by a process + file: + name: "{{ mgr_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(mgr_group_name, []) + - mgr_socket_stat.rc == 0 + - mgr_socket.rc == 1 + +- name: check for a ceph rbd mirror socket + shell: | + stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rbd-mirror*.asok + changed_when: false + failed_when: false + check_mode: no + register: rbd_mirror_socket_stat + when: + - inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: check if the ceph rbd mirror socket is in-use + command: fuser --silent {{ rbd_mirror_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: rbd_mirror_socket + when: + - inventory_hostname in groups.get(rbdmirror_group_name, []) + - rbd_mirror_socket_stat.rc == 0 + +- name: remove ceph rbd mirror socket if exists and not used by a process + file: + name: "{{ rbd_mirror_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(rbdmirror_group_name, []) + - rbd_mirror_socket_stat.rc == 0 + - rbd_mirror_socket.rc == 1 + +- name: check for a ceph nfs ganesha socket + command: stat --printf=%n /var/run/ganesha.pid + changed_when: false + failed_when: false + check_mode: no + register: nfs_socket_stat + when: + - inventory_hostname in groups.get(nfs_group_name, []) + +- name: check if the ceph nfs ganesha socket is in-use + command: fuser --silent {{ nfs_socket_stat.stdout }} + changed_when: false + failed_when: false + check_mode: no + register: nfs_socket + when: + - inventory_hostname in groups.get(nfs_group_name, []) + - nfs_socket_stat.rc == 0 + +- name: remove ceph nfs ganesha socket if exists and not used by a process + file: + name: "{{ nfs_socket_stat.stdout }}" + state: absent + when: + - inventory_hostname in groups.get(nfs_group_name, []) + - nfs_socket_stat.rc == 0 + - nfs_socket.rc == 1 + +- name: check for a tcmu-runner + command: "pgrep tcmu-runner" + register: ceph_tcmu_runner_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(iscsi_gw_group_name, []) + +- name: check for a rbd-target-api + command: "pgrep rbd-target-api" + register: ceph_rbd_target_api_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(iscsi_gw_group_name, []) + +- name: check for a rbd-target-gw + command: "pgrep name=rbd-target-gw" + register: ceph_rbd_target_gw_stat + changed_when: false + failed_when: false + check_mode: no + when: + - inventory_hostname in groups.get(iscsi_gw_group_name, []) diff --git a/roles/ceph-handler/tasks/main.yml b/roles/ceph-handler/tasks/main.yml new file mode 100644 index 000000000..09280cdee --- /dev/null +++ b/roles/ceph-handler/tasks/main.yml @@ -0,0 +1,3 @@ +--- +- name: include check_running_cluster.yml + include: check_running_cluster.yml \ No newline at end of file diff --git a/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 new file mode 100644 index 000000000..f265546f9 --- /dev/null +++ b/roles/ceph-handler/templates/restart_mds_daemon.sh.j2 @@ -0,0 +1,26 @@ +#!/bin/bash + +RETRIES="{{ handler_health_mds_check_retries }}" +DELAY="{{ handler_health_mds_check_delay }}" +MDS_NAME="{{ mds_name }}" +{% if containerized_deployment %} +DOCKER_EXEC="docker exec ceph-mds-{{ ansible_hostname }}" +{% endif %} + +# Backward compatibility +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_fqdn }}.asok +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mds.{{ ansible_hostname }}.asok + +# First, restart the daemon +systemctl restart ceph-mds@${MDS_NAME} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemds +while [ $RETRIES -ne 0 ]; do + $DOCKER_EXEC test -S $SOCKET && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means the socket is not present. +echo "Socket file ${SOCKET} could not be found, which means the Metadata Server is not running." +exit 1 diff --git a/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 new file mode 100644 index 000000000..2b06a04af --- /dev/null +++ b/roles/ceph-handler/templates/restart_mgr_daemon.sh.j2 @@ -0,0 +1,27 @@ +#!/bin/bash + +RETRIES="{{ handler_health_mgr_check_retries }}" +DELAY="{{ handler_health_mgr_check_delay }}" +MGR_NAME="{{ ansible_hostname }}" +{% if containerized_deployment %} +DOCKER_EXEC="docker exec ceph-mgr-{{ ansible_hostname }}" +{% endif %} + +# Backward compatibility +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_fqdn }}.asok +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mgr.{{ ansible_hostname }}.asok + +systemctl reset-failed ceph-mgr@${MGR_NAME} +# First, restart the daemon +systemctl restart ceph-mgr@${MGR_NAME} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemds +while [ $RETRIES -ne 0 ]; do + $DOCKER_EXEC test -S $SOCKET && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means the socket is not present. +echo "Socket file ${SOCKET} could not be found, which means ceph manager is not running." +exit 1 diff --git a/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 b/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 new file mode 100644 index 000000000..748b07374 --- /dev/null +++ b/roles/ceph-handler/templates/restart_mon_daemon.sh.j2 @@ -0,0 +1,42 @@ +#!/bin/bash + +RETRIES="{{ handler_health_mon_check_retries }}" +DELAY="{{ handler_health_mon_check_delay }}" +MONITOR_NAME="{{ monitor_name }}" +{% if containerized_deployment %} +DOCKER_EXEC="docker exec ceph-mon-{{ ansible_hostname }}" +{% endif %} + +# Backward compatibility +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_fqdn }}.asok +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-mon.{{ ansible_hostname }}.asok + +check_quorum() { +while [ $RETRIES -ne 0 ]; do + $DOCKER_EXEC ceph --cluster {{ cluster }} -s --format json | python -c 'import sys, json; exit(0) if "{{ monitor_name }}" in json.load(sys.stdin)["quorum_names"] else exit(1)' && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means there is a problem with the quorum +echo "Error with quorum." +echo "cluster status:" +$DOCKER_EXEC ceph --cluster {{ cluster }} -s +echo "quorum status:" +$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} mon_status +$DOCKER_EXEC ceph --cluster {{ cluster }} daemon mon.${MONITOR_NAME} quorum_status +exit 1 +} + +# First, restart the daemon +systemctl restart ceph-mon@{{ ansible_hostname }} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemon +while [ $COUNT -ne 0 ]; do + $DOCKER_EXEC test -S $SOCKET && check_quorum + sleep $DELAY + let COUNT=COUNT-1 +done +# If we reach this point, it means the socket is not present. +echo "Socket file ${SOCKET} could not be found, which means the monitor is not running." +exit 1 diff --git a/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 b/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 new file mode 100644 index 000000000..5828e1ac6 --- /dev/null +++ b/roles/ceph-handler/templates/restart_nfs_daemon.sh.j2 @@ -0,0 +1,26 @@ +#!/bin/bash + +RETRIES="{{ handler_health_nfs_check_retries }}" +DELAY="{{ handler_health_nfs_check_delay }}" +NFS_NAME="ceph-nfs@{{ ceph_nfs_service_suffix | default(ansible_hostname) }}" +PID=/var/run/ganesha.pid +{% if containerized_deployment %} +DOCKER_EXEC="docker exec ceph-nfs-{{ ansible_hostname }}" +{% endif %} + +# First, restart the daemon +{% if containerized_deployment -%} +systemctl restart $NFS_NAME +COUNT=10 +# Wait and ensure the pid exists after restarting the daemon +while [ $RETRIES -ne 0 ]; do + $DOCKER_EXEC test -f $PID && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means the pid is not present. +echo "PID file ${PID} could not be found, which means Ganesha is not running." +exit 1 +{% else %} +systemctl restart nfs-ganesha +{% endif %} diff --git a/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 new file mode 100644 index 000000000..15b255900 --- /dev/null +++ b/roles/ceph-handler/templates/restart_osd_daemon.sh.j2 @@ -0,0 +1,88 @@ +#!/bin/bash + +DELAY="{{ handler_health_osd_check_delay }}" +CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}" + +check_pgs() { + num_pgs=$($docker_exec ceph $CEPH_CLI -s -f json|python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])') + if [[ "$num_pgs" == "0" ]]; then + return 0 + fi + while [ $RETRIES -ne 0 ]; do + test "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')" -eq "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print sum ( [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if "active+clean" in i["state_name"]])')" + RET=$? + test $RET -eq 0 && return 0 + sleep $DELAY + let RETRIES=RETRIES-1 + done + # PGs not clean, exiting with return code 1 + echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean" + echo "It is possible that the cluster has less OSDs than the replica configuration" + echo "Will refuse to continue" + $docker_exec ceph $CEPH_CLI -s + $docker_exec ceph $CEPH_CLI osd dump + $docker_exec ceph $CEPH_CLI osd tree + $docker_exec ceph $CEPH_CLI osd crush rule dump + exit 1 +} + +wait_for_socket_in_docker() { + osd_mount_point=$(docker exec "$1" df --output=target | grep '/var/lib/ceph/osd/') + whoami=$(docker exec "$1" cat $osd_mount_point/whoami) + if ! docker exec "$1" timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"; then + echo "Timed out while trying to look for a Ceph OSD socket." + echo "Abort mission!" + exit 1 + fi +} + +get_dev_name() { + echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/' +} + +get_docker_id_from_dev_name() { + local id + local count + count=10 + while [ $count -ne 0 ]; do + id=$(docker ps -q -f "name=$1") + test "$id" != "" && break + sleep $DELAY + let count=count-1 + done + echo "$id" +} + +get_docker_osd_id() { + wait_for_socket_in_docker $1 + docker exec "$1" ls /var/run/ceph | cut -d'.' -f2 +} + +# For containerized deployments, the unit file looks like: ceph-osd@sda.service +# For non-containerized deployments, the unit file looks like: ceph-osd@NNN.service where NNN is OSD ID +for unit in $(systemctl list-units | grep -E "loaded * active" | grep -oE "ceph-osd@([0-9]+|[a-z]+).service"); do + # First, restart daemon(s) + systemctl restart "${unit}" + # We need to wait because it may take some time for the socket to actually exists + COUNT=10 + # Wait and ensure the socket exists after restarting the daemon + {% if containerized_deployment -%} + id=$(get_dev_name "$unit") + container_id=$(get_docker_id_from_dev_name "$id") + wait_for_socket_in_docker "$container_id" + osd_id=$whoami + docker_exec="docker exec $container_id" + {% else %} + osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]+') + {% endif %} + SOCKET=/var/run/ceph/{{ cluster }}-osd.${osd_id}.asok + while [ $COUNT -ne 0 ]; do + RETRIES="{{ handler_health_osd_check_retries }}" + $docker_exec test -S "$SOCKET" && check_pgs && continue 2 + sleep $DELAY + let COUNT=COUNT-1 + done + # If we reach this point, it means the socket is not present. + echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running." + exit 1 +done diff --git a/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 new file mode 100644 index 000000000..73a87086b --- /dev/null +++ b/roles/ceph-handler/templates/restart_rbd_mirror_daemon.sh.j2 @@ -0,0 +1,29 @@ +#!/bin/bash + +RETRIES="{{ handler_health_rbd_mirror_check_retries }}" +DELAY="{{ handler_health_rbd_mirror_check_delay }}" +RBD_MIRROR_NAME="{{ ansible_hostname }}" +{% if containerized_deployment %} +DOCKER_EXEC="docker exec ceph-rbd-mirror-{{ ansible_hostname }}" +{% endif %} +{% if ceph_release_num[ceph_release] < ceph_release_num['luminous'] %} +SOCKET=/var/run/ceph/{{ cluster }}-client.admin.asok +{% else %} +# Backward compatibility +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_fqdn }}.asok +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rbd-mirror.{{ ansible_hostname }}.asok +{% endif %} + +# First, restart the daemon +systemctl restart ceph-rbd-mirror@rbd-mirror.${RBD_MIRROR_NAME} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemon +while [ $RETRIES -ne 0 ]; do + $DOCKER_EXEC test -S $SOCKET && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 +done +# If we reach this point, it means the socket is not present. +echo "Socket file ${SOCKET} could not be found, which means rbd mirror is not running." +exit 1 diff --git a/roles/ceph-handler/templates/restart_rbd_target_api.sh.j2 b/roles/ceph-handler/templates/restart_rbd_target_api.sh.j2 new file mode 100644 index 000000000..fd477c37a --- /dev/null +++ b/roles/ceph-handler/templates/restart_rbd_target_api.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +systemctl restart rbd-target-api diff --git a/roles/ceph-handler/templates/restart_rbd_target_gw.sh.j2 b/roles/ceph-handler/templates/restart_rbd_target_gw.sh.j2 new file mode 100644 index 000000000..10c34bfa2 --- /dev/null +++ b/roles/ceph-handler/templates/restart_rbd_target_gw.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +systemctl restart rbd-target-gw diff --git a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 new file mode 100644 index 000000000..ce6efc0ba --- /dev/null +++ b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 @@ -0,0 +1,88 @@ +#!/bin/bash + +RETRIES="{{ handler_health_rgw_check_retries }}" +DELAY="{{ handler_health_rgw_check_delay }}" +RGW_NAME="{{ ansible_hostname }}" +RGW_PORT="{{ radosgw_frontend_port }}" +{% if containerized_deployment %} +DOCKER_EXEC="docker exec ceph-rgw-{{ ansible_hostname }}" +{% endif %} +# Backward compatibility +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_fqdn }}.asok +$DOCKER_EXEC test -S /var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok && SOCKET=/var/run/ceph/{{ cluster }}-client.rgw.{{ ansible_hostname }}.asok +{% if hostvars[inventory_hostname]['radosgw_address_block'] is defined and hostvars[inventory_hostname]['radosgw_address_block'] != 'subnet' %} + {% if ip_version == 'ipv4' %} +RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \ + {% elif ip_version == 'ipv6' %} +RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \ + {% endif %} +{% elif radosgw_address_block is defined and radosgw_address_block != 'subnet' -%} + {% if ip_version == 'ipv4' %} +RGW_IP={{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }} \ + {% elif ip_version == 'ipv6' %} +RGW_IP=[{{ hostvars[inventory_hostname]['ansible_all_' + ip_version + '_addresses'] | ipaddr(radosgw_address_block) | first }}] \ + {% endif %} +{% elif hostvars[inventory_hostname]['radosgw_address'] is defined and hostvars[inventory_hostname]['radosgw_address'] != 'address' -%} + {% if ip_version == 'ipv4' %} +RGW_IP={{ hostvars[inventory_hostname]['radosgw_address'] }} \ + {% elif ip_version == 'ipv6' %} +RGW_IP=[{{ hostvars[inventory_hostname]['radosgw_address'] }}] \ + {% endif %} +{% elif radosgw_address is defined and radosgw_address != 'address' -%} + {% if ip_version == 'ipv4' %} +RGW_IP={{ radosgw_address }} \ + {% elif ip_version == 'ipv6' %} +RGW_IP=[{{ radosgw_address }}] \ + {% endif %} +{% elif hostvars[inventory_hostname]['radosgw_interface'] is defined -%} + {% set interface = 'ansible_' + (hostvars[inventory_hostname]['radosgw_interface'] | replace('-', '_')) %} + {% if ip_version == 'ipv4' %} +RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \ + {% elif ip_version == 'ipv6' %} +RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \ + {% endif %} +{% else %} + {% set interface = 'ansible_' + (radosgw_interface | replace('-', '_')) %} + {% if ip_version == 'ipv4' %} +RGW_IP={{ hostvars[inventory_hostname][interface][ip_version]['address'] }} \ + {% elif ip_version == 'ipv6' %} +RGW_IP=[{{ hostvars[inventory_hostname][interface][ip_version][0]['address'] }}] \ + {% endif %} +{% endif %} + +check_for_curl_or_wget() { + if $DOCKER_EXEC command -v wget &>/dev/null; then + rgw_test_command="wget --quiet" + elif $DOCKER_EXEC command -v curl &>/dev/null; then + rgw_test_command="curl --fail --silent --output /dev/null" + else + echo "It seems that neither curl or wget are available on your system." + echo "Cannot test rgw connection." + exit 0 + fi +} + +check_rest() { + check_for_curl_or_wget + while [ $RETRIES -ne 0 ]; do + test "$rgw_test_command http://$RGW_IP:$RGW_PORT" && exit 0 + sleep $DELAY + let RETRIES=RETRIES-1 + done + # If we reach this point, it means there is a problem with the connection to rgw + echo "Error connecting locally to Rados Gateway service: http://$rgw_listen" + exit 1 +} + +# First, restart the daemon +systemctl restart ceph-radosgw@rgw.${RGW_NAME} + +COUNT=10 +# Wait and ensure the socket exists after restarting the daemon +while [ $COUNT -ne 0 ]; do + $DOCKER_EXEC test -S $SOCKET && check_rest + sleep $DELAY + let COUNT=COUNT-1 +done +echo "Socket file ${SOCKET} could not be found, which means Rados Gateway is not running." +exit 1 diff --git a/roles/ceph-handler/templates/restart_tcmu_runner.sh.j2 b/roles/ceph-handler/templates/restart_tcmu_runner.sh.j2 new file mode 100644 index 000000000..5dd5ff842 --- /dev/null +++ b/roles/ceph-handler/templates/restart_tcmu_runner.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +systemctl restart tcmu-runner diff --git a/site-docker.yml.sample b/site-docker.yml.sample index bb5c145d6..7d2e0fb7b 100644 --- a/site-docker.yml.sample +++ b/site-docker.yml.sample @@ -54,6 +54,7 @@ - role: ceph-defaults tags: [with_pkg, fetch_container_image] - role: ceph-validate + - role: ceph-handler - role: ceph-docker-common tags: [with_pkg, fetch_container_image] when: @@ -85,6 +86,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -115,6 +117,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -146,6 +149,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -173,6 +177,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -200,6 +205,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -227,6 +233,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -258,6 +265,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -285,6 +293,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common - role: ceph-config tags: ['ceph_update_config'] @@ -316,6 +325,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-docker-common when: - inventory_hostname == groups.get('clients', ['']) | first @@ -346,7 +356,8 @@ start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" roles: - { role: ceph-defaults, tags: ['ceph_update_config'] } - - { role: ceph-docker-common } + - role: ceph-handler + - ceph-docker-common - { role: ceph-config, tags: ['ceph_update_config'], when: "ceph_release_num[ceph_release] >= ceph_release_num.luminous" } - { role: ceph-iscsi-gw, when: "ceph_release_num[ceph_release] >= ceph_release_num.luminous" } post_tasks: @@ -375,4 +386,4 @@ msg: "{{ ceph_status.stdout_lines }}" delegate_to: "{{ groups['mons'][0] }}" run_once: true - when: not ceph_status.failed \ No newline at end of file + when: not ceph_status.failed diff --git a/site.yml.sample b/site.yml.sample index 3ab96a49c..a8a4342ff 100644 --- a/site.yml.sample +++ b/site.yml.sample @@ -91,6 +91,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -118,6 +119,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -149,6 +151,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -176,6 +179,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -203,6 +207,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -230,6 +235,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -257,6 +263,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -288,6 +295,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -319,6 +327,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -346,6 +355,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config'] @@ -375,6 +385,7 @@ roles: - role: ceph-defaults tags: ['ceph_update_config'] + - role: ceph-handler - role: ceph-common - role: ceph-config tags: ['ceph_update_config']