From: Sébastien Han Date: Fri, 22 Sep 2017 16:34:16 +0000 (+0200) Subject: handler: enhance socket detection X-Git-Tag: v3.0.0rc11^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F1943%2Fhead;p=ceph-ansible.git handler: enhance socket detection We have seen issues with leftover socker. So now, if a socket is found we also check if it's accessed by a process. If so, we can run the handler, if not we remove it and continue the playbook. Signed-off-by: Sébastien Han Co-Authored-by: Guillaume Abrioux --- diff --git a/roles/ceph-defaults/handlers/main.yml b/roles/ceph-defaults/handlers/main.yml index 4389952d2..f911344a3 100644 --- a/roles/ceph-defaults/handlers/main.yml +++ b/roles/ceph-defaults/handlers/main.yml @@ -20,7 +20,7 @@ listen: "restart ceph mons" when: # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mon_socket.rc == 0 + - mon_socket_stat.rc == 0 - mon_group_name in group_names # This does not just restart OSDs but everything else too. Unfortunately @@ -41,9 +41,9 @@ - name: restart containerized ceph osds daemon(s) command: /tmp/restart_osd_daemon.sh listen: "restart ceph osds" - with_items: "{{ socket_osd_container.results | default([]) }}" + with_items: "{{ socket_osd_container_stat.results | default([]) }}" when: - # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) + # We do not want to run these checks on initial deployment (`socket_osd_container_stat.results[n].rc == 0`) # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified - containerized_deployment - ((crush_location is defined and crush_location) or item.get('rc') == 0) @@ -56,9 +56,10 @@ command: /tmp/restart_osd_daemon.sh listen: "restart ceph osds" when: + - not containerized_deployment # We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`) # except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified - - ((crush_location is defined and crush_location) or osd_socket.rc == 0) + - ((crush_location is defined and crush_location) or osd_socket_stat.rc == 0) - ceph_current_fsid.rc == 0 - handler_health_osd_check # See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below @@ -77,18 +78,12 @@ - inventory_hostname in play_hosts - mds_group_name in group_names -- name: debug socket mds - debug: msg="{{mds_socket}}" - listen: "restart ceph mdss" - when: - - mds_group_name in group_names - - name: restart ceph mds daemon(s) command: /tmp/restart_mds_daemon.sh listen: "restart ceph mdss" when: # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - mds_socket.rc == 0 + - mds_socket_stat.rc == 0 - mds_group_name in group_names - name: copy rgw restart script @@ -108,7 +103,7 @@ listen: "restart ceph rgws" when: # We do not want to run these checks on initial deployment (`socket.rc == 0`) - - rgw_socket.rc == 0 + - rgw_socket_stat.rc == 0 - rgw_group_name in group_names - name: restart ceph nfss diff --git a/roles/ceph-defaults/tasks/check_socket.yml b/roles/ceph-defaults/tasks/check_socket.yml index ddfffeca8..21b5de054 100644 --- a/roles/ceph-defaults/tasks/check_socket.yml +++ b/roles/ceph-defaults/tasks/check_socket.yml @@ -1,60 +1,207 @@ --- # These checks are used to avoid running handlers at initial deployment. +- name: set_fact docker_exec_cmd mon + set_fact: + docker_exec_cmd: "docker exec ceph-mon-{{ ansible_hostname }}" + when: + - containerized_deployment + - name: check for a ceph mon socket shell: | - {{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok > /dev/null 2>&1' + {{ docker_exec_cmd | default('') }} bash -c 'stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok' + changed_when: false + failed_when: false + always_run: true + register: mon_socket_stat + +- name: check if the ceph mon socket is in-use + shell: | + {{ docker_exec_cmd | default('') }} bash -c 'fuser --silent {{ mon_socket_stat.stdout }}' changed_when: false failed_when: false always_run: true register: mon_socket + when: + - mon_socket_stat.rc == 0 + +- name: remove ceph mon socket if exists and not used by a process + file: + name: "{{ mon_socket_stat.stdout }}" + state: absent + when: + - not containerized_deployment + - mon_socket_stat.rc == 0 + - mon_socket.rc != 0 - name: check for a ceph osd socket shell: | - {{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/{{ cluster }}-osd*.asok > /dev/null 2>&1' + stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-osd*.asok + changed_when: false + failed_when: false + always_run: true + register: osd_socket_stat + when: + - not containerized_deployment + +- name: check if the ceph osd socket is in-use + shell: | + fuser --silent {{ osd_socket_stat.stdout }} changed_when: false failed_when: false always_run: true register: osd_socket + when: + - not containerized_deployment + - osd_socket_stat.rc == 0 + +- name: remove ceph osd socket if exists and not used by a process + file: + name: "{{ osd_socket_stat.stdout }}" + state: absent + when: + - not containerized_deployment + - osd_socket_stat.rc == 0 + - osd_socket.rc != 0 + +- name: set_fact docker_exec_cmd mds + set_fact: + docker_exec_cmd: "docker exec ceph-mds-{{ ansible_hostname }}" + when: + - containerized_deployment - name: check for a ceph mds socket shell: | - {{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/{{ cluster }}-mds*.asok > /dev/null 2>&1' + {{ docker_exec_cmd | default('') }} bash -c 'stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mds*.asok' + changed_when: false + failed_when: false + always_run: true + register: mds_socket_stat + +- name: check if the ceph mds socket is in-use + shell: | + {{ docker_exec_cmd | default('') }} bash -c 'fuser --silent {{ mds_socket_stat.stdout }}' changed_when: false failed_when: false always_run: true register: mds_socket + when: + - mds_socket_stat.rc == 0 + +- name: remove ceph mds socket if exists and not used by a process + file: + name: "{{ mds_socket_stat.stdout }}" + state: absent + when: + - not containerized_deployment + - mds_socket_stat.rc == 0 + - mds_socket.rc != 0 + +- name: set_fact docker_exec_cmd rgw + set_fact: + docker_exec_cmd: "docker exec ceph-rgw-{{ ansible_hostname }}" + when: + - containerized_deployment - name: check for a ceph rgw socket shell: | - {{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rgw*.asok > /dev/null 2>&1' + {{ docker_exec_cmd | default('') }} bash -c 'stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rgw*.asok' + changed_when: false + failed_when: false + always_run: true + register: rgw_socket_stat + +- name: check if the ceph rgw socket is in-use + shell: | + {{ docker_exec_cmd | default('') }} bash -c 'fuser --silent {{ rgw_socket_stat.stdout }}' changed_when: false failed_when: false always_run: true register: rgw_socket + when: + - rgw_socket_stat.rc == 0 + +- name: remove ceph rgw socket if exists and not used by a process + file: + name: "{{ rgw_socket_stat.stdout }}" + state: absent + when: + - not containerized_deployment + - rgw_socket_stat.rc == 0 + - rgw_socket.rc != 0 + +- name: set_fact docker_exec_cmd mgr + set_fact: + docker_exec_cmd: "docker exec ceph-mgr-{{ ansible_hostname }}" + when: + - containerized_deployment - name: check for a ceph mgr socket shell: | - {{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/{{ cluster }}-mgr*.asok > /dev/null 2>&1' + {{ docker_exec_cmd | default('') }} bash -c 'stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mgr*.asok' + changed_when: false + failed_when: false + always_run: true + register: mgr_socket_stat + +- name: check if the ceph mgr socket is in-use + shell: | + {{ docker_exec_cmd | default('') }} bash -c 'fuser --silent {{ mgr_socket_stat.stdout }}' changed_when: false failed_when: false always_run: true register: mgr_socket + when: + - mgr_socket_stat.rc == 0 + +- name: remove ceph mgr socket if exists and not used by a process + file: + name: "{{ mgr_socket_stat.stdout }}" + state: absent + when: + - not containerized_deployment + - mgr_socket_stat.rc == 0 + - mgr_socket.rc != 0 + +- name: set_fact docker_exec_cmd rbd mirror + set_fact: + docker_exec_cmd: "docker exec ceph-rbd-mirror-{{ ansible_hostname }}" + when: + - containerized_deployment - name: check for a ceph rbd mirror socket shell: | - {{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rbd-mirror*.asok > /dev/null 2>&1' + {{ docker_exec_cmd | default('') }} bash -c 'stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-client.rbd-mirror*.asok' + changed_when: false + failed_when: false + always_run: true + register: rbd_mirror_socket_stat + +- name: check if the ceph rbd mirror socket is in-use + shell: | + {{ docker_exec_cmd | default('') }} bash -c 'fuser --silent {{ rbd_mirror_socket_stat.stdout }}' changed_when: false failed_when: false always_run: true register: rbd_mirror_socket + when: + - rbd_mirror_socket_stat.rc == 0 + +- name: remove ceph rbd mirror socket if exists and not used by a process + file: + name: "{{ rbd_mirror_socket_stat.stdout }}" + state: absent + when: + - not containerized_deployment + - rbd_mirror_socket_stat.rc == 0 + - rbd_mirror_socket.rc != 0 - name: check for a ceph socket in containerized deployment (osds) shell: | - docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat /var/run/ceph/*.asok > /dev/null 2>&1' + docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat --printf=%n /var/run/ceph/*.asok' changed_when: false failed_when: false always_run: true - register: socket_osd_container + register: socket_osd_container_stat with_items: "{{ devices }}" when: - containerized_deployment