From 86f3eeb717c7daac8c6330fdaa7f8a3c83f94b0d Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Thu, 9 Jan 2020 16:46:34 +0100 Subject: [PATCH] mon: support replacing a mon We must pick up a mon which actually exists in ceph-facts in order to detect if a cluster is running. Otherwise, it will state no cluster is already running which will end up deploying a new monitor isolated in a new quorum. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1622688 Signed-off-by: Guillaume Abrioux --- roles/ceph-facts/tasks/facts.yml | 61 +++++++++++++++++++++++- roles/ceph-mon/tasks/deploy_monitors.yml | 10 ++-- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/roles/ceph-facts/tasks/facts.yml b/roles/ceph-facts/tasks/facts.yml index ff5375458..6a9418578 100644 --- a/roles/ceph-facts/tasks/facts.yml +++ b/roles/ceph-facts/tasks/facts.yml @@ -45,17 +45,74 @@ - containerized_deployment | bool - groups.get(mon_group_name, []) | length > 0 +- name: find a running mon container + command: "{{ container_binary }} ps -q --filter name=ceph-mon-{{ hostvars[item]['ansible_hostname'] }}" + register: find_running_mon_container + failed_when: false + run_once: true + delegate_to: "{{ item }}" + with_items: "{{ groups.get(mon_group_name, []) }}" + when: containerized_deployment | bool + +- name: check for a ceph mon socket + shell: stat --printf=%n {{ rbd_client_admin_socket_path }}/{{ cluster }}-mon*.asok + changed_when: false + failed_when: false + check_mode: no + register: mon_socket_stat + run_once: true + delegate_to: "{{ item }}" + with_items: "{{ groups.get(mon_group_name, []) }}" + when: not containerized_deployment | bool + +- name: check if the ceph mon socket is in-use + command: grep -q {{ item.stdout }} /proc/net/unix + changed_when: false + failed_when: false + check_mode: no + register: mon_socket + run_once: true + with_items: "{{ mon_socket_stat.results }}" + when: + - not containerized_deployment | bool + - item.rc == 0 + +- name: set_fact running_mon - non_container + set_fact: + running_mon: "{{ hostvars[item.item.item]['inventory_hostname'] }}" + with_items: "{{ mon_socket.results }}" + run_once: true + when: + - not containerized_deployment | bool + - item.rc is defined + - item.rc == 1 + +- name: set_fact running_mon - container + set_fact: + running_mon: "{{ item.item }}" + run_once: true + with_items: "{{ find_running_mon_container.results }}" + when: + - containerized_deployment | bool + - item.stdout_lines | default([]) | length > 0 + +- name: set_fact _container_exec_cmd + set_fact: + _container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] if running_mon is undefined else running_mon }}" + when: + - containerized_deployment | bool + # this task shouldn't run in a rolling_update situation # because it blindly picks a mon, which may be down because # of the rolling update - name: is ceph running already? - command: "{{ timeout_command }} {{ container_exec_cmd }} ceph --cluster {{ cluster }} -s -f json" + command: "{{ timeout_command }} {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json" changed_when: false failed_when: false check_mode: no register: ceph_current_status run_once: true - delegate_to: "{{ groups[mon_group_name][0] }}" + delegate_to: "{{ groups[mon_group_name][0] if running_mon is undefined else running_mon }}" when: - not rolling_update | bool - groups.get(mon_group_name, []) | length > 0 diff --git a/roles/ceph-mon/tasks/deploy_monitors.yml b/roles/ceph-mon/tasks/deploy_monitors.yml index 0dfeacec4..e9cce2c50 100644 --- a/roles/ceph-mon/tasks/deploy_monitors.yml +++ b/roles/ceph-mon/tasks/deploy_monitors.yml @@ -1,23 +1,23 @@ --- - name: check if monitor initial keyring already exists command: > - {{ hostvars[groups[mon_group_name][0]]['container_exec_cmd'] | default('') }} ceph --cluster {{ cluster }} --name mon. -k - /var/lib/ceph/mon/{{ cluster }}-{{ hostvars[groups[mon_group_name][0]]['ansible_hostname'] }}/keyring + {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} --name mon. -k + /var/lib/ceph/mon/{{ cluster }}-{{ hostvars[groups[mon_group_name][0] if running_mon is undefined else running_mon]['ansible_hostname'] }}/keyring auth get-key mon. register: initial_mon_key run_once: True - delegate_to: "{{ groups.get(mon_group_name, [])[0] }}" + delegate_to: "{{ groups[mon_group_name][0] if running_mon is undefined else running_mon }}" when: ceph_current_status.fsid is defined - name: generate monitor initial keyring command: > - {{ discovered_interpreter_python }} -c "import os ; import struct ; + {{ hostvars[groups[mon_group_name][0] if running_mon is undefined else running_mon]['discovered_interpreter_python'] }} -c "import os ; import struct ; import time; import base64 ; key = os.urandom(16) ; header = struct.pack('