tasks_from: systemd.yml
when: inventory_hostname in groups.get(rgw_group_name, [])
+ - import_role:
+ name: ceph-crash
+ tasks_from: systemd.yml
+ when: inventory_hostname in groups.get(mon_group_name, []) or
+ inventory_hostname in groups.get(osd_group_name, []) or
+ inventory_hostname in groups.get(mds_group_name, []) or
+ inventory_hostname in groups.get(rgw_group_name, []) or
+ inventory_hostname in groups.get(mgr_group_name, []) or
+ inventory_hostname in groups.get(rbdmirror_group_name, [])
+
- name: dashboard configuration
when: dashboard_enabled | bool
block:
- /var/lib/ceph/bootstrap-mgr
- /var/lib/ceph/tmp
+- name: purge ceph-crash daemons
+ hosts:
+ - "{{ mon_group_name | default('mons') }}"
+ - "{{ osd_group_name | default('osds') }}"
+ - "{{ mds_group_name | default('mdss') }}"
+ - "{{ rgw_group_name | default('rgws') }}"
+ - "{{ rbdmirror_group_name | default('rbdmirrors') }}"
+ - "{{ mgr_group_name | default('mgrs') }}"
+ gather_facts: false
+ become: true
+ tasks:
+ - name: stop ceph-crash service
+ service:
+ name: ceph-crash.service
+ state: stopped
+ enabled: no
+ failed_when: false
+
+ - name: remove /var/lib/ceph/crash
+ file:
+ path: /var/lib/ceph/crash
+ state: absent
+
- name: final cleanup - check any running ceph, purge ceph packages, purge config and remove data
failed_when: false
when: dashboard_enabled | bool
+- name: purge ceph-crash containers
+ hosts:
+ - "{{ mon_group_name | default('mons') }}"
+ - "{{ osd_group_name | default('osds') }}"
+ - "{{ mds_group_name | default('mdss') }}"
+ - "{{ rgw_group_name | default('rgws') }}"
+ - "{{ rbdmirror_group_name | default('rbdmirrors') }}"
+ - "{{ mgr_group_name | default('mgrs') }}"
+ gather_facts: false
+ become: true
+ tasks:
+ - name: stop ceph-crash container
+ service:
+ name: "ceph-crash@{{ ansible_hostname }}"
+ state: stopped
+ enabled: no
+ failed_when: false
+
+ - name: remove service file
+ file:
+ name: "/etc/systemd/system/ceph-crash.service"
+ state: absent
+ failed_when: false
+
+ - name: remove /var/lib/ceph/crash
+ file:
+ path: /var/lib/ceph/crash
+ state: absent
+
- name: check container hosts
hosts:
- import_role:
name: ceph-client
+- name: upgrade ceph-crash daemons
+ hosts:
+ - "{{ mon_group_name | default('mons') }}"
+ - "{{ osd_group_name | default('osds') }}"
+ - "{{ mds_group_name | default('mdss') }}"
+ - "{{ rgw_group_name | default('rgws') }}"
+ - "{{ rbdmirror_group_name | default('rbdmirrors') }}"
+ - "{{ mgr_group_name | default('mgrs') }}"
+ gather_facts: false
+ become: true
+ tasks:
+ - import_role:
+ name: ceph-defaults
+ - import_role:
+ name: ceph-facts
+ tasks_from: container_binary.yml
+ - import_role:
+ name: ceph-handler
+ - import_role:
+ name: ceph-crash
+
- name: complete upgrade
hosts:
- "{{ mon_group_name | default('mons') }}"
- import_role:
name: ceph-nfs
+
+- name: switching from non-containerized to containerized ceph-crash
+
+ hosts:
+ - "{{ mon_group_name | default('mons') }}"
+ - "{{ osd_group_name | default('osds') }}"
+ - "{{ mds_group_name | default('mdss') }}"
+ - "{{ rgw_group_name | default('rgws') }}"
+ - "{{ rbdmirror_group_name | default('rbdmirrors') }}"
+ - "{{ mgr_group_name | default('mgrs') }}"
+
+ vars:
+ containerized_deployment: true
+ serial: 1
+ become: true
+ tasks:
+ - name: stop non-containerized ceph-crash
+ service:
+ name: ceph-crash
+ state: stopped
+ enabled: no
+
+ - import_role:
+ name: ceph-defaults
+
+ - import_role:
+ name: ceph-facts
+ tasks_from: container_binary.yml
+
+ - import_role:
+ name: ceph-handler
+
+ - import_role:
+ name: ceph-crash
\ No newline at end of file
- ceph_nfs_container_stat.get('rc') == 0
- ceph_nfs_container_stat.get('stdout_lines', [])|length != 0
+- name: inspect ceph crash container
+ command: "{{ container_binary }} inspect {{ ceph_crash_container_stat.stdout }}"
+ changed_when: false
+ register: ceph_crash_inspect
+ when:
+ - ceph_crash_container_stat.get('rc') == 0
+ - ceph_crash_container_stat.get('stdout_lines', [])|length != 0
+
# NOTE(leseb): using failed_when to handle the case when the image is not present yet
- name: "inspecting ceph mon container image before pulling"
command: "{{ container_binary }} inspect {{ (ceph_mon_inspect.stdout | from_json)[0].Image }}"
- nfs_group_name in group_names
- ceph_nfs_inspect.get('rc') == 0
+- name: "inspecting ceph crash container image before pulling"
+ command: "{{ container_binary }} inspect {{ (ceph_crash_inspect.stdout | from_json)[0].Image }}"
+ changed_when: false
+ failed_when: false
+ register: ceph_crash_container_inspect_before_pull
+ when: ceph_crash_inspect.get('rc') == 0
+
- name: set_fact ceph_mon_image_repodigest_before_pulling
set_fact:
ceph_mon_image_repodigest_before_pulling: "{{ (ceph_mon_container_inspect_before_pull.stdout | from_json)[0].Id }}"
- mgr_group_name in group_names
- ceph_mgr_container_inspect_before_pull.get('rc') == 0
+- name: set_fact ceph_crash_image_repodigest_before_pulling
+ set_fact:
+ ceph_crash_image_repodigest_before_pulling: "{{ (ceph_crash_container_inspect_before_pull.stdout | from_json)[0].Id }}"
+ when: ceph_crash_container_inspect_before_pull.get('rc') == 0
+
- name: set_fact ceph_rbd_mirror_image_repodigest_before_pulling
set_fact:
ceph_rbd_mirror_image_repodigest_before_pulling: "{{ (ceph_rbd_mirror_container_inspect_before_pull.stdout | from_json)[0].Id }}"
- ceph_nfs_container_inspect_before_pull.get('rc') == 0
- ceph_nfs_image_repodigest_before_pulling != image_repodigest_after_pulling
+- name: set_fact ceph_crash_image_updated
+ set_fact:
+ ceph_crash_image_updated: "{{ ceph_crash_image_repodigest_before_pulling != image_repodigest_after_pulling }}"
+ changed_when: true
+ notify: restart ceph crash
+ when:
+ - ceph_crash_container_inspect_before_pull.get('rc') == 0
+ - ceph_crash_image_repodigest_before_pulling != image_repodigest_after_pulling
+
- name: export local ceph dev image
command: >
{{ container_binary }} save -o "/tmp/{{ ceph_docker_username }}-{{ ceph_docker_imagename }}-{{ ceph_docker_image_tag }}.tar"
--- /dev/null
+---
+galaxy_info:
+ company: Red Hat
+ author: Guillaume Abrioux
+ description: Deploy ceph-crash
+ license: Apache
+ min_ansible_version: 2.7
+ platforms:
+ - name: EL
+ versions:
+ - 7
+ - 8
+ galaxy_tags:
+ - system
+dependencies: []
--- /dev/null
+---
+- name: create and copy client.crash keyring
+ when: cephx | bool
+ block:
+ - name: create client.crash keyring
+ ceph_key:
+ state: present
+ name: "client.crash"
+ caps: "{{ {'mon': 'allow profile crash', 'mgr': 'allow profile crash'} }}"
+ cluster: "{{ cluster }}"
+ dest: "{{ ceph_conf_key_directory }}"
+ import_key: True
+ mode: "{{ ceph_keyring_permissions }}"
+ owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
+ group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
+ environment:
+ CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
+ CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+ delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
+ run_once: True
+
+ - name: get keys from monitors
+ command: "{{ hostvars[groups[mon_group_name][0]]['container_exec_cmd'] | default('') }} ceph --cluster {{ cluster }} auth get client.crash"
+ register: _crash_keys
+ delegate_to: "{{ groups.get(mon_group_name)[0] }}"
+ run_once: true
+
+ - name: get a list of node where the keyring should be copied
+ set_fact:
+ list_target_node: "{{ list_target_node | default([]) | union(((groups.get('all') | difference(groups.get(grafana_server_group_name, []) + groups.get(client_group_name, []) + groups.get(nfs_group_name, []) + groups.get(iscsi_gw_group_name, []))) + groups.get(item, [])) | unique) }}"
+ run_once: True
+ with_items:
+ - "{{ mon_group_name if groups.get(mon_group_name, []) | length > 0 else [] }}"
+ - "{{ osd_group_name if groups.get(osd_group_name, []) | length > 0 else [] }}"
+ - "{{ mds_group_name if groups.get(mds_group_name, []) | length > 0 else [] }}"
+ - "{{ rgw_group_name if groups.get(rgw_group_name, []) | length > 0 else [] }}"
+ - "{{ rbdmirror_group_name if groups.get(rbdmirror_group_name, []) | length > 0 else [] }}"
+ - "{{ mgr_group_name if groups.get(mgr_group_name, []) | length > 0 else [] }}"
+
+ - name: copy ceph key(s) if needed
+ copy:
+ dest: "{{ ceph_conf_key_directory }}/{{ cluster }}.client.crash.keyring"
+ content: "{{ _crash_keys.stdout + '\n' }}"
+ owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
+ group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
+ mode: "{{ ceph_keyring_permissions }}"
+ with_items: "{{ list_target_node }}"
+ delegate_to: "{{ item }}"
+ run_once: True
+
+- name: start ceph-crash daemon
+ when: containerized_deployment | bool
+ block:
+ - name: create /var/lib/ceph/crash/posted
+ file:
+ path: /var/lib/ceph/crash/posted
+ state: directory
+ mode: '0755'
+ owner: "{{ ceph_uid }}"
+ group: "{{ ceph_uid }}"
+
+ - name: include_tasks systemd.yml
+ include_tasks: systemd.yml
+
+- name: start the ceph-crash service
+ systemd:
+ name: "{{ 'ceph-crash@' + ansible_hostname if containerized_deployment | bool else 'ceph-crash.service' }}"
+ state: started
+ enabled: yes
+ masked: no
+ daemon_reload: yes
\ No newline at end of file
--- /dev/null
+---
+- name: generate systemd unit file for ceph-crash container
+ template:
+ src: "{{ role_path }}/templates/ceph-crash.service.j2"
+ dest: /etc/systemd/system/ceph-crash@.service
+ owner: "root"
+ group: "root"
+ mode: "0644"
+ notify: restart ceph crash
\ No newline at end of file
--- /dev/null
+[Unit]
+Description=Ceph crash dump collector
+{% if container_binary == 'docker' %}
+After=docker.service
+Requires=docker.service
+{% else %}
+After=network.target
+{% endif %}
+
+[Service]
+{% if container_binary == 'podman' %}
+ExecStartPre=-/usr/bin/rm -f /%t/%n-pid /%t/%n-cid
+ExecStartPre=-/usr/bin/{{ container_binary }} rm -f ceph-crash-%i
+{% endif %}
+ExecStart=/usr/bin/{{ container_binary }} run --rm --name ceph-crash-%i \
+{% if container_binary == 'podman' %}
+-d --conmon-pidfile /%t/%n-pid --cidfile /%t/%n-cid \
+{% endif %}
+--net=host \
+-v /var/lib/ceph:/var/lib/ceph:z \
+-v /etc/localtime:/etc/localtime:ro \
+--entrypoint=/usr/bin/ceph-crash {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}
+{% if container_binary == 'podman' %}
+ExecStop=-/usr/bin/sh -c "/usr/bin/{{ container_binary }} rm -f `cat /%t/%n-cid`"
+{% else %}
+ExecStop=-/usr/bin/{{ container_binary }} stop ceph-crash-%i
+{% endif %}
+StartLimitInterval=10min
+StartLimitBurst=30
+{% if container_binary == 'podman' %}
+Type=forking
+PIDFile=/%t/%n-pid
+{% endif %}
+KillMode=none
+Restart=always
+RestartSec=10s
+TimeoutStartSec=120
+TimeoutStopSec=10
+
+[Install]
+WantedBy=multi-user.target
include_tasks: handler_rbd_target_api_gw.yml
when: iscsi_gw_group_name in group_names
listen: "restart ceph rbd-target-api-gw"
+
+ - name: ceph crash handler
+ include_tasks: handler_crash.yml
+ listen: "restart ceph crash"
failed_when: false
check_mode: no
when: inventory_hostname in groups.get(iscsi_gw_group_name, [])
+
+- name: check for a ceph-crash container
+ command: "{{ container_binary }} ps -q --filter='name=ceph-crash-{{ ansible_hostname }}'"
+ register: ceph_crash_container_stat
+ changed_when: false
+ failed_when: false
+ check_mode: no
\ No newline at end of file
failed_when: false
check_mode: no
when: inventory_hostname in groups.get(iscsi_gw_group_name, [])
+
+- name: check for a ceph-crash process
+ command: pgrep ceph-crash
+ changed_when: false
+ failed_when: false
+ check_mode: no
+ register: crash_process
\ No newline at end of file
--- /dev/null
+---
+- name: set _crash_handler_called before restart
+ set_fact:
+ _crash_handler_called: True
+
+- name: restart the ceph-crash service
+ systemd:
+ name: ceph-crash@{{ ansible_hostname }}
+ state: restarted
+ enabled: yes
+ masked: no
+ daemon_reload: yes
+ ignore_errors: true
+ when: hostvars[inventory_hostname]['_crash_handler_called'] | default(False) | bool
+
+- name: set _crash_handler_called after restart
+ set_fact:
+ _crash_handler_called: False
- name: set_fact handler_mgr_status
set_fact:
handler_mgr_status: "{{ (mgr_socket_stat.get('rc') == 0) if not containerized_deployment | bool else (ceph_mgr_container_stat.get('rc') == 0 and ceph_mgr_container_stat.get('stdout_lines', []) | length != 0) }}"
- when: inventory_hostname in groups.get(mgr_group_name, [])
\ No newline at end of file
+ when: inventory_hostname in groups.get(mgr_group_name, [])
+
+- name: set_fact handler_crash_status
+ set_fact:
+ handler_crash_status: "{{ crash_process.get('rc') == 0 if not containerized_deployment | bool else (ceph_crash_container_stat.get('rc') == 0 and ceph_crash_container_stat.get('stdout_lines', []) | length != 0) }}"
+ when:
+ - inventory_hostname in groups.get(mon_group_name, [])
+ or inventory_hostname in groups.get(mgr_group_name, [])
+ or inventory_hostname in groups.get(osd_group_name, [])
+ or inventory_hostname in groups.get(mds_group_name, [])
+ or inventory_hostname in groups.get(rgw_group_name, [])
+ or inventory_hostname in groups.get(rbdmirror_group_name, [])
\ No newline at end of file
- dashboard_enabled | bool
- groups.get(grafana_server_group_name, []) | length > 0
+- hosts:
+ - mons
+ - osds
+ - mdss
+ - rgws
+ - rbdmirrors
+ - mgrs
+
+ gather_facts: false
+ become: True
+ any_errors_fatal: true
+
+ tasks:
+ - import_role:
+ name: ceph-defaults
+ - import_role:
+ name: ceph-facts
+ tasks_from: container_binary.yml
+ - import_role:
+ name: ceph-handler
+ - import_role:
+ name: ceph-crash
+
+
- hosts: mons
gather_facts: false
become: True
- dashboard_enabled | bool
- groups.get(grafana_server_group_name, []) | length > 0
+- hosts:
+ - mons
+ - osds
+ - mdss
+ - rgws
+ - rbdmirrors
+ - mgrs
+
+ gather_facts: false
+ become: True
+ any_errors_fatal: true
+
+ tasks:
+ - import_role:
+ name: ceph-defaults
+ - import_role:
+ name: ceph-facts
+ tasks_from: container_binary.yml
+ - import_role:
+ name: ceph-handler
+ - import_role:
+ name: ceph-crash
+
- hosts: mons
gather_facts: false
become: True
request.function, group_names)
pytest.skip(reason)
+ if request.node.get_closest_marker('ceph_crash') and group_names in [['nfss'], ['iscsigws'], ['clients'], ['grafana-server']]:
+ pytest.skip('Not a valid test for nfs, client or iscsigw nodes')
+
if request.node.get_closest_marker("no_docker") and docker:
pytest.skip(
"Not a valid test for containerized deployments or atomic hosts")
if pattern.search(mon_host_line) is None:
result = False
assert result
+
+class TestCephCrash(object):
+ @pytest.mark.no_docker
+ @pytest.mark.ceph_crash
+ def test_ceph_crash_service_enabled_and_running(self, node, host):
+ s = host.service("ceph-crash")
+ assert s.is_enabled
+ assert s.is_running
+
+ @pytest.mark.docker
+ @pytest.mark.ceph_crash
+ def test_ceph_crash_service_enabled_and_running_container(self, node, host):
+ s = host.service("ceph-crash@{hostname}".format(hostname=node["vars"]["inventory_hostname"]))
+ assert s.is_enabled
+ assert s.is_running
\ No newline at end of file