From 3f9081931f8a369b075060083cdb225e3477f99a Mon Sep 17 00:00:00 2001 From: Dimitri Savineau Date: Mon, 26 Oct 2020 17:49:47 -0400 Subject: [PATCH] rgw/rbdmirror: use service dump instead of ceph -s The ceph status command returns a lot of information stored in variables and/or facts which could consume resources for nothing. When checking the rgw/rbdmirror services status, we're only using the servicmap structure in the ceph status output. To optimize this, we could use the ceph service dump command which contains the same needed information. This command returns less information and is slightly faster than the ceph status command. $ ceph status -f json | wc -c 2001 $ ceph service dump -f json | wc -c 1105 $ time ceph status -f json > /dev/null real 0m0.557s user 0m0.516s sys 0m0.040s $ time ceph service dump -f json > /dev/null real 0m0.454s user 0m0.434s sys 0m0.020s Signed-off-by: Dimitri Savineau --- infrastructure-playbooks/shrink-rbdmirror.yml | 18 +++++++++--------- infrastructure-playbooks/shrink-rgw.yml | 14 +++++++------- roles/ceph-facts/tasks/facts.yml | 9 ++++----- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/infrastructure-playbooks/shrink-rbdmirror.yml b/infrastructure-playbooks/shrink-rbdmirror.yml index 9b01a1e00..d87130044 100644 --- a/infrastructure-playbooks/shrink-rbdmirror.yml +++ b/infrastructure-playbooks/shrink-rbdmirror.yml @@ -67,7 +67,7 @@ container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ ansible_hostname }}" - name: exit playbook, if can not connect to the cluster - command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} -s -f json" + command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} service dump -f json" register: ceph_health until: ceph_health is succeeded retries: 5 @@ -80,14 +80,14 @@ - name: set_fact rbdmirror_gids set_fact: rbdmirror_gids: "{{ rbdmirror_gids | default([]) + [ item ] }}" - with_items: "{{ (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'].keys() | list }}" + with_items: "{{ (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'].keys() | list }}" when: item != 'summary' - name: set_fact rbdmirror_to_kill_gid set_fact: - rbdmirror_to_kill_gid: "{{ (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'][item]['gid'] }}" + rbdmirror_to_kill_gid: "{{ (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'][item]['gid'] }}" with_items: "{{ rbdmirror_gids }}" - when: (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'][item]['metadata']['id'] == rbdmirror_to_kill_hostname + when: (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'][item]['metadata']['id'] == rbdmirror_to_kill_hostname tasks: - name: stop rbdmirror service @@ -106,14 +106,14 @@ post_tasks: - name: get servicemap details - command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} -s -f json" + command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} service dump -f json" register: ceph_health failed_when: - - "'rbd-mirror' in (ceph_health.stdout | from_json)['servicemap']['services'].keys() | list" - - rbdmirror_to_kill_gid in (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'].keys() | list + - "'rbd-mirror' in (ceph_health.stdout | from_json)['services'].keys() | list" + - rbdmirror_to_kill_gid in (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'].keys() | list until: - - "'rbd-mirror' in (ceph_health.stdout | from_json)['servicemap']['services'].keys() | list" - - rbdmirror_to_kill_gid not in (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'].keys() | list + - "'rbd-mirror' in (ceph_health.stdout | from_json)['services'].keys() | list" + - rbdmirror_to_kill_gid not in (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'].keys() | list when: rbdmirror_to_kill_gid is defined retries: 12 delay: 10 diff --git a/infrastructure-playbooks/shrink-rgw.yml b/infrastructure-playbooks/shrink-rgw.yml index e8e9a6cd8..88231b2c2 100644 --- a/infrastructure-playbooks/shrink-rgw.yml +++ b/infrastructure-playbooks/shrink-rgw.yml @@ -76,12 +76,12 @@ delay: 2 - name: get rgw instances - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json" + command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} service dump -f json" register: rgw_instances - name: exit playbook, if the rgw_to_kill doesn't exist - when: rgw_to_kill not in (rgw_instances.stdout | from_json).servicemap.services.rgw.daemons.keys() | list + when: rgw_to_kill not in (rgw_instances.stdout | from_json).services.rgw.daemons.keys() | list fail: msg: > It seems that the rgw instance given is not part of the ceph cluster. Please @@ -111,14 +111,14 @@ delay: 2 - name: exit if rgw_to_kill is reported in ceph status - command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json" + command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} service dump -f json" register: ceph_status failed_when: - - (ceph_status.stdout | from_json).servicemap.services.rgw is defined - - rgw_to_kill in (ceph_status.stdout | from_json).servicemap.services.rgw.daemons.keys() | list + - (ceph_status.stdout | from_json).services.rgw is defined + - rgw_to_kill in (ceph_status.stdout | from_json).services.rgw.daemons.keys() | list until: - - (ceph_status.stdout | from_json).servicemap.services.rgw is defined - - rgw_to_kill not in (ceph_status.stdout | from_json).servicemap.services.rgw.daemons.keys() | list + - (ceph_status.stdout | from_json).services.rgw is defined + - rgw_to_kill not in (ceph_status.stdout | from_json).services.rgw.daemons.keys() | list retries: 3 delay: 3 diff --git a/roles/ceph-facts/tasks/facts.yml b/roles/ceph-facts/tasks/facts.yml index 6987244e7..c7bd414de 100644 --- a/roles/ceph-facts/tasks/facts.yml +++ b/roles/ceph-facts/tasks/facts.yml @@ -266,7 +266,7 @@ or inventory_hostname in groups.get(nfs_group_name, []) block: - name: get ceph current status - command: "{{ timeout_command }} {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json" + command: "{{ timeout_command }} {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} service dump -f json" changed_when: false failed_when: false check_mode: no @@ -283,16 +283,15 @@ - name: set_fact rgw_hostname set_fact: rgw_hostname: "{% set _value = ansible_hostname -%} - {% for key in (ceph_current_status['servicemap']['services']['rgw']['daemons'] | list) -%} + {% for key in (ceph_current_status['services']['rgw']['daemons'] | list) -%} {% if key == ansible_fqdn -%} {% set _value = key -%} {% endif -%} {% endfor -%} {{ _value }}" when: - - ceph_current_status['servicemap'] is defined - - ceph_current_status['servicemap']['services'] is defined - - ceph_current_status['servicemap']['services']['rgw'] is defined + - ceph_current_status['services'] is defined + - ceph_current_status['services']['rgw'] is defined - name: check if the ceph conf exists stat: -- 2.39.5