From 3f9081931f8a369b075060083cdb225e3477f99a Mon Sep 17 00:00:00 2001
From: Dimitri Savineau <dsavinea@redhat.com>
Date: Mon, 26 Oct 2020 17:49:47 -0400
Subject: [PATCH] rgw/rbdmirror: use service dump instead of ceph -s

The ceph status command returns a lot of information stored in variables
and/or facts which could consume resources for nothing.
When checking the rgw/rbdmirror services status, we're only using the
servicmap structure in the ceph status output.
To optimize this, we could use the ceph service dump command which contains
the same needed information.
This command returns less information and is slightly faster than the ceph
status command.

$ ceph status -f json | wc -c
2001
$ ceph service dump -f json | wc -c
1105
$ time ceph status -f json > /dev/null

real	0m0.557s
user	0m0.516s
sys	0m0.040s
$ time ceph service dump -f json > /dev/null

real	0m0.454s
user	0m0.434s
sys	0m0.020s

Signed-off-by: Dimitri Savineau <dsavinea@redhat.com>
---
 infrastructure-playbooks/shrink-rbdmirror.yml | 18 +++++++++---------
 infrastructure-playbooks/shrink-rgw.yml       | 14 +++++++-------
 roles/ceph-facts/tasks/facts.yml              |  9 ++++-----
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/infrastructure-playbooks/shrink-rbdmirror.yml b/infrastructure-playbooks/shrink-rbdmirror.yml
index 9b01a1e00..d87130044 100644
--- a/infrastructure-playbooks/shrink-rbdmirror.yml
+++ b/infrastructure-playbooks/shrink-rbdmirror.yml
@@ -67,7 +67,7 @@
         container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ ansible_hostname }}"
 
     - name: exit playbook, if can not connect to the cluster
-      command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} -s -f json"
+      command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} service dump -f json"
       register: ceph_health
       until: ceph_health is succeeded
       retries: 5
@@ -80,14 +80,14 @@
     - name: set_fact rbdmirror_gids
       set_fact:
         rbdmirror_gids: "{{ rbdmirror_gids | default([]) + [ item ] }}"
-      with_items: "{{  (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'].keys() | list }}"
+      with_items: "{{  (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'].keys() | list }}"
       when: item != 'summary'
 
     - name: set_fact rbdmirror_to_kill_gid
       set_fact:
-        rbdmirror_to_kill_gid: "{{ (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'][item]['gid'] }}"
+        rbdmirror_to_kill_gid: "{{ (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'][item]['gid'] }}"
       with_items: "{{ rbdmirror_gids }}"
-      when: (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'][item]['metadata']['id'] == rbdmirror_to_kill_hostname
+      when: (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'][item]['metadata']['id'] == rbdmirror_to_kill_hostname
 
   tasks:
     - name: stop rbdmirror service
@@ -106,14 +106,14 @@
 
   post_tasks:
     - name: get servicemap details
-      command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} -s -f json"
+      command: "{{ container_exec_cmd | default('') }} timeout 5 ceph --cluster {{ cluster }} service dump -f json"
       register: ceph_health
       failed_when:
-        - "'rbd-mirror' in (ceph_health.stdout | from_json)['servicemap']['services'].keys() | list"
-        - rbdmirror_to_kill_gid in (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'].keys() | list
+        - "'rbd-mirror' in (ceph_health.stdout | from_json)['services'].keys() | list"
+        - rbdmirror_to_kill_gid in (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'].keys() | list
       until:
-        - "'rbd-mirror' in (ceph_health.stdout | from_json)['servicemap']['services'].keys() | list"
-        - rbdmirror_to_kill_gid not in (ceph_health.stdout | from_json)['servicemap']['services']['rbd-mirror']['daemons'].keys() | list
+        - "'rbd-mirror' in (ceph_health.stdout | from_json)['services'].keys() | list"
+        - rbdmirror_to_kill_gid not in (ceph_health.stdout | from_json)['services']['rbd-mirror']['daemons'].keys() | list
       when: rbdmirror_to_kill_gid is defined
       retries: 12
       delay: 10
diff --git a/infrastructure-playbooks/shrink-rgw.yml b/infrastructure-playbooks/shrink-rgw.yml
index e8e9a6cd8..88231b2c2 100644
--- a/infrastructure-playbooks/shrink-rgw.yml
+++ b/infrastructure-playbooks/shrink-rgw.yml
@@ -76,12 +76,12 @@
       delay: 2
 
     - name: get rgw instances
-      command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
+      command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} service dump -f json"
       register: rgw_instances
 
 
     - name: exit playbook, if the rgw_to_kill doesn't exist
-      when: rgw_to_kill not in (rgw_instances.stdout | from_json).servicemap.services.rgw.daemons.keys() | list
+      when: rgw_to_kill not in (rgw_instances.stdout | from_json).services.rgw.daemons.keys() | list
       fail:
         msg: >
           It seems that the rgw instance given is not part of the ceph cluster. Please
@@ -111,14 +111,14 @@
       delay: 2
 
     - name: exit if rgw_to_kill is reported in ceph status
-      command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
+      command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} service dump -f json"
       register: ceph_status
       failed_when:
-        - (ceph_status.stdout | from_json).servicemap.services.rgw is defined
-        - rgw_to_kill in (ceph_status.stdout | from_json).servicemap.services.rgw.daemons.keys() | list
+        - (ceph_status.stdout | from_json).services.rgw is defined
+        - rgw_to_kill in (ceph_status.stdout | from_json).services.rgw.daemons.keys() | list
       until:
-        - (ceph_status.stdout | from_json).servicemap.services.rgw is defined
-        - rgw_to_kill not in (ceph_status.stdout | from_json).servicemap.services.rgw.daemons.keys() | list
+        - (ceph_status.stdout | from_json).services.rgw is defined
+        - rgw_to_kill not in (ceph_status.stdout | from_json).services.rgw.daemons.keys() | list
       retries: 3
       delay: 3
 
diff --git a/roles/ceph-facts/tasks/facts.yml b/roles/ceph-facts/tasks/facts.yml
index 6987244e7..c7bd414de 100644
--- a/roles/ceph-facts/tasks/facts.yml
+++ b/roles/ceph-facts/tasks/facts.yml
@@ -266,7 +266,7 @@
       or inventory_hostname in groups.get(nfs_group_name, [])
   block:
     - name: get ceph current status
-      command: "{{ timeout_command }} {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} -s -f json"
+      command: "{{ timeout_command }} {{ _container_exec_cmd | default('') }} ceph --cluster {{ cluster }} service dump -f json"
       changed_when: false
       failed_when: false
       check_mode: no
@@ -283,16 +283,15 @@
     - name: set_fact rgw_hostname
       set_fact:
         rgw_hostname: "{% set _value = ansible_hostname -%}
-        {% for key in (ceph_current_status['servicemap']['services']['rgw']['daemons'] | list) -%}
+        {% for key in (ceph_current_status['services']['rgw']['daemons'] | list) -%}
         {% if key == ansible_fqdn -%}
         {% set _value = key -%}
         {% endif -%}
         {% endfor -%}
         {{ _value }}"
       when:
-        - ceph_current_status['servicemap'] is defined
-        - ceph_current_status['servicemap']['services'] is defined
-        - ceph_current_status['servicemap']['services']['rgw'] is defined
+        - ceph_current_status['services'] is defined
+        - ceph_current_status['services']['rgw'] is defined
 
 - name: check if the ceph conf exists
   stat:
-- 
2.39.5