]> git.apps.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
common: disable/enable pg_autoscaler
authorGuillaume Abrioux <gabrioux@redhat.com>
Mon, 14 Jun 2021 16:01:41 +0000 (18:01 +0200)
committerDimitri Savineau <savineau.dimitri@gmail.com>
Tue, 20 Jul 2021 15:04:25 +0000 (11:04 -0400)
The PG autoscaler can disrupt the PG checks so the idea here is to
disable it and re-enable it back after the restart is done.

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
(cherry picked from commit 13036115e2862fc8ca9c04e1379fd793e0e7036a)

infrastructure-playbooks/cephadm-adopt.yml
infrastructure-playbooks/rolling_update.yml
infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml
roles/ceph-facts/tasks/facts.yml
roles/ceph-handler/tasks/handler_osds.yml

index 47ba64718a6edee87d7cf0b37647d93895a551d0..c9cf6e822b6dd13096a2edca9379419764ac5979 100644 (file)
     - import_role:
         name: ceph-defaults
 
+    - name: get pool list
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
+      register: pool_list
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+
+    - name: get balancer module status
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
+      register: balancer_status
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+
+    - name: set_fact pools_pgautoscaler_mode
+      set_fact:
+        pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
+      run_once: true
+      with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
+
+    - name: disable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
+    - name: disable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: false
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      run_once: true
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
     - name: set osd flags
       ceph_osd_flag:
         cluster: "{{ cluster }}"
     - import_role:
         name: ceph-defaults
 
+    - name: re-enable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: true
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      run_once: true
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
     - name: unset osd flags
       ceph_osd_flag:
         cluster: "{{ cluster }}"
         CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
         CEPH_CONTAINER_BINARY: "{{ container_binary }}"
 
+    - name: re-enable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
 - name: redeploy mds daemons
   hosts: "{{ mds_group_name|default('mdss') }}"
   become: true
index 48969460b496bcb3a98ded123afbec19cde8f259..630e0d2d277923bb948013a8d65a436a6fb23913 100644 (file)
         name: ceph-facts
         tasks_from: container_binary.yml
 
+    - name: get pool list
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
+      register: pool_list
+      run_once: true
+      changed_when: false
+
+    - name: get balancer module status
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
+      register: balancer_status
+      run_once: true
+      changed_when: false
+
+    - name: set_fact pools_pgautoscaler_mode
+      set_fact:
+        pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
+      run_once: true
+      with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
+
+    - name: disable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
+      run_once: true
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
+    - name: disable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: false
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      run_once: true
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
     - name: set osd flags
       ceph_osd_flag:
         name: "{{ item }}"
         name: ceph-facts
         tasks_from: container_binary.yml
 
+    - name: re-enable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: true
+      run_once: true
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
     - name: unset osd flags
       ceph_osd_flag:
         name: "{{ item }}"
         - noout
         - nodeep-scrub
 
+    - name: re-enable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
+      run_once: true
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
 - name: upgrade ceph mdss cluster, deactivate all rank > 0
   hosts: "{{ mon_group_name | default('mons') }}[0]"
   become: true
index bbd7fe80858e6ffc02062862c474399ace6d5452..d592147367c6db4612d53e41078bdec83532877f 100644 (file)
         name: ceph-facts
         tasks_from: container_binary.yml
 
+    - name: get pool list
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
+      register: pool_list
+      run_once: true
+      changed_when: false
+
+    - name: get balancer module status
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
+      register: balancer_status
+      run_once: true
+      changed_when: false
+
+    - name: set_fact pools_pgautoscaler_mode
+      set_fact:
+        pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
+      run_once: true
+      with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
+
+    - name: disable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
+      run_once: true
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
+    - name: disable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: false
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      run_once: true
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
     - name: set osd flags
       ceph_osd_flag:
         name: "{{ item }}"
         name: ceph-facts
         tasks_from: container_binary.yml
 
-    - name: set osd flags
+    - name: re-enable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: true
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      run_once: true
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
+    - name: unset osd flags
       ceph_osd_flag:
         name: "{{ item }}"
         cluster: "{{ cluster }}"
         - noout
         - nodeep-scrub
 
+    - name: re-enable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
+      run_once: true
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
+
 - name: switching from non-containerized to containerized ceph mds
 
   hosts: "{{ mds_group_name|default('mdss') }}"
index 103762ffaa84c6486d1f75e7ee6388124c36caed..be318de3c1beb8064491c6b02357d4d087e9fbf3 100644 (file)
 - name: import_tasks container_binary.yml
   import_tasks: container_binary.yml
 
+- name: set_fact ceph_cmd
+  set_fact:
+    ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}"
+
 # In case ansible_python_interpreter is set by the user,
 # ansible will not discover python and discovered_interpreter_python
 # will not be set
index 4828409c73e0ec31be38dd64ea4effb222c30be0..e1fb0ac92f8a0ae27245136777704c43115ca3cb 100644 (file)
 ---
-- name: set _osd_handler_called before restart
+- name: set_fact trigger_restart
   set_fact:
-    _osd_handler_called: True
-
-- name: unset noup flag
-  ceph_osd_flag:
-    name: noup
-    cluster: "{{ cluster }}"
-    state: absent
-  environment:
-    CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
-    CEPH_CONTAINER_BINARY: "{{ container_binary }}"
-  delegate_to: "{{ groups[mon_group_name][0] }}"
+    trigger_restart: true
+  loop: "{{ groups[osd_group_name] }}"
+  when: hostvars[item]['handler_osd_status'] | default(False) | bool
   run_once: true
 
-# This does not just restart OSDs but everything else too. Unfortunately
-# at this time the ansible role does not have an OSD id list to use
-# for restarting them specifically.
-# This does not need to run during a rolling update as the playbook will
-# restart all OSDs using the tasks "start ceph osd" or
-# "restart containerized ceph osd"
-- name: copy osd restart script
-  template:
-    src: restart_osd_daemon.sh.j2
-    dest: "{{ tmpdirpath.path }}/restart_osd_daemon.sh"
-    owner: root
-    group: root
-    mode: 0750
-
-- name: restart ceph osds daemon(s)
-  command: /usr/bin/env bash {{ hostvars[item]['tmpdirpath']['path'] }}/restart_osd_daemon.sh
-  when:
-    - hostvars[item]['handler_osd_status'] | default(False) | bool
-    - handler_health_osd_check | bool
-    - hostvars[item]['_osd_handler_called'] | default(False) | bool
-  with_items: "{{ groups[osd_group_name] | intersect(ansible_play_batch) }}"
-  delegate_to: "{{ item }}"
-  run_once: True
-
-- name: set _osd_handler_called after restart
-  set_fact:
-    _osd_handler_called: False
+- name: osd handler
+  when: trigger_restart | default(False) | bool
+  block:
+    - name: set _osd_handler_called before restart
+      set_fact:
+        _osd_handler_called: True
+
+    - name: unset noup flag
+      ceph_osd_flag:
+        name: noup
+        cluster: "{{ cluster }}"
+        state: absent
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      run_once: true
+
+    # This does not just restart OSDs but everything else too. Unfortunately
+    # at this time the ansible role does not have an OSD id list to use
+    # for restarting them specifically.
+    # This does not need to run during a rolling update as the playbook will
+    # restart all OSDs using the tasks "start ceph osd" or
+    # "restart containerized ceph osd"
+    - name: copy osd restart script
+      template:
+        src: restart_osd_daemon.sh.j2
+        dest: "{{ tmpdirpath.path }}/restart_osd_daemon.sh"
+        owner: root
+        group: root
+        mode: 0750
+
+    - name: get pool list
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} osd dump -f json"
+      register: pool_list
+      delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
+      run_once: true
+      changed_when: false
+
+    - name: get balancer module status
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer status -f json"
+      register: balancer_status
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+
+    - name: set_fact pools_pgautoscaler_mode
+      set_fact:
+        pools_pgautoscaler_mode: "{{ pools_pgautoscaler_mode | default([]) | union([{'name': item.pool_name, 'mode': item.pg_autoscale_mode}]) }}"
+      run_once: true
+      with_items: "{{ (pool_list.stdout | from_json)['pools'] }}"
+
+    - name: disable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer off"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
+
+    - name: disable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: false
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
+      run_once: true
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
+    - name: restart ceph osds daemon(s)
+      command: /usr/bin/env bash {{ hostvars[item]['tmpdirpath']['path'] }}/restart_osd_daemon.sh
+      when:
+        - hostvars[item]['handler_osd_status'] | default(False) | bool
+        - handler_health_osd_check | bool
+        - hostvars[item]['_osd_handler_called'] | default(False) | bool
+      with_items: "{{ groups[osd_group_name] | intersect(ansible_play_batch) }}"
+      delegate_to: "{{ item }}"
+      run_once: True
+
+    - name: set _osd_handler_called after restart
+      set_fact:
+        _osd_handler_called: False
+
+    - name: re-enable pg autoscale on pools
+      ceph_pool:
+        name: "{{ item.name }}"
+        cluster: "{{ cluster }}"
+        pg_autoscale_mode: true
+      with_items: "{{ pools_pgautoscaler_mode }}"
+      run_once: true
+      delegate_to: "{{ groups.get(mon_group_name, [])[0] }}"
+      when:
+        - pools_pgautoscaler_mode is defined
+        - item.mode == 'on'
+      environment:
+        CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}"
+        CEPH_CONTAINER_BINARY: "{{ container_binary }}"
+
+    - name: re-enable balancer
+      command: "{{ ceph_cmd }} --cluster {{ cluster }} balancer on"
+      run_once: true
+      delegate_to: "{{ groups[mon_group_name][0] }}"
+      changed_when: false
+      when: (balancer_status.stdout | from_json)['active'] | bool
\ No newline at end of file