ceph-handler: use haproxy maintenance for rgw restarts

author Seena Fallah <seenafallah@gmail.com>

Mon, 10 Jun 2024 10:11:55 +0000 (12:11 +0200)

committer Guillaume Abrioux <gabrioux@ibm.com>

Thu, 27 Jun 2024 07:42:39 +0000 (09:42 +0200)
author Seena Fallah <seenafallah@gmail.com>
Mon, 10 Jun 2024 10:11:55 +0000 (12:11 +0200)
committer Guillaume Abrioux <gabrioux@ibm.com>
Thu, 27 Jun 2024 07:42:39 +0000 (09:42 +0200)
diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample

index 205397288f8752785a5c994540597bc14bdff3ae..ca6e48004219c7e6205f482944c7d30d82a7441b 100644 (file)
--- a/group_vars/all.yml.sample
+++ b/group_vars/all.yml.sample
@@ -420,6 +420,7 @@ dummy:
  # RGW handler checks
  #handler_health_rgw_check_retries: 5
  #handler_health_rgw_check_delay: 10
+#handler_rgw_use_haproxy_maintenance: false
  
  # NFS handler checks
  #handler_health_nfs_check_retries: 5
diff --git a/roles/ceph-defaults/defaults/main.yml b/roles/ceph-defaults/defaults/main.yml

index d8b06cdd1904c6db363db0570e10b2cbea1b3d05..a61280100c3c3e8a88fcb4d82a0c723948326c87 100644 (file)
--- a/roles/ceph-defaults/defaults/main.yml
+++ b/roles/ceph-defaults/defaults/main.yml
@@ -412,6 +412,7 @@ handler_health_mds_check_delay: 10
  # RGW handler checks
  handler_health_rgw_check_retries: 5
  handler_health_rgw_check_delay: 10
+handler_rgw_use_haproxy_maintenance: false
  
  # NFS handler checks
  handler_health_nfs_check_retries: 5
diff --git a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2 b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2

index 5ea0f3c7db2cff3edbe7b0859911cc7e3f3d78f4..d7eb36a7235f847b664d8c7b0aea5eaf6c320e2b 100644 (file)
--- a/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2
+++ b/roles/ceph-handler/templates/restart_rgw_daemon.sh.j2
@@ -11,6 +11,7 @@ else
      RGW_PROTOCOL=http
  fi
  INSTANCES_NAME=({% for i in rgw_instances %}{{ i.instance_name }} {% endfor %})
+HAPROXY_BACKEND=({% for i in rgw_instances %}{{ i.haproxy_backend | default('rgw-backend') }} {% endfor %})
  RGW_IPS=({% for i in rgw_instances %}{{ i.radosgw_address }} {% endfor %})
  RGW_PORTS=({% for i in rgw_instances %}{{ i.radosgw_frontend_port }} {% endfor %})
  RGW_ZONE="{{ rgw_zone }}"
@@ -78,19 +79,38 @@ check_rest() {
  }
  
  for ((i=0; i<${RGW_NUMS}; i++)); do
-  # First, restart the daemon
-
    # Check if systemd unit exists
    # This is needed for new instances as the restart might trigger before the deployment
-  if systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then
-    systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}
-  else
+  if ! systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then
      echo "Systemd unit ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} does not exist."
      continue
    fi
  
+{% if handler_rgw_use_haproxy_maintenance %}
+  # set server weight to 0 on haproxy
+  echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 0" | socat stdio {{ haproxy_socket_path }}
+
+  # wait for the connections to drop
+  retries={{ handler_rgw_haproxy_maintenance_retries | default(60) }}
+  while [ $retries -gt 0 ]; do
+    if [ "$(echo "show servers conn ${HAPROXY_BACKEND[i]}" | socat stdio {{ haproxy_socket_path }} | grep "${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} " | awk '{ print $7 }')" -eq 0 ]; then
+      break
+    fi
+    sleep 1
+    let retries=retries-1
+  done
+{% endif %}
+
+  # Restart the daemon
+  systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}
+
    # Check socket files
    check_socket ${i}
    # Check rest
    check_rest ${i}
+
+{% if handler_rgw_use_haproxy_maintenance %}
+  # set server weight to 100 on haproxy
+  echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 100" | socat stdio {{ haproxy_socket_path }}
+{% endif %}
  done
author	Seena Fallah <seenafallah@gmail.com>
	Mon, 10 Jun 2024 10:11:55 +0000 (12:11 +0200)
committer	Guillaume Abrioux <gabrioux@ibm.com>
	Thu, 27 Jun 2024 07:42:39 +0000 (09:42 +0200)
group_vars/all.yml.sample		patch \| blob \| history
roles/ceph-defaults/defaults/main.yml		patch \| blob \| history
roles/ceph-handler/templates/restart_rgw_daemon.sh.j2		patch \| blob \| history