]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ansible.git/commitdiff
ceph-handler: use haproxy maintenance for rgw restarts
authorSeena Fallah <seenafallah@gmail.com>
Mon, 10 Jun 2024 10:11:55 +0000 (12:11 +0200)
committerGuillaume Abrioux <gabrioux@ibm.com>
Thu, 27 Jun 2024 07:42:39 +0000 (09:42 +0200)
RGW currently restarts without waiting for existing connections to
close. By adjusting the HAProxy weight before the restart, we can
ensure that no active connections are disrupted during the restart
process.

Signed-off-by: Seena Fallah <seenafallah@gmail.com>
group_vars/all.yml.sample
roles/ceph-defaults/defaults/main.yml
roles/ceph-handler/templates/restart_rgw_daemon.sh.j2

index 205397288f8752785a5c994540597bc14bdff3ae..ca6e48004219c7e6205f482944c7d30d82a7441b 100644 (file)
@@ -420,6 +420,7 @@ dummy:
 # RGW handler checks
 #handler_health_rgw_check_retries: 5
 #handler_health_rgw_check_delay: 10
+#handler_rgw_use_haproxy_maintenance: false
 
 # NFS handler checks
 #handler_health_nfs_check_retries: 5
index d8b06cdd1904c6db363db0570e10b2cbea1b3d05..a61280100c3c3e8a88fcb4d82a0c723948326c87 100644 (file)
@@ -412,6 +412,7 @@ handler_health_mds_check_delay: 10
 # RGW handler checks
 handler_health_rgw_check_retries: 5
 handler_health_rgw_check_delay: 10
+handler_rgw_use_haproxy_maintenance: false
 
 # NFS handler checks
 handler_health_nfs_check_retries: 5
index 5ea0f3c7db2cff3edbe7b0859911cc7e3f3d78f4..d7eb36a7235f847b664d8c7b0aea5eaf6c320e2b 100644 (file)
@@ -11,6 +11,7 @@ else
     RGW_PROTOCOL=http
 fi
 INSTANCES_NAME=({% for i in rgw_instances %}{{ i.instance_name }} {% endfor %})
+HAPROXY_BACKEND=({% for i in rgw_instances %}{{ i.haproxy_backend | default('rgw-backend') }} {% endfor %})
 RGW_IPS=({% for i in rgw_instances %}{{ i.radosgw_address }} {% endfor %})
 RGW_PORTS=({% for i in rgw_instances %}{{ i.radosgw_frontend_port }} {% endfor %})
 RGW_ZONE="{{ rgw_zone }}"
@@ -78,19 +79,38 @@ check_rest() {
 }
 
 for ((i=0; i<${RGW_NUMS}; i++)); do
-  # First, restart the daemon
-
   # Check if systemd unit exists
   # This is needed for new instances as the restart might trigger before the deployment
-  if systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then
-    systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}
-  else
+  if ! systemctl list-units --full --all | grep -q "ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}"; then
     echo "Systemd unit ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]} does not exist."
     continue
   fi
 
+{% if handler_rgw_use_haproxy_maintenance %}
+  # set server weight to 0 on haproxy
+  echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 0" | socat stdio {{ haproxy_socket_path }}
+
+  # wait for the connections to drop
+  retries={{ handler_rgw_haproxy_maintenance_retries | default(60) }}
+  while [ $retries -gt 0 ]; do
+    if [ "$(echo "show servers conn ${HAPROXY_BACKEND[i]}" | socat stdio {{ haproxy_socket_path }} | grep "${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} " | awk '{ print $7 }')" -eq 0 ]; then
+      break
+    fi
+    sleep 1
+    let retries=retries-1
+  done
+{% endif %}
+
+  # Restart the daemon
+  systemctl restart ceph-radosgw@rgw.${RGW_ZONE}.${HOST_NAME}.${INSTANCES_NAME[i]}
+
   # Check socket files
   check_socket ${i}
   # Check rest
   check_rest ${i}
+
+{% if handler_rgw_use_haproxy_maintenance %}
+  # set server weight to 100 on haproxy
+  echo "set weight ${HAPROXY_BACKEND[i]}/${INSTANCES_NAME[i]} 100" | socat stdio {{ haproxy_socket_path }}
+{% endif %}
 done