]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: make CONN_STATUS_EXPIRE_SECS a cfg option
authorOguzhan Ozmen <oozmen@bloomberg.net>
Tue, 3 Mar 2026 01:39:19 +0000 (01:39 +0000)
committerOguzhan Ozmen <oozmen@bloomberg.net>
Tue, 2 Jun 2026 22:16:20 +0000 (22:16 +0000)
Introduce a new radosgw option 'rgw_rest_conn_ip_fail_timeout_secs' to
be able to set the constant CONN_STATUS_EXPIRE_SECS dynamically.

Signed-off-by: Oguzhan Ozmen <oozmen@bloomberg.net>
src/common/options/rgw.yaml.in
src/rgw/rgw_rest_conn.cc
src/rgw/rgw_rest_conn.h

index 2d1c4cb777f9fe76250b76cb0fc75bb0a460b1f3..701f45759f7981b2fee84ba0d90d9a34a8c02175 100644 (file)
@@ -2080,6 +2080,24 @@ options:
   default: false
   services:
   - rgw
+  see_also:
+  - rgw_rest_conn_ip_fail_timeout_secs
+  with_legacy: true
+- name: rgw_rest_conn_ip_fail_timeout_secs
+  desc: IP failure tracking timeout (requires rgw_rest_conn_connect_to_resolved_ips=true)
+  type: uint
+  level: advanced
+  long_desc: When rgw_rest_conn_connect_to_resolved_ips is enabled, RGW tracks
+    per-IP connection failures by remembering the timestamp of the most recent
+    failure. This option controls how long (in seconds) an IP address remains
+    marked as "failed" before RGW considers it eligible for retry.
+    After this timeout expires, the IP will be tried again in the normal
+    round-robin rotation.
+  default: 2
+  services:
+  - rgw
+  see_also:
+  - rgw_rest_conn_connect_to_resolved_ips
   with_legacy: true
 - name: rgw_obj_stripe_size
   type: size
index 412a33d95a113024cacc32c75a65003918d21f62..6d27faa27c5dfd650e7ca89f068d6a7468746c55 100644 (file)
@@ -167,7 +167,7 @@ void RGWRESTConn::populate_connect_to(RGWEndpoint& endpoint, ResolvedEndpoint& r
     return;
   }
 
-  static constexpr uint32_t CONN_STATUS_EXPIRE_SECS = 2;
+  const auto ip_fail_timeout = cct->_conf->rgw_rest_conn_ip_fail_timeout_secs;
   const size_t num_ips = resolved_endpoint.resolved_ips.size();
 
   // Round-robin through IPs, skipping any that are marked down
@@ -182,7 +182,7 @@ void RGWRESTConn::populate_connect_to(RGWEndpoint& endpoint, ResolvedEndpoint& r
     }
 
     auto diff = ceph::to_seconds<double>(ceph::real_clock::now() - last_fail);
-    if (diff >= CONN_STATUS_EXPIRE_SECS) {
+    if (diff >= ip_fail_timeout) {
       // Failure expired, mark IP as up and use it
       ip_status.mark_up();
       ldout(cct, 5) << "IP " << ip_status.connect_to << " failure expired, marking up" << dendl;
@@ -204,7 +204,7 @@ int RGWRESTConn::get_endpoint(RGWEndpoint& endpoint)
     return -EINVAL;
   }
 
-  static constexpr uint32_t CONN_STATUS_EXPIRE_SECS = 2;
+  const auto ip_fail_timeout = cct->_conf->rgw_rest_conn_ip_fail_timeout_secs;
   auto now = ceph::real_clock::now();
 
   // Helper to check if an endpoint has at least one available IP
@@ -217,7 +217,7 @@ int RGWRESTConn::get_endpoint(RGWEndpoint& endpoint)
     // Fast path: if no recent failures at endpoint level, all IPs are available
     const auto& ep_last_fail = res_ep.last_failure_time.load();
     if (ceph::real_clock::is_zero(ep_last_fail) ||
-        ceph::to_seconds<double>(now - ep_last_fail) >= CONN_STATUS_EXPIRE_SECS) {
+        ceph::to_seconds<double>(now - ep_last_fail) >= ip_fail_timeout) {
       return true;
     }
 
@@ -228,7 +228,7 @@ int RGWRESTConn::get_endpoint(RGWEndpoint& endpoint)
         return true;  // This IP is up
       }
       auto diff = ceph::to_seconds<double>(now - last_fail);
-      if (diff >= CONN_STATUS_EXPIRE_SECS) {
+      if (diff >= ip_fail_timeout) {
         return true;  // This IP's failure has expired
       }
     }
index 66c849f1a1e97d13b6b00e8a6c7c5367c2664755..325a9ab7a1d9f084f27f6765c9d40b191b269da0 100644 (file)
@@ -69,7 +69,7 @@ inline param_vec_t make_param_list(const std::map<std::string, std::string> *pp)
  * ResolvedIP - Per-IP connection status tracking.
  *
  * Each resolved IP address has its own failure status. An IP is considered
- * "down" if last_failure is non-zero and less than CONN_STATUS_EXPIRE_SECS old.
+ * "down" if last_failure is non-zero and less than rgw_rest_conn_ip_fail_timeout_secs old.
  * After the timeout, the IP becomes eligible for retry.
  */
 struct ResolvedIP {