From: Yuval Lifshitz Date: Mon, 23 Jun 2025 10:10:37 +0000 (+0000) Subject: rgw/notifications: add http request timeout and max inflight X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8a2e7bfedf21180bf363a6b22fb22b0827fffc45;p=ceph.git rgw/notifications: add http request timeout and max inflight also make connection timeout configurable Fixes: https://tracker.ceph.com/issues/71402 Signed-off-by: Yuval Lifshitz --- diff --git a/doc/radosgw/notifications.rst b/doc/radosgw/notifications.rst index c780409f078..7cdf76e97a6 100644 --- a/doc/radosgw/notifications.rst +++ b/doc/radosgw/notifications.rst @@ -137,6 +137,17 @@ Notification Performance Statistics event on each notification, but ``pubsub_push_ok`` and ``pubsub_push_fail`` are incremented per push action on each notification. +Configuration Options +------------------------------ +The following are global configuration options for the different endpoints: + +HTTP +~~~~ +.. confval:: rgw_http_notif_message_timeout +.. confval:: rgw_http_notif_connection_timeout +.. confval:: rgw_http_notif_max_inflight + + Bucket Notification REST API ---------------------------- diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index 3303ce4a6c5..607640d7276 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -4307,6 +4307,42 @@ options: services: - rgw with_legacy: true +- name: rgw_http_notif_message_timeout + type: uint + level: advanced + desc: This is the maximum time in seconds to deliver a notification + long_desc: This is the maximum time in seconds to deliver a notification. + Delivery error occurs when the message timeout is exceeded. + This value includes the connection time, and hence must be larger than rgw_http_notif_connection_timeout. + If set to zero the http client will wait indefinitely. + see https://curl.se/libcurl/c/CURLOPT_TIMEOUT.html + default: 10 + services: + - rgw + with_legacy: true +- name: rgw_http_notif_connection_timeout + type: uint + level: advanced + desc: This is the maximum time in seconds to connect to an endpoint + long_desc: This is the maximum time in seconds to connect to an endpoint. + Delivery error occurs when the message timeout is exceeded. + If set to zero the default value of 300 seconds will be used. + see https://curl.se/libcurl/c/CURLOPT_CONNECTTIMEOUT.html + default: 5 + services: + - rgw + with_legacy: true +- name: rgw_http_notif_max_inflight + type: uint + level: advanced + desc: This is the maximum number of messages in-flight (across all http endpoints) + long_desc: This is the maximum number of messages in-flight (across all http endpoints). + Delivery error (BUSY) occurs when the number of messages is exceeded. + If set to zero there is no limit on the number of messages in-flight. + default: 8192 + services: + - rgw + with_legacy: true - name: rgw_d4n_l1_datacache_address type: str level: advanced diff --git a/src/rgw/driver/rados/rgw_pubsub_push.cc b/src/rgw/driver/rados/rgw_pubsub_push.cc index f3baeeb0aa8..159e2b876df 100644 --- a/src/rgw/driver/rados/rgw_pubsub_push.cc +++ b/src/rgw/driver/rados/rgw_pubsub_push.cc @@ -61,6 +61,7 @@ bool get_bool(const RGWHTTPArgs& args, const std::string& name, bool default_val static std::unique_ptr s_http_manager; static std::shared_mutex s_http_manager_mutex; +static std::atomic s_http_manager_inflight(0); class RGWPubSubHTTPEndpoint : public RGWPubSubEndpoint { private: @@ -99,10 +100,17 @@ public: ldout(cct, 1) << "ERROR: send failed. http endpoint manager not running" << dendl; return -ESRCH; } + const auto max_inflight = cct->_conf->rgw_http_notif_max_inflight; + if (max_inflight != 0 && + s_http_manager_inflight >= max_inflight) { + ldout(cct, 1) << "ERROR: send failed. http endpoint manager busy. in-flight requests: " << + s_http_manager_inflight << " >= " << max_inflight << dendl; + return -EBUSY; + } bufferlist read_bl; RGWPostHTTPData request(cct, "POST", endpoint, &read_bl, verify_ssl); - //default to 3 seconds for wrong url hits - if wrong endpoint configured - request.set_req_connect_timeout(3); + request.set_req_connect_timeout(cct->_conf->rgw_http_notif_connection_timeout); + request.set_req_timeout(cct->_conf->rgw_http_notif_message_timeout); const auto post_data = json_format_pubsub_event(event); if (cloudevents) { // following: https://github.com/cloudevents/spec/blob/v1.0.1/http-protocol-binding.md @@ -118,11 +126,13 @@ public: request.set_post_data(post_data); request.set_send_length(post_data.length()); request.append_header("Content-Type", "application/json"); + ++s_http_manager_inflight; if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending); auto rc = s_http_manager->add_request(&request); if (rc == 0) { rc = request.wait(dpp, y); } + --s_http_manager_inflight; if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending); // TODO: use read_bl to process return code and handle according to ack level return rc;