From: Oguzhan Ozmen Date: Tue, 14 Apr 2026 16:46:05 +0000 (+0000) Subject: rgw: store RGWEndpoint URL as boost::urls::url X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3513cd888a0e6c6c879ef42823255c2e620a8fca;p=ceph.git rgw: store RGWEndpoint URL as boost::urls::url Replace raw string manipulation in RGWEndpoint with boost::urls::url. URL path/query/host changes now use set_path(), set_query(), set_host() instead of string concatenation. Rename original_url to endpoint_url_lookup_id to clarify its role as a health-tracking key for ResolvedEndpoint lookup in set_endpoint_unconnectable(). Signed-off-by: Oguzhan Ozmen --- diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc index 1b1d5bae8b7..e2d9d87180c 100644 --- a/src/rgw/rgw_http_client.cc +++ b/src/rgw/rgw_http_client.cc @@ -333,7 +333,7 @@ void RGWHTTPClient::init() } } - const string& url = endpoint.get_url(); + const string url = endpoint.get_url(); auto pos = url.find("://"); if (pos == string::npos) { diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h index 5aec7c40684..16f73ad23d6 100644 --- a/src/rgw/rgw_http_client.h +++ b/src/rgw/rgw_http_client.h @@ -10,6 +10,7 @@ #include "rgw_http_client_types.h" #include +#include using param_pair_t = std::pair; using param_vec_t = std::vector; @@ -23,21 +24,22 @@ class RGWHTTPManager; /** * RGWEndpoint - Represents an HTTP endpoint with additional metdata such as connection routing. * - * Unlike a plain URL string, RGWEndpoint carries additional information needed - * to leverage libcurl's CURLOPT_CONNECT_TO option. This enables RGW to route - * requests to specific IP addresses while preserving the original hostname for - * TLS/SNI and Host headers. + * Wraps boost::urls::url for proper URL manipulation (set_path, set_query, + * set_host) instead of raw string concatenation. + * + * Carries a CURLOPT_CONNECT_TO override for IP-level routing. * * Fields: - * - url: The effective URL for the request (may be modified with paths). - * - original_url: The initially configured endpoint URL (preserved for reference). - * - connect_to: libcurl CONNECT_TO string (format: "host:port:addr:port") to - * override connection routing without changing the request URL. + * - endpoint_url_lookup_id: The initially configured endpoint URL (immutable). + * Used as a lookup key in RGWRESTConn to find the ResolvedEndpoint + * for health tracking (marking IPs as up/down). + * - url: boost::urls::url for the request URL (mutable via set_path, etc.). + * - connect_to: libcurl CONNECT_TO string (format: "host:port:addr:port"). */ struct RGWEndpoint { private: - std::string url; - std::string original_url; + std::string endpoint_url_lookup_id; + boost::urls::url url; std::string connect_to; public: @@ -45,45 +47,57 @@ public: RGWEndpoint(const char* u) : RGWEndpoint(std::string(u)) {} - RGWEndpoint(std::string u, std::string c = {}) - : url(std::move(u)), original_url(url), connect_to(std::move(c)) {} + RGWEndpoint(const std::string& u) : endpoint_url_lookup_id(u) { + auto r = boost::urls::parse_uri(u); + if (r.has_value()) { + url = r.value(); + } + } - RGWEndpoint with_url(std::string new_url) const { - RGWEndpoint e = *this; - e.set_url(std::move(new_url)); - return e; + RGWEndpoint(const std::string& u, const std::string& c) + : endpoint_url_lookup_id(u), connect_to(c) { + auto r = boost::urls::parse_uri(u); + if (r.has_value()) { + url = r.value(); + } } - void set_url(const std::string& _url) { - url = _url; - // Capture the first URL assignment as the original - if (original_url.empty()) { - original_url = _url; + void set_url(const std::string& u) { + auto r = boost::urls::parse_uri(u); + if (r.has_value()) { + url = r.value(); + } + if (endpoint_url_lookup_id.empty()) { + endpoint_url_lookup_id = u; } } - const std::string& get_url() const { return url; } - const std::string& get_original_url() const { return original_url; } - void set_connect_to(const std::string& _connect_to) { connect_to = _connect_to; } - const std::string& get_connect_to() const { return connect_to; } + std::string get_url() const { return std::string(url.buffer()); } + const std::string& get_endpoint_url_lookup_id() const { return endpoint_url_lookup_id; } - void add_trailing_slash() { - if (! url.empty() && url.back() != '/') - append_to_url("/"); + void set_path(const std::string& path) { url.set_encoded_path(path); } + void set_query(const std::string& q) { + if (q.empty()) return; + // strip leading '?' - if present, boost::urls adds it automatically + url.set_encoded_query(q[0] == '?' ? q.substr(1) : q); } + void set_host(const std::string& h) { url.set_host(h); } - void append_to_url(const std::string& suffix) { - url.append(suffix); + void add_trailing_slash() { + auto path = std::string(url.encoded_path()); + if (path.empty() || path.back() != '/') { + path += '/'; + url.set_encoded_path(path); + } } + void set_connect_to(const std::string& c) { connect_to = c; } + const std::string& get_connect_to() const { return connect_to; } + friend std::ostream& operator<<(std::ostream& os, const RGWEndpoint& ep) { - os << "RGWEndpoint: url=" << ep.url; - if (!ep.original_url.empty() && ep.original_url != ep.url) { - os << " original_url=" << ep.original_url; - } - if (!ep.connect_to.empty()) { - os << " connect_to=" << ep.connect_to; - } + os << "RGWEndpoint: url=" << ep.url.buffer() + << " endpoint_url_lookup_id=" << (ep.endpoint_url_lookup_id.empty() ? "" : ep.endpoint_url_lookup_id) + << " connect_to=" << (ep.connect_to.empty() ? "" : ep.connect_to); return os; } }; diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc index e82680e7b0e..02b80c2f1ed 100644 --- a/src/rgw/rgw_rest_client.cc +++ b/src/rgw/rgw_rest_client.cc @@ -425,16 +425,8 @@ auto RGWRESTSimpleRequest::forward_request(const DoutPrefixProvider *dpp, const string params_str; get_params_str(new_info.args.get_params(), params_str); - string new_url = endpoint.get_url(); - string& resource = new_info.request_uri; - string new_resource = resource; - if (new_url[new_url.size() - 1] == '/' && resource[0] == '/') { - new_url = new_url.substr(0, new_url.size() - 1); - } else if (resource[0] != '/') { - new_resource = "/"; - new_resource.append(resource); - } - new_url.append(new_resource + params_str); + endpoint.set_path(new_info.request_uri); + endpoint.set_query(params_str); bufferlist::iterator bliter; @@ -446,7 +438,6 @@ auto RGWRESTSimpleRequest::forward_request(const DoutPrefixProvider *dpp, const } method = new_info.method; - endpoint.set_url(new_url); std::ignore = process(dpp, y); @@ -581,7 +572,9 @@ void RGWRESTGenerateHTTPHeaders::init(const string& _method, const string& host, url_encode(iter->second, encode_slash)); } - endpoint = _endpoint.with_url(_endpoint.get_url() + resource + params_str); + endpoint = _endpoint; + endpoint.set_path(resource); + endpoint.set_query(params_str); const std::string date_str = get_gmt_date_str(); new_env->set("HTTP_DATE", date_str.c_str()); @@ -692,8 +685,8 @@ void RGWRESTStreamS3PutObj::send_init(const rgw_obj& obj) if (host_style == VirtualStyle) { resource_str = obj.get_oid(); - new_endpoint.set_url(protocol + "://" + bucket_name + "." + host); new_host = bucket_name + "." + new_host; + new_endpoint.set_host(new_host); } else { resource_str = bucket_name + "/" + obj.get_oid(); } @@ -847,13 +840,13 @@ int RGWRESTStreamRWRequest::do_send_prepare(const DoutPrefixProvider *dpp, RGWAc } if (host_style == VirtualStyle) { - new_endpoint.set_url(protocol + "://" + bucket_name + "." + host); + new_host = bucket_name + "." + host; + new_endpoint.set_host(new_host); if(pos == string::npos) { new_resource = ""; } else { new_resource = new_resource.substr(pos+1); } - new_host = bucket_name + "." + host; } new_endpoint.add_trailing_slash(); diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc index c0060c8fbdb..46f6685c6fd 100644 --- a/src/rgw/rgw_rest_conn.cc +++ b/src/rgw/rgw_rest_conn.cc @@ -273,11 +273,11 @@ RGWEndpoint RGWRESTConn::get_endpoint() void RGWRESTConn::set_endpoint_unconnectable(const RGWEndpoint& endpoint) { - const string& orig_url = endpoint.get_original_url(); + const string& endpoint_id = endpoint.get_endpoint_url_lookup_id(); const string& connect_to = endpoint.get_connect_to(); - ResolvedEndpoint* res_ep = find_resolved_endpoint(orig_url); - if (orig_url.empty() || !res_ep) { + ResolvedEndpoint* res_ep = find_resolved_endpoint(endpoint_id); + if (endpoint_id.empty() || !res_ep) { ldout(cct, 0) << "ERROR: endpoint is not valid or not found: " << endpoint << dendl; return; @@ -301,7 +301,7 @@ void RGWRESTConn::set_endpoint_unconnectable(const RGWEndpoint& endpoint) for (auto& res_ip : res_ep->resolved_ips) { res_ip.mark_down(); } - ldout(cct, 10) << "set all IPs unconnectable for endpoint url=" << orig_url << dendl; + ldout(cct, 10) << "set all IPs unconnectable for endpoint=" << endpoint << dendl; } void RGWRESTConn::populate_params(param_vec_t& params, const rgw_owner* uid, const string& zonegroup) diff --git a/src/test/rgw/test_rgw_http_client.cc b/src/test/rgw/test_rgw_http_client.cc index 79f80c67edb..33cb2d1f85f 100644 --- a/src/test/rgw/test_rgw_http_client.cc +++ b/src/test/rgw/test_rgw_http_client.cc @@ -13,83 +13,106 @@ using namespace std; TEST(RGWEndpointTest, default_constructor) { RGWEndpoint ep; EXPECT_TRUE(ep.get_url().empty()); - EXPECT_TRUE(ep.get_original_url().empty()); + EXPECT_TRUE(ep.get_endpoint_url_lookup_id().empty()); EXPECT_TRUE(ep.get_connect_to().empty()); } -TEST(RGWEndpointTest, constructor_sets_url_and_original_url) { +TEST(RGWEndpointTest, constructor_sets_url_and_lookup_id) { RGWEndpoint ep("http://example.com:8080"); EXPECT_EQ(ep.get_url(), "http://example.com:8080"); - EXPECT_EQ(ep.get_original_url(), "http://example.com:8080"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); } TEST(RGWEndpointTest, constructor_with_connect_to) { RGWEndpoint ep("http://example.com:8080", "example.com:8080:192.168.1.1:8080"); EXPECT_EQ(ep.get_url(), "http://example.com:8080"); - EXPECT_EQ(ep.get_original_url(), "http://example.com:8080"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080"); } -TEST(RGWEndpointTest, set_url_on_default_constructed_sets_original) { +TEST(RGWEndpointTest, set_url_on_default_constructed_sets_base) { RGWEndpoint ep; - EXPECT_TRUE(ep.get_original_url().empty()); + EXPECT_TRUE(ep.get_endpoint_url_lookup_id().empty()); ep.set_url("http://first.example.com"); EXPECT_EQ(ep.get_url(), "http://first.example.com"); - EXPECT_EQ(ep.get_original_url(), "http://first.example.com"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://first.example.com"); - // Second set_url should NOT change original_url + // Second set_url should NOT change endpoint_url_lookup_id ep.set_url("http://second.example.com"); EXPECT_EQ(ep.get_url(), "http://second.example.com"); - EXPECT_EQ(ep.get_original_url(), "http://first.example.com"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://first.example.com"); } -TEST(RGWEndpointTest, set_url_does_not_change_original_after_constructor) { - RGWEndpoint ep("http://original.example.com"); - - ep.set_url("http://modified.example.com"); - EXPECT_EQ(ep.get_url(), "http://modified.example.com"); - EXPECT_EQ(ep.get_original_url(), "http://original.example.com"); +TEST(RGWEndpointTest, set_path) { + RGWEndpoint ep("http://example.com:8080"); + ep.set_path("/path/to/resource"); + EXPECT_EQ(ep.get_url(), "http://example.com:8080/path/to/resource"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); } -TEST(RGWEndpointTest, with_url_returns_copy_with_new_url) { - RGWEndpoint ep("http://original.example.com"); - ep.set_connect_to("original.example.com:80:192.168.1.1:80"); - - RGWEndpoint ep2 = ep.with_url("http://modified.example.com"); - - // Original unchanged - EXPECT_EQ(ep.get_url(), "http://original.example.com"); - EXPECT_EQ(ep.get_original_url(), "http://original.example.com"); +TEST(RGWEndpointTest, set_query) { + RGWEndpoint ep("http://example.com:8080/path"); + ep.set_query("key=val&k2=v2"); + EXPECT_EQ(ep.get_url(), "http://example.com:8080/path?key=val&k2=v2"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080/path"); +} - // Copy has new url but preserves original_url and connect_to - EXPECT_EQ(ep2.get_url(), "http://modified.example.com"); - EXPECT_EQ(ep2.get_original_url(), "http://original.example.com"); - EXPECT_EQ(ep2.get_connect_to(), "original.example.com:80:192.168.1.1:80"); +TEST(RGWEndpointTest, set_query_with_leading_question_mark) { + RGWEndpoint ep("http://example.com:8080/path"); + ep.set_query("?key=val"); + EXPECT_EQ(ep.get_url(), "http://example.com:8080/path?key=val"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080/path"); } -TEST(RGWEndpointTest, set_connect_to) { +TEST(RGWEndpointTest, set_host) { RGWEndpoint ep("http://example.com:8080"); - EXPECT_TRUE(ep.get_connect_to().empty()); - - ep.set_connect_to("example.com:8080:192.168.1.1:8080"); - EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080"); + ep.set_host("bucket.example.com"); + EXPECT_EQ(ep.get_url(), "http://bucket.example.com:8080"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); } TEST(RGWEndpointTest, add_trailing_slash) { RGWEndpoint ep("http://example.com:8080"); ep.add_trailing_slash(); EXPECT_EQ(ep.get_url(), "http://example.com:8080/"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); // Should not add another slash ep.add_trailing_slash(); EXPECT_EQ(ep.get_url(), "http://example.com:8080/"); } -TEST(RGWEndpointTest, append_to_url) { +TEST(RGWEndpointTest, set_query_empty_is_noop) { + RGWEndpoint ep("http://example.com:8080/path"); + ep.set_query(""); + // Empty query should not add a '?' to the URL + EXPECT_EQ(ep.get_url(), "http://example.com:8080/path"); +} + +TEST(RGWEndpointTest, set_path_without_leading_slash) { RGWEndpoint ep("http://example.com:8080"); - ep.append_to_url("/path/to/resource"); - EXPECT_EQ(ep.get_url(), "http://example.com:8080/path/to/resource"); + ep.set_path("admin/bucket"); + // boost::urls should auto-add leading '/' for URLs with authority + EXPECT_EQ(ep.get_url(), "http://example.com:8080/admin/bucket"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); +} + +TEST(RGWEndpointTest, set_path_then_set_query) { + // This mirrors the forward_request() usage pattern + RGWEndpoint ep("http://example.com:8080"); + ep.set_path("/admin/bucket"); + ep.set_query("?max-entries=100&stats=true"); + EXPECT_EQ(ep.get_url(), "http://example.com:8080/admin/bucket?max-entries=100&stats=true"); + EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080"); +} + +TEST(RGWEndpointTest, set_connect_to) { + RGWEndpoint ep("http://example.com:8080"); + EXPECT_TRUE(ep.get_connect_to().empty()); + + ep.set_connect_to("example.com:8080:192.168.1.1:8080"); + EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080"); } // Tests for operator<< @@ -98,17 +121,16 @@ TEST(RGWEndpointTest, ostream_operator_url_only) { RGWEndpoint ep("http://example.com:8080"); std::ostringstream oss; oss << ep; - EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080"); + EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 endpoint_url_lookup_id=http://example.com:8080 connect_to="); } -TEST(RGWEndpointTest, ostream_operator_with_different_original_url) { - RGWEndpoint ep; - ep.set_url("http://original.example.com:8080"); - ep.set_url("http://modified.example.com:8080"); // original_url stays the same +TEST(RGWEndpointTest, ostream_operator_with_modified_url) { + RGWEndpoint ep("http://example.com:8080"); + ep.set_path("/modified"); std::ostringstream oss; oss << ep; - EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://modified.example.com:8080 original_url=http://original.example.com:8080"); + EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080/modified endpoint_url_lookup_id=http://example.com:8080 connect_to="); } TEST(RGWEndpointTest, ostream_operator_with_connect_to) { @@ -117,7 +139,7 @@ TEST(RGWEndpointTest, ostream_operator_with_connect_to) { std::ostringstream oss; oss << ep; - EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 connect_to=example.com:8080:192.168.1.1:8080"); + EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 endpoint_url_lookup_id=http://example.com:8080 connect_to=example.com:8080:192.168.1.1:8080"); } TEST(RGWEndpointTest, ostream_operator_full) { @@ -128,5 +150,5 @@ TEST(RGWEndpointTest, ostream_operator_full) { std::ostringstream oss; oss << ep; - EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://192.168.1.1:8080 original_url=http://original.example.com:8080 connect_to=original.example.com:8080:192.168.1.1:8080"); + EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://192.168.1.1:8080 endpoint_url_lookup_id=http://original.example.com:8080 connect_to=original.example.com:8080:192.168.1.1:8080"); }