]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: store RGWEndpoint URL as boost::urls::url
authorOguzhan Ozmen <oozmen@bloomberg.net>
Tue, 14 Apr 2026 16:46:05 +0000 (16:46 +0000)
committerOguzhan Ozmen <oozmen@bloomberg.net>
Thu, 4 Jun 2026 19:32:19 +0000 (19:32 +0000)
Replace raw string manipulation in RGWEndpoint with boost::urls::url.
URL path/query/host changes now use set_path(), set_query(), set_host()
instead of string concatenation.

Rename original_url to endpoint_url_lookup_id to clarify its role as a
health-tracking key for ResolvedEndpoint lookup in
set_endpoint_unconnectable().

Signed-off-by: Oguzhan Ozmen <oozmen@bloomberg.net>
src/rgw/rgw_http_client.cc
src/rgw/rgw_http_client.h
src/rgw/rgw_rest_client.cc
src/rgw/rgw_rest_conn.cc
src/test/rgw/test_rgw_http_client.cc

index 1b1d5bae8b796ce194658c72a0dc5a742daca10e..e2d9d87180c6cf363925cca8e1feef0e055c17c6 100644 (file)
@@ -333,7 +333,7 @@ void RGWHTTPClient::init()
     }
   }
 
-  const string& url = endpoint.get_url();
+  const string url = endpoint.get_url();
 
   auto pos = url.find("://");
   if (pos == string::npos) {
index 5aec7c406847b6509a95121820ce4640e6dcae7a..16f73ad23d6390548a62a8e381321236b71fec7b 100644 (file)
@@ -10,6 +10,7 @@
 #include "rgw_http_client_types.h"
 
 #include <atomic>
+#include <boost/url.hpp>
 
 using param_pair_t = std::pair<std::string, std::string>;
 using param_vec_t = std::vector<param_pair_t>;
@@ -23,21 +24,22 @@ class RGWHTTPManager;
 /**
  * RGWEndpoint - Represents an HTTP endpoint with additional metdata such as connection routing.
  *
- * Unlike a plain URL string, RGWEndpoint carries additional information needed
- * to leverage libcurl's CURLOPT_CONNECT_TO option. This enables RGW to route
- * requests to specific IP addresses while preserving the original hostname for
- * TLS/SNI and Host headers.
+ * Wraps boost::urls::url for proper URL manipulation (set_path, set_query,
+ * set_host) instead of raw string concatenation.
+ *
+ * Carries a CURLOPT_CONNECT_TO override for IP-level routing.
  *
  * Fields:
- *  - url:          The effective URL for the request (may be modified with paths).
- *  - original_url: The initially configured endpoint URL (preserved for reference).
- *  - connect_to:   libcurl CONNECT_TO string (format: "host:port:addr:port") to
- *                  override connection routing without changing the request URL.
+ *  - endpoint_url_lookup_id: The initially configured endpoint URL (immutable).
+ *                 Used as a lookup key in RGWRESTConn to find the ResolvedEndpoint
+ *                 for health tracking (marking IPs as up/down).
+ *  - url:         boost::urls::url for the request URL (mutable via set_path, etc.).
+ *  - connect_to:  libcurl CONNECT_TO string (format: "host:port:addr:port").
  */
 struct RGWEndpoint {
 private:
-  std::string url;
-  std::string original_url;
+  std::string endpoint_url_lookup_id;
+  boost::urls::url url;
   std::string connect_to;
 
 public:
@@ -45,45 +47,57 @@ public:
 
   RGWEndpoint(const char* u) : RGWEndpoint(std::string(u)) {}
 
-  RGWEndpoint(std::string u, std::string c = {})
-    : url(std::move(u)), original_url(url), connect_to(std::move(c)) {}
+  RGWEndpoint(const std::string& u) : endpoint_url_lookup_id(u) {
+    auto r = boost::urls::parse_uri(u);
+    if (r.has_value()) {
+      url = r.value();
+    }
+  }
 
-  RGWEndpoint with_url(std::string new_url) const {
-    RGWEndpoint e = *this;
-    e.set_url(std::move(new_url));
-    return e;
+  RGWEndpoint(const std::string& u, const std::string& c)
+    : endpoint_url_lookup_id(u), connect_to(c) {
+    auto r = boost::urls::parse_uri(u);
+    if (r.has_value()) {
+      url = r.value();
+    }
   }
 
-  void set_url(const std::string& _url) {
-    url = _url;
-    // Capture the first URL assignment as the original
-    if (original_url.empty()) {
-      original_url = _url;
+  void set_url(const std::string& u) {
+    auto r = boost::urls::parse_uri(u);
+    if (r.has_value()) {
+      url = r.value();
+    }
+    if (endpoint_url_lookup_id.empty()) {
+      endpoint_url_lookup_id = u;
     }
   }
-  const std::string& get_url() const { return url; }
-  const std::string& get_original_url() const { return original_url; }
 
-  void set_connect_to(const std::string& _connect_to) { connect_to = _connect_to; }
-  const std::string& get_connect_to() const { return connect_to; }
+  std::string get_url() const { return std::string(url.buffer()); }
+  const std::string& get_endpoint_url_lookup_id() const { return endpoint_url_lookup_id; }
 
-  void add_trailing_slash() {
-    if (! url.empty() && url.back() != '/')
-      append_to_url("/");
+  void set_path(const std::string& path) { url.set_encoded_path(path); }
+  void set_query(const std::string& q) {
+    if (q.empty()) return;
+    // strip leading '?' - if present, boost::urls adds it automatically
+    url.set_encoded_query(q[0] == '?' ? q.substr(1) : q);
   }
+  void set_host(const std::string& h) { url.set_host(h); }
 
-  void append_to_url(const std::string& suffix) {
-    url.append(suffix);
+  void add_trailing_slash() {
+    auto path = std::string(url.encoded_path());
+    if (path.empty() || path.back() != '/') {
+      path += '/';
+      url.set_encoded_path(path);
+    }
   }
 
+  void set_connect_to(const std::string& c) { connect_to = c; }
+  const std::string& get_connect_to() const { return connect_to; }
+
   friend std::ostream& operator<<(std::ostream& os, const RGWEndpoint& ep) {
-    os << "RGWEndpoint: url=" << ep.url;
-    if (!ep.original_url.empty() && ep.original_url != ep.url) {
-      os << " original_url=" << ep.original_url;
-    }
-    if (!ep.connect_to.empty()) {
-      os << " connect_to=" << ep.connect_to;
-    }
+    os << "RGWEndpoint: url=" << ep.url.buffer()
+       << " endpoint_url_lookup_id=" << (ep.endpoint_url_lookup_id.empty() ? "<empty>" : ep.endpoint_url_lookup_id)
+       << " connect_to=" << (ep.connect_to.empty() ? "<empty>" : ep.connect_to);
     return os;
   }
 };
index e82680e7b0ef91eac05c2b2634161f34105488b7..02b80c2f1edfed60181a37d6b83218020340e938 100644 (file)
@@ -425,16 +425,8 @@ auto RGWRESTSimpleRequest::forward_request(const DoutPrefixProvider *dpp, const
   string params_str;
   get_params_str(new_info.args.get_params(), params_str);
 
-  string new_url = endpoint.get_url();
-  string& resource = new_info.request_uri;
-  string new_resource = resource;
-  if (new_url[new_url.size() - 1] == '/' && resource[0] == '/') {
-    new_url = new_url.substr(0, new_url.size() - 1);
-  } else if (resource[0] != '/') {
-    new_resource = "/";
-    new_resource.append(resource);
-  }
-  new_url.append(new_resource + params_str);
+  endpoint.set_path(new_info.request_uri);
+  endpoint.set_query(params_str);
 
   bufferlist::iterator bliter;
 
@@ -446,7 +438,6 @@ auto RGWRESTSimpleRequest::forward_request(const DoutPrefixProvider *dpp, const
   }
 
   method = new_info.method;
-  endpoint.set_url(new_url);
 
   std::ignore = process(dpp, y);
 
@@ -581,7 +572,9 @@ void RGWRESTGenerateHTTPHeaders::init(const string& _method, const string& host,
                           url_encode(iter->second, encode_slash));
   }
 
-  endpoint = _endpoint.with_url(_endpoint.get_url() + resource + params_str);
+  endpoint = _endpoint;
+  endpoint.set_path(resource);
+  endpoint.set_query(params_str);
 
   const std::string date_str = get_gmt_date_str();
   new_env->set("HTTP_DATE", date_str.c_str());
@@ -692,8 +685,8 @@ void RGWRESTStreamS3PutObj::send_init(const rgw_obj& obj)
 
   if (host_style == VirtualStyle) {
     resource_str = obj.get_oid();
-    new_endpoint.set_url(protocol + "://" + bucket_name + "." + host);
     new_host = bucket_name + "." + new_host;
+    new_endpoint.set_host(new_host);
   } else {
     resource_str = bucket_name + "/" + obj.get_oid();
   }
@@ -847,13 +840,13 @@ int RGWRESTStreamRWRequest::do_send_prepare(const DoutPrefixProvider *dpp, RGWAc
   }
 
   if (host_style == VirtualStyle) {
-    new_endpoint.set_url(protocol + "://" + bucket_name + "." + host);
+    new_host = bucket_name + "." + host;
+    new_endpoint.set_host(new_host);
     if(pos == string::npos) {
       new_resource = "";
     } else {
       new_resource = new_resource.substr(pos+1);
     }
-    new_host = bucket_name + "." + host;
   }
   new_endpoint.add_trailing_slash();
 
index c0060c8fbdb89f32ef0a36ef57e4b1ff8da60de8..46f6685c6fd9f07069cacbd9d64de6dbb8ef043a 100644 (file)
@@ -273,11 +273,11 @@ RGWEndpoint RGWRESTConn::get_endpoint()
 
 void RGWRESTConn::set_endpoint_unconnectable(const RGWEndpoint& endpoint)
 {
-  const string& orig_url = endpoint.get_original_url();
+  const string& endpoint_id = endpoint.get_endpoint_url_lookup_id();
   const string& connect_to = endpoint.get_connect_to();
 
-  ResolvedEndpoint* res_ep = find_resolved_endpoint(orig_url);
-  if (orig_url.empty() || !res_ep) {
+  ResolvedEndpoint* res_ep = find_resolved_endpoint(endpoint_id);
+  if (endpoint_id.empty() || !res_ep) {
     ldout(cct, 0) << "ERROR: endpoint is not valid or not found: "
       << endpoint << dendl;
     return;
@@ -301,7 +301,7 @@ void RGWRESTConn::set_endpoint_unconnectable(const RGWEndpoint& endpoint)
   for (auto& res_ip : res_ep->resolved_ips) {
     res_ip.mark_down();
   }
-  ldout(cct, 10) << "set all IPs unconnectable for endpoint url=" << orig_url << dendl;
+  ldout(cct, 10) << "set all IPs unconnectable for endpoint=" << endpoint << dendl;
 }
 
 void RGWRESTConn::populate_params(param_vec_t& params, const rgw_owner* uid, const string& zonegroup)
index 79f80c67edb8303c2c5e1afc1ea7751d552740ae..33cb2d1f85fe876855309bdca68555f675e39933 100644 (file)
@@ -13,83 +13,106 @@ using namespace std;
 TEST(RGWEndpointTest, default_constructor) {
   RGWEndpoint ep;
   EXPECT_TRUE(ep.get_url().empty());
-  EXPECT_TRUE(ep.get_original_url().empty());
+  EXPECT_TRUE(ep.get_endpoint_url_lookup_id().empty());
   EXPECT_TRUE(ep.get_connect_to().empty());
 }
 
-TEST(RGWEndpointTest, constructor_sets_url_and_original_url) {
+TEST(RGWEndpointTest, constructor_sets_url_and_lookup_id) {
   RGWEndpoint ep("http://example.com:8080");
   EXPECT_EQ(ep.get_url(), "http://example.com:8080");
-  EXPECT_EQ(ep.get_original_url(), "http://example.com:8080");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
 }
 
 TEST(RGWEndpointTest, constructor_with_connect_to) {
   RGWEndpoint ep("http://example.com:8080", "example.com:8080:192.168.1.1:8080");
   EXPECT_EQ(ep.get_url(), "http://example.com:8080");
-  EXPECT_EQ(ep.get_original_url(), "http://example.com:8080");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
   EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080");
 }
 
-TEST(RGWEndpointTest, set_url_on_default_constructed_sets_original) {
+TEST(RGWEndpointTest, set_url_on_default_constructed_sets_base) {
   RGWEndpoint ep;
-  EXPECT_TRUE(ep.get_original_url().empty());
+  EXPECT_TRUE(ep.get_endpoint_url_lookup_id().empty());
 
   ep.set_url("http://first.example.com");
   EXPECT_EQ(ep.get_url(), "http://first.example.com");
-  EXPECT_EQ(ep.get_original_url(), "http://first.example.com");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://first.example.com");
 
-  // Second set_url should NOT change original_url
+  // Second set_url should NOT change endpoint_url_lookup_id
   ep.set_url("http://second.example.com");
   EXPECT_EQ(ep.get_url(), "http://second.example.com");
-  EXPECT_EQ(ep.get_original_url(), "http://first.example.com");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://first.example.com");
 }
 
-TEST(RGWEndpointTest, set_url_does_not_change_original_after_constructor) {
-  RGWEndpoint ep("http://original.example.com");
-
-  ep.set_url("http://modified.example.com");
-  EXPECT_EQ(ep.get_url(), "http://modified.example.com");
-  EXPECT_EQ(ep.get_original_url(), "http://original.example.com");
+TEST(RGWEndpointTest, set_path) {
+  RGWEndpoint ep("http://example.com:8080");
+  ep.set_path("/path/to/resource");
+  EXPECT_EQ(ep.get_url(), "http://example.com:8080/path/to/resource");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
 }
 
-TEST(RGWEndpointTest, with_url_returns_copy_with_new_url) {
-  RGWEndpoint ep("http://original.example.com");
-  ep.set_connect_to("original.example.com:80:192.168.1.1:80");
-
-  RGWEndpoint ep2 = ep.with_url("http://modified.example.com");
-
-  // Original unchanged
-  EXPECT_EQ(ep.get_url(), "http://original.example.com");
-  EXPECT_EQ(ep.get_original_url(), "http://original.example.com");
+TEST(RGWEndpointTest, set_query) {
+  RGWEndpoint ep("http://example.com:8080/path");
+  ep.set_query("key=val&k2=v2");
+  EXPECT_EQ(ep.get_url(), "http://example.com:8080/path?key=val&k2=v2");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080/path");
+}
 
-  // Copy has new url but preserves original_url and connect_to
-  EXPECT_EQ(ep2.get_url(), "http://modified.example.com");
-  EXPECT_EQ(ep2.get_original_url(), "http://original.example.com");
-  EXPECT_EQ(ep2.get_connect_to(), "original.example.com:80:192.168.1.1:80");
+TEST(RGWEndpointTest, set_query_with_leading_question_mark) {
+  RGWEndpoint ep("http://example.com:8080/path");
+  ep.set_query("?key=val");
+  EXPECT_EQ(ep.get_url(), "http://example.com:8080/path?key=val");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080/path");
 }
 
-TEST(RGWEndpointTest, set_connect_to) {
+TEST(RGWEndpointTest, set_host) {
   RGWEndpoint ep("http://example.com:8080");
-  EXPECT_TRUE(ep.get_connect_to().empty());
-
-  ep.set_connect_to("example.com:8080:192.168.1.1:8080");
-  EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080");
+  ep.set_host("bucket.example.com");
+  EXPECT_EQ(ep.get_url(), "http://bucket.example.com:8080");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
 }
 
 TEST(RGWEndpointTest, add_trailing_slash) {
   RGWEndpoint ep("http://example.com:8080");
   ep.add_trailing_slash();
   EXPECT_EQ(ep.get_url(), "http://example.com:8080/");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
 
   // Should not add another slash
   ep.add_trailing_slash();
   EXPECT_EQ(ep.get_url(), "http://example.com:8080/");
 }
 
-TEST(RGWEndpointTest, append_to_url) {
+TEST(RGWEndpointTest, set_query_empty_is_noop) {
+  RGWEndpoint ep("http://example.com:8080/path");
+  ep.set_query("");
+  // Empty query should not add a '?' to the URL
+  EXPECT_EQ(ep.get_url(), "http://example.com:8080/path");
+}
+
+TEST(RGWEndpointTest, set_path_without_leading_slash) {
   RGWEndpoint ep("http://example.com:8080");
-  ep.append_to_url("/path/to/resource");
-  EXPECT_EQ(ep.get_url(), "http://example.com:8080/path/to/resource");
+  ep.set_path("admin/bucket");
+  // boost::urls should auto-add leading '/' for URLs with authority
+  EXPECT_EQ(ep.get_url(), "http://example.com:8080/admin/bucket");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
+}
+
+TEST(RGWEndpointTest, set_path_then_set_query) {
+  // This mirrors the forward_request() usage pattern
+  RGWEndpoint ep("http://example.com:8080");
+  ep.set_path("/admin/bucket");
+  ep.set_query("?max-entries=100&stats=true");
+  EXPECT_EQ(ep.get_url(), "http://example.com:8080/admin/bucket?max-entries=100&stats=true");
+  EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
+}
+
+TEST(RGWEndpointTest, set_connect_to) {
+  RGWEndpoint ep("http://example.com:8080");
+  EXPECT_TRUE(ep.get_connect_to().empty());
+
+  ep.set_connect_to("example.com:8080:192.168.1.1:8080");
+  EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080");
 }
 
 // Tests for operator<<
@@ -98,17 +121,16 @@ TEST(RGWEndpointTest, ostream_operator_url_only) {
   RGWEndpoint ep("http://example.com:8080");
   std::ostringstream oss;
   oss << ep;
-  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080");
+  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 endpoint_url_lookup_id=http://example.com:8080 connect_to=<empty>");
 }
 
-TEST(RGWEndpointTest, ostream_operator_with_different_original_url) {
-  RGWEndpoint ep;
-  ep.set_url("http://original.example.com:8080");
-  ep.set_url("http://modified.example.com:8080");  // original_url stays the same
+TEST(RGWEndpointTest, ostream_operator_with_modified_url) {
+  RGWEndpoint ep("http://example.com:8080");
+  ep.set_path("/modified");
 
   std::ostringstream oss;
   oss << ep;
-  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://modified.example.com:8080 original_url=http://original.example.com:8080");
+  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080/modified endpoint_url_lookup_id=http://example.com:8080 connect_to=<empty>");
 }
 
 TEST(RGWEndpointTest, ostream_operator_with_connect_to) {
@@ -117,7 +139,7 @@ TEST(RGWEndpointTest, ostream_operator_with_connect_to) {
 
   std::ostringstream oss;
   oss << ep;
-  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 connect_to=example.com:8080:192.168.1.1:8080");
+  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 endpoint_url_lookup_id=http://example.com:8080 connect_to=example.com:8080:192.168.1.1:8080");
 }
 
 TEST(RGWEndpointTest, ostream_operator_full) {
@@ -128,5 +150,5 @@ TEST(RGWEndpointTest, ostream_operator_full) {
 
   std::ostringstream oss;
   oss << ep;
-  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://192.168.1.1:8080 original_url=http://original.example.com:8080 connect_to=original.example.com:8080:192.168.1.1:8080");
+  EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://192.168.1.1:8080 endpoint_url_lookup_id=http://original.example.com:8080 connect_to=original.example.com:8080:192.168.1.1:8080");
 }