#include "rgw_http_client_types.h"
#include <atomic>
+#include <boost/url.hpp>
using param_pair_t = std::pair<std::string, std::string>;
using param_vec_t = std::vector<param_pair_t>;
/**
* RGWEndpoint - Represents an HTTP endpoint with additional metdata such as connection routing.
*
- * Unlike a plain URL string, RGWEndpoint carries additional information needed
- * to leverage libcurl's CURLOPT_CONNECT_TO option. This enables RGW to route
- * requests to specific IP addresses while preserving the original hostname for
- * TLS/SNI and Host headers.
+ * Wraps boost::urls::url for proper URL manipulation (set_path, set_query,
+ * set_host) instead of raw string concatenation.
+ *
+ * Carries a CURLOPT_CONNECT_TO override for IP-level routing.
*
* Fields:
- * - url: The effective URL for the request (may be modified with paths).
- * - original_url: The initially configured endpoint URL (preserved for reference).
- * - connect_to: libcurl CONNECT_TO string (format: "host:port:addr:port") to
- * override connection routing without changing the request URL.
+ * - endpoint_url_lookup_id: The initially configured endpoint URL (immutable).
+ * Used as a lookup key in RGWRESTConn to find the ResolvedEndpoint
+ * for health tracking (marking IPs as up/down).
+ * - url: boost::urls::url for the request URL (mutable via set_path, etc.).
+ * - connect_to: libcurl CONNECT_TO string (format: "host:port:addr:port").
*/
struct RGWEndpoint {
private:
- std::string url;
- std::string original_url;
+ std::string endpoint_url_lookup_id;
+ boost::urls::url url;
std::string connect_to;
public:
RGWEndpoint(const char* u) : RGWEndpoint(std::string(u)) {}
- RGWEndpoint(std::string u, std::string c = {})
- : url(std::move(u)), original_url(url), connect_to(std::move(c)) {}
+ RGWEndpoint(const std::string& u) : endpoint_url_lookup_id(u) {
+ auto r = boost::urls::parse_uri(u);
+ if (r.has_value()) {
+ url = r.value();
+ }
+ }
- RGWEndpoint with_url(std::string new_url) const {
- RGWEndpoint e = *this;
- e.set_url(std::move(new_url));
- return e;
+ RGWEndpoint(const std::string& u, const std::string& c)
+ : endpoint_url_lookup_id(u), connect_to(c) {
+ auto r = boost::urls::parse_uri(u);
+ if (r.has_value()) {
+ url = r.value();
+ }
}
- void set_url(const std::string& _url) {
- url = _url;
- // Capture the first URL assignment as the original
- if (original_url.empty()) {
- original_url = _url;
+ void set_url(const std::string& u) {
+ auto r = boost::urls::parse_uri(u);
+ if (r.has_value()) {
+ url = r.value();
+ }
+ if (endpoint_url_lookup_id.empty()) {
+ endpoint_url_lookup_id = u;
}
}
- const std::string& get_url() const { return url; }
- const std::string& get_original_url() const { return original_url; }
- void set_connect_to(const std::string& _connect_to) { connect_to = _connect_to; }
- const std::string& get_connect_to() const { return connect_to; }
+ std::string get_url() const { return std::string(url.buffer()); }
+ const std::string& get_endpoint_url_lookup_id() const { return endpoint_url_lookup_id; }
- void add_trailing_slash() {
- if (! url.empty() && url.back() != '/')
- append_to_url("/");
+ void set_path(const std::string& path) { url.set_encoded_path(path); }
+ void set_query(const std::string& q) {
+ if (q.empty()) return;
+ // strip leading '?' - if present, boost::urls adds it automatically
+ url.set_encoded_query(q[0] == '?' ? q.substr(1) : q);
}
+ void set_host(const std::string& h) { url.set_host(h); }
- void append_to_url(const std::string& suffix) {
- url.append(suffix);
+ void add_trailing_slash() {
+ auto path = std::string(url.encoded_path());
+ if (path.empty() || path.back() != '/') {
+ path += '/';
+ url.set_encoded_path(path);
+ }
}
+ void set_connect_to(const std::string& c) { connect_to = c; }
+ const std::string& get_connect_to() const { return connect_to; }
+
friend std::ostream& operator<<(std::ostream& os, const RGWEndpoint& ep) {
- os << "RGWEndpoint: url=" << ep.url;
- if (!ep.original_url.empty() && ep.original_url != ep.url) {
- os << " original_url=" << ep.original_url;
- }
- if (!ep.connect_to.empty()) {
- os << " connect_to=" << ep.connect_to;
- }
+ os << "RGWEndpoint: url=" << ep.url.buffer()
+ << " endpoint_url_lookup_id=" << (ep.endpoint_url_lookup_id.empty() ? "<empty>" : ep.endpoint_url_lookup_id)
+ << " connect_to=" << (ep.connect_to.empty() ? "<empty>" : ep.connect_to);
return os;
}
};
string params_str;
get_params_str(new_info.args.get_params(), params_str);
- string new_url = endpoint.get_url();
- string& resource = new_info.request_uri;
- string new_resource = resource;
- if (new_url[new_url.size() - 1] == '/' && resource[0] == '/') {
- new_url = new_url.substr(0, new_url.size() - 1);
- } else if (resource[0] != '/') {
- new_resource = "/";
- new_resource.append(resource);
- }
- new_url.append(new_resource + params_str);
+ endpoint.set_path(new_info.request_uri);
+ endpoint.set_query(params_str);
bufferlist::iterator bliter;
}
method = new_info.method;
- endpoint.set_url(new_url);
std::ignore = process(dpp, y);
url_encode(iter->second, encode_slash));
}
- endpoint = _endpoint.with_url(_endpoint.get_url() + resource + params_str);
+ endpoint = _endpoint;
+ endpoint.set_path(resource);
+ endpoint.set_query(params_str);
const std::string date_str = get_gmt_date_str();
new_env->set("HTTP_DATE", date_str.c_str());
if (host_style == VirtualStyle) {
resource_str = obj.get_oid();
- new_endpoint.set_url(protocol + "://" + bucket_name + "." + host);
new_host = bucket_name + "." + new_host;
+ new_endpoint.set_host(new_host);
} else {
resource_str = bucket_name + "/" + obj.get_oid();
}
}
if (host_style == VirtualStyle) {
- new_endpoint.set_url(protocol + "://" + bucket_name + "." + host);
+ new_host = bucket_name + "." + host;
+ new_endpoint.set_host(new_host);
if(pos == string::npos) {
new_resource = "";
} else {
new_resource = new_resource.substr(pos+1);
}
- new_host = bucket_name + "." + host;
}
new_endpoint.add_trailing_slash();
TEST(RGWEndpointTest, default_constructor) {
RGWEndpoint ep;
EXPECT_TRUE(ep.get_url().empty());
- EXPECT_TRUE(ep.get_original_url().empty());
+ EXPECT_TRUE(ep.get_endpoint_url_lookup_id().empty());
EXPECT_TRUE(ep.get_connect_to().empty());
}
-TEST(RGWEndpointTest, constructor_sets_url_and_original_url) {
+TEST(RGWEndpointTest, constructor_sets_url_and_lookup_id) {
RGWEndpoint ep("http://example.com:8080");
EXPECT_EQ(ep.get_url(), "http://example.com:8080");
- EXPECT_EQ(ep.get_original_url(), "http://example.com:8080");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
}
TEST(RGWEndpointTest, constructor_with_connect_to) {
RGWEndpoint ep("http://example.com:8080", "example.com:8080:192.168.1.1:8080");
EXPECT_EQ(ep.get_url(), "http://example.com:8080");
- EXPECT_EQ(ep.get_original_url(), "http://example.com:8080");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080");
}
-TEST(RGWEndpointTest, set_url_on_default_constructed_sets_original) {
+TEST(RGWEndpointTest, set_url_on_default_constructed_sets_base) {
RGWEndpoint ep;
- EXPECT_TRUE(ep.get_original_url().empty());
+ EXPECT_TRUE(ep.get_endpoint_url_lookup_id().empty());
ep.set_url("http://first.example.com");
EXPECT_EQ(ep.get_url(), "http://first.example.com");
- EXPECT_EQ(ep.get_original_url(), "http://first.example.com");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://first.example.com");
- // Second set_url should NOT change original_url
+ // Second set_url should NOT change endpoint_url_lookup_id
ep.set_url("http://second.example.com");
EXPECT_EQ(ep.get_url(), "http://second.example.com");
- EXPECT_EQ(ep.get_original_url(), "http://first.example.com");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://first.example.com");
}
-TEST(RGWEndpointTest, set_url_does_not_change_original_after_constructor) {
- RGWEndpoint ep("http://original.example.com");
-
- ep.set_url("http://modified.example.com");
- EXPECT_EQ(ep.get_url(), "http://modified.example.com");
- EXPECT_EQ(ep.get_original_url(), "http://original.example.com");
+TEST(RGWEndpointTest, set_path) {
+ RGWEndpoint ep("http://example.com:8080");
+ ep.set_path("/path/to/resource");
+ EXPECT_EQ(ep.get_url(), "http://example.com:8080/path/to/resource");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
}
-TEST(RGWEndpointTest, with_url_returns_copy_with_new_url) {
- RGWEndpoint ep("http://original.example.com");
- ep.set_connect_to("original.example.com:80:192.168.1.1:80");
-
- RGWEndpoint ep2 = ep.with_url("http://modified.example.com");
-
- // Original unchanged
- EXPECT_EQ(ep.get_url(), "http://original.example.com");
- EXPECT_EQ(ep.get_original_url(), "http://original.example.com");
+TEST(RGWEndpointTest, set_query) {
+ RGWEndpoint ep("http://example.com:8080/path");
+ ep.set_query("key=val&k2=v2");
+ EXPECT_EQ(ep.get_url(), "http://example.com:8080/path?key=val&k2=v2");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080/path");
+}
- // Copy has new url but preserves original_url and connect_to
- EXPECT_EQ(ep2.get_url(), "http://modified.example.com");
- EXPECT_EQ(ep2.get_original_url(), "http://original.example.com");
- EXPECT_EQ(ep2.get_connect_to(), "original.example.com:80:192.168.1.1:80");
+TEST(RGWEndpointTest, set_query_with_leading_question_mark) {
+ RGWEndpoint ep("http://example.com:8080/path");
+ ep.set_query("?key=val");
+ EXPECT_EQ(ep.get_url(), "http://example.com:8080/path?key=val");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080/path");
}
-TEST(RGWEndpointTest, set_connect_to) {
+TEST(RGWEndpointTest, set_host) {
RGWEndpoint ep("http://example.com:8080");
- EXPECT_TRUE(ep.get_connect_to().empty());
-
- ep.set_connect_to("example.com:8080:192.168.1.1:8080");
- EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080");
+ ep.set_host("bucket.example.com");
+ EXPECT_EQ(ep.get_url(), "http://bucket.example.com:8080");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
}
TEST(RGWEndpointTest, add_trailing_slash) {
RGWEndpoint ep("http://example.com:8080");
ep.add_trailing_slash();
EXPECT_EQ(ep.get_url(), "http://example.com:8080/");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
// Should not add another slash
ep.add_trailing_slash();
EXPECT_EQ(ep.get_url(), "http://example.com:8080/");
}
-TEST(RGWEndpointTest, append_to_url) {
+TEST(RGWEndpointTest, set_query_empty_is_noop) {
+ RGWEndpoint ep("http://example.com:8080/path");
+ ep.set_query("");
+ // Empty query should not add a '?' to the URL
+ EXPECT_EQ(ep.get_url(), "http://example.com:8080/path");
+}
+
+TEST(RGWEndpointTest, set_path_without_leading_slash) {
RGWEndpoint ep("http://example.com:8080");
- ep.append_to_url("/path/to/resource");
- EXPECT_EQ(ep.get_url(), "http://example.com:8080/path/to/resource");
+ ep.set_path("admin/bucket");
+ // boost::urls should auto-add leading '/' for URLs with authority
+ EXPECT_EQ(ep.get_url(), "http://example.com:8080/admin/bucket");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
+}
+
+TEST(RGWEndpointTest, set_path_then_set_query) {
+ // This mirrors the forward_request() usage pattern
+ RGWEndpoint ep("http://example.com:8080");
+ ep.set_path("/admin/bucket");
+ ep.set_query("?max-entries=100&stats=true");
+ EXPECT_EQ(ep.get_url(), "http://example.com:8080/admin/bucket?max-entries=100&stats=true");
+ EXPECT_EQ(ep.get_endpoint_url_lookup_id(), "http://example.com:8080");
+}
+
+TEST(RGWEndpointTest, set_connect_to) {
+ RGWEndpoint ep("http://example.com:8080");
+ EXPECT_TRUE(ep.get_connect_to().empty());
+
+ ep.set_connect_to("example.com:8080:192.168.1.1:8080");
+ EXPECT_EQ(ep.get_connect_to(), "example.com:8080:192.168.1.1:8080");
}
// Tests for operator<<
RGWEndpoint ep("http://example.com:8080");
std::ostringstream oss;
oss << ep;
- EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080");
+ EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 endpoint_url_lookup_id=http://example.com:8080 connect_to=<empty>");
}
-TEST(RGWEndpointTest, ostream_operator_with_different_original_url) {
- RGWEndpoint ep;
- ep.set_url("http://original.example.com:8080");
- ep.set_url("http://modified.example.com:8080"); // original_url stays the same
+TEST(RGWEndpointTest, ostream_operator_with_modified_url) {
+ RGWEndpoint ep("http://example.com:8080");
+ ep.set_path("/modified");
std::ostringstream oss;
oss << ep;
- EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://modified.example.com:8080 original_url=http://original.example.com:8080");
+ EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080/modified endpoint_url_lookup_id=http://example.com:8080 connect_to=<empty>");
}
TEST(RGWEndpointTest, ostream_operator_with_connect_to) {
std::ostringstream oss;
oss << ep;
- EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 connect_to=example.com:8080:192.168.1.1:8080");
+ EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://example.com:8080 endpoint_url_lookup_id=http://example.com:8080 connect_to=example.com:8080:192.168.1.1:8080");
}
TEST(RGWEndpointTest, ostream_operator_full) {
std::ostringstream oss;
oss << ep;
- EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://192.168.1.1:8080 original_url=http://original.example.com:8080 connect_to=original.example.com:8080:192.168.1.1:8080");
+ EXPECT_EQ(oss.str(), "RGWEndpoint: url=http://192.168.1.1:8080 endpoint_url_lookup_id=http://original.example.com:8080 connect_to=original.example.com:8080:192.168.1.1:8080");
}