From: Radoslaw Zarzynski Date: Wed, 17 May 2017 19:29:18 +0000 (+0200) Subject: rgw; rework interface and implementation of url_decode. X-Git-Tag: v12.1.0~155^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=64cfc43f91f921ebe85875556d95639c3574b979;p=ceph.git rgw; rework interface and implementation of url_decode. This commit alters the url_decode() to remove its dependency on Variable Length Array and unnecessary memory allocations. It also adjust its signature to the sole usage pattern which is spread across the code. Signed-off-by: Radoslaw Zarzynski --- diff --git a/src/rgw/rgw_auth_s3.cc b/src/rgw/rgw_auth_s3.cc index bddffc238d56..6ff0eb1aa2bc 100644 --- a/src/rgw/rgw_auth_s3.cc +++ b/src/rgw/rgw_auth_s3.cc @@ -515,14 +515,9 @@ static inline std::string aws4_uri_encode(const std::string& src) static inline std::string aws4_uri_recode(const boost::string_view& src) { - /* TODO(rzarzynski): we might want to have a string_view-aware variant of - * url_decode. */ - const auto src_str = src.to_string(); - - std::string decoded; - url_decode(src_str, decoded); + std::string decoded = url_decode(src); if (decoded.length() != src.length()) { - return src_str; + return src.to_string(); } else { return aws4_uri_encode(decoded); } diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index f8eb2b16ec43..4529a801c651 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -907,10 +907,8 @@ int RGWHTTPArgs::parse() end = true; fpos = str.size(); } - string substr, nameval; - substr = str.substr(pos, fpos - pos); - url_decode(substr, nameval, true); - NameVal nv(nameval); + std::string nameval = url_decode(str.substr(pos, fpos - pos), true); + NameVal nv(std::move(nameval)); int ret = nv.parse(); if (ret >= 0) { string& name = nv.get_name(); @@ -1336,43 +1334,39 @@ static char hex_to_num(char c) return hex_table.to_num(c); } -bool url_decode(const string& src_str, string& dest_str, bool in_query) +std::string url_decode(const boost::string_view& src_str, bool in_query) { - const char *src = src_str.c_str(); - char dest[src_str.size() + 1]; - int pos = 0; - char c; + std::string dest_str; + dest_str.reserve(src_str.length() + 1); - while (*src) { + for (auto src = std::begin(src_str); src != std::end(src_str); ++src) { if (*src != '%') { if (!in_query || *src != '+') { - if (*src == '?') in_query = true; - dest[pos++] = *src++; + if (*src == '?') { + in_query = true; + } + dest_str.push_back(*src); } else { - dest[pos++] = ' '; - ++src; + dest_str.push_back(' '); } } else { - src++; - if (!*src) + /* 3 == strlen("%%XX") */ + if (std::distance(src, std::end(src_str)) < 3) { break; - char c1 = hex_to_num(*src++); - if (!*src) - break; - c = c1 << 4; - if (c1 < 0) - return false; - c1 = hex_to_num(*src++); - if (c1 < 0) - return false; - c |= c1; - dest[pos++] = c; + } + + src++; + const char c1 = hex_to_num(*src++); + const char c2 = hex_to_num(*src); + if (c1 < 0 || c2 < 0) { + return std::string(); + } else { + dest_str.push_back(c1 << 4 | c2); + } } } - dest[pos] = 0; - dest_str = dest; - return true; + return dest_str; } void rgw_uri_escape_char(char c, string& dst) diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 6d314df56aa4..88d49765dc3c 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -2214,9 +2214,8 @@ extern bool verify_object_permission_no_policy(struct req_state *s, /** Convert an input URL into a sane object name * by converting %-escaped strings into characters, etc*/ extern void rgw_uri_escape_char(char c, string& dst); -extern bool url_decode(const std::string& src_str, - std::string& dest_str, - bool in_query = false); +extern std::string url_decode(const boost::string_view& src_str, + bool in_query = false); extern void url_encode(const std::string& src, string& dst); extern std::string url_encode(const std::string& src); diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 506c1cf5f018..d93103e0a7d9 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -9,9 +9,10 @@ #include #include +#include #include #include -#include +#include #include "common/Clock.h" #include "common/armor.h" @@ -1180,20 +1181,17 @@ static int get_obj_user_manifest_iterate_cb(rgw_bucket& bucket, int RGWGetObj::handle_user_manifest(const char *prefix) { - ldout(s->cct, 2) << "RGWGetObj::handle_user_manifest() prefix=" << prefix << dendl; + const boost::string_view prefix_view(prefix); + ldout(s->cct, 2) << "RGWGetObj::handle_user_manifest() prefix=" + << prefix_view << dendl; - string prefix_str = prefix; - size_t pos = prefix_str.find('/'); - if (pos == string::npos) + const size_t pos = prefix_view.find('/'); + if (pos == string::npos) { return -EINVAL; + } - string bucket_name_raw, bucket_name; - bucket_name_raw = prefix_str.substr(0, pos); - url_decode(bucket_name_raw, bucket_name); - - string obj_prefix_raw, obj_prefix; - obj_prefix_raw = prefix_str.substr(pos + 1); - url_decode(obj_prefix_raw, obj_prefix); + const std::string bucket_name = url_decode(prefix_view.substr(0, pos)); + const std::string obj_prefix = url_decode(prefix_view.substr(pos + 1)); rgw_bucket bucket; @@ -3876,20 +3874,14 @@ int RGWDeleteObj::handle_slo_manifest(bufferlist& bl) const string& path_str = iter.path; const size_t sep_pos = path_str.find('/', 1 /* skip first slash */); - if (string::npos == sep_pos) { + if (boost::string_view::npos == sep_pos) { return -EINVAL; } RGWBulkDelete::acct_path_t path; - string bucket_name; - url_decode(path_str.substr(1, sep_pos - 1), bucket_name); - - string obj_name; - url_decode(path_str.substr(sep_pos + 1), obj_name); - - path.bucket_name = bucket_name; - path.obj_key = obj_name; + path.bucket_name = url_decode(path_str.substr(1, sep_pos - 1)); + path.obj_key = url_decode(path_str.substr(sep_pos + 1)); items.push_back(path); } @@ -4039,9 +4031,7 @@ bool RGWCopyObj::parse_copy_location(const string& url_src, string& bucket_name, params_str = url_src.substr(pos + 1); } - string dec_src; - - url_decode(name_str, dec_src); + std::string dec_src = url_decode(name_str); const char *src = dec_src.c_str(); if (*src == '/') ++src; diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc index 8a4fa90a1973..591c0a52e729 100644 --- a/src/rgw/rgw_rest.cc +++ b/src/rgw/rgw_rest.cc @@ -2209,7 +2209,7 @@ int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio) s->info.domain = s->cct->_conf->rgw_dns_name; } - url_decode(s->info.request_uri, s->decoded_uri); + s->decoded_uri = url_decode(s->info.request_uri); /* FastCGI specification, section 6.3 * http://www.fastcgi.com/devkit/doc/fcgi-spec.html#S6.3 diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 056956099cf2..f8a76071847f 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -1134,10 +1134,10 @@ int RGWPutObj_ObjStore_S3::get_params() #define VERSION_ID_STR "?versionId=" pos = copy_source_object_name.find(VERSION_ID_STR); if (pos == std::string::npos) { - url_decode(copy_source_object_name, copy_source_object_name); + copy_source_object_name = url_decode(copy_source_object_name); } else { copy_source_version_id = copy_source_object_name.substr(pos + sizeof(VERSION_ID_STR) - 1); - url_decode(copy_source_object_name.substr(0, pos), copy_source_object_name); + copy_source_object_name = url_decode(copy_source_object_name.substr(0, pos)); } pos = copy_source_bucket_name.find(":"); if (pos == std::string::npos) { @@ -1838,9 +1838,7 @@ int RGWDeleteObj_ObjStore_S3::get_params() } if (if_unmod) { - string if_unmod_str(if_unmod); - string if_unmod_decoded; - url_decode(if_unmod_str, if_unmod_decoded); + std::string if_unmod_decoded = url_decode(if_unmod); uint64_t epoch; uint64_t nsec; if (utime_t::parse_date(if_unmod_decoded, &epoch, &nsec) < 0) { diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index 9c7720be8bc7..8061364e14fb 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -1399,18 +1399,13 @@ int RGWBulkDelete_ObjStore_SWIFT::get_data( const size_t sep_pos = path_str.find('/', start_pos); if (string::npos != sep_pos) { - string bucket_name; - url_decode(path_str.substr(start_pos, sep_pos - start_pos), bucket_name); - - string obj_name; - url_decode(path_str.substr(sep_pos + 1), obj_name); - - path.bucket_name = bucket_name; - path.obj_key = obj_name; + path.bucket_name = url_decode(path_str.substr(start_pos, + sep_pos - start_pos)); + path.obj_key = url_decode(path_str.substr(sep_pos + 1)); } else { /* It's guaranteed here that bucket name is at least one character * long and is different than slash. */ - url_decode(path_str.substr(start_pos), path.bucket_name); + path.bucket_name = url_decode(path_str.substr(start_pos)); } items.push_back(path); @@ -2232,8 +2227,7 @@ RGWOp* RGWSwiftWebsiteHandler::get_ws_listing_op() bool RGWSwiftWebsiteHandler::is_web_dir() const { - std::string subdir_name; - url_decode(s->object.name, subdir_name); + std::string subdir_name = url_decode(s->object.name); /* Remove character from the subdir name if it is "/". */ if (subdir_name.empty()) { @@ -2242,7 +2236,7 @@ bool RGWSwiftWebsiteHandler::is_web_dir() const subdir_name.pop_back(); } - rgw_obj obj(s->bucket, subdir_name); + rgw_obj obj(s->bucket, std::move(subdir_name)); /* First, get attrset of the object we'll try to retrieve. */ RGWObjectCtx& obj_ctx = *static_cast(s->obj_ctx);