From 94f46f6db0b4d888dd1804328314e811efc1736c Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Fri, 4 Dec 2020 18:16:08 -0500 Subject: [PATCH] rgw: in ordered bucket listing skip namespaced entries when possible When listing non-namespaced entries in the bucket index, the code would march through the namespaced entries in blocks, requesting all of them from the CLS layer. When there were many namespaced entries, it would significantly affect the performance of ordered listing. This commit adds code to advance the marker passed to lower layers to skip past namespaced entries. This is challenging in that non-namespaced entries can appear in the middle of the namespaced entries. We'll ignore the issue instance tags in names to simplify the following discussion. Non-namespaced entries are indexed by "name". Namespaced entries are indexed by _namespace_name, using underscores to surround the namespace. The challenge comes with entries such as "_name", where the name begins with an underscore. In that case we index them by "__name", quoting the underscore with another. Now the extra challenge comes due to the lexic ordering of the following: ASP _BAT_cat __DOG _eel_FOX goat Note that the namespaced entries are in positions 2 and 4, and the non-namespaced entries are in positions 1, 3, and 5. So when skipping past the namespaced entries, we have to be careful not to skip past the non-namespaced entries that begin with underscore. Additional code clean-ups done as well. Signed-off-by: J. Eric Ivancich Resolves: rhbz#1883283 (cherry picked from commit acfa6409597768b3c1dc8dcab3668d23761a4cbc) --- src/cls/rgw/cls_rgw_types.h | 16 ++++++--- src/rgw/rgw_common.h | 27 ++++++++++++++ src/rgw/rgw_rados.cc | 70 ++++++++++++++++++++++++------------- 3 files changed, 84 insertions(+), 29 deletions(-) diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index 6098f1be65938..01dbe192e6df4 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -292,9 +292,15 @@ struct cls_rgw_obj_key { } return (r < 0); } + bool operator>(const cls_rgw_obj_key& k) const { + return k < *this; + } bool operator<=(const cls_rgw_obj_key& k) const { return !(k < *this); } + bool operator>=(const cls_rgw_obj_key& k) const { + return !(k > *this); + } bool empty() const { return name.empty(); } @@ -392,16 +398,18 @@ struct rgw_bucket_dir_entry { DECODE_FINISH(bl); } - bool is_current() { + bool is_current() const { int test_flags = RGW_BUCKET_DIRENT_FLAG_VER | RGW_BUCKET_DIRENT_FLAG_CURRENT; return (flags & RGW_BUCKET_DIRENT_FLAG_VER) == 0 || (flags & test_flags) == test_flags; } - bool is_delete_marker() { return (flags & RGW_BUCKET_DIRENT_FLAG_DELETE_MARKER) != 0; } - bool is_visible() { + bool is_delete_marker() const { + return (flags & RGW_BUCKET_DIRENT_FLAG_DELETE_MARKER) != 0; + } + bool is_visible() const { return is_current() && !is_delete_marker(); } - bool is_valid() { return (flags & RGW_BUCKET_DIRENT_FLAG_VER_MARKER) == 0; } + bool is_valid() const { return (flags & RGW_BUCKET_DIRENT_FLAG_VER_MARKER) == 0; } void dump(Formatter *f) const; void decode_json(JSONObj *obj); diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index c8491b03d2e65..99fba924164e0 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -1669,6 +1669,33 @@ struct rgw_obj_key { instance = k.instance; } +// Since bucket index entries are stored in sequence, and the elements +// with namespaces can be between those without, we need a way to skip +// past namespaced elements; this returns a marker that will do so. +// +// Consider the following sequence: ASP, _BAT_cat, __DOG, _eel_FOX, +// goat; the 2nd and 4th entries are namespaced, but the 3rd is not, +// it's just an entry that begins with an underscore, which will be +// quoted with another underscore putting it between two potential +// namespaced blocks + static const rgw_obj_index_key& after_namespace_marker(const std::string& after) { + // this is just before "__", so will allow finding non-namespaced + // entries that begin with an underscore (and therefore are entered + // as starting with "__". + static const rgw_obj_index_key result1(std::string("_^") + char(255)); + + // this is just before entries that do not begin with an + // underscore and will allow skipping past the second namespace + // block + static const rgw_obj_index_key result2(std::string("_") + char(255)); + + if (after < result1.name) { + return result1; + } else { + return result2; + } + } + static void parse_index_key(const string& key, string *name, string *ns) { if (key[0] != '_') { *name = key; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 64697e111c29e..cda2b1ed07228 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -2468,8 +2468,13 @@ int RGWRados::Bucket::List::list_objects_ordered( } } + // allows us to skip over entries in two conditions: 1) when using a + // delimiter and we can skip over "subdirectories" and 2) when + // searching for elements in the empty namespace we can skip over + // namespaced elements + rgw_obj_index_key marker_skip_ahead; + rgw_obj_index_key prev_marker; - string skip_after_delim; for (uint16_t attempt = 1; /* empty */; ++attempt) { ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ << " starting attempt " << attempt << dendl; @@ -2484,33 +2489,34 @@ int RGWRados::Bucket::List::list_objects_ordered( } prev_marker = cur_marker; - if (skip_after_delim > cur_marker.name) { - cur_marker = skip_after_delim; - - ldout(cct, 20) << "setting cur_marker=" - << cur_marker.name - << "[" << cur_marker.instance << "]" - << dendl; + // see whether we found a way to skip ahead in the previous + // iteration + if (marker_skip_ahead > cur_marker) { + cur_marker = marker_skip_ahead; + ldout(cct, 20) << "advancing cur_marker=" << cur_marker << dendl; } std::map ent_map; + const size_t num_requested = read_ahead + 1 - count; int r = store->cls_bucket_list_ordered(target->get_bucket_info(), shard_id, cur_marker, cur_prefix, - read_ahead + 1 - count, + num_requested, params.list_versions, attempt, ent_map, &truncated, &cur_marker); - if (r < 0) + if (r < 0) { return r; + } for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) { + const std::string& key = eiter->first; rgw_bucket_dir_entry& entry = eiter->second; rgw_obj_index_key index_key = entry.key; - rgw_obj_key obj(index_key); + rgw_obj_key obj(index_key); // NB: why is this re-set below? can't be const ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ << " considering entry " << entry.key << dendl; @@ -2527,20 +2533,22 @@ int RGWRados::Bucket::List::list_objects_ordered( continue; } - bool matched_ns = (obj.ns == params.ns); if (!params.list_versions && !entry.is_visible()) { continue; } + const bool matched_ns = (obj.ns == params.ns); if (params.enforce_ns && !matched_ns) { if (!params.ns.empty()) { /* we've iterated past the namespace we're searching -- done now */ truncated = false; goto done; - } - - /* we're not looking at the namespace this object is in, next! */ - continue; + } else { + // we're enforcing an empty namespace, so we need to skip + // past the namespace block + marker_skip_ahead = rgw_obj_key::after_namespace_marker(key); + continue; + } } if (cur_end_marker_valid && cur_end_marker <= index_key) { @@ -2553,19 +2561,21 @@ int RGWRados::Bucket::List::list_objects_ordered( next_marker = index_key; } - if (params.filter && !params.filter->filter(obj.name, index_key.name)) + if (params.filter && !params.filter->filter(obj.name, index_key.name)) { continue; + } if (params.prefix.size() && - (obj.name.compare(0, params.prefix.size(), params.prefix) != 0)) + (obj.name.compare(0, params.prefix.size(), params.prefix) != 0)) { continue; + } if (!params.delim.empty()) { int delim_pos = obj.name.find(params.delim, params.prefix.size()); if (delim_pos >= 0) { - /* extract key -with trailing delimiter- for CommonPrefix */ - string prefix_key = + /* extract key *with* trailing delimiter for CommonPrefix */ + const std::string prefix_key = obj.name.substr(0, delim_pos + params.delim.length()); if (common_prefixes && @@ -2577,10 +2587,13 @@ int RGWRados::Bucket::List::list_objects_ordered( next_marker = prefix_key; (*common_prefixes)[prefix_key] = true; - skip_after_delim = obj.name.substr(0, delim_pos); - skip_after_delim.append(after_delim_s); - - ldout(cct, 20) << "skip_after_delim=" << skip_after_delim << dendl; + // setting marker_skip_ahead allows the next call to + // cls_bucket_list_ordered to skip over unlisted entries; + // NOTE: after_delim_s + const std::string skip_name = obj.name.substr(0, delim_pos) + after_delim_s; + const rgw_obj_key skip_key(skip_name, "" /* empty instance*/ , obj.ns); + skip_key.get_index_key(&marker_skip_ahead); + ldout(cct, 20) << "marker_skip_ahead=" << marker_skip_ahead << dendl; count++; } @@ -2618,8 +2631,15 @@ int RGWRados::Bucket::List::list_objects_ordered( } // for (uint16_t attempt... done: - if (is_truncated) + + auto csz = (common_prefixes) ? common_prefixes->size() : 0; + ldout(cct, 10) << "RGWRados::Bucket::List::" << __func__ << + " INFO returning " << result->size() << " entries and " + << csz << " common prefixes" << dendl; + + if (is_truncated) { *is_truncated = truncated; + } return 0; } // list_objects_ordered -- 2.39.5