From 8f8cc9a57b17621cdaec2c8a995f5850b362dc52 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Fri, 22 Nov 2024 12:40:24 -0500 Subject: [PATCH] rgw: optimize bucket listing to skip past regions of namespaced entries When listing a bucket and the parameters are such that we're not listing namespaced entries, this commit adds an optimization to advance the marker such that we skip past a whole region of namespaced entries rather than evaluating each entry one-by-one. Signed-off-by: J. Eric Ivancich (cherry picked from commit 91488321277d15b877d8b5c5a5204e18d7ab7fad) --- src/rgw/driver/rados/rgw_rados.cc | 55 ++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index ab6e87cfdfb40..c06edafc03344 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -1927,11 +1927,58 @@ int RGWRados::Bucket::List::list_objects_ordered( ": finished due to getting past requested namespace \"" << params.ns << "\"" << dendl; goto done; - } + } else if (!obj.ns.empty()) { + // We're in the namespace range and we're enforcing an empty + // namespace, therefore we can skip past a congtiguous chunk + // of namespaced entries. Namespaces are demarcated in the + // index key by underscores before and after the namespace + // name (e.g., "_somenamespace_somekey"). Also, regular + // entries might begin with an underscore, in which case + // they're escaped with another underscore (e.g., "_foobar" + // is encoded as "__foobar"). We also have to account for + // the fact that in lexical ordering there are characters + // both before underscore (e.g., uppercase letters) and + // after (e.g., lowercase letters). So that means there can + // be five distinct and meaningful regions in the lexical + // ordering of entries, which we'll use examples to help + // illustrate: + + // 1. FOOBAR (regular pre-underscore) + // 2. _BAZ_foobar (namespaced, with namespace pre-underscore) + // 3. __foobar (regular with escaped underscore) + // 4. _baz_foobar (namespaced, with namespace post-underscore) + // 5. foobar (regular, post-underscore) + + // So if we're skipping namespaces and recognize we're in + // region 2, we must skip to region 3. And if we recognize + // we're in region 4, we skip to region 5. + rgw_obj_index_key potential_marker; + if (obj.ns[0] < '_') { + // We're in region 2, so need to skip to region 3. The + // caret (^) is the ASCII character that preceeds + // underscore, so we'll set the marker to the + // caret/circumflex followed by 0xFF, so the key after can + // be in the double underscore range. + potential_marker = rgw_obj_index_key("_^\xFF"); + } else { + // we're passed the escaped underscore region (i.e., + // starting with two underscores), so we can skip past the + // underscore region + potential_marker = rgw_obj_index_key("_\xFF"); + } + + if (cur_marker < potential_marker) { + ldpp_dout(dpp, 20) << __func__ << + ": skipping past region of namespaced entries, starting with \"" << + entry.key << "\"" << dendl; + cur_marker = potential_marker; + break; // leave inner loop (for) and allow another cls call + } + } - /* we're skipping past namespaced objects */ + // we're skipping past namespaced objects ldpp_dout(dpp, 20) << __func__ << - ": skipping past namespaced objects, including \"" << entry.key << + ": skipping past individual namespaced entry \"" << entry.key << "\"" << dendl; continue; } @@ -1952,7 +1999,7 @@ int RGWRados::Bucket::List::list_objects_ordered( if (params.access_list_filter && !params.access_list_filter(obj.name, index_key.name)) { ldpp_dout(dpp, 20) << __func__ << - ": skipping past namespaced objects, including \"" << entry.key << + ": skipping past filtered out entry \"" << entry.key << "\"" << dendl; continue; } -- 2.39.5