From ce1768202b64dd73f549a3660968f10ebe184a7b Mon Sep 17 00:00:00 2001
From: "J. Eric Ivancich" <ivancich@redhat.com>
Date: Tue, 6 Aug 2019 17:44:29 -0400
Subject: [PATCH] rgw: move some bucket index delimiter filtering logic to cls

Currently the cls code that returns bucket index entries does no
filtering when a delimiter is specified. This means that the cls code
will return many entries that the rgw code does not need. This means
unnecessary bucket index entries will be encoded, decoded, and consume
network bandwidth.

A change is made to send a delimiter, if it's specified, to the cls
code, so the filtering can be done there.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
---
 src/cls/rgw/cls_rgw.cc           | 158 +++++++++++++++++++++++--------
 src/cls/rgw/cls_rgw_client.cc    |  31 ++++--
 src/cls/rgw/cls_rgw_client.h     |  14 ++-
 src/cls/rgw/cls_rgw_ops.h        |  18 +++-
 src/cls/rgw/cls_rgw_types.h      |   9 ++
 src/rgw/rgw_admin.cc             |  16 ++--
 src/rgw/rgw_bucket.cc            |   6 +-
 src/rgw/rgw_rados.cc             |  79 ++++++++--------
 src/rgw/rgw_rados.h              |   7 +-
 src/test/cls_rgw/test_cls_rgw.cc |   7 +-
 10 files changed, 237 insertions(+), 108 deletions(-)

diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc
index 7d56e0fea3f9f..bdc3f78433246 100644
--- a/src/cls/rgw/cls_rgw.cc
+++ b/src/cls/rgw/cls_rgw.cc
@@ -5,6 +5,8 @@
 
 #include <errno.h>
 
+#include <boost/algorithm/string.hpp>
+
 #include "objclass/objclass.h"
 #include "cls/rgw/cls_rgw_ops.h"
 #include "cls/rgw/cls_rgw_const.h"
@@ -439,94 +441,175 @@ static int read_bucket_header(cls_method_context_t hctx,
 
 int rgw_bucket_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
 {
+  // maximum number of calls to get_obj_vals we'll try; compromise
+  // between wanting to return the requested # of entries, but not
+  // wanting to slow down this op with too many omap reads
+  constexpr int max_attempts = 8;
+
   auto iter = in->cbegin();
 
   rgw_cls_list_op op;
   try {
     decode(op, iter);
   } catch (buffer::error& err) {
-    CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to decode request\n");
+    CLS_LOG(1, "ERROR: %s: failed to decode request\n", __func__);
     return -EINVAL;
   }
 
   rgw_cls_list_ret ret;
   rgw_bucket_dir& new_dir = ret.dir;
+  auto& name_entry_map = new_dir.m; // map of keys to entries
+
   int rc = read_bucket_header(hctx, &new_dir.header);
   if (rc < 0) {
-    CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to read header\n");
+    CLS_LOG(1, "ERROR: %s: failed to read header\n", __func__);
     return rc;
   }
 
-  map<string, bufferlist> keys;
-  std::map<string, bufferlist>::iterator kiter;
-  string start_after_key;
-  encode_list_index_key(hctx, op.start_obj, &start_key);
-  bool done = false;
-  uint32_t left_to_read = op.num_entries;
-  bool more;
-
-  do {
-    rc = get_obj_vals(hctx, start_after_key, op.filter_prefix, left_to_read, &keys, &more);
-    if (rc < 0)
+  string start_after_key;   // key that we can start listing at, one of a)
+                            // sent in by caller, b) last item visited, or
+                            // c) when delimiter present, a key that will
+                            // move past the subdirectory
+  encode_list_index_key(hctx, op.start_obj, &start_after_key);
+
+  string previous_key; // last key stored in result, so if we have to
+		       // call get_obj_vals multiple times, we do not
+		       // add the overlap to result
+  string previous_prefix_key; // last prefix_key stored in result, so
+			      // we can skip over entries with the
+			      // same prefix_key
+
+  bool done = false;   // whether we need to keep calling get_obj_vals
+  bool more = true;    // output parameter of get_obj_vals
+  bool has_delimiter = !op.delimiter.empty();
+
+  if (has_delimiter &&
+      boost::algorithm::ends_with(start_after_key, op.delimiter)) {
+    // advance past all subdirectory entries if we start after a
+    // subdirectory
+    start_after_key = cls_rgw_after_delim(start_after_key);
+  }
+
+  for (int attempt = 0;
+       attempt < max_attempts &&
+	 more &&
+	 !done &&
+	 name_entry_map.size() < op.num_entries;
+       ++attempt) {
+    map<string, bufferlist> keys;
+    rc = get_obj_vals(hctx, start_after_key, op.filter_prefix,
+		      op.num_entries - name_entry_map.size(),
+		      &keys, &more);
+    if (rc < 0) {
       return rc;
-
-    auto& m = new_dir.m;
+    }
 
     done = keys.empty();
 
-    for (kiter = keys.begin(); kiter != keys.end(); ++kiter) {
-      rgw_bucket_dir_entry entry;
-
+    for (auto kiter = keys.cbegin(); kiter != keys.cend(); ++kiter) {
       if (!bi_is_objs_index(kiter->first)) {
+	// we're done if we walked off the end of the objects area of
+	// the bucket index
         done = true;
         break;
       }
 
-      bufferlist& entrybl = kiter->second;
-      auto eiter = entrybl.cbegin();
+      rgw_bucket_dir_entry entry;
       try {
+	const bufferlist& entrybl = kiter->second;
+	auto eiter = entrybl.cbegin();
         decode(entry, eiter);
       } catch (buffer::error& err) {
-        CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to decode entry, key=%s\n", kiter->first.c_str());
+        CLS_LOG(1, "ERROR: %s: failed to decode entry, key=%s\n",
+		__func__, kiter->first.c_str());
         return -EINVAL;
       }
 
+      start_after_key = kiter->first;
+      CLS_LOG(20, "%s: working on key=%s len=%zu",
+	      __func__, kiter->first.c_str(), kiter->first.size());
+
       cls_rgw_obj_key key;
       uint64_t ver;
-
-      start_key = kiter->first;
-      CLS_LOG(20, "start_key=%s len=%zu", start_key.c_str(), start_key.size());
-
       int ret = decode_list_index_key(kiter->first, &key, &ver);
       if (ret < 0) {
-        CLS_LOG(0, "ERROR: failed to decode list index key (%s)\n", escape_str(kiter->first).c_str());
+        CLS_LOG(0, "ERROR: %s: failed to decode list index key (%s)\n",
+		__func__, escape_str(kiter->first).c_str());
         continue;
       }
 
       if (!entry.is_valid()) {
-        CLS_LOG(20, "entry %s[%s] is not valid\n", key.name.c_str(), key.instance.c_str());
+        CLS_LOG(20, "%s: entry %s[%s] is not valid\n",
+		__func__, key.name.c_str(), key.instance.c_str());
         continue;
       }
       
       // filter out noncurrent versions, delete markers, and initial marker
-      if (!op.list_versions && (!entry.is_visible() || op.start_obj.name == key.name)) {
-        CLS_LOG(20, "entry %s[%s] is not visible\n", key.name.c_str(), key.instance.c_str());
+      if (!op.list_versions &&
+	  (!entry.is_visible() || op.start_obj.name == key.name)) {
+        CLS_LOG(20, "%s: entry %s[%s] is not visible\n",
+		__func__, key.name.c_str(), key.instance.c_str());
         continue;
       }
-      if (m.size() < op.num_entries) {
-        m[kiter->first] = entry;
+
+      if (has_delimiter) {
+        int delim_pos = key.name.find(op.delimiter, op.filter_prefix.size());
+
+        if (delim_pos >= 0) {
+	  /* extract key with trailing delimiter */
+          string prefix_key =
+	    key.name.substr(0, delim_pos + op.delimiter.length());
+
+	  if (prefix_key == previous_prefix_key) {
+	    continue; // we've already added this;
+	  } else {
+	    previous_prefix_key = prefix_key;
+	  }
+
+	  if (name_entry_map.size() < op.num_entries) {
+	    rgw_bucket_dir_entry proxy_entry;
+	    cls_rgw_obj_key proxy_key(prefix_key);
+	    proxy_entry.key = cls_rgw_obj_key(proxy_key);
+	    proxy_entry.flags = rgw_bucket_dir_entry::FLAG_COMMON_PREFIX;
+	    name_entry_map[prefix_key] = proxy_entry;
+
+	    CLS_LOG(20, "%s: got common prefix entry %s[%s] num entries=%lu\n",
+		    __func__, proxy_key.name.c_str(), proxy_key.instance.c_str(),
+		    name_entry_map.size());
+	  }
+
+	  // make sure that if this is the last item added to the
+	  // result from this call to get_obj_vals, the next call will
+	  // skip past rest of "subdirectory"
+	  start_after_key = cls_rgw_after_delim(prefix_key);
+
+	  // advance to past this subdirectory, but then back up one,
+	  // so the loop increment will put us in the right place
+	  kiter = keys.lower_bound(start_after_key);
+	  --kiter;
+
+          continue;
+        }
+
+	// no delimiter after prefix found, so this is a "top-level"
+	// item and we can just fall through
       }
-      left_to_read--;
 
-      CLS_LOG(20, "got entry %s[%s] m.size()=%d\n", key.name.c_str(), key.instance.c_str(), (int)m.size());
-    }
-  } while (left_to_read > 0 && !done);
+      if (name_entry_map.size() < op.num_entries &&
+	  kiter->first != previous_key) {
+        name_entry_map[kiter->first] = entry;
+	previous_key = kiter->first;
+	CLS_LOG(20, "%s: got object entry %s[%s] num entries=%d\n",
+		__func__, key.name.c_str(), key.instance.c_str(),
+		int(name_entry_map.size()));
+      }
+    } // for (auto kiter...
+  } // for (int attempt...
 
   ret.is_truncated = more && !done;
-
   encode(ret, *out);
   return 0;
-}
+} // rgw_bucket_list
 
 static int check_index(cls_method_context_t hctx,
 		       rgw_bucket_dir_header *existing_header,
@@ -4083,4 +4166,3 @@ CLS_INIT(rgw)
 
   return;
 }
-
diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc
index 4422c82174e71..65e59047ebada 100644
--- a/src/cls/rgw/cls_rgw_client.cc
+++ b/src/cls/rgw/cls_rgw_client.cc
@@ -210,6 +210,7 @@ void cls_rgw_bucket_complete_op(ObjectWriteOperation& o, RGWModifyOp op, string&
 void cls_rgw_bucket_list_op(librados::ObjectReadOperation& op,
                             const cls_rgw_obj_key& start_obj,
                             const std::string& filter_prefix,
+                            const std::string& delimiter,
                             uint32_t num_entries,
                             bool list_versions,
                             rgw_cls_list_ret* result)
@@ -218,28 +219,38 @@ void cls_rgw_bucket_list_op(librados::ObjectReadOperation& op,
   rgw_cls_list_op call;
   call.start_obj = start_obj;
   call.filter_prefix = filter_prefix;
+  call.delimiter = delimiter;
   call.num_entries = num_entries;
   call.list_versions = list_versions;
   encode(call, in);
 
-  op.exec(RGW_CLASS, RGW_BUCKET_LIST, in, new ClsBucketIndexOpCtx<rgw_cls_list_ret>(result, NULL));
+  op.exec(RGW_CLASS, RGW_BUCKET_LIST, in,
+	  new ClsBucketIndexOpCtx<rgw_cls_list_ret>(result, NULL));
 }
 
-static bool issue_bucket_list_op(librados::IoCtx& io_ctx, const string& oid,
+static bool issue_bucket_list_op(librados::IoCtx& io_ctx,
+				 const string& oid,
 				 const cls_rgw_obj_key& start_obj,
 				 const string& filter_prefix,
-				 uint32_t num_entries, bool list_versions,
+				 const string& delimiter,
+				 uint32_t num_entries,
+				 bool list_versions,
 				 BucketIndexAioManager *manager,
-				 rgw_cls_list_ret *pdata) {
+				 rgw_cls_list_ret *pdata)
+{
   librados::ObjectReadOperation op;
-  cls_rgw_bucket_list_op(op, start_obj, filter_prefix,
+  cls_rgw_bucket_list_op(op,
+			 start_obj, filter_prefix, delimiter,
                          num_entries, list_versions, pdata);
   return manager->aio_operate(io_ctx, oid, &op);
 }
 
 int CLSRGWIssueBucketList::issue_op(int shard_id, const string& oid)
 {
-  return issue_bucket_list_op(io_ctx, oid, start_obj, filter_prefix, num_entries, list_versions, &manager, &result[shard_id]);
+  return issue_bucket_list_op(io_ctx, oid,
+			      start_obj, filter_prefix, delimiter,
+			      num_entries, list_versions, &manager,
+			      &result[shard_id]);
 }
 
 void cls_rgw_remove_obj(librados::ObjectWriteOperation& o, list<string>& keep_attr_prefixes)
@@ -577,8 +588,12 @@ void cls_rgw_suggest_changes(ObjectWriteOperation& o, bufferlist& updates)
 
 int CLSRGWIssueGetDirHeader::issue_op(int shard_id, const string& oid)
 {
-  cls_rgw_obj_key nokey;
-  return issue_bucket_list_op(io_ctx, oid, nokey, "", 0, false, &manager, &result[shard_id]);
+  cls_rgw_obj_key empty_key;
+  string empty_prefix;
+  string empty_delimiter;
+  return issue_bucket_list_op(io_ctx, oid,
+			      empty_key, empty_prefix, empty_delimiter,
+			      0, false, &manager, &result[shard_id]);
 }
 
 static bool issue_resync_bi_log(librados::IoCtx& io_ctx, const string& oid, BucketIndexAioManager *manager)
diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h
index 2d1566b7ad357..4a24eddeba5f3 100644
--- a/src/cls/rgw/cls_rgw_client.h
+++ b/src/cls/rgw/cls_rgw_client.h
@@ -429,25 +429,33 @@ int cls_rgw_usage_log_trim(librados::IoCtx& io_ctx, const string& oid, const str
 class CLSRGWIssueBucketList : public CLSRGWConcurrentIO {
   cls_rgw_obj_key start_obj;
   string filter_prefix;
+  string delimiter;
   uint32_t num_entries;
   bool list_versions;
   map<int, rgw_cls_list_ret>& result;
 protected:
   int issue_op(int shard_id, const string& oid) override;
 public:
-  CLSRGWIssueBucketList(librados::IoCtx& io_ctx, const cls_rgw_obj_key& _start_obj,
-                        const string& _filter_prefix, uint32_t _num_entries,
+  CLSRGWIssueBucketList(librados::IoCtx& io_ctx,
+			const cls_rgw_obj_key& _start_obj,
+                        const string& _filter_prefix,
+			const string& _delimiter,
+			uint32_t _num_entries,
                         bool _list_versions,
                         map<int, string>& oids,
                         map<int, rgw_cls_list_ret>& list_results,
                         uint32_t max_aio) :
   CLSRGWConcurrentIO(io_ctx, oids, max_aio),
-  start_obj(_start_obj), filter_prefix(_filter_prefix), num_entries(_num_entries), list_versions(_list_versions), result(list_results) {}
+    start_obj(_start_obj), filter_prefix(_filter_prefix), delimiter(_delimiter),
+    num_entries(_num_entries), list_versions(_list_versions),
+    result(list_results)
+  {}
 };
 
 void cls_rgw_bucket_list_op(librados::ObjectReadOperation& op,
                             const cls_rgw_obj_key& start_obj,
                             const std::string& filter_prefix,
+			    const std::string& delimiter,
                             uint32_t num_entries,
                             bool list_versions,
                             rgw_cls_list_ret* result);
diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h
index 0d3be7cd18226..47388f4a97b9c 100644
--- a/src/cls/rgw/cls_rgw_ops.h
+++ b/src/cls/rgw/cls_rgw_ops.h
@@ -385,30 +385,38 @@ struct rgw_cls_list_op
   cls_rgw_obj_key start_obj;
   uint32_t num_entries;
   string filter_prefix;
+  string delimiter;
   bool list_versions;
 
   rgw_cls_list_op() : num_entries(0), list_versions(false) {}
 
   void encode(bufferlist &bl) const {
-    ENCODE_START(5, 4, bl);
+    ENCODE_START(6, 4, bl);
     encode(num_entries, bl);
     encode(filter_prefix, bl);
     encode(start_obj, bl);
     encode(list_versions, bl);
+    encode(delimiter, bl);
     ENCODE_FINISH(bl);
   }
   void decode(bufferlist::const_iterator &bl) {
-    DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl);
+    DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl);
     if (struct_v < 4) {
       decode(start_obj.name, bl);
     }
     decode(num_entries, bl);
-    if (struct_v >= 3)
+    if (struct_v >= 3) {
       decode(filter_prefix, bl);
-    if (struct_v >= 4)
+    }
+    if (struct_v >= 4) {
       decode(start_obj, bl);
-    if (struct_v >= 5)
+    }
+    if (struct_v >= 5) {
       decode(list_versions, bl);
+    }
+    if (struct_v >= 6) {
+      decode(delimiter, bl);
+    }
     DECODE_FINISH(bl);
   }
   void dump(Formatter *f) const;
diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h
index 49add410fe00b..5d1dc864fd596 100644
--- a/src/cls/rgw/cls_rgw_types.h
+++ b/src/cls/rgw/cls_rgw_types.h
@@ -347,6 +347,12 @@ struct rgw_bucket_dir_entry {
   static constexpr uint16_t FLAG_DELETE_MARKER =      0x4;
   /* object is versioned, a placeholder for the plain entry */
   static constexpr uint16_t FLAG_VER_MARKER =         0x8;
+  /* object is a proxy; it is not listed in the bucket index but is a
+   * prefix ending with a delimiter, perhaps common to multiple
+   * entries; it is only useful when a delimiter is used and
+   * represents a "subdirectory" (again, ending in a delimiter) that
+   * may contain one or more actual entries/objects */
+  static constexpr uint16_t FLAG_COMMON_PREFIX =   0x8000;
 
   cls_rgw_obj_key key;
   rgw_bucket_entry_ver ver;
@@ -424,6 +430,9 @@ struct rgw_bucket_dir_entry {
   bool is_valid() const {
     return (flags & rgw_bucket_dir_entry::FLAG_VER_MARKER) == 0;
   }
+  bool is_common_prefix() const {
+    return flags & rgw_bucket_dir_entry::FLAG_COMMON_PREFIX;
+  }
 
   void dump(Formatter *f) const;
   void decode_json(JSONObj *obj);
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index e59ca611ba96b..7035326c64e59 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -6247,19 +6247,21 @@ next:
     bool is_truncated = true;
 
     rgw_obj_index_key marker;
-    string prefix;
+    string empty_prefix;
+    string empty_delimiter;
 
     formatter->open_object_section("result");
     formatter->dump_string("bucket", bucket_name);
     formatter->open_array_section("objects");
     while (is_truncated) {
       RGWRados::ent_map_t result;
-      int r =
-	store->getRados()->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker,
-				       prefix, 1000, true,
-				       result, &is_truncated, &marker,
-                                       null_yield,
-				       rgw_bucket_object_check_filter);
+      int r = store->getRados()->cls_bucket_list_ordered(
+	bucket_info, RGW_NO_SHARD,
+	marker, empty_prefix, empty_delimiter,
+	1000, true,
+	result, &is_truncated, &marker,
+	null_yield,
+	rgw_bucket_object_check_filter);
       if (r < 0 && r != -ENOENT) {
         cerr << "ERROR: failed operation r=" << r << std::endl;
       }
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index 6d228bd3e64e7..800efadcd4c96 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -1004,6 +1004,7 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
   store->getRados()->cls_obj_set_bucket_tag_timeout(bucket_info, BUCKET_TAG_TIMEOUT);
 
   string prefix;
+  string empty_delimiter;
   rgw_obj_index_key marker;
   bool is_truncated = true;
 
@@ -1014,8 +1015,9 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
     result.reserve(listing_max_entries);
 
     int r = store->getRados()->cls_bucket_list_ordered(
-      bucket_info, RGW_NO_SHARD, marker, prefix, listing_max_entries, true,
-      result, &is_truncated, &marker, y, bucket_object_check_filter);
+      bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
+      listing_max_entries, true, result, &is_truncated, &marker, y,
+      rgw_bucket_object_check_filter);
     if (r == -ENOENT) {
       break;
     } else if (r < 0 && r != -ENOENT) {
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index f6c65ea95131c..5c09d3bdbaa0a 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -1743,11 +1743,14 @@ int RGWRados::Bucket::List::list_objects_ordered(
 
   result->clear();
 
-  rgw_obj_key marker_obj(params.marker.name, params.marker.instance, params.ns);
+  rgw_obj_key marker_obj(params.marker.name,
+			 params.marker.instance,
+			 params.ns);
   rgw_obj_index_key cur_marker;
   marker_obj.get_index_key(&cur_marker);
 
-  rgw_obj_key end_marker_obj(params.end_marker.name, params.end_marker.instance,
+  rgw_obj_key end_marker_obj(params.end_marker.name,
+			     params.end_marker.instance,
                              params.ns);
   rgw_obj_index_key cur_end_marker;
   end_marker_obj.get_index_key(&cur_end_marker);
@@ -1771,7 +1774,6 @@ int RGWRados::Bucket::List::list_objects_ordered(
   }
 
   constexpr int allowed_read_attempts = 2;
-  string skip_after_delim;
   for (int attempt = 0; attempt < allowed_read_attempts; ++attempt) {
     // this loop is generally expected only to have a single
     // iteration; see bottom of loop for early exit
@@ -1782,14 +1784,16 @@ int RGWRados::Bucket::List::list_objects_ordered(
 					   shard_id,
 					   cur_marker,
 					   cur_prefix,
+					   params.delim,
 					   read_ahead + 1 - count,
 					   params.list_versions,
 					   ent_map,
 					   &truncated,
 					   &cur_marker,
                                            y);
-    if (r < 0)
+    if (r < 0) {
       return r;
+    }
 
     for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
       rgw_bucket_dir_entry& entry = eiter->second;
@@ -1805,7 +1809,8 @@ int RGWRados::Bucket::List::list_objects_ordered(
        */
       bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
       if (!valid) {
-        ldout(cct, 0) << "ERROR: could not parse object name: " << obj.name << dendl;
+        ldout(cct, 0) << "ERROR: could not parse object name: " <<
+	  obj.name << dendl;
         continue;
       }
 
@@ -1835,36 +1840,37 @@ int RGWRados::Bucket::List::list_objects_ordered(
         next_marker = index_key;
       }
 
-      if (params.filter && !params.filter->filter(obj.name, index_key.name))
+      if (params.filter &&
+	  ! params.filter->filter(obj.name, index_key.name)) {
         continue;
+      }
 
       if (params.prefix.size() &&
-	  (obj.name.compare(0, params.prefix.size(), params.prefix) != 0))
+	  0 != obj.name.compare(0, params.prefix.size(), params.prefix)) {
         continue;
+      }
 
       if (!params.delim.empty()) {
         int delim_pos = obj.name.find(params.delim, params.prefix.size());
 
         if (delim_pos >= 0) {
-	  /* extract key -with trailing delimiter- for CommonPrefix */
-          string prefix_key =
-	    obj.name.substr(0, delim_pos + params.delim.length());
-
-          if (common_prefixes &&
-              common_prefixes->find(prefix_key) == common_prefixes->end()) {
+	  // should only find one delimiter at the end if it finds any
+	  // after the prefix
+	  ceph_assert(delim_pos ==
+		      int(obj.name.length() - params.delim.length()));
+          if (common_prefixes) {
             if (count >= max) {
               truncated = true;
               goto done;
             }
-            next_marker = prefix_key;
-            (*common_prefixes)[prefix_key] = true;
 
+            (*common_prefixes)[obj.name] = true;
             count++;
           }
 
           continue;
-        }
-      }
+        } // if found delimiter after prefix
+      } // if there is a delimiter
 
       if (count >= max) {
         truncated = true;
@@ -1875,24 +1881,6 @@ int RGWRados::Bucket::List::list_objects_ordered(
       count++;
     } // eiter for loop
 
-    if (!params.delim.empty()) {
-      int marker_delim_pos = cur_marker.name.find(params.delim, cur_prefix.size());
-      if (marker_delim_pos >= 0) {
-        skip_after_delim = cur_marker.name.substr(0, marker_delim_pos);
-        skip_after_delim.append(after_delim_s);
-
-        ldout(cct, 20) << "skip_after_delim=" << skip_after_delim << dendl;
-
-        if (skip_after_delim > cur_marker.name) {
-          cur_marker = skip_after_delim;
-          ldout(cct, 20) << "setting cur_marker="
-                         << cur_marker.name
-                         << "[" << cur_marker.instance << "]"
-                         << dendl;
-        }
-      }
-    }
-
     // if we finished listing, or if we're returning at least half the
     // requested entries, that's enough; S3 and swift protocols allow
     // returning fewer than max entries
@@ -1906,8 +1894,10 @@ int RGWRados::Bucket::List::list_objects_ordered(
   } // read attempt loop
 
 done:
-  if (is_truncated)
+
+  if (is_truncated) {
     *is_truncated = truncated;
+  }
 
   return 0;
 } // list_objects_ordered
@@ -8019,6 +8009,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
 				      int shard_id,
 				      const rgw_obj_index_key& start_after,
 				      const string& prefix,
+				      const string& delimiter,
 				      uint32_t num_entries,
 				      bool list_versions,
 				      ent_map_t& m,
@@ -8053,7 +8044,8 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
   auto& ioctx = index_pool.ioctx();
   map<int, struct rgw_cls_list_ret> list_results;
   cls_rgw_obj_key start_after_key(start_after.name, start_after.instance);
-  r = CLSRGWIssueBucketList(ioctx, start_key_after, prefix, num_entries_per_shard,
+  r = CLSRGWIssueBucketList(ioctx, start_after_key, prefix, delimiter,
+			    num_entries_per_shard,
 			    list_versions, oids, list_results,
 			    cct->_conf->rgw_bucket_index_max_aio)();
   if (r < 0) {
@@ -8093,9 +8085,12 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
     const string& name = vcurrents[pos]->first;
     struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;
 
-    bool force_check = force_check_filter &&
-        force_check_filter(dirent.key.name);
-    if ((!dirent.exists && !dirent.is_delete_marker()) ||
+    bool force_check =
+      force_check_filter && force_check_filter(dirent.key.name);
+
+    if ((!dirent.exists &&
+	 !dirent.is_delete_marker() &&
+	 !dirent.is_common_prefix()) ||
         !dirent.pending_map.empty() ||
         force_check) {
       /* there are uncommitted ops. We need to check the current
@@ -8245,7 +8240,9 @@ int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info,
     rgw_cls_list_ret result;
 
     librados::ObjectReadOperation op;
-    cls_rgw_bucket_list_op(op, marker, prefix, num_entries,
+    string empty_delimiter;
+    cls_rgw_bucket_list_op(op, marker, prefix, empty_delimiter,
+			   num_entries,
                            list_versions, &result);
     r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
     if (r < 0)
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index 855597705cfd5..2508e46becec7 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1348,10 +1348,13 @@ public:
 
   using check_filter_t = bool (*)(const std::string&);
 
-  int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
+  int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
+			      int shard_id,
 			      const rgw_obj_index_key& start_after,
 			      const string& prefix,
-			      uint32_t num_entries, bool list_versions,
+			      const string& delimiter,
+			      uint32_t num_entries,
+			      bool list_versions,
 			      ent_map_t& m,
 			      bool *is_truncated,
 			      rgw_obj_index_key *last_entry,
diff --git a/src/test/cls_rgw/test_cls_rgw.cc b/src/test/cls_rgw/test_cls_rgw.cc
index f0bd4711d2bcc..635c9f23c3b34 100644
--- a/src/test/cls_rgw/test_cls_rgw.cc
+++ b/src/test/cls_rgw/test_cls_rgw.cc
@@ -421,8 +421,11 @@ TEST_F(cls_rgw, index_list)
   map<int, string> oids = { {0, bucket_oid} };
   map<int, struct rgw_cls_list_ret> list_results;
   cls_rgw_obj_key start_key("", "");
-  int r = CLSRGWIssueBucketList(ioctx, start_key, "", 1000, true, oids, list_results, 1)();
-
+  string empty_prefix;
+  string empty_delimiter;
+  int r = CLSRGWIssueBucketList(ioctx, start_key,
+				empty_prefix, empty_delimiter,
+				1000, true, oids, list_results, 1)();
   ASSERT_EQ(r, 0);
   ASSERT_EQ(1u, list_results.size());
 
-- 
2.39.5