rgw: consider this scenario, many sibling directories, and many objects under each...

author zealot <xzd16@sohu.com>

Sun, 26 Jun 2022 16:22:19 +0000 (00:22 +0800)

committer zealot <xzd16@sohu.com>

Sat, 20 Aug 2022 02:38:37 +0000 (10:38 +0800)
author zealot <xzd16@sohu.com>
Sun, 26 Jun 2022 16:22:19 +0000 (00:22 +0800)
committer zealot <xzd16@sohu.com>
Sat, 20 Aug 2022 02:38:37 +0000 (10:38 +0800)
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc

index 38ea670f34ddee146d7b368b5296ac9114caaa47..de272c94769e74adbb94c43c76c306f778958c37 100644 (file)
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -8657,14 +8657,12 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
  
    // add the next unique candidate, or return false if we reach the end
    auto next_candidate = [] (CephContext *cct, ShardTracker& t,
-                            std::map<std::string, size_t>& candidates,
+                            std::multimap<std::string, size_t>& candidates,
                              size_t tracker_idx) {
-    while (!t.at_end()) {
-      if (candidates.emplace(t.entry_name(), tracker_idx).second) {
-        return;
-      }
-      t.advance(); // skip duplicate common prefixes
+    if (!t.at_end()) {
+      candidates.emplace(t.entry_name(), tracker_idx);
      }
+    return;
    };
  
    // one tracker per shard requested (may not be all shards)
@@ -8686,8 +8684,10 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
    // (key=candidate, value=index into results_trackers); as we consume
    // entries from shards, we replace them with the next entries in the
    // shards until we run out
-  std::map<std::string, size_t> candidates;
+  std::multimap<std::string, size_t> candidates;
    size_t tracker_idx = 0;
+  std::vector<size_t> vidx;
+  vidx.reserve(shard_list_results.size());
    for (auto& t : results_trackers) {
      // it's important that the values in the map refer to the index
      // into the results_trackers vector, which may not be the same
@@ -8763,12 +8763,23 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
      }
  
      // refresh the candidates map
-    candidates.erase(candidates.begin());
-    tracker.advance();
-
-    next_candidate(cct, tracker, candidates, tracker_idx);
-
-    if (tracker.at_end() && tracker.is_truncated()) {
+    vidx.clear();
+    bool need_to_stop = false;
+    auto range = candidates.equal_range(name);
+    for (auto i = range.first; i != range.second; ++i) {
+      vidx.push_back(i->second);
+    } 
+    candidates.erase(range.first, range.second);
+    for (auto idx : vidx) {
+      auto& tracker_match = results_trackers.at(idx);
+      tracker_match.advance();
+      next_candidate(cct, tracker_match, candidates, idx);
+      if (tracker_match.at_end() && tracker_match.is_truncated()) {
+        need_to_stop = true;
+        break;
+      }
+    }
+    if (need_to_stop) {
        // once we exhaust one shard that is truncated, we need to stop,
        // as we cannot be certain that one of the next entries needs to
        // come from that shard; S3 and swift protocols allow returning
@@ -8776,7 +8787,7 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
        ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ <<
         ": stopped accumulating results at count=" << count <<
         ", dirent=\"" << dirent.key <<
-       "\", because its shard is untruncated and exhaused" << dendl;
+       "\", because its shard is truncated and exhausted" << dendl;
        break;
      }
    } // while we haven't provided requested # of result entries
author	zealot <xzd16@sohu.com>
	Sun, 26 Jun 2022 16:22:19 +0000 (00:22 +0800)
committer	zealot <xzd16@sohu.com>
	Sat, 20 Aug 2022 02:38:37 +0000 (10:38 +0800)