rgw: clean up address 0-length listing results...

author J. Eric Ivancich <ivancich@redhat.com>

Fri, 10 Jan 2020 19:12:35 +0000 (14:12 -0500)

committer J. Eric Ivancich <ivancich@redhat.com>

Thu, 13 Feb 2020 03:34:59 +0000 (22:34 -0500)
author J. Eric Ivancich <ivancich@redhat.com>
Fri, 10 Jan 2020 19:12:35 +0000 (14:12 -0500)
committer J. Eric Ivancich <ivancich@redhat.com>
Thu, 13 Feb 2020 03:34:59 +0000 (22:34 -0500)
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc

index 477f05f3d78859ed9678d2f74becd6d5ff70bd5b..4f4dc5d3f5b044b45ebc3fce3b7fc047dd4ed12e 100644 (file)
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -6751,18 +6751,16 @@ next:
      formatter->open_array_section("objects");
  
      constexpr uint32_t NUM_ENTRIES = 1000;
-    uint16_t attempt = 1;
+    uint16_t expansion_factor = 1;
      while (is_truncated) {
        RGWRados::ent_map_t result;
-      int r =
-       store->getRados()->cls_bucket_list_ordered(
-         bucket_info, RGW_NO_SHARD,
-         marker, empty_prefix, empty_delimiter,
-         NUM_ENTRIES, true, attempt,
-         result, &is_truncated, &cls_filtered, &marker,
-         null_yield,
-         rgw_bucket_object_check_filter);
-
+      int r = store->getRados()->cls_bucket_list_ordered(
+       bucket_info, RGW_NO_SHARD,
+       marker, empty_prefix, empty_delimiter,
+       NUM_ENTRIES, true, expansion_factor,
+       result, &is_truncated, &cls_filtered, &marker,
+       null_yield,
+       rgw_bucket_object_check_filter);
        if (r < 0 && r != -ENOENT) {
          cerr << "ERROR: failed operation r=" << r << std::endl;
        } else if (r == -ENOENT) {
@@ -6770,9 +6768,10 @@ next:
        }
  
        if (result.size() < NUM_ENTRIES / 8) {
-       ++attempt;
-      } else if (result.size() > NUM_ENTRIES * 7 / 8 && attempt > 1) {
-       --attempt;
+       ++expansion_factor;
+      } else if (result.size() > NUM_ENTRIES * 7 / 8 &&
+                expansion_factor > 1) {
+       --expansion_factor;
        }
  
        for (auto iter = result.begin(); iter != result.end(); ++iter) {
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc

index c17936a825d94693a774eeaf2a874b88f659ed86..108b1c13e2d1c5278f7ae8fb5f4310e393a52aa7 100644 (file)
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -57,6 +57,10 @@
  
  #define BUCKET_TAG_TIMEOUT 30
  
+// default number of entries to list with each bucket listing call
+// (use marker to bridge between calls)
+static constexpr size_t listing_max_entries = 1000;
+
  
  /*
   * The tenant_name is always returned on purpose. May be empty, of course.
@@ -1015,15 +1019,14 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
  
    Formatter *formatter = flusher.get_formatter();
    formatter->open_object_section("objects");
-  constexpr uint32_t NUM_ENTRIES = 1000;
-  uint16_t attempt = 1;
+  uint16_t expansion_factor = 1;
    while (is_truncated) {
      RGWRados::ent_map_t result;
      result.reserve(listing_max_entries);
  
      int r = store->getRados()->cls_bucket_list_ordered(
        bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
-      listing_max_entries, true, attempt,
+      listing_max_entries, true, expansion_factor,
        result, &is_truncated, &cls_filtered, &marker,
        y, rgw_bucket_object_check_filter);
      if (r == -ENOENT) {
@@ -1032,10 +1035,11 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
        set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r));
      }
  
-    if (result.size() < NUM_ENTRIES / 8) {
-      ++attempt;
-    } else if (result.size() > NUM_ENTRIES * 7 / 8 && attempt > 1) {
-      --attempt;
+    if (result.size() < listing_max_entries / 8) {
+      ++expansion_factor;
+    } else if (result.size() > listing_max_entries * 7 / 8 &&
+              expansion_factor > 1) {
+      --expansion_factor;
      }
  
      dump_bucket_index(result, formatter);
diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h

index d0f38869c540775eddcd690c430a17df0999f8d5..d30f410fddcf56d2c1648e261443f075dbcd4489 100644 (file)
--- a/src/rgw/rgw_bucket.h
+++ b/src/rgw/rgw_bucket.h
@@ -24,9 +24,6 @@
  #include "services/svc_bucket_sync.h"
  
  
-static constexpr size_t listing_max_entries = 1000;
-
-
  // define as static when RGWBucket implementation completes
  extern void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id);
  
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc

index 8b4bef1b9579f563c34bb2af67997e421a757024..85f20574257f93b52c122766ef58240565b4570d 100644 (file)
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -1818,9 +1818,11 @@ int RGWRados::Bucket::List::list_objects_ordered(
      for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
        rgw_bucket_dir_entry& entry = eiter->second;
        rgw_obj_index_key index_key = entry.key;
-
        rgw_obj_key obj(index_key);
  
+      ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
+       " considering entry " << entry.key << dendl;
+
        /* note that parse_raw_oid() here will not set the correct
         * object's instance, as rgw_obj_index_key encodes that
         * separately. We don't need to set the instance because it's
@@ -1834,12 +1836,12 @@ int RGWRados::Bucket::List::list_objects_ordered(
          continue;
        }
  
-      bool check_ns = (obj.ns == params.ns);
+      bool matched_ns = (obj.ns == params.ns);
        if (!params.list_versions && !entry.is_visible()) {
          continue;
        }
  
-      if (params.enforce_ns && !check_ns) {
+      if (params.enforce_ns && !matched_ns) {
          if (!params.ns.empty()) {
            /* we've iterated past the namespace we're searching -- done now */
            truncated = false;
@@ -8171,7 +8173,7 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
                                       const string& delimiter,
                                       const uint32_t num_entries,
                                       const bool list_versions,
-                                     const uint16_t attempt,
+                                     const uint16_t expansion_factor,
                                       ent_map_t& m,
                                       bool* is_truncated,
                                       bool* cls_filtered,
@@ -8179,13 +8181,18 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
                                        optional_yield y,
                                       check_filter_t force_check_filter)
  {
+  /* expansion_factor allows the number of entries to read to grow
+   * exponentially; this is used when earlier reads are producing too
+   * few results, perhaps due to filtering or to a series of
+   * namespaced entries */
+
    ldout(cct, 10) << "RGWRados::" << __func__ << ": " << bucket_info.bucket <<
      " start_after=\"" << start_after.name <<
      "[" << start_after.instance <<
      "]\", prefix=\"" << prefix <<
      "\" num_entries=" << num_entries <<
      ", list_versions=" << list_versions <<
-    ", attempt=" << attempt << dendl;
+    ", expansion_factor=" << expansion_factor << dendl;
  
    RGWSI_RADOS::Pool index_pool;
    // key   - oid (for different shards if there is any)
@@ -8200,14 +8207,14 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
  
    const uint32_t shard_count = oids.size();
    uint32_t num_entries_per_shard;
-  if (attempt == 0) {
+  if (expansion_factor == 0) {
      num_entries_per_shard =
        calc_ordered_bucket_list_per_shard(num_entries, shard_count);
-  } else if (attempt <= 11) {
+  } else if (expansion_factor <= 11) {
      // we'll max out the exponential multiplication factor at 1024 (2<<10)
      num_entries_per_shard =
        std::min(num_entries,
-              (uint32_t(1 << (attempt - 1)) *
+              (uint32_t(1 << (expansion_factor - 1)) *
                 calc_ordered_bucket_list_per_shard(num_entries, shard_count)));
    } else {
      num_entries_per_shard = num_entries;
@@ -8229,21 +8236,6 @@ int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
      return r;
    }
  
-  auto result_info =
-    [](const map<int, struct rgw_cls_list_ret>& m) -> std::string {
-      std::stringstream out;
-      out << "{ size:" << m.size() << ", entries:[";
-      for (const auto& i : m) {
-       out << " { " << i.first << ", " << i.second.dir.m.size() << " },";
-      }
-      out << "] }";
-      return out.str();
-    };
-
-  ldout(cct, 20) << "RGWRados::" << __func__ <<
-    " CLSRGWIssueBucketList() result=" <<
-    result_info(list_results) << dendl;
-
    // create a list of iterators that are used to iterate each shard
    vector<RGWRados::ent_map_t::iterator> vcurrents;
    vector<RGWRados::ent_map_t::iterator> vends;
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h

index 6ad236a70672c2dcec5519f2de1ed4900da5e9fb..040f88d01be8c8cda54e2de9414dab3e14410319 100644 (file)
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1384,7 +1384,7 @@ public:
                               const string& delimiter,
                               const uint32_t num_entries,
                               const bool list_versions,
-                             const uint16_t attempt, // 0 means ignore
+                             const uint16_t exp_factor, // 0 means ignore
                               ent_map_t& m,
                               bool* is_truncated,
                               bool* cls_filtered,
author	J. Eric Ivancich <ivancich@redhat.com>
	Fri, 10 Jan 2020 19:12:35 +0000 (14:12 -0500)
committer	J. Eric Ivancich <ivancich@redhat.com>
	Thu, 13 Feb 2020 03:34:59 +0000 (22:34 -0500)
src/rgw/rgw_admin.cc		patch \| blob \| history
src/rgw/rgw_bucket.cc		patch \| blob \| history
src/rgw/rgw_bucket.h		patch \| blob \| history
src/rgw/rgw_rados.cc		patch \| blob \| history
src/rgw/rgw_rados.h		patch \| blob \| history