]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: ability to list bucket contents in unsorted order for efficiency 21026/head
authorJ. Eric Ivancich <ivancich@redhat.com>
Mon, 12 Feb 2018 20:03:14 +0000 (15:03 -0500)
committerJ. Eric Ivancich <ivancich@redhat.com>
Wed, 11 Apr 2018 17:01:03 +0000 (13:01 -0400)
Add the ability to list the contents of a bucket without the results
being sorted. This can have performance benefits since all bucket
shards do not need to be queried. This is done by adding a parameter
to the REST query string (i.e., "allow_unordered" [note underscore]
for swift or "allow-unordered" [note hyphen] for s3) to true.

This works with other request parameters such as marker, end_marker,
and prefix. But it does not work when delimeter is used. Also, because
the results are not in order, a subsequent marker may precede an
earlier marker.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
13 files changed:
doc/radosgw/s3/bucketops.rst
doc/radosgw/swift/containerops.rst
src/cls/rgw/cls_rgw_ops.h
src/rgw/rgw_admin.cc
src/rgw/rgw_bucket.cc
src/rgw/rgw_common.h
src/rgw/rgw_lc.cc
src/rgw/rgw_op.cc
src/rgw/rgw_op.h
src/rgw/rgw_rados.cc
src/rgw/rgw_rados.h
src/rgw/rgw_rest_s3.cc
src/rgw/rgw_rest_swift.cc

index c7cd5b4fd608203af4070b03f15004367c750591..ed1f2a4f6c7d9ff4aaab360445098cc28d9ff209 100644 (file)
@@ -91,18 +91,19 @@ Syntax
 Parameters
 ~~~~~~~~~~
 
-+-----------------+-----------+-----------------------------------------------------------------------+
-| Name            | Type      | Description                                                           |
-+=================+===========+=======================================================================+
-| ``prefix``      | String    | Only returns objects that contain the specified prefix.               |
-+-----------------+-----------+-----------------------------------------------------------------------+
-| ``delimiter``   | String    | The delimiter between the prefix and the rest of the object name.     |
-+-----------------+-----------+-----------------------------------------------------------------------+
-| ``marker``      | String    | A beginning index for the list of objects returned.                   |
-+-----------------+-----------+-----------------------------------------------------------------------+
-| ``max-keys``    | Integer   | The maximum number of keys to return. Default is 1000.                |
-+-----------------+-----------+-----------------------------------------------------------------------+
-
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| Name                | Type      | Description                                                                                     |
++=====================+===========+=================================================================================================+
+| ``prefix``          | String    | Only returns objects that contain the specified prefix.                                         |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``delimiter``       | String    | The delimiter between the prefix and the rest of the object name.                               |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``marker``          | String    | A beginning index for the list of objects returned.                                             |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``max-keys``        | Integer   | The maximum number of keys to return. Default is 1000.                                          |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``allow-unordered`` | Boolean   | Non-standard extension. Allows results to be returned unordered. Cannot be used with delimiter. |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
 
 HTTP Response
 ~~~~~~~~~~~~~
index 463d91c6ce1924d15d42d0c2902595cb61e18eea..f97429579d76708ca0b5981d3fc7f231838070f7 100644 (file)
@@ -147,6 +147,13 @@ Parameters
 :Type: String
 :Required: No
 
+``allow_unordered``
+
+:Description: Allows the results to be returned unordered to reduce computation overhead. Cannot be used with ``delimiter``.
+:Type: Boolean
+:Required: No
+:Non-Standard Extension: Yes
+
 
 Response Entities
 ~~~~~~~~~~~~~~~~~
index 48ef7ddce710a3fd0a5fc6f060d482961472531e..37db81e53639488c9f5cdb6dd816d967dbaf0583 100644 (file)
@@ -413,8 +413,7 @@ struct rgw_cls_list_op
 };
 WRITE_CLASS_ENCODER(rgw_cls_list_op)
 
-struct rgw_cls_list_ret
-{
+struct rgw_cls_list_ret {
   rgw_bucket_dir dir;
   bool is_truncated;
 
index 30fd7ecb4ee36be2d9bef3d7eac92ce5f2401215..94a1ba37a1f2188f7512871aba21a44000eba173 100644 (file)
@@ -5748,9 +5748,11 @@ next:
     formatter->open_array_section("objects");
     while (is_truncated) {
       map<string, rgw_bucket_dir_entry> result;
-      int r = store->cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true,
-                                     result, &is_truncated, &marker,
-                                     bucket_object_check_filter);
+      int r =
+       store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker,
+                                      prefix, 1000, true,
+                                      result, &is_truncated, &marker,
+                                      bucket_object_check_filter);
 
       if (r < 0 && r != -ENOENT) {
         cerr << "ERROR: failed operation r=" << r << std::endl;
index 2398bd3c7bfbd58ce6dd3ecfd92f2cc7aecf246e..925dceed3ed5e7f2812601b5982dc8e13b7b13cd 100644 (file)
@@ -1146,19 +1146,18 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
   while (is_truncated) {
     map<string, rgw_bucket_dir_entry> result;
 
-    int r = store->cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true,
-                                   result, &is_truncated, &marker,
-                                   bucket_object_check_filter);
+    int r = store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD,
+                                          marker, prefix, 1000, true,
+                                          result, &is_truncated, &marker,
+                                          bucket_object_check_filter);
     if (r == -ENOENT) {
       break;
     } else if (r < 0 && r != -ENOENT) {
       set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r));
     }
 
-
     dump_bucket_index(result, formatter);
     flusher.flush();
-
   }
 
   formatter->close_section();
index 8cd71a276a51b284e3ea1cafc1789057ff65ce5a..7207fdafa28e1ea5919b331cebbdcfa66d2692b1 100644 (file)
@@ -315,8 +315,7 @@ class NameVal
 };
 
 /** Stores the XML arguments associated with the HTTP request in req_state*/
-class RGWHTTPArgs
-{
+class RGWHTTPArgs {
   string str, empty_str;
   map<string, string> val_map;
   map<string, string> sys_val_map;
@@ -375,7 +374,7 @@ class RGWHTTPArgs
   const string& get_str() {
     return str;
   }
-};
+}; // RGWHTTPArgs
 
 const char *rgw_conf_get(const map<string, string, ltstr_nocase>& conf_map, const char *name, const char *def_val);
 int rgw_conf_get_int(const map<string, string, ltstr_nocase>& conf_map, const char *name, int def_val);
@@ -1204,8 +1203,7 @@ inline ostream& operator<<(ostream& out, const RGWBucketIndexType &index_type)
   }
 }
 
-struct RGWBucketInfo
-{
+struct RGWBucketInfo {
   enum BIShardsHashType {
     MOD = 0
   };
index 62c941618fffdd7555a6263a0eb7d6c66cc86a0f..5aa57d384dc7f5c6d37e9c0e1dc47c307a061960 100644 (file)
@@ -1,3 +1,6 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
 #include <string.h>
 #include <iostream>
 #include <map>
@@ -480,10 +483,10 @@ int RGWLC::bucket_lc_process(string& shard_id)
         continue;
       }
       if (prefix_iter != prefix_map.begin() && 
-        (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) {
-        list_op.next_marker = pre_marker;
+          (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) {
+       list_op.get_next_marker() = pre_marker;
       } else {
-        pre_marker = list_op.get_next_marker();
+       pre_marker = list_op.get_next_marker();
       }
       list_op.params.prefix = prefix_iter->first;
       rgw_bucket_dir_entry pre_obj;
@@ -519,7 +522,7 @@ int RGWLC::bucket_lc_process(string& shard_id)
               if ((obj_iter + 1)==objs.end()) {
                 if (is_truncated) {
                   //deal with it in next round because we can't judge whether this marker is the only version
-                  list_op.next_marker = obj_iter->key;
+                  list_op.get_next_marker() = obj_iter->key;
                   break;
                 }
               } else if (obj_iter->key.name.compare((obj_iter + 1)->key.name) == 0) {   //*obj_iter is delete marker and isn't the only version, do nothing.
index 4028079fa0779b3d42c28f24247036b2611aceb0..0963cc33d75a2d073acdb04cf68cb376172714c5 100644 (file)
@@ -2401,6 +2401,13 @@ void RGWListBucket::execute()
     return;
   }
 
+  if (allow_unordered && !delimiter.empty()) {
+    ldout(s->cct, 0) <<
+      "ERROR: unordered bucket listing requested with a delimiter" << dendl;
+    op_ret = -EINVAL;
+    return;
+  }
+
   if (need_container_stats()) {
     map<string, RGWBucketEnt> m;
     m[s->bucket.name] = RGWBucketEnt();
@@ -2422,6 +2429,7 @@ void RGWListBucket::execute()
   list_op.params.marker = marker;
   list_op.params.end_marker = end_marker;
   list_op.params.list_versions = list_versions;
+  list_op.params.allow_unordered = allow_unordered;
 
   op_ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
   if (op_ret >= 0) {
index 952fe36d1643b4db49a05c3d9f046b9873710731..92a0a0fac345bb340611459a5db5bf4e7761a4ff 100644 (file)
@@ -639,7 +639,7 @@ public:
   const string name() override { return "list_buckets"; }
   RGWOpType get_type() override { return RGW_OP_LIST_BUCKETS; }
   uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
-};
+}; // class RGWListBuckets
 
 class RGWGetUsage : public RGWOp {
 protected:
@@ -703,6 +703,7 @@ protected:
 
   int default_max;
   bool is_truncated;
+  bool allow_unordered;
 
   int shard_id;
 
@@ -710,7 +711,8 @@ protected:
 
 public:
   RGWListBucket() : list_versions(false), max(0),
-                    default_max(0), is_truncated(false), shard_id(-1) {}
+                    default_max(0), is_truncated(false),
+                   allow_unordered(false), shard_id(-1) {}
   int verify_permission() override;
   void pre_exec() override;
   void execute() override;
index 77697cf58d3e8c855631c23240ce246b5ff72799..3bfd5ceeeda20554e91677f5a5f94938eced65f3 100644 (file)
@@ -5613,8 +5613,9 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id)
   return 0;
 }
 
-/** 
- * get listing of the objects in a bucket.
+
+/**
+ * Get ordered listing of the objects in a bucket.
  *
  * max: maximum number of results to return
  * bucket: bucket to list contents of
@@ -5628,10 +5629,10 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id)
  * common_prefixes: if delim is filled in, any matching prefixes are placed here.
  * is_truncated: if number of objects in the bucket is bigger than max, then truncated.
  */
-int RGWRados::Bucket::List::list_objects(int64_t max,
-                                         vector<rgw_bucket_dir_entry> *result,
-                                         map<string, bool> *common_prefixes,
-                                         bool *is_truncated)
+int RGWRados::Bucket::List::list_objects_ordered(int64_t max,
+                                                vector<rgw_bucket_dir_entry> *result,
+                                                map<string, bool> *common_prefixes,
+                                                bool *is_truncated)
 {
   RGWRados *store = target->get_store();
   CephContext *cct = store->ctx();
@@ -5660,7 +5661,8 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
   string bigger_than_delim;
 
   if (!params.delim.empty()) {
-    unsigned long val = decode_utf8((unsigned char *)params.delim.c_str(), params.delim.size());
+    unsigned long val = decode_utf8((unsigned char *)params.delim.c_str(),
+                                   params.delim.size());
     char buf[params.delim.size() + 16];
     int r = encode_utf8(val + 1, (unsigned char *)buf);
     if (r < 0) {
@@ -5679,7 +5681,7 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
       cur_marker = s;
     }
   }
-  
+
   string skip_after_delim;
   while (truncated && count <= max) {
     if (skip_after_delim > cur_marker.name) {
@@ -5687,22 +5689,29 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
       ldout(cct, 20) << "setting cur_marker=" << cur_marker.name << "[" << cur_marker.instance << "]" << dendl;
     }
     std::map<string, rgw_bucket_dir_entry> ent_map;
-    int r = store->cls_bucket_list(target->get_bucket_info(), shard_id, cur_marker, cur_prefix,
-                                   read_ahead + 1 - count, params.list_versions, ent_map,
-                                   &truncated, &cur_marker);
+    int r = store->cls_bucket_list_ordered(target->get_bucket_info(),
+                                          shard_id,
+                                          cur_marker,
+                                          cur_prefix,
+                                          read_ahead + 1 - count,
+                                          params.list_versions,
+                                          ent_map,
+                                          &truncated,
+                                          &cur_marker);
     if (r < 0)
       return r;
 
-    std::map<string, rgw_bucket_dir_entry>::iterator eiter;
-    for (eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
+    for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
       rgw_bucket_dir_entry& entry = eiter->second;
       rgw_obj_index_key index_key = entry.key;
 
       rgw_obj_key obj(index_key);
 
-      /* note that parse_raw_oid() here will not set the correct object's instance, as
-       * rgw_obj_index_key encodes that separately. We don't need to set the instance because it's
-       * not needed for the checks here and we end up using the raw entry for the return vector
+      /* note that parse_raw_oid() here will not set the correct
+       * object's instance, as rgw_obj_index_key encodes that
+       * separately. We don't need to set the instance because it's
+       * not needed for the checks here and we end up using the raw
+       * entry for the return vector
        */
       bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
       if (!valid) {
@@ -5738,7 +5747,8 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
       if (params.filter && !params.filter->filter(obj.name, index_key.name))
         continue;
 
-      if (params.prefix.size() &&  (obj.name.compare(0, params.prefix.size(), params.prefix) != 0))
+      if (params.prefix.size() &&
+         (obj.name.compare(0, params.prefix.size(), params.prefix) != 0))
         continue;
 
       if (!params.delim.empty()) {
@@ -5785,7 +5795,140 @@ done:
     *is_truncated = truncated;
 
   return 0;
-}
+} // list_objects_ordered
+
+
+/**
+ * Get listing of the objects in a bucket and allow the results to be out
+ * of order.
+ *
+ * Even though there are key differences with the ordered counterpart,
+ * the parameters are the same to maintain some compatability.
+ *
+ * max: maximum number of results to return
+ * bucket: bucket to list contents of
+ * prefix: only return results that match this prefix
+ * delim: should not be set; if it is we should have indicated an error
+ * marker: if filled in, begin the listing with this object.
+ * end_marker: if filled in, end the listing with this object.
+ * result: the objects are put in here.
+ * common_prefixes: this is never filled with an unordered list; the param
+ *                  is maintained for compatibility
+ * is_truncated: if number of objects in the bucket is bigger than max, then
+ *               truncated.
+ */
+int RGWRados::Bucket::List::list_objects_unordered(int64_t max,
+                                                  vector<rgw_bucket_dir_entry> *result,
+                                                  map<string, bool> *common_prefixes,
+                                                  bool *is_truncated)
+{
+  RGWRados *store = target->get_store();
+  CephContext *cct = store->ctx();
+  int shard_id = target->get_shard_id();
+
+  int count = 0;
+  bool truncated = true;
+
+  // read a few extra in each call to cls_bucket_list_unordered in
+  // case some are filtered out due to namespace matching, versioning,
+  // filtering, etc.
+  const int64_t max_read_ahead = 100;
+  const uint32_t read_ahead = uint32_t(max + std::min(max, max_read_ahead));
+
+  result->clear();
+
+  rgw_obj_key marker_obj(params.marker.name, params.marker.instance, params.ns);
+  rgw_obj_index_key cur_marker;
+  marker_obj.get_index_key(&cur_marker);
+
+  rgw_obj_key end_marker_obj(params.end_marker.name, params.end_marker.instance,
+                             params.ns);
+  rgw_obj_index_key cur_end_marker;
+  end_marker_obj.get_index_key(&cur_end_marker);
+  const bool cur_end_marker_valid = !params.end_marker.empty();
+
+  rgw_obj_key prefix_obj(params.prefix);
+  prefix_obj.ns = params.ns;
+  string cur_prefix = prefix_obj.get_index_key_name();
+
+  while (truncated && count <= max) {
+    std::vector<rgw_bucket_dir_entry> ent_list;
+    int r = store->cls_bucket_list_unordered(target->get_bucket_info(),
+                                            shard_id,
+                                            cur_marker,
+                                            cur_prefix,
+                                            read_ahead,
+                                            params.list_versions,
+                                            ent_list,
+                                            &truncated,
+                                            &cur_marker);
+    if (r < 0)
+      return r;
+
+    // NB: while regions of ent_list will be sorted, we have no
+    // guarantee that all items will be sorted since they can cross
+    // shard boundaries
+
+    for (auto& entry : ent_list) {
+      rgw_obj_index_key index_key = entry.key;
+      rgw_obj_key obj(index_key);
+
+      /* note that parse_raw_oid() here will not set the correct
+       * object's instance, as rgw_obj_index_key encodes that
+       * separately. We don't need to set the instance because it's
+       * not needed for the checks here and we end up using the raw
+       * entry for the return vector
+       */
+      bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
+      if (!valid) {
+        ldout(cct, 0) << "ERROR: could not parse object name: " <<
+         obj.name << dendl;
+        continue;
+      }
+
+      if (!params.list_versions && !entry.is_visible()) {
+        continue;
+      }
+
+      if (params.enforce_ns && obj.ns != params.ns) {
+        continue;
+      }
+
+      if (cur_end_marker_valid && cur_end_marker <= index_key) {
+       // we're not guaranteed items will come in order, so we have
+       // to loop through all
+       continue;
+      }
+
+      if (count < max) {
+        params.marker = index_key;
+        next_marker = index_key;
+      }
+
+      if (params.filter && !params.filter->filter(obj.name, index_key.name))
+        continue;
+
+      if (params.prefix.size() &&
+         (0 != obj.name.compare(0, params.prefix.size(), params.prefix)))
+        continue;
+
+      if (count >= max) {
+        truncated = true;
+        goto done;
+      }
+
+      result->emplace_back(std::move(entry));
+      count++;
+    } // for (auto& entry : ent_list)
+  } // while (truncated && count <= max)
+
+done:
+  if (is_truncated)
+    *is_truncated = truncated;
+
+  return 0;
+} // list_objects_unordered
+
 
 /**
  * create a rados pool, associated meta info
@@ -8452,27 +8595,34 @@ bool RGWRados::is_syncing_bucket_meta(const rgw_bucket& bucket)
 
 int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info)
 {
-  std::map<string, rgw_bucket_dir_entry> ent_map;
+  std::vector<rgw_bucket_dir_entry> ent_list;
   rgw_obj_index_key marker;
   string prefix;
   bool is_truncated;
 
   do {
-#define NUM_ENTRIES 1000
-    int r = cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, NUM_ENTRIES, true, ent_map,
-                        &is_truncated, &marker);
+    constexpr uint NUM_ENTRIES = 1000u;
+    int r = cls_bucket_list_unordered(bucket_info,
+                                     RGW_NO_SHARD,
+                                     marker,
+                                     prefix,
+                                     NUM_ENTRIES,
+                                     true,
+                                     ent_list,
+                                     &is_truncated,
+                                     &marker);
     if (r < 0)
       return r;
 
     string ns;
-    std::map<string, rgw_bucket_dir_entry>::iterator eiter;
-    for (eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
+    for (auto const& dirent : ent_list) {
       rgw_obj_key obj;
 
-      if (rgw_obj_key::oid_to_key_in_ns(eiter->second.key.name, &obj, ns))
+      if (rgw_obj_key::oid_to_key_in_ns(dirent.key.name, &obj, ns))
         return -ENOTEMPTY;
     }
   } while (is_truncated);
+
   return 0;
 }
   
@@ -8632,7 +8782,9 @@ int RGWRados::send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool
   return gc->send_chain(chain, tag, sync);
 }
 
-int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid)
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info,
+                               librados::IoCtx& index_ctx,
+                               string& bucket_oid)
 {
   const rgw_bucket& bucket = bucket_info.bucket;
   int r = open_bucket_index_ctx(bucket_info, index_ctx);
@@ -8650,8 +8802,9 @@ int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCt
   return 0;
 }
 
-int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
-    string& bucket_oid_base) {
+int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info,
+                                    librados::IoCtx& index_ctx,
+                                    string& bucket_oid_base) {
   const rgw_bucket& bucket = bucket_info.bucket;
   int r = open_bucket_index_ctx(bucket_info, index_ctx);
   if (r < 0)
@@ -8669,8 +8822,11 @@ int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, librados:
 
 }
 
-int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
-    map<int, string>& bucket_objs, int shard_id, map<int, string> *bucket_instance_ids) {
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info,
+                               librados::IoCtx& index_ctx,
+                               map<int, string>& bucket_objs,
+                               int shard_id,
+                               map<int, string> *bucket_instance_ids) {
   string bucket_oid_base;
   int ret = open_bucket_index_base(bucket_info, index_ctx, bucket_oid_base);
   if (ret < 0) {
@@ -12930,16 +13086,26 @@ int RGWRados::cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_
   return CLSRGWIssueSetTagTimeout(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)();
 }
 
-int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
-                             uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
-                             bool *is_truncated, rgw_obj_index_key *last_entry,
-                             bool (*force_check_filter)(const string&  name))
+
+int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
+                                     int shard_id,
+                                     rgw_obj_index_key& start,
+                                     const string& prefix,
+                                     uint32_t num_entries,
+                                     bool list_versions,
+                                     map<string, rgw_bucket_dir_entry>& m,
+                                     bool *is_truncated,
+                                     rgw_obj_index_key *last_entry,
+                                     bool (*force_check_filter)(const string& name))
 {
-  ldout(cct, 10) << "cls_bucket_list " << bucket_info.bucket << " start " << start.name << "[" << start.instance << "] num_entries " << num_entries << dendl;
+  ldout(cct, 10) << "cls_bucket_list_ordered " << bucket_info.bucket <<
+    " start " << start.name << "[" << start.instance << "] num_entries " <<
+    num_entries << dendl;
 
   librados::IoCtx index_ctx;
   // key   - oid (for different shards if there is any)
-  // value - list result for the corresponding oid (shard), it is filled by the AIO callback
+  // value - list result for the corresponding oid (shard), it is filled by
+  //         the AIO callback
   map<int, string> oids;
   map<int, struct rgw_cls_list_ret> list_results;
   int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
@@ -12947,8 +13113,9 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
     return r;
 
   cls_rgw_obj_key start_key(start.name, start.instance);
-  r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, list_versions,
-                            oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
+  r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries,
+                           list_versions, oids, list_results,
+                           cct->_conf->rgw_bucket_index_max_aio)();
   if (r < 0)
     return r;
 
@@ -12996,13 +13163,15 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
        * and if the tags are old we need to do cleanup as well. */
       librados::IoCtx sub_ctx;
       sub_ctx.dup(index_ctx);
-      r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[vnames[pos]]);
+      r = check_disk_state(sub_ctx, bucket_info, dirent, dirent,
+                          updates[vnames[pos]]);
       if (r < 0 && r != -ENOENT) {
           return r;
       }
     }
     if (r >= 0) {
-      ldout(cct, 10) << "RGWRados::cls_bucket_list: got " << dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+      ldout(cct, 10) << "RGWRados::cls_bucket_list_ordered: got " <<
+       dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
       m[name] = std::move(dirent);
       ++count;
     }
@@ -13024,14 +13193,16 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
       // we don't care if we lose suggested updates, send them off blindly
       AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
       index_ctx.aio_operate(miter->first, c, &o);
-        c->release();
+      c->release();
     }
   }
 
   // Check if all the returned entries are consumed or not
   for (size_t i = 0; i < vcurrents.size(); ++i) {
-    if (vcurrents[i] != vends[i])
+    if (vcurrents[i] != vends[i]) {
       *is_truncated = true;
+      break;
+    }
   }
   if (!m.empty())
     *last_entry = m.rbegin()->first;
@@ -13039,7 +13210,131 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
   return 0;
 }
 
-int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info)
+
+int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info,
+                                       int shard_id,
+                                       rgw_obj_index_key& start,
+                                       const string& prefix,
+                                       uint32_t num_entries,
+                                       bool list_versions,
+                                       std::vector<rgw_bucket_dir_entry>& ent_list,
+                                       bool *is_truncated,
+                                       rgw_obj_index_key *last_entry,
+                                       bool (*force_check_filter)(const string& name)) {
+  ldout(cct, 10) << "cls_bucket_list_unordered " << bucket_info.bucket <<
+    " start " << start.name << "[" << start.instance <<
+    "] num_entries " << num_entries << dendl;
+
+  *is_truncated = false;
+  librados::IoCtx index_ctx;
+
+  rgw_obj_index_key my_start = start;
+
+  map<int, string> oids;
+  int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
+  if (r < 0)
+    return r;
+  const uint32_t num_shards = oids.size();
+
+  uint32_t current_shard;
+  if (shard_id >= 0) {
+    current_shard = shard_id;
+  } else if (my_start.empty()) {
+    current_shard = 0u;
+  } else {
+    current_shard =
+      rgw_bucket_shard_index(my_start.name, num_shards);
+  }
+
+  uint32_t count = 0u;
+  map<string, bufferlist> updates;
+  std::string last_added_entry;
+  while (count <= num_entries &&
+        ((shard_id >= 0 && current_shard == uint32_t(shard_id)) ||
+         current_shard < num_shards)) {
+    // key   - oid (for different shards if there is any)
+    // value - list result for the corresponding oid (shard), it is filled by
+    //         the AIO callback
+    map<int, struct rgw_cls_list_ret> list_results;
+    r = CLSRGWIssueBucketList(index_ctx, my_start, prefix, num_entries,
+                             list_versions, oids, list_results,
+                             cct->_conf->rgw_bucket_index_max_aio)();
+    if (r < 0)
+      return r;
+
+    const std::string& oid = oids[current_shard];
+    assert(list_results.find(current_shard) != list_results.end());
+    auto& result = list_results[current_shard];
+    for (auto& entry : result.dir.m) {
+      rgw_bucket_dir_entry& dirent = entry.second;
+
+      bool force_check = force_check_filter &&
+       force_check_filter(dirent.key.name);
+      if ((!dirent.exists && !dirent.is_delete_marker()) ||
+         !dirent.pending_map.empty() ||
+         force_check) {
+       /* there are uncommitted ops. We need to check the current state,
+        * and if the tags are old we need to do cleanup as well. */
+       librados::IoCtx sub_ctx;
+       sub_ctx.dup(index_ctx);
+       r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[oid]);
+       if (r < 0 && r != -ENOENT) {
+         return r;
+       }
+      }
+
+      // at this point either r >=0 or r == -ENOENT
+      if (r >= 0) { // i.e., if r != -ENOENT
+       ldout(cct, 10) << "RGWRados::cls_bucket_list_unordered: got " <<
+         dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+
+       if (count < num_entries) {
+         last_added_entry = entry.first;
+         my_start = dirent.key;
+         ent_list.emplace_back(std::move(dirent));
+         ++count;
+       } else {
+         *is_truncated = true;
+         goto check_updates;
+       }
+      } else { // r == -ENOENT
+       // in the case of -ENOENT, make sure we're advancing marker
+       // for possible next call to CLSRGWIssueBucketList
+       my_start = dirent.key;
+      }
+    } // entry for loop
+
+    if (!result.is_truncated) {
+      // if we reached the end of the shard read next shard
+      ++current_shard;
+      my_start = rgw_obj_index_key();
+    }
+  } // shard loop
+
+check_updates:
+  // suggest updates if there is any
+  map<string, bufferlist>::iterator miter = updates.begin();
+  for (; miter != updates.end(); ++miter) {
+    if (miter->second.length()) {
+      ObjectWriteOperation o;
+      cls_rgw_suggest_changes(o, miter->second);
+      // we don't care if we lose suggested updates, send them off blindly
+      AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+      index_ctx.aio_operate(miter->first, c, &o);
+      c->release();
+    }
+  }
+
+  if (last_entry && !ent_list.empty()) {
+    *last_entry = last_added_entry;
+  }
+
+  return 0;
+}
+
+
+int RGWRados::cls_obj_usage_log_add(const string& oid,
+                                   rgw_usage_log_info& info)
 {
   rgw_raw_obj obj(get_zone_params().usage_log_pool, oid);
 
@@ -13546,8 +13841,9 @@ int RGWRados::check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
 }
 
 void RGWRados::get_bucket_index_objects(const string& bucket_oid_base,
-    uint32_t num_shards, map<int, string>& bucket_objects, int shard_id)
-{
+                                       uint32_t num_shards,
+                                       map<int, string>& bucket_objects,
+                                       int shard_id) {
   if (!num_shards) {
     bucket_objects[0] = bucket_oid_base;
   } else {
index 8601600e39469b173c3941347b4021ba77810f0a..e9b0fae56548505e211d3df45a0a767561e3a899 100644 (file)
@@ -3036,12 +3036,25 @@ public:
       const string *get_optag() { return &optag; }
 
       bool is_prepared() { return prepared; }
-    };
+    }; // class UpdateIndex
+
+    class List {
+    protected:
 
-    struct List {
       RGWRados::Bucket *target;
       rgw_obj_key next_marker;
 
+      int list_objects_ordered(int64_t max,
+                              vector<rgw_bucket_dir_entry> *result,
+                              map<string, bool> *common_prefixes,
+                              bool *is_truncated);
+      int list_objects_unordered(int64_t max,
+                                vector<rgw_bucket_dir_entry> *result,
+                                map<string, bool> *common_prefixes,
+                                bool *is_truncated);
+
+    public:
+
       struct Params {
         string prefix;
         string delim;
@@ -3051,19 +3064,35 @@ public:
         bool enforce_ns;
         RGWAccessListFilter *filter;
         bool list_versions;
-
-        Params() : enforce_ns(true), filter(NULL), list_versions(false) {}
+       bool allow_unordered;
+
+        Params() :
+         enforce_ns(true),
+         filter(NULL),
+         list_versions(false),
+         allow_unordered(false)
+       {}
       } params;
 
-    public:
       explicit List(RGWRados::Bucket *_target) : target(_target) {}
 
-      int list_objects(int64_t max, vector<rgw_bucket_dir_entry> *result, map<string, bool> *common_prefixes, bool *is_truncated);
+      int list_objects(int64_t max,
+                      vector<rgw_bucket_dir_entry> *result,
+                      map<string, bool> *common_prefixes,
+                      bool *is_truncated) {
+       if (params.allow_unordered) {
+         return list_objects_unordered(max, result, common_prefixes,
+                                       is_truncated);
+       } else {
+         return list_objects_ordered(max, result, common_prefixes,
+                                     is_truncated);
+       }
+      }
       rgw_obj_key& get_next_marker() {
         return next_marker;
       }
-    };
-  };
+    }; // class List
+  }; // class Bucket
 
   /** Write/overwrite an object to the bucket storage. */
   virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
@@ -3520,10 +3549,19 @@ public:
                            ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
   int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
   int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
-  int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
-                      uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
-                      bool *is_truncated, rgw_obj_index_key *last_entry,
-                      bool (*force_check_filter)(const string&  name) = NULL);
+  int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
+                             rgw_obj_index_key& start, const string& prefix,
+                             uint32_t num_entries, bool list_versions,
+                             map<string, rgw_bucket_dir_entry>& m,
+                             bool *is_truncated,
+                             rgw_obj_index_key *last_entry,
+                             bool (*force_check_filter)(const string& name) = nullptr);
+  int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
+                               rgw_obj_index_key& start, const string& prefix,
+                               uint32_t num_entries, bool list_versions,
+                               vector<rgw_bucket_dir_entry>& ent_list,
+                               bool *is_truncated, rgw_obj_index_key *last_entry,
+                               bool (*force_check_filter)(const string& name) = nullptr);
   int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
   int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
   int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
index 7d327b19a05834970aab02ffec1fffac30774d7d..974002ff58e3ff87aca0e8d7ddc19a54503cb791 100644 (file)
@@ -647,12 +647,18 @@ int RGWListBucket_ObjStore_S3::get_params()
     marker.name = s->info.args.get("key-marker");
     marker.instance = s->info.args.get("version-id-marker");
   }
+
+  // non-standard
+  s->info.args.get_bool("allow-unordered", &allow_unordered, false);
+
+  delimiter = s->info.args.get("delimiter");
+
   max_keys = s->info.args.get("max-keys");
   op_ret = parse_max_keys();
   if (op_ret < 0) {
     return op_ret;
   }
-  delimiter = s->info.args.get("delimiter");
+
   encoding_type = s->info.args.get("encoding-type");
   if (s->system_request) {
     s->info.args.get_bool("objs-container", &objs_container, false);
@@ -668,6 +674,7 @@ int RGWListBucket_ObjStore_S3::get_params()
       shard_id = s->bucket_instance_shard_id;
     }
   }
+
   return 0;
 }
 
index 260ee389d709858a1abe52e21c19b634736983e9..43e5df7f29e475e81986fb6a5718aaa14351e4a3 100644 (file)
@@ -294,6 +294,12 @@ int RGWListBucket_ObjStore_SWIFT::get_params()
   marker = s->info.args.get("marker");
   end_marker = s->info.args.get("end_marker");
   max_keys = s->info.args.get("limit");
+
+  // non-standard
+  s->info.args.get_bool("allow_unordered", &allow_unordered, false);
+
+  delimiter = s->info.args.get("delimiter");
+
   op_ret = parse_max_keys();
   if (op_ret < 0) {
     return op_ret;
@@ -301,8 +307,6 @@ int RGWListBucket_ObjStore_SWIFT::get_params()
   if (max > default_max)
     return -ERR_PRECONDITION_FAILED;
 
-  delimiter = s->info.args.get("delimiter");
-
   string path_args;
   if (s->info.args.exists("path")) { // should handle empty path
     path_args = s->info.args.get("path");
@@ -342,7 +346,10 @@ void RGWListBucket_ObjStore_SWIFT::send_response()
   dump_container_metadata(s, bucket, bucket_quota,
                           s->bucket_info.website_conf);
 
-  s->formatter->open_array_section_with_attrs("container", FormatterAttrs("name", s->bucket.name.c_str(), NULL));
+  s->formatter->open_array_section_with_attrs("container",
+                                             FormatterAttrs("name",
+                                                            s->bucket.name.c_str(),
+                                                            NULL));
 
   while (iter != objs.end() || pref_iter != common_prefixes.end()) {
     bool do_pref = false;
@@ -363,7 +370,7 @@ void RGWListBucket_ObjStore_SWIFT::send_response()
     else
       do_pref = true;
 
-    if (do_objs && (marker.empty() || marker < key)) {
+    if (do_objs && (allow_unordered || marker.empty() || marker < key)) {
       if (key.name.compare(path) == 0)
         goto next;
 
@@ -433,7 +440,7 @@ next:
   }
 
   rgw_flush_formatter_and_reset(s, s->formatter);
-}
+} // RGWListBucket_ObjStore_SWIFT::send_response
 
 static void dump_container_metadata(struct req_state *s,
                                     const RGWBucketEnt& bucket,