From 6da5a5888c8605497cddc83b73cee1528d1b4b44 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Mon, 12 Feb 2018 15:03:14 -0500 Subject: [PATCH] rgw: ability to list bucket contents in unsorted order for efficiency Add the ability to list the contents of a bucket without the results being sorted. This can have performance benefits since all bucket shards do not need to be queried. This is done by adding a parameter to the REST query string (i.e., "allow_unordered" [note underscore] for swift or "allow-unordered" [note hyphen] for s3) to true. This works with other request parameters such as marker, end_marker, and prefix. But it does not work when delimeter is used. Also, because the results are not in order, a subsequent marker may precede an earlier marker. Signed-off-by: J. Eric Ivancich --- doc/radosgw/s3/bucketops.rst | 25 +- doc/radosgw/swift/containerops.rst | 7 + src/cls/rgw/cls_rgw_ops.h | 3 +- src/rgw/rgw_admin.cc | 8 +- src/rgw/rgw_bucket.cc | 9 +- src/rgw/rgw_common.h | 8 +- src/rgw/rgw_lc.cc | 11 +- src/rgw/rgw_op.cc | 8 + src/rgw/rgw_op.h | 6 +- src/rgw/rgw_rados.cc | 386 +++++++++++++++++++++++++---- src/rgw/rgw_rados.h | 62 ++++- src/rgw/rgw_rest_s3.cc | 9 +- src/rgw/rgw_rest_swift.cc | 17 +- 13 files changed, 463 insertions(+), 96 deletions(-) diff --git a/doc/radosgw/s3/bucketops.rst b/doc/radosgw/s3/bucketops.rst index c7cd5b4fd6082..ed1f2a4f6c7d9 100644 --- a/doc/radosgw/s3/bucketops.rst +++ b/doc/radosgw/s3/bucketops.rst @@ -91,18 +91,19 @@ Syntax Parameters ~~~~~~~~~~ -+-----------------+-----------+-----------------------------------------------------------------------+ -| Name | Type | Description | -+=================+===========+=======================================================================+ -| ``prefix`` | String | Only returns objects that contain the specified prefix. | -+-----------------+-----------+-----------------------------------------------------------------------+ -| ``delimiter`` | String | The delimiter between the prefix and the rest of the object name. | -+-----------------+-----------+-----------------------------------------------------------------------+ -| ``marker`` | String | A beginning index for the list of objects returned. | -+-----------------+-----------+-----------------------------------------------------------------------+ -| ``max-keys`` | Integer | The maximum number of keys to return. Default is 1000. | -+-----------------+-----------+-----------------------------------------------------------------------+ - ++---------------------+-----------+-------------------------------------------------------------------------------------------------+ +| Name | Type | Description | ++=====================+===========+=================================================================================================+ +| ``prefix`` | String | Only returns objects that contain the specified prefix. | ++---------------------+-----------+-------------------------------------------------------------------------------------------------+ +| ``delimiter`` | String | The delimiter between the prefix and the rest of the object name. | ++---------------------+-----------+-------------------------------------------------------------------------------------------------+ +| ``marker`` | String | A beginning index for the list of objects returned. | ++---------------------+-----------+-------------------------------------------------------------------------------------------------+ +| ``max-keys`` | Integer | The maximum number of keys to return. Default is 1000. | ++---------------------+-----------+-------------------------------------------------------------------------------------------------+ +| ``allow-unordered`` | Boolean | Non-standard extension. Allows results to be returned unordered. Cannot be used with delimiter. | ++---------------------+-----------+-------------------------------------------------------------------------------------------------+ HTTP Response ~~~~~~~~~~~~~ diff --git a/doc/radosgw/swift/containerops.rst b/doc/radosgw/swift/containerops.rst index 463d91c6ce192..f97429579d767 100644 --- a/doc/radosgw/swift/containerops.rst +++ b/doc/radosgw/swift/containerops.rst @@ -147,6 +147,13 @@ Parameters :Type: String :Required: No +``allow_unordered`` + +:Description: Allows the results to be returned unordered to reduce computation overhead. Cannot be used with ``delimiter``. +:Type: Boolean +:Required: No +:Non-Standard Extension: Yes + Response Entities ~~~~~~~~~~~~~~~~~ diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h index 48ef7ddce710a..37db81e536394 100644 --- a/src/cls/rgw/cls_rgw_ops.h +++ b/src/cls/rgw/cls_rgw_ops.h @@ -413,8 +413,7 @@ struct rgw_cls_list_op }; WRITE_CLASS_ENCODER(rgw_cls_list_op) -struct rgw_cls_list_ret -{ +struct rgw_cls_list_ret { rgw_bucket_dir dir; bool is_truncated; diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 30fd7ecb4ee36..94a1ba37a1f21 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -5748,9 +5748,11 @@ next: formatter->open_array_section("objects"); while (is_truncated) { map result; - int r = store->cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true, - result, &is_truncated, &marker, - bucket_object_check_filter); + int r = + store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker, + prefix, 1000, true, + result, &is_truncated, &marker, + bucket_object_check_filter); if (r < 0 && r != -ENOENT) { cerr << "ERROR: failed operation r=" << r << std::endl; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 2398bd3c7bfbd..925dceed3ed5e 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -1146,19 +1146,18 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state, while (is_truncated) { map result; - int r = store->cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true, - result, &is_truncated, &marker, - bucket_object_check_filter); + int r = store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, + marker, prefix, 1000, true, + result, &is_truncated, &marker, + bucket_object_check_filter); if (r == -ENOENT) { break; } else if (r < 0 && r != -ENOENT) { set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r)); } - dump_bucket_index(result, formatter); flusher.flush(); - } formatter->close_section(); diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 8cd71a276a51b..7207fdafa28e1 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -315,8 +315,7 @@ class NameVal }; /** Stores the XML arguments associated with the HTTP request in req_state*/ -class RGWHTTPArgs -{ +class RGWHTTPArgs { string str, empty_str; map val_map; map sys_val_map; @@ -375,7 +374,7 @@ class RGWHTTPArgs const string& get_str() { return str; } -}; +}; // RGWHTTPArgs const char *rgw_conf_get(const map& conf_map, const char *name, const char *def_val); int rgw_conf_get_int(const map& conf_map, const char *name, int def_val); @@ -1204,8 +1203,7 @@ inline ostream& operator<<(ostream& out, const RGWBucketIndexType &index_type) } } -struct RGWBucketInfo -{ +struct RGWBucketInfo { enum BIShardsHashType { MOD = 0 }; diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index 62c941618fffd..5aa57d384dc7f 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #include #include #include @@ -480,10 +483,10 @@ int RGWLC::bucket_lc_process(string& shard_id) continue; } if (prefix_iter != prefix_map.begin() && - (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) { - list_op.next_marker = pre_marker; + (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) { + list_op.get_next_marker() = pre_marker; } else { - pre_marker = list_op.get_next_marker(); + pre_marker = list_op.get_next_marker(); } list_op.params.prefix = prefix_iter->first; rgw_bucket_dir_entry pre_obj; @@ -519,7 +522,7 @@ int RGWLC::bucket_lc_process(string& shard_id) if ((obj_iter + 1)==objs.end()) { if (is_truncated) { //deal with it in next round because we can't judge whether this marker is the only version - list_op.next_marker = obj_iter->key; + list_op.get_next_marker() = obj_iter->key; break; } } else if (obj_iter->key.name.compare((obj_iter + 1)->key.name) == 0) { //*obj_iter is delete marker and isn't the only version, do nothing. diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 4028079fa0779..0963cc33d75a2 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -2401,6 +2401,13 @@ void RGWListBucket::execute() return; } + if (allow_unordered && !delimiter.empty()) { + ldout(s->cct, 0) << + "ERROR: unordered bucket listing requested with a delimiter" << dendl; + op_ret = -EINVAL; + return; + } + if (need_container_stats()) { map m; m[s->bucket.name] = RGWBucketEnt(); @@ -2422,6 +2429,7 @@ void RGWListBucket::execute() list_op.params.marker = marker; list_op.params.end_marker = end_marker; list_op.params.list_versions = list_versions; + list_op.params.allow_unordered = allow_unordered; op_ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated); if (op_ret >= 0) { diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 952fe36d1643b..92a0a0fac345b 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -639,7 +639,7 @@ public: const string name() override { return "list_buckets"; } RGWOpType get_type() override { return RGW_OP_LIST_BUCKETS; } uint32_t op_mask() override { return RGW_OP_TYPE_READ; } -}; +}; // class RGWListBuckets class RGWGetUsage : public RGWOp { protected: @@ -703,6 +703,7 @@ protected: int default_max; bool is_truncated; + bool allow_unordered; int shard_id; @@ -710,7 +711,8 @@ protected: public: RGWListBucket() : list_versions(false), max(0), - default_max(0), is_truncated(false), shard_id(-1) {} + default_max(0), is_truncated(false), + allow_unordered(false), shard_id(-1) {} int verify_permission() override; void pre_exec() override; void execute() override; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 77697cf58d3e8..3bfd5ceeeda20 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -5613,8 +5613,9 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id) return 0; } -/** - * get listing of the objects in a bucket. + +/** + * Get ordered listing of the objects in a bucket. * * max: maximum number of results to return * bucket: bucket to list contents of @@ -5628,10 +5629,10 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id) * common_prefixes: if delim is filled in, any matching prefixes are placed here. * is_truncated: if number of objects in the bucket is bigger than max, then truncated. */ -int RGWRados::Bucket::List::list_objects(int64_t max, - vector *result, - map *common_prefixes, - bool *is_truncated) +int RGWRados::Bucket::List::list_objects_ordered(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated) { RGWRados *store = target->get_store(); CephContext *cct = store->ctx(); @@ -5660,7 +5661,8 @@ int RGWRados::Bucket::List::list_objects(int64_t max, string bigger_than_delim; if (!params.delim.empty()) { - unsigned long val = decode_utf8((unsigned char *)params.delim.c_str(), params.delim.size()); + unsigned long val = decode_utf8((unsigned char *)params.delim.c_str(), + params.delim.size()); char buf[params.delim.size() + 16]; int r = encode_utf8(val + 1, (unsigned char *)buf); if (r < 0) { @@ -5679,7 +5681,7 @@ int RGWRados::Bucket::List::list_objects(int64_t max, cur_marker = s; } } - + string skip_after_delim; while (truncated && count <= max) { if (skip_after_delim > cur_marker.name) { @@ -5687,22 +5689,29 @@ int RGWRados::Bucket::List::list_objects(int64_t max, ldout(cct, 20) << "setting cur_marker=" << cur_marker.name << "[" << cur_marker.instance << "]" << dendl; } std::map ent_map; - int r = store->cls_bucket_list(target->get_bucket_info(), shard_id, cur_marker, cur_prefix, - read_ahead + 1 - count, params.list_versions, ent_map, - &truncated, &cur_marker); + int r = store->cls_bucket_list_ordered(target->get_bucket_info(), + shard_id, + cur_marker, + cur_prefix, + read_ahead + 1 - count, + params.list_versions, + ent_map, + &truncated, + &cur_marker); if (r < 0) return r; - std::map::iterator eiter; - for (eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) { + for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) { rgw_bucket_dir_entry& entry = eiter->second; rgw_obj_index_key index_key = entry.key; rgw_obj_key obj(index_key); - /* note that parse_raw_oid() here will not set the correct object's instance, as - * rgw_obj_index_key encodes that separately. We don't need to set the instance because it's - * not needed for the checks here and we end up using the raw entry for the return vector + /* note that parse_raw_oid() here will not set the correct + * object's instance, as rgw_obj_index_key encodes that + * separately. We don't need to set the instance because it's + * not needed for the checks here and we end up using the raw + * entry for the return vector */ bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj); if (!valid) { @@ -5738,7 +5747,8 @@ int RGWRados::Bucket::List::list_objects(int64_t max, if (params.filter && !params.filter->filter(obj.name, index_key.name)) continue; - if (params.prefix.size() && (obj.name.compare(0, params.prefix.size(), params.prefix) != 0)) + if (params.prefix.size() && + (obj.name.compare(0, params.prefix.size(), params.prefix) != 0)) continue; if (!params.delim.empty()) { @@ -5785,7 +5795,140 @@ done: *is_truncated = truncated; return 0; -} +} // list_objects_ordered + + +/** + * Get listing of the objects in a bucket and allow the results to be out + * of order. + * + * Even though there are key differences with the ordered counterpart, + * the parameters are the same to maintain some compatability. + * + * max: maximum number of results to return + * bucket: bucket to list contents of + * prefix: only return results that match this prefix + * delim: should not be set; if it is we should have indicated an error + * marker: if filled in, begin the listing with this object. + * end_marker: if filled in, end the listing with this object. + * result: the objects are put in here. + * common_prefixes: this is never filled with an unordered list; the param + * is maintained for compatibility + * is_truncated: if number of objects in the bucket is bigger than max, then + * truncated. + */ +int RGWRados::Bucket::List::list_objects_unordered(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated) +{ + RGWRados *store = target->get_store(); + CephContext *cct = store->ctx(); + int shard_id = target->get_shard_id(); + + int count = 0; + bool truncated = true; + + // read a few extra in each call to cls_bucket_list_unordered in + // case some are filtered out due to namespace matching, versioning, + // filtering, etc. + const int64_t max_read_ahead = 100; + const uint32_t read_ahead = uint32_t(max + std::min(max, max_read_ahead)); + + result->clear(); + + rgw_obj_key marker_obj(params.marker.name, params.marker.instance, params.ns); + rgw_obj_index_key cur_marker; + marker_obj.get_index_key(&cur_marker); + + rgw_obj_key end_marker_obj(params.end_marker.name, params.end_marker.instance, + params.ns); + rgw_obj_index_key cur_end_marker; + end_marker_obj.get_index_key(&cur_end_marker); + const bool cur_end_marker_valid = !params.end_marker.empty(); + + rgw_obj_key prefix_obj(params.prefix); + prefix_obj.ns = params.ns; + string cur_prefix = prefix_obj.get_index_key_name(); + + while (truncated && count <= max) { + std::vector ent_list; + int r = store->cls_bucket_list_unordered(target->get_bucket_info(), + shard_id, + cur_marker, + cur_prefix, + read_ahead, + params.list_versions, + ent_list, + &truncated, + &cur_marker); + if (r < 0) + return r; + + // NB: while regions of ent_list will be sorted, we have no + // guarantee that all items will be sorted since they can cross + // shard boundaries + + for (auto& entry : ent_list) { + rgw_obj_index_key index_key = entry.key; + rgw_obj_key obj(index_key); + + /* note that parse_raw_oid() here will not set the correct + * object's instance, as rgw_obj_index_key encodes that + * separately. We don't need to set the instance because it's + * not needed for the checks here and we end up using the raw + * entry for the return vector + */ + bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj); + if (!valid) { + ldout(cct, 0) << "ERROR: could not parse object name: " << + obj.name << dendl; + continue; + } + + if (!params.list_versions && !entry.is_visible()) { + continue; + } + + if (params.enforce_ns && obj.ns != params.ns) { + continue; + } + + if (cur_end_marker_valid && cur_end_marker <= index_key) { + // we're not guaranteed items will come in order, so we have + // to loop through all + continue; + } + + if (count < max) { + params.marker = index_key; + next_marker = index_key; + } + + if (params.filter && !params.filter->filter(obj.name, index_key.name)) + continue; + + if (params.prefix.size() && + (0 != obj.name.compare(0, params.prefix.size(), params.prefix))) + continue; + + if (count >= max) { + truncated = true; + goto done; + } + + result->emplace_back(std::move(entry)); + count++; + } // for (auto& entry : ent_list) + } // while (truncated && count <= max) + +done: + if (is_truncated) + *is_truncated = truncated; + + return 0; +} // list_objects_unordered + /** * create a rados pool, associated meta info @@ -8452,27 +8595,34 @@ bool RGWRados::is_syncing_bucket_meta(const rgw_bucket& bucket) int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info) { - std::map ent_map; + std::vector ent_list; rgw_obj_index_key marker; string prefix; bool is_truncated; do { -#define NUM_ENTRIES 1000 - int r = cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, NUM_ENTRIES, true, ent_map, - &is_truncated, &marker); + constexpr uint NUM_ENTRIES = 1000u; + int r = cls_bucket_list_unordered(bucket_info, + RGW_NO_SHARD, + marker, + prefix, + NUM_ENTRIES, + true, + ent_list, + &is_truncated, + &marker); if (r < 0) return r; string ns; - std::map::iterator eiter; - for (eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) { + for (auto const& dirent : ent_list) { rgw_obj_key obj; - if (rgw_obj_key::oid_to_key_in_ns(eiter->second.key.name, &obj, ns)) + if (rgw_obj_key::oid_to_key_in_ns(dirent.key.name, &obj, ns)) return -ENOTEMPTY; } } while (is_truncated); + return 0; } @@ -8632,7 +8782,9 @@ int RGWRados::send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool return gc->send_chain(chain, tag, sync); } -int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid) +int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, + librados::IoCtx& index_ctx, + string& bucket_oid) { const rgw_bucket& bucket = bucket_info.bucket; int r = open_bucket_index_ctx(bucket_info, index_ctx); @@ -8650,8 +8802,9 @@ int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCt return 0; } -int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, - string& bucket_oid_base) { +int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, + librados::IoCtx& index_ctx, + string& bucket_oid_base) { const rgw_bucket& bucket = bucket_info.bucket; int r = open_bucket_index_ctx(bucket_info, index_ctx); if (r < 0) @@ -8669,8 +8822,11 @@ int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, librados: } -int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, - map& bucket_objs, int shard_id, map *bucket_instance_ids) { +int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, + librados::IoCtx& index_ctx, + map& bucket_objs, + int shard_id, + map *bucket_instance_ids) { string bucket_oid_base; int ret = open_bucket_index_base(bucket_info, index_ctx, bucket_oid_base); if (ret < 0) { @@ -12930,16 +13086,26 @@ int RGWRados::cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_ return CLSRGWIssueSetTagTimeout(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)(); } -int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix, - uint32_t num_entries, bool list_versions, map& m, - bool *is_truncated, rgw_obj_index_key *last_entry, - bool (*force_check_filter)(const string& name)) + +int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info, + int shard_id, + rgw_obj_index_key& start, + const string& prefix, + uint32_t num_entries, + bool list_versions, + map& m, + bool *is_truncated, + rgw_obj_index_key *last_entry, + bool (*force_check_filter)(const string& name)) { - ldout(cct, 10) << "cls_bucket_list " << bucket_info.bucket << " start " << start.name << "[" << start.instance << "] num_entries " << num_entries << dendl; + ldout(cct, 10) << "cls_bucket_list_ordered " << bucket_info.bucket << + " start " << start.name << "[" << start.instance << "] num_entries " << + num_entries << dendl; librados::IoCtx index_ctx; // key - oid (for different shards if there is any) - // value - list result for the corresponding oid (shard), it is filled by the AIO callback + // value - list result for the corresponding oid (shard), it is filled by + // the AIO callback map oids; map list_results; int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id); @@ -12947,8 +13113,9 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_ return r; cls_rgw_obj_key start_key(start.name, start.instance); - r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, list_versions, - oids, list_results, cct->_conf->rgw_bucket_index_max_aio)(); + r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, + list_versions, oids, list_results, + cct->_conf->rgw_bucket_index_max_aio)(); if (r < 0) return r; @@ -12996,13 +13163,15 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_ * and if the tags are old we need to do cleanup as well. */ librados::IoCtx sub_ctx; sub_ctx.dup(index_ctx); - r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[vnames[pos]]); + r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, + updates[vnames[pos]]); if (r < 0 && r != -ENOENT) { return r; } } if (r >= 0) { - ldout(cct, 10) << "RGWRados::cls_bucket_list: got " << dirent.key.name << "[" << dirent.key.instance << "]" << dendl; + ldout(cct, 10) << "RGWRados::cls_bucket_list_ordered: got " << + dirent.key.name << "[" << dirent.key.instance << "]" << dendl; m[name] = std::move(dirent); ++count; } @@ -13024,14 +13193,16 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_ // we don't care if we lose suggested updates, send them off blindly AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL); index_ctx.aio_operate(miter->first, c, &o); - c->release(); + c->release(); } } // Check if all the returned entries are consumed or not for (size_t i = 0; i < vcurrents.size(); ++i) { - if (vcurrents[i] != vends[i]) + if (vcurrents[i] != vends[i]) { *is_truncated = true; + break; + } } if (!m.empty()) *last_entry = m.rbegin()->first; @@ -13039,7 +13210,131 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_ return 0; } -int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info) + +int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info, + int shard_id, + rgw_obj_index_key& start, + const string& prefix, + uint32_t num_entries, + bool list_versions, + std::vector& ent_list, + bool *is_truncated, + rgw_obj_index_key *last_entry, + bool (*force_check_filter)(const string& name)) { + ldout(cct, 10) << "cls_bucket_list_unordered " << bucket_info.bucket << + " start " << start.name << "[" << start.instance << + "] num_entries " << num_entries << dendl; + + *is_truncated = false; + librados::IoCtx index_ctx; + + rgw_obj_index_key my_start = start; + + map oids; + int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id); + if (r < 0) + return r; + const uint32_t num_shards = oids.size(); + + uint32_t current_shard; + if (shard_id >= 0) { + current_shard = shard_id; + } else if (my_start.empty()) { + current_shard = 0u; + } else { + current_shard = + rgw_bucket_shard_index(my_start.name, num_shards); + } + + uint32_t count = 0u; + map updates; + std::string last_added_entry; + while (count <= num_entries && + ((shard_id >= 0 && current_shard == uint32_t(shard_id)) || + current_shard < num_shards)) { + // key - oid (for different shards if there is any) + // value - list result for the corresponding oid (shard), it is filled by + // the AIO callback + map list_results; + r = CLSRGWIssueBucketList(index_ctx, my_start, prefix, num_entries, + list_versions, oids, list_results, + cct->_conf->rgw_bucket_index_max_aio)(); + if (r < 0) + return r; + + const std::string& oid = oids[current_shard]; + assert(list_results.find(current_shard) != list_results.end()); + auto& result = list_results[current_shard]; + for (auto& entry : result.dir.m) { + rgw_bucket_dir_entry& dirent = entry.second; + + bool force_check = force_check_filter && + force_check_filter(dirent.key.name); + if ((!dirent.exists && !dirent.is_delete_marker()) || + !dirent.pending_map.empty() || + force_check) { + /* there are uncommitted ops. We need to check the current state, + * and if the tags are old we need to do cleanup as well. */ + librados::IoCtx sub_ctx; + sub_ctx.dup(index_ctx); + r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[oid]); + if (r < 0 && r != -ENOENT) { + return r; + } + } + + // at this point either r >=0 or r == -ENOENT + if (r >= 0) { // i.e., if r != -ENOENT + ldout(cct, 10) << "RGWRados::cls_bucket_list_unordered: got " << + dirent.key.name << "[" << dirent.key.instance << "]" << dendl; + + if (count < num_entries) { + last_added_entry = entry.first; + my_start = dirent.key; + ent_list.emplace_back(std::move(dirent)); + ++count; + } else { + *is_truncated = true; + goto check_updates; + } + } else { // r == -ENOENT + // in the case of -ENOENT, make sure we're advancing marker + // for possible next call to CLSRGWIssueBucketList + my_start = dirent.key; + } + } // entry for loop + + if (!result.is_truncated) { + // if we reached the end of the shard read next shard + ++current_shard; + my_start = rgw_obj_index_key(); + } + } // shard loop + +check_updates: + // suggest updates if there is any + map::iterator miter = updates.begin(); + for (; miter != updates.end(); ++miter) { + if (miter->second.length()) { + ObjectWriteOperation o; + cls_rgw_suggest_changes(o, miter->second); + // we don't care if we lose suggested updates, send them off blindly + AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL); + index_ctx.aio_operate(miter->first, c, &o); + c->release(); + } + } + + if (last_entry && !ent_list.empty()) { + *last_entry = last_added_entry; + } + + return 0; +} + + +int RGWRados::cls_obj_usage_log_add(const string& oid, + rgw_usage_log_info& info) { rgw_raw_obj obj(get_zone_params().usage_log_pool, oid); @@ -13546,8 +13841,9 @@ int RGWRados::check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket, } void RGWRados::get_bucket_index_objects(const string& bucket_oid_base, - uint32_t num_shards, map& bucket_objects, int shard_id) -{ + uint32_t num_shards, + map& bucket_objects, + int shard_id) { if (!num_shards) { bucket_objects[0] = bucket_oid_base; } else { diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 8601600e39469..e9b0fae565485 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -3036,12 +3036,25 @@ public: const string *get_optag() { return &optag; } bool is_prepared() { return prepared; } - }; + }; // class UpdateIndex + + class List { + protected: - struct List { RGWRados::Bucket *target; rgw_obj_key next_marker; + int list_objects_ordered(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated); + int list_objects_unordered(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated); + + public: + struct Params { string prefix; string delim; @@ -3051,19 +3064,35 @@ public: bool enforce_ns; RGWAccessListFilter *filter; bool list_versions; - - Params() : enforce_ns(true), filter(NULL), list_versions(false) {} + bool allow_unordered; + + Params() : + enforce_ns(true), + filter(NULL), + list_versions(false), + allow_unordered(false) + {} } params; - public: explicit List(RGWRados::Bucket *_target) : target(_target) {} - int list_objects(int64_t max, vector *result, map *common_prefixes, bool *is_truncated); + int list_objects(int64_t max, + vector *result, + map *common_prefixes, + bool *is_truncated) { + if (params.allow_unordered) { + return list_objects_unordered(max, result, common_prefixes, + is_truncated); + } else { + return list_objects_ordered(max, result, common_prefixes, + is_truncated); + } + } rgw_obj_key& get_next_marker() { return next_marker; } - }; - }; + }; // class List + }; // class Bucket /** Write/overwrite an object to the bucket storage. */ virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime, @@ -3520,10 +3549,19 @@ public: ceph::real_time& removed_mtime, list *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout); - int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix, - uint32_t num_entries, bool list_versions, map& m, - bool *is_truncated, rgw_obj_index_key *last_entry, - bool (*force_check_filter)(const string& name) = NULL); + int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id, + rgw_obj_index_key& start, const string& prefix, + uint32_t num_entries, bool list_versions, + map& m, + bool *is_truncated, + rgw_obj_index_key *last_entry, + bool (*force_check_filter)(const string& name) = nullptr); + int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id, + rgw_obj_index_key& start, const string& prefix, + uint32_t num_entries, bool list_versions, + vector& ent_list, + bool *is_truncated, rgw_obj_index_key *last_entry, + bool (*force_check_filter)(const string& name) = nullptr); int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector& headers, map *bucket_instance_ids = NULL); int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio); int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list& result, bool *truncated); diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 7d327b19a0583..974002ff58e3f 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -647,12 +647,18 @@ int RGWListBucket_ObjStore_S3::get_params() marker.name = s->info.args.get("key-marker"); marker.instance = s->info.args.get("version-id-marker"); } + + // non-standard + s->info.args.get_bool("allow-unordered", &allow_unordered, false); + + delimiter = s->info.args.get("delimiter"); + max_keys = s->info.args.get("max-keys"); op_ret = parse_max_keys(); if (op_ret < 0) { return op_ret; } - delimiter = s->info.args.get("delimiter"); + encoding_type = s->info.args.get("encoding-type"); if (s->system_request) { s->info.args.get_bool("objs-container", &objs_container, false); @@ -668,6 +674,7 @@ int RGWListBucket_ObjStore_S3::get_params() shard_id = s->bucket_instance_shard_id; } } + return 0; } diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index 260ee389d7098..43e5df7f29e47 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -294,6 +294,12 @@ int RGWListBucket_ObjStore_SWIFT::get_params() marker = s->info.args.get("marker"); end_marker = s->info.args.get("end_marker"); max_keys = s->info.args.get("limit"); + + // non-standard + s->info.args.get_bool("allow_unordered", &allow_unordered, false); + + delimiter = s->info.args.get("delimiter"); + op_ret = parse_max_keys(); if (op_ret < 0) { return op_ret; @@ -301,8 +307,6 @@ int RGWListBucket_ObjStore_SWIFT::get_params() if (max > default_max) return -ERR_PRECONDITION_FAILED; - delimiter = s->info.args.get("delimiter"); - string path_args; if (s->info.args.exists("path")) { // should handle empty path path_args = s->info.args.get("path"); @@ -342,7 +346,10 @@ void RGWListBucket_ObjStore_SWIFT::send_response() dump_container_metadata(s, bucket, bucket_quota, s->bucket_info.website_conf); - s->formatter->open_array_section_with_attrs("container", FormatterAttrs("name", s->bucket.name.c_str(), NULL)); + s->formatter->open_array_section_with_attrs("container", + FormatterAttrs("name", + s->bucket.name.c_str(), + NULL)); while (iter != objs.end() || pref_iter != common_prefixes.end()) { bool do_pref = false; @@ -363,7 +370,7 @@ void RGWListBucket_ObjStore_SWIFT::send_response() else do_pref = true; - if (do_objs && (marker.empty() || marker < key)) { + if (do_objs && (allow_unordered || marker.empty() || marker < key)) { if (key.name.compare(path) == 0) goto next; @@ -433,7 +440,7 @@ next: } rgw_flush_formatter_and_reset(s, s->formatter); -} +} // RGWListBucket_ObjStore_SWIFT::send_response static void dump_container_metadata(struct req_state *s, const RGWBucketEnt& bucket, -- 2.39.5