#include <errno.h>
+#include <boost/algorithm/string.hpp>
+
#include "objclass/objclass.h"
#include "cls/rgw/cls_rgw_ops.h"
#include "cls/rgw/cls_rgw_const.h"
int rgw_bucket_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
+ // maximum number of calls to get_obj_vals we'll try; compromise
+ // between wanting to return the requested # of entries, but not
+ // wanting to slow down this op with too many omap reads
+ constexpr int max_attempts = 8;
+
auto iter = in->cbegin();
rgw_cls_list_op op;
try {
decode(op, iter);
} catch (buffer::error& err) {
- CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to decode request\n");
+ CLS_LOG(1, "ERROR: %s: failed to decode request\n", __func__);
return -EINVAL;
}
rgw_cls_list_ret ret;
rgw_bucket_dir& new_dir = ret.dir;
+ auto& name_entry_map = new_dir.m; // map of keys to entries
+
int rc = read_bucket_header(hctx, &new_dir.header);
if (rc < 0) {
- CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to read header\n");
+ CLS_LOG(1, "ERROR: %s: failed to read header\n", __func__);
return rc;
}
- map<string, bufferlist> keys;
- std::map<string, bufferlist>::iterator kiter;
- string start_after_key;
- encode_list_index_key(hctx, op.start_obj, &start_key);
- bool done = false;
- uint32_t left_to_read = op.num_entries;
- bool more;
-
- do {
- rc = get_obj_vals(hctx, start_after_key, op.filter_prefix, left_to_read, &keys, &more);
- if (rc < 0)
+ string start_after_key; // key that we can start listing at, one of a)
+ // sent in by caller, b) last item visited, or
+ // c) when delimiter present, a key that will
+ // move past the subdirectory
+ encode_list_index_key(hctx, op.start_obj, &start_after_key);
+
+ string previous_key; // last key stored in result, so if we have to
+ // call get_obj_vals multiple times, we do not
+ // add the overlap to result
+ string previous_prefix_key; // last prefix_key stored in result, so
+ // we can skip over entries with the
+ // same prefix_key
+
+ bool done = false; // whether we need to keep calling get_obj_vals
+ bool more = true; // output parameter of get_obj_vals
+ bool has_delimiter = !op.delimiter.empty();
+
+ if (has_delimiter &&
+ boost::algorithm::ends_with(start_after_key, op.delimiter)) {
+ // advance past all subdirectory entries if we start after a
+ // subdirectory
+ start_after_key = cls_rgw_after_delim(start_after_key);
+ }
+
+ for (int attempt = 0;
+ attempt < max_attempts &&
+ more &&
+ !done &&
+ name_entry_map.size() < op.num_entries;
+ ++attempt) {
+ map<string, bufferlist> keys;
+ rc = get_obj_vals(hctx, start_after_key, op.filter_prefix,
+ op.num_entries - name_entry_map.size(),
+ &keys, &more);
+ if (rc < 0) {
return rc;
-
- auto& m = new_dir.m;
+ }
done = keys.empty();
- for (kiter = keys.begin(); kiter != keys.end(); ++kiter) {
- rgw_bucket_dir_entry entry;
-
+ for (auto kiter = keys.cbegin(); kiter != keys.cend(); ++kiter) {
if (!bi_is_objs_index(kiter->first)) {
+ // we're done if we walked off the end of the objects area of
+ // the bucket index
done = true;
break;
}
- bufferlist& entrybl = kiter->second;
- auto eiter = entrybl.cbegin();
+ rgw_bucket_dir_entry entry;
try {
+ const bufferlist& entrybl = kiter->second;
+ auto eiter = entrybl.cbegin();
decode(entry, eiter);
} catch (buffer::error& err) {
- CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to decode entry, key=%s\n", kiter->first.c_str());
+ CLS_LOG(1, "ERROR: %s: failed to decode entry, key=%s\n",
+ __func__, kiter->first.c_str());
return -EINVAL;
}
+ start_after_key = kiter->first;
+ CLS_LOG(20, "%s: working on key=%s len=%zu",
+ __func__, kiter->first.c_str(), kiter->first.size());
+
cls_rgw_obj_key key;
uint64_t ver;
-
- start_key = kiter->first;
- CLS_LOG(20, "start_key=%s len=%zu", start_key.c_str(), start_key.size());
-
int ret = decode_list_index_key(kiter->first, &key, &ver);
if (ret < 0) {
- CLS_LOG(0, "ERROR: failed to decode list index key (%s)\n", escape_str(kiter->first).c_str());
+ CLS_LOG(0, "ERROR: %s: failed to decode list index key (%s)\n",
+ __func__, escape_str(kiter->first).c_str());
continue;
}
if (!entry.is_valid()) {
- CLS_LOG(20, "entry %s[%s] is not valid\n", key.name.c_str(), key.instance.c_str());
+ CLS_LOG(20, "%s: entry %s[%s] is not valid\n",
+ __func__, key.name.c_str(), key.instance.c_str());
continue;
}
// filter out noncurrent versions, delete markers, and initial marker
- if (!op.list_versions && (!entry.is_visible() || op.start_obj.name == key.name)) {
- CLS_LOG(20, "entry %s[%s] is not visible\n", key.name.c_str(), key.instance.c_str());
+ if (!op.list_versions &&
+ (!entry.is_visible() || op.start_obj.name == key.name)) {
+ CLS_LOG(20, "%s: entry %s[%s] is not visible\n",
+ __func__, key.name.c_str(), key.instance.c_str());
continue;
}
- if (m.size() < op.num_entries) {
- m[kiter->first] = entry;
+
+ if (has_delimiter) {
+ int delim_pos = key.name.find(op.delimiter, op.filter_prefix.size());
+
+ if (delim_pos >= 0) {
+ /* extract key with trailing delimiter */
+ string prefix_key =
+ key.name.substr(0, delim_pos + op.delimiter.length());
+
+ if (prefix_key == previous_prefix_key) {
+ continue; // we've already added this;
+ } else {
+ previous_prefix_key = prefix_key;
+ }
+
+ if (name_entry_map.size() < op.num_entries) {
+ rgw_bucket_dir_entry proxy_entry;
+ cls_rgw_obj_key proxy_key(prefix_key);
+ proxy_entry.key = cls_rgw_obj_key(proxy_key);
+ proxy_entry.flags = rgw_bucket_dir_entry::FLAG_COMMON_PREFIX;
+ name_entry_map[prefix_key] = proxy_entry;
+
+ CLS_LOG(20, "%s: got common prefix entry %s[%s] num entries=%lu\n",
+ __func__, proxy_key.name.c_str(), proxy_key.instance.c_str(),
+ name_entry_map.size());
+ }
+
+ // make sure that if this is the last item added to the
+ // result from this call to get_obj_vals, the next call will
+ // skip past rest of "subdirectory"
+ start_after_key = cls_rgw_after_delim(prefix_key);
+
+ // advance to past this subdirectory, but then back up one,
+ // so the loop increment will put us in the right place
+ kiter = keys.lower_bound(start_after_key);
+ --kiter;
+
+ continue;
+ }
+
+ // no delimiter after prefix found, so this is a "top-level"
+ // item and we can just fall through
}
- left_to_read--;
- CLS_LOG(20, "got entry %s[%s] m.size()=%d\n", key.name.c_str(), key.instance.c_str(), (int)m.size());
- }
- } while (left_to_read > 0 && !done);
+ if (name_entry_map.size() < op.num_entries &&
+ kiter->first != previous_key) {
+ name_entry_map[kiter->first] = entry;
+ previous_key = kiter->first;
+ CLS_LOG(20, "%s: got object entry %s[%s] num entries=%d\n",
+ __func__, key.name.c_str(), key.instance.c_str(),
+ int(name_entry_map.size()));
+ }
+ } // for (auto kiter...
+ } // for (int attempt...
ret.is_truncated = more && !done;
-
encode(ret, *out);
return 0;
-}
+} // rgw_bucket_list
static int check_index(cls_method_context_t hctx,
rgw_bucket_dir_header *existing_header,
return;
}
-
void cls_rgw_bucket_list_op(librados::ObjectReadOperation& op,
const cls_rgw_obj_key& start_obj,
const std::string& filter_prefix,
+ const std::string& delimiter,
uint32_t num_entries,
bool list_versions,
rgw_cls_list_ret* result)
rgw_cls_list_op call;
call.start_obj = start_obj;
call.filter_prefix = filter_prefix;
+ call.delimiter = delimiter;
call.num_entries = num_entries;
call.list_versions = list_versions;
encode(call, in);
- op.exec(RGW_CLASS, RGW_BUCKET_LIST, in, new ClsBucketIndexOpCtx<rgw_cls_list_ret>(result, NULL));
+ op.exec(RGW_CLASS, RGW_BUCKET_LIST, in,
+ new ClsBucketIndexOpCtx<rgw_cls_list_ret>(result, NULL));
}
-static bool issue_bucket_list_op(librados::IoCtx& io_ctx, const string& oid,
+static bool issue_bucket_list_op(librados::IoCtx& io_ctx,
+ const string& oid,
const cls_rgw_obj_key& start_obj,
const string& filter_prefix,
- uint32_t num_entries, bool list_versions,
+ const string& delimiter,
+ uint32_t num_entries,
+ bool list_versions,
BucketIndexAioManager *manager,
- rgw_cls_list_ret *pdata) {
+ rgw_cls_list_ret *pdata)
+{
librados::ObjectReadOperation op;
- cls_rgw_bucket_list_op(op, start_obj, filter_prefix,
+ cls_rgw_bucket_list_op(op,
+ start_obj, filter_prefix, delimiter,
num_entries, list_versions, pdata);
return manager->aio_operate(io_ctx, oid, &op);
}
int CLSRGWIssueBucketList::issue_op(int shard_id, const string& oid)
{
- return issue_bucket_list_op(io_ctx, oid, start_obj, filter_prefix, num_entries, list_versions, &manager, &result[shard_id]);
+ return issue_bucket_list_op(io_ctx, oid,
+ start_obj, filter_prefix, delimiter,
+ num_entries, list_versions, &manager,
+ &result[shard_id]);
}
void cls_rgw_remove_obj(librados::ObjectWriteOperation& o, list<string>& keep_attr_prefixes)
int CLSRGWIssueGetDirHeader::issue_op(int shard_id, const string& oid)
{
- cls_rgw_obj_key nokey;
- return issue_bucket_list_op(io_ctx, oid, nokey, "", 0, false, &manager, &result[shard_id]);
+ cls_rgw_obj_key empty_key;
+ string empty_prefix;
+ string empty_delimiter;
+ return issue_bucket_list_op(io_ctx, oid,
+ empty_key, empty_prefix, empty_delimiter,
+ 0, false, &manager, &result[shard_id]);
}
static bool issue_resync_bi_log(librados::IoCtx& io_ctx, const string& oid, BucketIndexAioManager *manager)
class CLSRGWIssueBucketList : public CLSRGWConcurrentIO {
cls_rgw_obj_key start_obj;
string filter_prefix;
+ string delimiter;
uint32_t num_entries;
bool list_versions;
map<int, rgw_cls_list_ret>& result;
protected:
int issue_op(int shard_id, const string& oid) override;
public:
- CLSRGWIssueBucketList(librados::IoCtx& io_ctx, const cls_rgw_obj_key& _start_obj,
- const string& _filter_prefix, uint32_t _num_entries,
+ CLSRGWIssueBucketList(librados::IoCtx& io_ctx,
+ const cls_rgw_obj_key& _start_obj,
+ const string& _filter_prefix,
+ const string& _delimiter,
+ uint32_t _num_entries,
bool _list_versions,
map<int, string>& oids,
map<int, rgw_cls_list_ret>& list_results,
uint32_t max_aio) :
CLSRGWConcurrentIO(io_ctx, oids, max_aio),
- start_obj(_start_obj), filter_prefix(_filter_prefix), num_entries(_num_entries), list_versions(_list_versions), result(list_results) {}
+ start_obj(_start_obj), filter_prefix(_filter_prefix), delimiter(_delimiter),
+ num_entries(_num_entries), list_versions(_list_versions),
+ result(list_results)
+ {}
};
void cls_rgw_bucket_list_op(librados::ObjectReadOperation& op,
const cls_rgw_obj_key& start_obj,
const std::string& filter_prefix,
+ const std::string& delimiter,
uint32_t num_entries,
bool list_versions,
rgw_cls_list_ret* result);
cls_rgw_obj_key start_obj;
uint32_t num_entries;
string filter_prefix;
+ string delimiter;
bool list_versions;
rgw_cls_list_op() : num_entries(0), list_versions(false) {}
void encode(bufferlist &bl) const {
- ENCODE_START(5, 4, bl);
+ ENCODE_START(6, 4, bl);
encode(num_entries, bl);
encode(filter_prefix, bl);
encode(start_obj, bl);
encode(list_versions, bl);
+ encode(delimiter, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::const_iterator &bl) {
- DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl);
if (struct_v < 4) {
decode(start_obj.name, bl);
}
decode(num_entries, bl);
- if (struct_v >= 3)
+ if (struct_v >= 3) {
decode(filter_prefix, bl);
- if (struct_v >= 4)
+ }
+ if (struct_v >= 4) {
decode(start_obj, bl);
- if (struct_v >= 5)
+ }
+ if (struct_v >= 5) {
decode(list_versions, bl);
+ }
+ if (struct_v >= 6) {
+ decode(delimiter, bl);
+ }
DECODE_FINISH(bl);
}
void dump(Formatter *f) const;
static constexpr uint16_t FLAG_DELETE_MARKER = 0x4;
/* object is versioned, a placeholder for the plain entry */
static constexpr uint16_t FLAG_VER_MARKER = 0x8;
+ /* object is a proxy; it is not listed in the bucket index but is a
+ * prefix ending with a delimiter, perhaps common to multiple
+ * entries; it is only useful when a delimiter is used and
+ * represents a "subdirectory" (again, ending in a delimiter) that
+ * may contain one or more actual entries/objects */
+ static constexpr uint16_t FLAG_COMMON_PREFIX = 0x8000;
cls_rgw_obj_key key;
rgw_bucket_entry_ver ver;
bool is_valid() const {
return (flags & rgw_bucket_dir_entry::FLAG_VER_MARKER) == 0;
}
+ bool is_common_prefix() const {
+ return flags & rgw_bucket_dir_entry::FLAG_COMMON_PREFIX;
+ }
void dump(Formatter *f) const;
void decode_json(JSONObj *obj);
bool is_truncated = true;
rgw_obj_index_key marker;
- string prefix;
+ string empty_prefix;
+ string empty_delimiter;
formatter->open_object_section("result");
formatter->dump_string("bucket", bucket_name);
formatter->open_array_section("objects");
while (is_truncated) {
RGWRados::ent_map_t result;
- int r =
- store->getRados()->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker,
- prefix, 1000, true,
- result, &is_truncated, &marker,
- null_yield,
- rgw_bucket_object_check_filter);
+ int r = store->getRados()->cls_bucket_list_ordered(
+ bucket_info, RGW_NO_SHARD,
+ marker, empty_prefix, empty_delimiter,
+ 1000, true,
+ result, &is_truncated, &marker,
+ null_yield,
+ rgw_bucket_object_check_filter);
if (r < 0 && r != -ENOENT) {
cerr << "ERROR: failed operation r=" << r << std::endl;
}
store->getRados()->cls_obj_set_bucket_tag_timeout(bucket_info, BUCKET_TAG_TIMEOUT);
string prefix;
+ string empty_delimiter;
rgw_obj_index_key marker;
bool is_truncated = true;
result.reserve(listing_max_entries);
int r = store->getRados()->cls_bucket_list_ordered(
- bucket_info, RGW_NO_SHARD, marker, prefix, listing_max_entries, true,
- result, &is_truncated, &marker, y, bucket_object_check_filter);
+ bucket_info, RGW_NO_SHARD, marker, prefix, empty_delimiter,
+ listing_max_entries, true, result, &is_truncated, &marker, y,
+ rgw_bucket_object_check_filter);
if (r == -ENOENT) {
break;
} else if (r < 0 && r != -ENOENT) {
result->clear();
- rgw_obj_key marker_obj(params.marker.name, params.marker.instance, params.ns);
+ rgw_obj_key marker_obj(params.marker.name,
+ params.marker.instance,
+ params.ns);
rgw_obj_index_key cur_marker;
marker_obj.get_index_key(&cur_marker);
- rgw_obj_key end_marker_obj(params.end_marker.name, params.end_marker.instance,
+ rgw_obj_key end_marker_obj(params.end_marker.name,
+ params.end_marker.instance,
params.ns);
rgw_obj_index_key cur_end_marker;
end_marker_obj.get_index_key(&cur_end_marker);
}
constexpr int allowed_read_attempts = 2;
- string skip_after_delim;
for (int attempt = 0; attempt < allowed_read_attempts; ++attempt) {
// this loop is generally expected only to have a single
// iteration; see bottom of loop for early exit
shard_id,
cur_marker,
cur_prefix,
+ params.delim,
read_ahead + 1 - count,
params.list_versions,
ent_map,
&truncated,
&cur_marker,
y);
- if (r < 0)
+ if (r < 0) {
return r;
+ }
for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
rgw_bucket_dir_entry& entry = eiter->second;
*/
bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
if (!valid) {
- ldout(cct, 0) << "ERROR: could not parse object name: " << obj.name << dendl;
+ ldout(cct, 0) << "ERROR: could not parse object name: " <<
+ obj.name << dendl;
continue;
}
next_marker = index_key;
}
- if (params.filter && !params.filter->filter(obj.name, index_key.name))
+ if (params.filter &&
+ ! params.filter->filter(obj.name, index_key.name)) {
continue;
+ }
if (params.prefix.size() &&
- (obj.name.compare(0, params.prefix.size(), params.prefix) != 0))
+ 0 != obj.name.compare(0, params.prefix.size(), params.prefix)) {
continue;
+ }
if (!params.delim.empty()) {
int delim_pos = obj.name.find(params.delim, params.prefix.size());
if (delim_pos >= 0) {
- /* extract key -with trailing delimiter- for CommonPrefix */
- string prefix_key =
- obj.name.substr(0, delim_pos + params.delim.length());
-
- if (common_prefixes &&
- common_prefixes->find(prefix_key) == common_prefixes->end()) {
+ // should only find one delimiter at the end if it finds any
+ // after the prefix
+ ceph_assert(delim_pos ==
+ int(obj.name.length() - params.delim.length()));
+ if (common_prefixes) {
if (count >= max) {
truncated = true;
goto done;
}
- next_marker = prefix_key;
- (*common_prefixes)[prefix_key] = true;
+ (*common_prefixes)[obj.name] = true;
count++;
}
continue;
- }
- }
+ } // if found delimiter after prefix
+ } // if there is a delimiter
if (count >= max) {
truncated = true;
count++;
} // eiter for loop
- if (!params.delim.empty()) {
- int marker_delim_pos = cur_marker.name.find(params.delim, cur_prefix.size());
- if (marker_delim_pos >= 0) {
- skip_after_delim = cur_marker.name.substr(0, marker_delim_pos);
- skip_after_delim.append(after_delim_s);
-
- ldout(cct, 20) << "skip_after_delim=" << skip_after_delim << dendl;
-
- if (skip_after_delim > cur_marker.name) {
- cur_marker = skip_after_delim;
- ldout(cct, 20) << "setting cur_marker="
- << cur_marker.name
- << "[" << cur_marker.instance << "]"
- << dendl;
- }
- }
- }
-
// if we finished listing, or if we're returning at least half the
// requested entries, that's enough; S3 and swift protocols allow
// returning fewer than max entries
} // read attempt loop
done:
- if (is_truncated)
+
+ if (is_truncated) {
*is_truncated = truncated;
+ }
return 0;
} // list_objects_ordered
int shard_id,
const rgw_obj_index_key& start_after,
const string& prefix,
+ const string& delimiter,
uint32_t num_entries,
bool list_versions,
ent_map_t& m,
auto& ioctx = index_pool.ioctx();
map<int, struct rgw_cls_list_ret> list_results;
cls_rgw_obj_key start_after_key(start_after.name, start_after.instance);
- r = CLSRGWIssueBucketList(ioctx, start_key_after, prefix, num_entries_per_shard,
+ r = CLSRGWIssueBucketList(ioctx, start_after_key, prefix, delimiter,
+ num_entries_per_shard,
list_versions, oids, list_results,
cct->_conf->rgw_bucket_index_max_aio)();
if (r < 0) {
const string& name = vcurrents[pos]->first;
struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;
- bool force_check = force_check_filter &&
- force_check_filter(dirent.key.name);
- if ((!dirent.exists && !dirent.is_delete_marker()) ||
+ bool force_check =
+ force_check_filter && force_check_filter(dirent.key.name);
+
+ if ((!dirent.exists &&
+ !dirent.is_delete_marker() &&
+ !dirent.is_common_prefix()) ||
!dirent.pending_map.empty() ||
force_check) {
/* there are uncommitted ops. We need to check the current
rgw_cls_list_ret result;
librados::ObjectReadOperation op;
- cls_rgw_bucket_list_op(op, marker, prefix, num_entries,
+ string empty_delimiter;
+ cls_rgw_bucket_list_op(op, marker, prefix, empty_delimiter,
+ num_entries,
list_versions, &result);
r = rgw_rados_operate(ioctx, oid, &op, nullptr, null_yield);
if (r < 0)
using check_filter_t = bool (*)(const std::string&);
- int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
+ int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
+ int shard_id,
const rgw_obj_index_key& start_after,
const string& prefix,
- uint32_t num_entries, bool list_versions,
+ const string& delimiter,
+ uint32_t num_entries,
+ bool list_versions,
ent_map_t& m,
bool *is_truncated,
rgw_obj_index_key *last_entry,
map<int, string> oids = { {0, bucket_oid} };
map<int, struct rgw_cls_list_ret> list_results;
cls_rgw_obj_key start_key("", "");
- int r = CLSRGWIssueBucketList(ioctx, start_key, "", 1000, true, oids, list_results, 1)();
-
+ string empty_prefix;
+ string empty_delimiter;
+ int r = CLSRGWIssueBucketList(ioctx, start_key,
+ empty_prefix, empty_delimiter,
+ 1000, true, oids, list_results, 1)();
ASSERT_EQ(r, 0);
ASSERT_EQ(1u, list_results.size());