From: Yehuda Sadeh Date: Sat, 25 Mar 2017 00:05:47 +0000 (-0700) Subject: rgw: metadata search pagination X-Git-Tag: ses5-milestone6~9^2~3^2~52 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1e886475bb0f21a43bc24b4b2d1d7cd273f0c46d;p=ceph.git rgw: metadata search pagination Leveraging elasticsearch "size" and "from" for doing pagination. This is not optimal, but sadly it's currently the way to go as elasticsearch does not have an efficient way to retrieve ordered entries (other than using stateful api). This should be revisited in the future. A preferable way would be to hold a unique identifier for each object doc, and have the query sort by that unique identifier (that could be used as marker). At the moment sorting by anything would require elasticseatch to load all data into memory. Signed-off-by: Yehuda Sadeh --- diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h index 26c84d121a2..0225797e5b7 100644 --- a/src/rgw/rgw_acl.h +++ b/src/rgw/rgw_acl.h @@ -377,6 +377,7 @@ public: DECODE_FINISH(bl); } void dump(Formatter *f) const; + void decode_json(JSONObj *obj); static void generate_test_instances(list& o); void set_id(const rgw_user& _id) { id = _id; } void set_name(const string& name) { display_name = name; } diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc index 2a183b59195..aff97d3c3ed 100644 --- a/src/rgw/rgw_json_enc.cc +++ b/src/rgw/rgw_json_enc.cc @@ -201,6 +201,13 @@ void ACLOwner::dump(Formatter *f) const encode_json("display_name", display_name, f); } +void ACLOwner::decode_json(JSONObj *obj) { + string id_str; + JSONDecoder::decode_json("id", id_str, obj); + id.from_str(id_str); + JSONDecoder::decode_json("display_name", display_name, obj); +} + void RGWAccessControlPolicy::dump(Formatter *f) const { encode_json("acl", acl, f); diff --git a/src/rgw/rgw_sync_module_es_rest.cc b/src/rgw/rgw_sync_module_es_rest.cc index 5b43977437f..d5c7ab50c7a 100644 --- a/src/rgw/rgw_sync_module_es_rest.cc +++ b/src/rgw/rgw_sync_module_es_rest.cc @@ -49,6 +49,7 @@ struct es_index_obj_response { JSONDecoder::decode_json("name", key.name, obj); JSONDecoder::decode_json("instance", key.instance, obj); JSONDecoder::decode_json("permissions", read_permissions, obj); + JSONDecoder::decode_json("owner", owner, obj); JSONDecoder::decode_json("meta", meta, obj); } }; @@ -101,6 +102,12 @@ class RGWMetadataSearchOp : public RGWOp { RGWElasticSyncModuleInstance *es_module; protected: string expression; +#define MAX_KEYS_DEFAULT 100 + uint64_t max_keys{MAX_KEYS_DEFAULT}; + string marker_str; + uint64_t marker{0}; + string next_marker; + bool is_truncated{false}; es_search_response response; @@ -163,7 +170,13 @@ void RGWMetadataSearchOp::execute() string resource = es_module->get_index_path(store->get_realm()) + "/_search"; param_vec_t params; -// params.push_back(param_pair_t("size", size)); +#define BUFSIZE 32 + char buf[BUFSIZE]; + snprintf(buf, sizeof(buf), "%lld", (long long)max_keys); + params.push_back(param_pair_t("size", buf)); + if (marker > 0) { + params.push_back(param_pair_t("from", marker_str.c_str())); + } ldout(s->cct, 20) << "sending request to elasticsearch, payload=" << string(in.c_str(), in.length()) << dendl; op_ret = conn->get_resource(resource, ¶ms, nullptr, out, &in); if (op_ret < 0) { @@ -196,6 +209,31 @@ public: int get_params() override { expression = s->info.args.get("query"); + bool exists; + string max_keys_str = s->info.args.get("max-keys", &exists); +#define MAX_KEYS_MAX 10000 + if (exists) { + string err; + max_keys = strict_strtoll(max_keys_str.c_str(), 10, &err); + if (!err.empty()) { + return -EINVAL; + } + if (max_keys > MAX_KEYS_MAX) { + max_keys = MAX_KEYS_MAX; + } + } + marker_str = s->info.args.get("marker", &exists); + if (exists) { + string err; + marker = strict_strtoll(marker_str.c_str(), 10, &err); + if (!err.empty()) { + return -EINVAL; + } + } + uint64_t nm = marker + max_keys; + char buf[BUFSIZE]; + snprintf(buf, sizeof(buf), "%lld", (long long)nm); + next_marker = buf; return 0; } void send_response() override { @@ -208,7 +246,14 @@ public: return; } + is_truncated = (response.hits.hits.size() >= max_keys); + s->formatter->open_object_section("SearchMetadataResponse"); + s->formatter->dump_string("Marker", marker_str); + s->formatter->dump_string("IsTruncated", (is_truncated ? "true" : "false")); + if (is_truncated) { + s->formatter->dump_string("NextMarker", next_marker); + } for (auto& i : response.hits.hits) { es_index_obj_response& e = i.source; s->formatter->open_object_section("Contents");