From: Yuval Lifshitz Date: Wed, 22 Oct 2025 09:15:47 +0000 (+0000) Subject: rgw/s3vector: add stubs for API X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=58cce946732218dbbaf599f19dd13b9293def613;p=ceph-ci.git rgw/s3vector: add stubs for API according to: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Operations_Amazon_S3_Vectors.html Signed-off-by: Yuval Lifshitz --- diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index 309e7639297..5d355b07dc2 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -307,7 +307,7 @@ options: any ``radosgw`` instance that is meant to participate in a `multi-site <../multisite>`_ configuration. - default: s3, s3website, swift, swift_auth, admin, sts, iam, notifications + default: s3, s3website, swift, swift_auth, admin, sts, iam, notifications, s3vectors services: - rgw with_legacy: true diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 0fd7a8bca47..40146e95e70 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -144,7 +144,9 @@ set(librgw_common_srcs rgw_realm_watcher.cc rgw_bucket_logging.cc rgw_rest_bucket_logging.cc - rgw_bucket_sync.cc) + rgw_bucket_sync.cc + rgw_rest_s3vector.cc + rgw_s3vector.cc) list(APPEND librgw_common_srcs driver/immutable_config/store.cc diff --git a/src/rgw/rgw_appmain.cc b/src/rgw/rgw_appmain.cc index df8c0c22cc3..e74977f18fd 100644 --- a/src/rgw/rgw_appmain.cc +++ b/src/rgw/rgw_appmain.cc @@ -310,6 +310,7 @@ void rgw::AppMain::cond_init_apis() const bool iam_enabled = apis_map.count("iam") > 0; const bool pubsub_enabled = apis_map.count("pubsub") > 0 || apis_map.count("notifications") > 0; + const bool s3vectors_enabled = apis_map.count("s3vectors") > 0; // Swift API entrypoint could placed in the root instead of S3 const bool swift_at_root = g_conf()->rgw_swift_url_prefix == "/"; if (apis_map.count("s3") > 0 || s3website_enabled) { @@ -317,7 +318,7 @@ void rgw::AppMain::cond_init_apis() rest.register_default_mgr(set_logging( rest_filter(env.driver, RGW_REST_S3, new RGWRESTMgr_S3(s3website_enabled, sts_enabled, - iam_enabled, pubsub_enabled)))); + iam_enabled, pubsub_enabled, s3vectors_enabled)))); } else { derr << "Cannot have the S3 or S3 Website enabled together with " << "Swift API placed in the root of hierarchy" << dendl; diff --git a/src/rgw/rgw_iam_policy.cc b/src/rgw/rgw_iam_policy.cc index ecd704da78e..4e550e30d0b 100644 --- a/src/rgw/rgw_iam_policy.cc +++ b/src/rgw/rgw_iam_policy.cc @@ -223,6 +223,22 @@ static const actpair actpairs[] = { "organizations:ListRoots", organizationsListRoots}, { "organizations:ListPolicies", organizationsListPolicies}, { "organizations:ListTargetsForPolicy", organizationsListTargetsForPolicy}, + { "s3vectors:CreateIndex", s3vectorsCreateIndex}, + { "s3vectors:CreateVectorBucket", s3vectorsCreateVectorBucket}, + { "s3vectors:DeleteIndex", s3vectorsDeleteIndex}, + { "s3vectors:DeleteVectorBucket", s3vectorsDeleteVectorBucket}, + { "s3vectors:DeleteVectorBucketPolicy", s3vectorsDeleteVectorBucketPolicy}, + { "s3vectors:DeleteVectors", s3vectorsDeleteVectors}, + { "s3vectors:GetIndex", s3vectorsGetIndex}, + { "s3vectors:GetVectorBucket", s3vectorsGetVectorBucket}, + { "s3vectors:GetVectorBucketPolicy", s3vectorsGetVectorBucketPolicy}, + { "s3vectors:GetVectors", s3vectorsGetVectors}, + { "s3vectors:ListIndexes", s3vectorsListIndexes}, + { "s3vectors:ListVectorBuckets", s3vectorsListVectorBuckets}, + { "s3vectors:ListVectors", s3vectorsListVectors}, + { "s3vectors:PutVectorBucketPolicy", s3vectorsPutVectorBucketPolicy}, + { "s3vectors:PutVectors", s3vectorsPutVectors}, + { "s3vectors:QueryVectors", s3vectorsQueryVectors}, }; struct PolicyParser; @@ -689,6 +705,12 @@ bool ParseState::do_string(CephContext* cct, const char* s, size_t l) { if ((t->notaction & organizationsAllValue) == organizationsAllValue) { t->notaction[organizationsAll] = 1; } + if ((t->action & s3vectorsAllValue) == s3vectorsAllValue) { + t->action[s3vectorsAll] = 1; + } + if ((t->notaction & s3vectorsAllValue) == s3vectorsAllValue) { + t->notaction[s3vectorsAll] = 1; + } } } } else if (w->id == TokenID::Resource || w->id == TokenID::NotResource) { @@ -1751,6 +1773,39 @@ const char* action_bit_string(uint64_t action) { case organizationsListTargetsForPolicy: return "organizations:ListTargetsForPolicy"; + + case s3vectorsCreateIndex: + return "s3vectors:CreateIndex"; + case s3vectorsCreateVectorBucket: + return "s3vectors:CreateVectorBucket"; + case s3vectorsDeleteIndex: + return "s3vectors:DeleteIndex"; + case s3vectorsDeleteVectorBucket: + return "s3vectors:DeleteVectorBucket"; + case s3vectorsDeleteVectorBucketPolicy: + return "s3vectors:DeleteVectorBucketPolicy"; + case s3vectorsDeleteVectors: + return "s3vectors:DeleteVectors"; + case s3vectorsGetIndex: + return "s3vectors:GetIndex"; + case s3vectorsGetVectorBucket: + return "s3vectors:GetVectorBucket"; + case s3vectorsGetVectorBucketPolicy: + return "s3vectors:GetVectorBucketPolicy"; + case s3vectorsGetVectors: + return "s3vectors:GetVectors"; + case s3vectorsListIndexes: + return "s3vectors:ListIndexes"; + case s3vectorsListVectorBuckets: + return "s3vectors:ListVectorBuckets"; + case s3vectorsListVectors: + return "s3vectors:ListVectors"; + case s3vectorsPutVectorBucketPolicy: + return "s3vectors:PutVectorBucketPolicy"; + case s3vectorsPutVectors: + return "s3vectors:PutVectors"; + case s3vectorsQueryVectors: + return "s3vectors:QueryVectors"; } return "s3Invalid"; } diff --git a/src/rgw/rgw_iam_policy.h b/src/rgw/rgw_iam_policy.h index b202052ce92..2a74f05c7fd 100644 --- a/src/rgw/rgw_iam_policy.h +++ b/src/rgw/rgw_iam_policy.h @@ -214,6 +214,24 @@ enum { organizationsListTargetsForPolicy, organizationsAll, + s3vectorsCreateIndex, + s3vectorsCreateVectorBucket, + s3vectorsDeleteIndex, + s3vectorsDeleteVectorBucket, + s3vectorsDeleteVectorBucketPolicy, + s3vectorsDeleteVectors, + s3vectorsGetIndex, + s3vectorsGetVectorBucket, + s3vectorsGetVectorBucketPolicy, + s3vectorsGetVectors, + s3vectorsListIndexes, + s3vectorsListVectorBuckets, + s3vectorsListVectors, + s3vectorsPutVectorBucketPolicy, + s3vectorsPutVectors, + s3vectorsQueryVectors, + s3vectorsAll, + allCount }; @@ -240,6 +258,7 @@ static const Action_t iamAllValue = set_cont_bits(s3objectlambdaAll+1, static const Action_t stsAllValue = set_cont_bits(iamAll+1,stsAll); static const Action_t snsAllValue = set_cont_bits(stsAll+1, snsAll); static const Action_t organizationsAllValue = set_cont_bits(snsAll+1,organizationsAll); +static const Action_t s3vectorsAllValue = set_cont_bits(organizationsAll+1, s3vectorsAll); static const Action_t allValue = set_cont_bits(0,allCount); namespace { diff --git a/src/rgw/rgw_op_type.h b/src/rgw/rgw_op_type.h index 0631bf2b393..5b681844eb0 100644 --- a/src/rgw/rgw_op_type.h +++ b/src/rgw/rgw_op_type.h @@ -173,5 +173,22 @@ enum RGWOpType { RGW_OP_ADD_CLIENTID_TO_OIDC_PROVIDER, RGW_OP_REMOVE_CLIENTID_FROM_OIDC_PROVIDER, RGW_OP_UPDATE_OIDC_PROVIDER_THUMBPRINT, + /* s3Vector */ + RGW_OP_S3VECTOR_CREATE_INDEX, + RGW_OP_S3VECTOR_CREATE_VECTOR_BUCKET, + RGW_OP_S3VECTOR_DELETE_INDEX, + RGW_OP_S3VECTOR_DELETE_VECTOR_BUCKET, + RGW_OP_S3VECTOR_DELETE_VECTOR_BUCKET_POLICY, + RGW_OP_S3VECTOR_DELETE_VECTORS, + RGW_OP_S3VECTOR_GET_INDEX, + RGW_OP_S3VECTOR_GET_VECTOR_BUCKET, + RGW_OP_S3VECTOR_GET_VECTOR_BUCKET_POLICY, + RGW_OP_S3VECTOR_GET_VECTORS, + RGW_OP_S3VECTOR_LIST_INDEXES, + RGW_OP_S3VECTOR_LIST_VECTOR_BUCKETS, + RGW_OP_S3VECTOR_LIST_VECTORS, + RGW_OP_S3VECTOR_PUT_VECTOR_BUCKET_POLICY, + RGW_OP_S3VECTOR_PUT_VECTORS, + RGW_OP_S3VECTOR_QUERY_VECTORS, }; diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 5e7fe2f6674..88325023c6a 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -39,6 +39,7 @@ #include "rgw_rest_s3.h" #include "rgw_rest_s3website.h" #include "rgw_rest_pubsub.h" +#include "rgw_rest_s3vector.h" #include "rgw_auth_s3.h" #include "rgw_acl.h" #include "rgw_policy_s3.h" @@ -5968,6 +5969,19 @@ void parse_post_action(const std::string& post_body, req_state* s) } } +// s3vector requests looks like bucket POST operations +// where the "bucket name" is the operation name. +// with JSON payload +// POST / HTTP/1.1 +// Content-type: application/json +bool is_s3vector_op(const req_state* s) { + const auto content_type = s->info.env->get_optional("CONTENT_TYPE"); + return std::string_view(s->info.method) == "POST" && + s->info.args.get_num_params() == 0 && + content_type && + *content_type == "application/json"; +} + RGWHandler_REST* RGWRESTMgr_S3::get_handler(rgw::sal::Driver* driver, req_state* const s, const rgw::auth::StrategyRegistry& auth_registry, @@ -6026,6 +6040,10 @@ RGWHandler_REST* RGWRESTMgr_S3::get_handler(rgw::sal::Driver* driver, return nullptr; } // has bucket + if (enable_s3vector && is_s3vector_op(s)) { + ldpp_dout(s, 20) << "INFO: s3vector op: " << s->init_state.url_bucket << dendl; + return new RGWHandler_REST_s3Vector(auth_registry); + } return new RGWHandler_REST_Bucket_S3(auth_registry, enable_pubsub); } diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index 90fec7376b5..c4e827b110b 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -824,12 +824,14 @@ private: const bool enable_sts; const bool enable_iam; const bool enable_pubsub; + const bool enable_s3vector; public: - explicit RGWRESTMgr_S3(bool _enable_s3website=false, bool _enable_sts=false, bool _enable_iam=false, bool _enable_pubsub=false) + explicit RGWRESTMgr_S3(bool _enable_s3website=false, bool _enable_sts=false, bool _enable_iam=false, bool _enable_pubsub=false, bool _enable_s3vector=false) : enable_s3website(_enable_s3website), enable_sts(_enable_sts), enable_iam(_enable_iam), - enable_pubsub(_enable_pubsub) { + enable_pubsub(_enable_pubsub), + enable_s3vector(_enable_s3vector) { } ~RGWRESTMgr_S3() override = default; diff --git a/src/rgw/rgw_rest_s3vector.cc b/src/rgw/rgw_rest_s3vector.cc new file mode 100644 index 00000000000..059e171993a --- /dev/null +++ b/src/rgw/rgw_rest_s3vector.cc @@ -0,0 +1,500 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab + +#include "rgw_op.h" +#include "rgw_rest_s3vector.h" +#include "rgw_s3vector.h" +#include "common/async/yield_context.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +namespace { + +class RGWS3VectorBase : public RGWDefaultResponseOp { +protected: + template + int do_init_processing(T& configuration, optional_yield y) { + const auto max_size = s->cct->_conf->rgw_max_put_param_size; + bufferlist data; + int ret = 0; + if (std::tie(ret, data) = read_all_input(s, max_size, false); ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to read JSON s3vector payload, ret = " << ret << dendl; + return ret; + } + if (data.length() == 0) { + ldpp_dout(this, 1) << "ERROR: JSON s3vector payload missing" << dendl; + return -EINVAL; + } + + JSONParser parser; + if (!parser.parse(data.c_str(), data.length())) { + ldpp_dout(this, 1) << "ERROR: failed to parse JSON s3vector payload" << dendl; + return -EINVAL; + } + try { + decode_json_obj(configuration, &parser); + } catch (const JSONDecoder::err& e) { + ldpp_dout(this, 1) << "ERROR: failed to decode JSON s3vector payload: " << e.what() << dendl; + return -EINVAL; + } + + return 0; + } +}; + +class RGWS3VectorCreateIndex : public RGWS3VectorBase { + rgw::s3vector::create_index_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector CreateIndex" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsCreateIndex)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_create_index"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.CreateIndex", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_CREATE_INDEX; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::create_index(configuration, this, y); + } +}; + +class RGWS3VectorCreateVectorBucket : public RGWS3VectorBase { + rgw::s3vector::create_vector_bucket_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector CreateVectorBucket" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsCreateVectorBucket)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_create_vector_bucket"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.CreateVectorBucket", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_CREATE_VECTOR_BUCKET; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::create_vector_bucket(configuration, this, y); + } +}; + +class RGWS3VectorDeleteIndex : public RGWS3VectorBase { + rgw::s3vector::delete_index_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector DeleteIndex" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsDeleteIndex)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_delete_index"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.DeleteIndex", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_DELETE_INDEX; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::delete_index(configuration, this, y); + } +}; + +class RGWS3VectorDeleteVectorBucket : public RGWS3VectorBase { + rgw::s3vector::delete_vector_bucket_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector DeleteVectorBucket" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsDeleteVectorBucket)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_delete_vector_bucket"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.DeleteVectorBucket", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_DELETE_VECTOR_BUCKET; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::delete_vector_bucket(configuration, this, y); + } +}; + +class RGWS3VectorDeleteVectorBucketPolicy : public RGWS3VectorBase { + rgw::s3vector::delete_vector_bucket_policy_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector DeleteVectorBucketPolicy" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsDeleteVectorBucketPolicy)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_delete_vector_bucket_policy"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.DeleteVectorBucketPolicy", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_DELETE_VECTOR_BUCKET_POLICY; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::delete_vector_bucket_policy(configuration, this, y); + } +}; + +class RGWS3VectorPutVectors : public RGWS3VectorBase { + rgw::s3vector::put_vectors_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector PutVectors" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsPutVectors)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_put_vectors"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.PutVectors", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_PUT_VECTORS; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::put_vectors(configuration, this, y); + } +}; + +class RGWS3VectorGetVectors : public RGWS3VectorBase { + rgw::s3vector::get_vectors_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector GetVectors" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsGetVectors)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_get_vectors"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.GetVectors", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_GET_VECTORS; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::get_vectors(configuration, this, y); + } +}; + +class RGWS3VectorListVectors : public RGWS3VectorBase { + rgw::s3vector::list_vectors_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector ListVectors" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsListVectors)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_list_vectors"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.ListVectors", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_LIST_VECTORS; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::list_vectors(configuration, this, y); + } +}; + +class RGWS3VectorListVectorBuckets : public RGWS3VectorBase { + rgw::s3vector::list_vector_buckets_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector ListVectorBuckets" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsListVectorBuckets)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_list_vector_buckets"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.ListVectorBuckets", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_LIST_VECTOR_BUCKETS; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::list_vector_buckets(configuration, this, y); + } +}; + +class RGWS3VectorGetVectorBucket : public RGWS3VectorBase { + rgw::s3vector::get_vector_bucket_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector GetVectorBucket" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsGetVectorBucket)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_get_vector_bucket"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.GetVectorBucket", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_GET_VECTOR_BUCKET; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::get_vector_bucket(configuration, this, y); + } +}; + +class RGWS3VectorGetIndex : public RGWS3VectorBase { + rgw::s3vector::get_index_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector GetIndex" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsGetIndex)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_get_index"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.GetIndex", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_GET_INDEX; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::get_index(configuration, this, y); + } +}; + +class RGWS3VectorListIndexes : public RGWS3VectorBase { + rgw::s3vector::list_indexes_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector ListIndexes" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsListIndexes)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_list_indexes"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.ListIndexes", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_LIST_INDEXES; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::list_indexes(configuration, this, y); + } +}; + +class RGWS3VectorPutVectorBucketPolicy : public RGWS3VectorBase { + rgw::s3vector::put_vector_bucket_policy_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector PutVectorBucketPolicy" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsPutVectorBucketPolicy)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_put_vector_bucket_policy"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.PutVectorBucketPolicy", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_PUT_VECTOR_BUCKET_POLICY; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::put_vector_bucket_policy(configuration, this, y); + } +}; + +class RGWS3VectorGetVectorBucketPolicy : public RGWS3VectorBase { + rgw::s3vector::get_vector_bucket_policy_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector GetVectorBucketPolicy" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsGetVectorBucketPolicy)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_get_vector_bucket_policy"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.GetVectorBucketPolicy", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_GET_VECTOR_BUCKET_POLICY; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::get_vector_bucket_policy(configuration, this, y); + } +}; + +class RGWS3VectorDeleteVectors : public RGWS3VectorBase { + rgw::s3vector::delete_vectors_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector DeleteVectors" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsDeleteVectors)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_delete_vectors"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.DeleteVectors", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_DELETE_VECTORS; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::delete_vectors(configuration, this, y); + } +}; + +class RGWS3VectorQueryVectors : public RGWS3VectorBase { + rgw::s3vector::query_vectors_t configuration; + + int verify_permission(optional_yield y) override { + ldpp_dout(this, 10) << "INFO: verifying permission for s3vector QueryVectors" << dendl; + // TODO: implement permission check + /*if (!verify_bucket_permission(this, s, rgw::IAM::s3vectorsQueryVectors)) { + return -EACCES; + }*/ + return 0; + } + + const char* name() const override { return "s3vector_query_vectors"; } + std::string canonical_name() const override { return fmt::format("REST.{}.S3VECTOR.QueryVectors", s->info.method); } + RGWOpType get_type() override { return RGW_OP_S3VECTOR_QUERY_VECTORS; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + int init_processing(optional_yield y) override { + return do_init_processing(configuration, y); + } + + void execute(optional_yield y) override { + op_ret = rgw::s3vector::query_vectors(configuration, this, y); + } +}; +} + +RGWOp* RGWHandler_REST_s3Vector::op_post() { + const auto& op_name = s->init_state.url_bucket; + if (op_name == "CreateIndex") + return new RGWS3VectorCreateIndex(); + if (op_name == "CreateVectorBucket") + return new RGWS3VectorCreateVectorBucket(); + if (op_name == "PutVectors") + return new RGWS3VectorPutVectors(); + if (op_name == "PutVectorBucketPolicy") + return new RGWS3VectorPutVectorBucketPolicy(); + if (op_name == "DeleteVectors") + return new RGWS3VectorDeleteVectors(); + if (op_name == "DeleteIndex") + return new RGWS3VectorDeleteIndex(); + if (op_name == "DeleteVectorBucket") + return new RGWS3VectorDeleteVectorBucket(); + if (op_name == "DeleteVectorBucketPolicy") + return new RGWS3VectorDeleteVectorBucketPolicy(); + if (op_name == "GetIndex") + return new RGWS3VectorGetIndex(); + if (op_name == "GetVectors") + return new RGWS3VectorGetVectors(); + if (op_name == "GetVectorBucket") + return new RGWS3VectorGetVectorBucket(); + if (op_name == "GetVectorBucketPolicy") + return new RGWS3VectorGetVectorBucketPolicy(); + if (op_name == "ListIndexes") + return new RGWS3VectorListIndexes(); + if (op_name == "ListVectors") + return new RGWS3VectorListVectors(); + if (op_name == "ListVectorBuckets") + return new RGWS3VectorListVectorBuckets(); + if (op_name == "QueryVectors") + return new RGWS3VectorQueryVectors(); + return nullptr; +} + + diff --git a/src/rgw/rgw_rest_s3vector.h b/src/rgw/rgw_rest_s3vector.h new file mode 100644 index 00000000000..4b1b3c92c6b --- /dev/null +++ b/src/rgw/rgw_rest_s3vector.h @@ -0,0 +1,20 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab + +#pragma once + +#include "rgw_rest_s3.h" + +class RGWHandler_REST_s3Vector : public RGWHandler_REST_S3 { +protected: + int init_permissions(RGWOp* op, optional_yield y) override {return 0;} + int read_permissions(RGWOp* op, optional_yield y) override {return 0;} + bool supports_quota() override {return false;} +public: + explicit RGWHandler_REST_s3Vector(const rgw::auth::StrategyRegistry& auth_registry) + : RGWHandler_REST_S3(auth_registry) {} + virtual ~RGWHandler_REST_s3Vector() = default; + RGWOp *op_post() override; + static RGWOp* create_post_op(const std::string& op_name); +}; + diff --git a/src/rgw/rgw_s3vector.cc b/src/rgw/rgw_s3vector.cc new file mode 100644 index 00000000000..2e6b579c235 --- /dev/null +++ b/src/rgw/rgw_s3vector.cc @@ -0,0 +1,621 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab ft=cpp + +#include "rgw_s3vector.h" +#include "common/ceph_json.h" +#include "common/Formatter.h" +#include "common/dout.h" +#include + +#define dout_subsys ceph_subsys_rgw + +namespace rgw::s3vector { + + +// utility functions for JSON encoding/decoding + +void decode_json_obj(float& val, JSONObj *obj) { + std::string s = obj->get_data(); + const char *start = s.c_str(); + char *p; + + errno = 0; + val = strtof(start, &p); + + /* Check for various possible errors */ + + if ((errno == ERANGE && val == HUGE_VAL) || + (errno != 0 && val == 0)) { + throw JSONDecoder::err("failed to number"); + } + + if (p == start) { + throw JSONDecoder::err("failed to parse number"); + } + + while (*p != '\0') { + if (!isspace(*p)) { + throw JSONDecoder::err("failed to parse number"); + } + p++; + } +} + +void decode_json(const char* field_name, DistanceMetric& metric, JSONObj* obj, bool mandatory) { + std::string metric_str; + JSONDecoder::decode_json(field_name, metric_str, obj, mandatory); + if (metric_str == "cosine") { + metric = DistanceMetric::COSINE; + } else if (metric_str == "euclidean") { + metric = DistanceMetric::EUCLIDEAN; + } else { + throw JSONDecoder::err("invalid distanceMetric: " + metric_str); + } +} + +void encode_json(const char* field_name, const DistanceMetric& metric, ceph::Formatter* f) { + switch (metric) { + case DistanceMetric::COSINE: + ::encode_json(field_name, "cosine", f); + return; + case DistanceMetric::EUCLIDEAN: + ::encode_json(field_name, "euclidean", f); + return; + } + ::encode_json(field_name, "unknown", f); +} + +void decode_json(const char* field_name, VectorData& data, JSONObj* obj) { + data.clear(); + auto it = obj->find(field_name); + if (it.end()) { + throw JSONDecoder::err(std::string("missing field: ") + field_name); + } + auto arr_it = (*it)->find("float32"); + for (auto value_it = (*arr_it)->find_first(); !value_it.end(); ++value_it) { + float value; + decode_json_obj(value, *value_it); + data.push_back(value); + } +} + +void encode_json(const char* field_name, const VectorData& data, ceph::Formatter* f) { + f->open_object_section(field_name); + f->open_array_section("float32"); + for (auto value : data) { + f->dump_float("", value); + } + f->close_section(); + f->close_section(); +} + +void create_index_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("dataType", data_type, f); + ::encode_json("dimension", dimension, f); + rgw::s3vector::encode_json("distanceMetric", distance_metric, f); + ::encode_json("indexName", index_name, f); + f->open_object_section("metadataConfiguration"); + ::encode_json("nonFilterableMetadataKeys", non_filterable_metadata_keys, f); + f->close_section(); + ::encode_json("vectorBucketArn", vector_bucket_arn, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void decode_name(const char* name_field, std::string& name, JSONObj* obj) { + JSONDecoder::decode_json(name_field, name, obj, true); + if (name.length() < 3 || name.length() > 63) { + throw JSONDecoder::err(fmt::format("{} length must be between 3 and 63 characters, got {}", name_field, name.length())); + } +} + +void decode_name_or_arn(const char* name_field, const char* arn_field, std::string& name, std::string& arn, JSONObj* obj) { + JSONDecoder::decode_json(arn_field, arn, obj); + JSONDecoder::decode_json(name_field, name, obj); + if (arn.empty() && name.empty()) { + throw JSONDecoder::err(fmt::format("either {} or {} must be specified", name_field, arn_field)); + } + if (!name.empty() && (name.length() < 3 || name.length() > 63)) { + throw JSONDecoder::err(fmt::format("{} length must be between 3 and 63 characters, got {}", name_field, name.length())); + } + //TODO: validate ARN +} + +void create_index_t::decode_json(JSONObj* obj) { + JSONDecoder::decode_json("dataType", data_type, obj, true); + if (data_type != "float32") { + throw JSONDecoder::err(fmt::format("invalid dataType: {}. Only 'float32' is supported.", data_type)); + } + JSONDecoder::decode_json("dimension", dimension, obj, true); + if (dimension < 1 || dimension > 4096) { + throw JSONDecoder::err(fmt::format("dimension must be between 1 and 4096, got {}", dimension)); + } + rgw::s3vector::decode_json("distanceMetric", distance_metric, obj, true); + JSONDecoder::decode_json("indexName", index_name, obj, true); + auto md_it = obj->find("metadataConfiguration"); + if (!md_it.end()) { + JSONDecoder::decode_json("nonFilterableMetadataKeys", non_filterable_metadata_keys, *md_it); + } + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); +} + +void create_vector_bucket_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void create_vector_bucket_t::decode_json(JSONObj* obj) { + decode_name("vectorBucketName", vector_bucket_name, obj); +} + +int create_index(const create_index_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector CreateIndex with: " << ss.str() << dendl; + return 0; +} + +void delete_index_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void delete_index_t::decode_json(JSONObj* obj) { + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + decode_name("vectorBucketName", vector_bucket_name, obj); +} + +void delete_vector_bucket_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("vectorBucketArn", vector_bucket_arn, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void delete_vector_bucket_t::decode_json(JSONObj* obj) { + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); +} + +void delete_vector_bucket_policy_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void delete_vector_bucket_policy_t::decode_json(JSONObj* obj) { + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); +} + +int create_vector_bucket(const create_vector_bucket_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector CreateVectorBucket with: " << ss.str() << dendl; + return 0; +} + +int delete_index(const delete_index_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector DeleteIndex with: " << ss.str() << dendl; + return 0; +} + +int delete_vector_bucket(const delete_vector_bucket_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector DeleteVectorBucket with: " << ss.str() << dendl; + return 0; +} + +void vector_item_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("key", key, f); + rgw::s3vector::encode_json("data", data, f); + ::encode_json("metadata", metadata, f); + f->close_section(); +} + +void vector_item_t::decode_json(JSONObj* obj) { + JSONDecoder::decode_json("key", key, obj, true); + if (key.empty()) { + throw JSONDecoder::err("vector key must be specified"); + } + + rgw::s3vector::decode_json("data", data, obj); + if (data.empty()) { + throw JSONDecoder::err("vector data cannot be empty"); + } + + JSONDecoder::decode_json("metadata", metadata, obj); + +} + +void put_vectors_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->open_array_section("vectors"); + for (const auto& vector : vectors) { + vector.dump(f); + } + f->close_section(); + f->close_section(); +} + +void put_vectors_t::decode_json(JSONObj* obj) { + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + decode_name("vectorBucketName", vector_bucket_name, obj); + JSONDecoder::decode_json("vectors", vectors, obj, true); + + if (vectors.empty() or vectors.size() > 500) { + throw JSONDecoder::err(fmt::format("vectors array must contain 1-500 items, got {}", vectors.size())); + } +} + +int delete_vector_bucket_policy(const delete_vector_bucket_policy_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector DeleteVectorBucketPolicy with: " << ss.str() << dendl; + return 0; +} + +void get_vectors_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + ::encode_json("keys", keys, f); + ::encode_json("returnData", return_data, f); + ::encode_json("returnMetadata", return_metadata, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void get_vectors_t::decode_json(JSONObj* obj) { + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + JSONDecoder::decode_json("keys", keys, obj, true); + JSONDecoder::decode_json("returnData", return_data, obj); + JSONDecoder::decode_json("returnMetadata", return_metadata, obj); + decode_name("vectorBucketName", vector_bucket_name, obj); + + if (keys.empty() || keys.size() > 100) { + throw JSONDecoder::err(fmt::format("keys array must contain 1-100 items, got {}", keys.size())); + } + + for (const auto& key : keys) { + if (key.empty() || key.length() > 1024) { + throw JSONDecoder::err(fmt::format("each key must be 1-1024 characters long, got key of length {}", key.length())); + } + } +} + +int put_vectors(const put_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector PutVectors with: " << ss.str() << dendl; + return 0; +} + +void list_vectors_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + ::encode_json("maxResults", max_results, f); + ::encode_json("nextToken", next_token, f); + ::encode_json("returnData", return_data, f); + ::encode_json("returnMetadata", return_metadata, f); + if (segment_count > 0) { + ::encode_json("segmentCount", segment_count, f); + ::encode_json("segmentIndex", segment_index, f); + } + f->close_section(); +} + +void list_vectors_t::decode_json(JSONObj* obj) { + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + decode_name("vectorBucketName", vector_bucket_name, obj); + JSONDecoder::decode_json("maxResults", max_results, obj); + JSONDecoder::decode_json("nextToken", next_token, obj); + JSONDecoder::decode_json("returnData", return_data, obj); + JSONDecoder::decode_json("returnMetadata", return_metadata, obj); + JSONDecoder::decode_json("segmentCount", segment_count, obj); + JSONDecoder::decode_json("segmentIndex", segment_index, obj); + + if (max_results < 1 || max_results > 1000) { + throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 1000, got {}", max_results)); + } + + if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 2048)) { + throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 2048, got {}", next_token.length())); + } + + if (segment_count > 0) { + if (segment_count < 1 || segment_count > 16) { + throw JSONDecoder::err(fmt::format("segmentCount must be between 1 and 16, got {}", segment_count)); + } + if (segment_index >= segment_count) { + throw JSONDecoder::err(fmt::format("segmentIndex must be between 0 and segmentCount-1 ({}), got {}", segment_count - 1, segment_index)); + } + } else if (segment_index > 0) { + throw JSONDecoder::err("segmentIndex requires segmentCount to be specified"); + } +} + +int get_vectors(const get_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector GetVectors with: " << ss.str() << dendl; + return 0; +} + +void list_vector_buckets_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("maxResults", max_results, f); + ::encode_json("nextToken", next_token, f); + ::encode_json("prefix", prefix, f); + f->close_section(); +} + +void list_vector_buckets_t::decode_json(JSONObj* obj) { + JSONDecoder::decode_json("maxResults", max_results, obj); + JSONDecoder::decode_json("nextToken", next_token, obj); + JSONDecoder::decode_json("prefix", prefix, obj); + + if (max_results < 1 || max_results > 1000) { + throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 1000, got {}", max_results)); + } + + if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 2048)) { + throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 2048, got {}", next_token.length())); + } + + if (!prefix.empty() && (prefix.length() < 1 || prefix.length() > 63)) { + throw JSONDecoder::err(fmt::format("prefix length must be between 1 and 63, got {}", prefix.length())); + } +} + +int list_vectors(const list_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector ListVectors with: " << ss.str() << dendl; + return 0; +} + +void get_vector_bucket_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("vectorBucketArn", vector_bucket_arn, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void get_vector_bucket_t::decode_json(JSONObj* obj) { + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); +} + +int list_vector_buckets(const list_vector_buckets_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector ListVectorBuckets with: " << ss.str() << dendl; + return 0; +} + +void get_index_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void get_index_t::decode_json(JSONObj* obj) { + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + decode_name("vectorBucketName", vector_bucket_name, obj); +} + +int get_vector_bucket(const get_vector_bucket_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector GetVectorBucket with: " << ss.str() << dendl; + return 0; +} + +void list_indexes_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("maxResults", max_results, f); + ::encode_json("nextToken", next_token, f); + ::encode_json("prefix", prefix, f); + ::encode_json("vectorBucketArn", vector_bucket_arn, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void list_indexes_t::decode_json(JSONObj* obj) { + JSONDecoder::decode_json("maxResults", max_results, obj); + JSONDecoder::decode_json("nextToken", next_token, obj); + JSONDecoder::decode_json("prefix", prefix, obj); + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); + + if (max_results < 1 || max_results > 500) { + throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 500, got {}", max_results)); + } + + if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 512)) { + throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 512, got {}", next_token.length())); + } + + if (!prefix.empty() && (prefix.length() < 1 || prefix.length() > 63)) { + throw JSONDecoder::err(fmt::format("prefix length must be between 1 and 63, got {}", prefix.length())); + } +} + +int get_index(const get_index_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector GetIndex with: " << ss.str() << dendl; + return 0; +} + +void put_vector_bucket_policy_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("policy", policy, f); + ::encode_json("vectorBucketArn", vector_bucket_arn, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void put_vector_bucket_policy_t::decode_json(JSONObj* obj) { + JSONDecoder::decode_json("policy", policy, obj, true); + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); + + if (policy.empty()) { + throw JSONDecoder::err("policy must be specified and cannot be empty"); + } + // TODO: validate JSON policy +} + +int list_indexes(const list_indexes_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector ListIndexes with: " << ss.str() << dendl; + return 0; +} + +void get_vector_bucket_policy_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("vectorBucketArn", vector_bucket_arn, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void get_vector_bucket_policy_t::decode_json(JSONObj* obj) { + decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); +} + +int put_vector_bucket_policy(const put_vector_bucket_policy_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector PutVectorBucketPolicy with: " << ss.str() << dendl; + return 0; +} + +int get_vector_bucket_policy(const get_vector_bucket_policy_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector GetVectorBucketPolicy with: " << ss.str() << dendl; + return 0; +} + +void delete_vectors_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + ::encode_json("keys", keys, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void delete_vectors_t::decode_json(JSONObj* obj) { + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + JSONDecoder::decode_json("keys", keys, obj, true); + decode_name("vectorBucketName", vector_bucket_name, obj); + + if (keys.empty() || keys.size() > 500) { + throw JSONDecoder::err(fmt::format("keys array must contain 1-500 items, got {}", keys.size())); + } + + for (const auto& key : keys) { + if (key.empty() || key.length() > 1024) { + throw JSONDecoder::err(fmt::format("each key must be 1-1024 characters long, got key of length {}", key.length())); + } + } +} + +int delete_vectors(const delete_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector DeleteVectors with: " << ss.str() << dendl; + return 0; +} + +void query_vectors_t::dump(ceph::Formatter* f) const { + f->open_object_section(""); + if (!filter.empty()) { + ::encode_json("filter", filter, f); + } + ::encode_json("indexArn", index_arn, f); + ::encode_json("indexName", index_name, f); + rgw::s3vector::encode_json("queryVector", query_vector, f); + ::encode_json("returnDistance", return_distance, f); + ::encode_json("returnMetadata", return_metadata, f); + ::encode_json("topK", top_k, f); + ::encode_json("vectorBucketName", vector_bucket_name, f); + f->close_section(); +} + +void query_vectors_t::decode_json(JSONObj* obj) { + JSONDecoder::decode_json("filter", filter, obj); + decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); + rgw::s3vector::decode_json("queryVector", query_vector, obj); + JSONDecoder::decode_json("returnDistance", return_distance, obj); + JSONDecoder::decode_json("returnMetadata", return_metadata, obj); + JSONDecoder::decode_json("topK", top_k, obj, true); + decode_name("vectorBucketName", vector_bucket_name, obj); + + if (top_k < 1) { + throw JSONDecoder::err(fmt::format("topK must be at least 1, got {}", top_k)); + } + + if (query_vector.empty()) { + throw JSONDecoder::err("queryVector cannot be empty"); + } + + // TODO: validate filter +} + +int query_vectors(const query_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y) { + JSONFormatter f; + configuration.dump(&f); + std::stringstream ss; + f.flush(ss); + ldpp_dout(dpp, 20) << "INFO: executing s3vector QueryVectors with: " << ss.str() << dendl; + return 0; +} + +} + diff --git a/src/rgw/rgw_s3vector.h b/src/rgw/rgw_s3vector.h new file mode 100644 index 00000000000..f8542492048 --- /dev/null +++ b/src/rgw/rgw_s3vector.h @@ -0,0 +1,570 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 sts=2 expandtab ft=cpp + +#pragma once + +#include +#include +#include "include/encoding.h" +#include "common/async/yield_context.h" + +namespace ceph { +class Formatter; +} +class JSONObj; +class DoutPrefixProvider; + +namespace rgw::s3vector { +enum class DistanceMetric { + COSINE, + EUCLIDEAN, +}; + +/* + { + "dataType": "string", + "dimension": number, + "distanceMetric": "string", + "indexName": "string", + "metadataConfiguration": { + "nonFilterableMetadataKeys": [ "string" ] + }, + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct create_index_t { + std::string data_type; + unsigned int dimension; /* 1 - 4096 */ + DistanceMetric distance_metric; + std::string index_name; + std::vector non_filterable_metadata_keys; + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(create_index_t) + +/* + { + "vectorBucketName": "string" + } +*/ +struct create_vector_bucket_t { + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(create_vector_bucket_t) + +/* + { + "indexArn": "string", + "indexName": "string", + "vectorBucketName": "string" + } +*/ +struct delete_index_t { + std::string index_arn; + std::string index_name; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(delete_index_t) + +/* + { + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct delete_vector_bucket_t { + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(delete_vector_bucket_t) + +/* + { + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct delete_vector_bucket_policy_t { + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(delete_vector_bucket_policy_t) + + +using VectorData = std::vector; +/* + { + "key": "string", + "data": {"float32": [float]}, + "metadata": {} + } +*/ +struct vector_item_t { + std::string key; + VectorData data; + std::string metadata; // JSON string + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(vector_item_t) + +/* + { + "indexArn": "string", + "indexName": "string", + "vectorBucketName": "string", + "vectors": [vector_item_t] + } +*/ +struct put_vectors_t { + std::string index_arn; + std::string index_name; + std::string vector_bucket_name; + std::vector vectors; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(put_vectors_t) + +/* + { + "indexArn": "string", + "indexName": "string", + "keys": ["string"], + "returnData": boolean, + "returnMetadata": boolean, + "vectorBucketName": "string" + } +*/ +struct get_vectors_t { + std::string index_arn; + std::string index_name; + std::vector keys; + bool return_data = false; + bool return_metadata = false; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(get_vectors_t) + +/* + { + "indexArn": "string", + "indexName": "string", + "vectorBucketName": "string", + "maxResults": number, + "nextToken": "string", + "returnData": boolean, + "returnMetadata": boolean, + "segmentCount": number, + "segmentIndex": number + } +*/ +struct list_vectors_t { + std::string index_arn; + std::string index_name; + std::string vector_bucket_name; + unsigned int max_results = 500; + std::string next_token; + bool return_data = false; + bool return_metadata = false; + unsigned int segment_count = 0; + unsigned int segment_index = 0; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(list_vectors_t) + +/* + { + "maxResults": number, + "nextToken": "string", + "prefix": "string" + } +*/ +struct list_vector_buckets_t { + unsigned int max_results = 500; + std::string next_token; + std::string prefix; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(list_vector_buckets_t) + +/* + { + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct get_vector_bucket_t { + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(get_vector_bucket_t) + +/* + { + "indexArn": "string", + "indexName": "string", + "vectorBucketName": "string" + } +*/ +struct get_index_t { + std::string index_arn; + std::string index_name; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(get_index_t) + +/* + { + "maxResults": number, + "nextToken": "string", + "prefix": "string", + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct list_indexes_t { + unsigned int max_results = 500; + std::string next_token; + std::string prefix; + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(list_indexes_t) + +/* + { + "policy": "string", + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct put_vector_bucket_policy_t { + std::string policy; + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(put_vector_bucket_policy_t) + +/* + { + "vectorBucketArn": "string", + "vectorBucketName": "string" + } +*/ +struct get_vector_bucket_policy_t { + std::string vector_bucket_arn; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(get_vector_bucket_policy_t) + +/* + { + "indexArn": "string", + "indexName": "string", + "keys": ["string"], + "vectorBucketName": "string" + } +*/ +struct delete_vectors_t { + std::string index_arn; + std::string index_name; + std::vector keys; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(delete_vectors_t) + +/* + { + "filter": {}, + "indexArn": "string", + "indexName": "string", + "queryVector": {"float32": [float]}, + "returnDistance": boolean, + "returnMetadata": boolean, + "topK": number, + "vectorBucketName": "string" + } +*/ +struct query_vectors_t { + std::string filter; // JSON string + std::string index_arn; + std::string index_name; + VectorData query_vector; + bool return_distance = false; + bool return_metadata = false; + unsigned int top_k; + std::string vector_bucket_name; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + // TODO + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + // TODO + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(query_vectors_t) + +int create_index(const create_index_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int create_vector_bucket(const create_vector_bucket_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int delete_index(const delete_index_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int delete_vector_bucket(const delete_vector_bucket_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int delete_vector_bucket_policy(const delete_vector_bucket_policy_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int put_vectors(const put_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int get_vectors(const get_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int list_vectors(const list_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int list_vector_buckets(const list_vector_buckets_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int get_vector_bucket(const get_vector_bucket_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int get_index(const get_index_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int list_indexes(const list_indexes_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int put_vector_bucket_policy(const put_vector_bucket_policy_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int get_vector_bucket_policy(const get_vector_bucket_policy_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int delete_vectors(const delete_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y); +int query_vectors(const query_vectors_t& configuration, DoutPrefixProvider* dpp, optional_yield y); + +} +