From: Daniel Gryniewicz Date: Fri, 5 May 2023 17:09:07 +0000 (-0400) Subject: RGW - Move DAOS and MOTR to proper locations X-Git-Tag: v19.0.0~488^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6565e58aacb5358f3c2c4ef37bf80003e1f45d0f;p=ceph.git RGW - Move DAOS and MOTR to proper locations Signed-off-by: Daniel Gryniewicz --- diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index dc824d35504f..5e46c81902e0 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -216,10 +216,10 @@ if(WITH_RADOSGW_DBSTORE) list(APPEND librgw_common_srcs rgw_sal_dbstore.cc) endif() if(WITH_RADOSGW_MOTR) - list(APPEND librgw_common_srcs rgw_sal_motr.cc) + list(APPEND librgw_common_srcs driver/motr/rgw_sal_motr.cc) endif() if(WITH_RADOSGW_DAOS) - list(APPEND librgw_common_srcs rgw_sal_daos.cc) + list(APPEND librgw_common_srcs driver/motr/rgw_sal_daos.cc) endif() if(WITH_JAEGER) list(APPEND librgw_common_srcs rgw_tracer.cc) diff --git a/src/rgw/driver/daos/rgw_sal_daos.cc b/src/rgw/driver/daos/rgw_sal_daos.cc new file mode 100644 index 000000000000..a8663805d828 --- /dev/null +++ b/src/rgw/driver/daos/rgw_sal_daos.cc @@ -0,0 +1,2450 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=2 sw=2 expandtab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * SAL implementation for the CORTX DAOS backend + * + * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_sal_daos.h" + +#include +#include +#include + +#include +#include + +#include "common/Clock.h" +#include "common/errno.h" +#include "rgw_bucket.h" +#include "rgw_compression.h" +#include "rgw_sal.h" + +#define dout_subsys ceph_subsys_rgw + +using std::list; +using std::map; +using std::set; +using std::string; +using std::vector; + +namespace fs = std::filesystem; + +namespace rgw::sal { + +using ::ceph::decode; +using ::ceph::encode; + +int DaosUser::list_buckets(const DoutPrefixProvider* dpp, const string& marker, + const string& end_marker, uint64_t max, + bool need_stats, BucketList& buckets, + optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: list_user_buckets: marker=" << marker + << " end_marker=" << end_marker << " max=" << max << dendl; + int ret = 0; + bool is_truncated = false; + buckets.clear(); + vector bucket_infos(max); + daos_size_t bcount = bucket_infos.size(); + vector> values(bcount, vector(DS3_MAX_ENCODED_LEN)); + for (daos_size_t i = 0; i < bcount; i++) { + bucket_infos[i].encoded = values[i].data(); + bucket_infos[i].encoded_length = values[i].size(); + } + + char daos_marker[DS3_MAX_BUCKET_NAME]; + std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker)); + ret = ds3_bucket_list(&bcount, bucket_infos.data(), daos_marker, + &is_truncated, store->ds3, nullptr); + ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_list: bcount=" << bcount + << " ret=" << ret << dendl; + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list failed!" << ret << dendl; + return ret; + } + + bucket_infos.resize(bcount); + values.resize(bcount); + + for (const auto& bi : bucket_infos) { + DaosBucketInfo dbinfo; + bufferlist bl; + bl.append(reinterpret_cast(bi.encoded), bi.encoded_length); + auto iter = bl.cbegin(); + dbinfo.decode(iter); + buckets.add(std::make_unique(this->store, dbinfo.info, this)); + } + + buckets.set_truncated(is_truncated); + return 0; +} + +int DaosUser::create_bucket( + const DoutPrefixProvider* dpp, const rgw_bucket& b, + const std::string& zonegroup_id, rgw_placement_rule& placement_rule, + std::string& swift_ver_location, const RGWQuotaInfo* pquota_info, + const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info, + obj_version& ep_objv, bool exclusive, bool obj_lock_enabled, bool* existed, + req_info& req_info, std::unique_ptr* bucket_out, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: create_bucket:" << b.name << dendl; + int ret; + std::unique_ptr bucket; + + // Look up the bucket. Create it if it doesn't exist. + ret = this->store->get_bucket(dpp, this, b, &bucket, y); + if (ret != 0 && ret != -ENOENT) { + return ret; + } + + if (ret != -ENOENT) { + *existed = true; + if (swift_ver_location.empty()) { + swift_ver_location = bucket->get_info().swift_ver_location; + } + placement_rule.inherit_from(bucket->get_info().placement_rule); + + // TODO: ACL policy + // // don't allow changes to the acl policy + // RGWAccessControlPolicy old_policy(ctx()); + // int rc = rgw_op_get_bucket_policy_from_attr( + // dpp, this, u, bucket->get_attrs(), &old_policy, y); + // if (rc >= 0 && old_policy != policy) { + // bucket_out->swap(bucket); + // return -EEXIST; + //} + } else { + placement_rule.name = "default"; + placement_rule.storage_class = "STANDARD"; + bucket = std::make_unique(store, b, this); + bucket->set_attrs(attrs); + + *existed = false; + } + + // TODO: how to handle zone and multi-site. + + if (!*existed) { + info.placement_rule = placement_rule; + info.bucket = b; + info.owner = this->get_info().user_id; + info.zonegroup = zonegroup_id; + info.creation_time = ceph::real_clock::now(); + if (obj_lock_enabled) + info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; + bucket->set_version(ep_objv); + bucket->get_info() = info; + + // Create a new bucket: + DaosBucket* daos_bucket = static_cast(bucket.get()); + bufferlist bl; + std::unique_ptr bucket_info = + daos_bucket->get_encoded_info(bl, ceph::real_time()); + ret = ds3_bucket_create(bucket->get_name().c_str(), bucket_info.get(), + nullptr, store->ds3, nullptr); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_create failed! ret=" << ret + << dendl; + return ret; + } + } else { + bucket->set_version(ep_objv); + bucket->get_info() = info; + } + + bucket_out->swap(bucket); + + return ret; +} + +int DaosUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosUser::read_stats(const DoutPrefixProvider* dpp, optional_yield y, + RGWStorageStats* stats, + ceph::real_time* last_stats_sync, + ceph::real_time* last_stats_update) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +/* stats - Not for first pass */ +int DaosUser::read_stats_async(const DoutPrefixProvider* dpp, + RGWGetUserStats_CB* cb) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosUser::complete_flush_stats(const DoutPrefixProvider* dpp, + optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosUser::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, + uint64_t end_epoch, uint32_t max_entries, + bool* is_truncated, RGWUsageIter& usage_iter, + map& usage) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosUser::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, + uint64_t end_epoch) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) { + const string name = info.user_id.to_str(); + ldpp_dout(dpp, 20) << "DEBUG: load_user, name=" << name << dendl; + + DaosUserInfo duinfo; + int ret = read_user(dpp, name, &duinfo); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: load_user failed, name=" << name << dendl; + return ret; + } + + info = duinfo.info; + attrs = duinfo.attrs; + objv_tracker.read_version = duinfo.user_version; + return 0; +} + +int DaosUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, + Attrs& new_attrs, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs + << dendl; + for (auto& it : new_attrs) { + attrs[it.first] = it.second; + } + return store_user(dpp, y, false); +} + +int DaosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y, + bool exclusive, RGWUserInfo* old_info) { + const string name = info.user_id.to_str(); + ldpp_dout(dpp, 10) << "DEBUG: Store_user(): User name=" << name << dendl; + + // Read user + int ret = 0; + struct DaosUserInfo duinfo; + ret = read_user(dpp, name, &duinfo); + obj_version obj_ver = duinfo.user_version; + std::unique_ptr old_user_info; + std::vector old_access_ids; + + // Check if the user already exists + if (ret == 0 && obj_ver.ver) { + // already exists. + + if (old_info) { + *old_info = duinfo.info; + } + + if (objv_tracker.read_version.ver != obj_ver.ver) { + // Object version mismatch.. return ECANCELED + ret = -ECANCELED; + ldpp_dout(dpp, 0) << "User Read version mismatch read_version=" + << objv_tracker.read_version.ver + << " obj_ver=" << obj_ver.ver << dendl; + return ret; + } + + if (exclusive) { + // return + return ret; + } + obj_ver.ver++; + + for (auto const& [id, key] : duinfo.info.access_keys) { + old_access_ids.push_back(id.c_str()); + } + old_user_info.reset( + new ds3_user_info{.name = duinfo.info.user_id.to_str().c_str(), + .email = duinfo.info.user_email.c_str(), + .access_ids = old_access_ids.data(), + .access_ids_nr = old_access_ids.size()}); + } else { + obj_ver.ver = 1; + obj_ver.tag = "UserTAG"; + } + + bufferlist bl; + std::unique_ptr user_info = + get_encoded_info(bl, obj_ver); + + ret = ds3_user_set(name.c_str(), user_info.get(), old_user_info.get(), + store->ds3, nullptr); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name + << " ret=" << ret << dendl; + } + + return ret; +} + +int DaosUser::read_user(const DoutPrefixProvider* dpp, std::string name, + DaosUserInfo* duinfo) { + // Initialize ds3_user_info + bufferlist bl; + uint64_t size = DS3_MAX_ENCODED_LEN; + struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(), + .encoded_length = size}; + + int ret = ds3_user_get(name.c_str(), &user_info, store->ds3, nullptr); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "Error: ds3_user_get failed, name=" << name + << " ret=" << ret << dendl; + return ret; + } + + // Decode + bufferlist& blr = bl; + auto iter = blr.cbegin(); + duinfo->decode(iter); + return ret; +} + +std::unique_ptr DaosUser::get_encoded_info( + bufferlist& bl, obj_version& obj_ver) { + // Encode user data + struct DaosUserInfo duinfo; + duinfo.info = info; + duinfo.attrs = attrs; + duinfo.user_version = obj_ver; + duinfo.encode(bl); + + // Initialize ds3_user_info + access_ids.clear(); + for (auto const& [id, key] : info.access_keys) { + access_ids.push_back(id.c_str()); + } + return std::unique_ptr( + new ds3_user_info{.name = info.user_id.to_str().c_str(), + .email = info.user_email.c_str(), + .access_ids = access_ids.data(), + .access_ids_nr = access_ids.size(), + .encoded = bl.c_str(), + .encoded_length = bl.length()}); +} + +int DaosUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) { + const string name = info.user_id.to_str(); + + // TODO: the expectation is that the object version needs to be passed in as a + // method arg see int DB::remove_user(const DoutPrefixProvider *dpp, + // RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv) + obj_version obj_ver; + bufferlist bl; + std::unique_ptr user_info = + get_encoded_info(bl, obj_ver); + + // Remove user + int ret = ds3_user_remove(name.c_str(), user_info.get(), store->ds3, nullptr); + if (ret != 0) { + ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name + << " ret=" << ret << dendl; + } + return ret; +} + +DaosBucket::~DaosBucket() { close(nullptr); } + +int DaosBucket::open(const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: open, name=" << info.bucket.name.c_str() + << dendl; + // Idempotent + if (is_open()) { + return 0; + } + + int ret = ds3_bucket_open(get_name().c_str(), &ds3b, store->ds3, nullptr); + ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_open, name=" << get_name() + << ", ret=" << ret << dendl; + + return ret; +} + +int DaosBucket::close(const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: close" << dendl; + // Idempotent + if (!is_open()) { + return 0; + } + + int ret = ds3_bucket_close(ds3b, nullptr); + ds3b = nullptr; + ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_close ret=" << ret << dendl; + + return ret; +} + +std::unique_ptr DaosBucket::get_encoded_info( + bufferlist& bl, ceph::real_time _mtime) { + DaosBucketInfo dbinfo; + dbinfo.info = info; + dbinfo.bucket_attrs = attrs; + dbinfo.mtime = _mtime; + dbinfo.bucket_version = bucket_version; + dbinfo.encode(bl); + + auto bucket_info = std::make_unique(); + bucket_info->encoded = bl.c_str(); + bucket_info->encoded_length = bl.length(); + std::strncpy(bucket_info->name, get_name().c_str(), sizeof(bucket_info->name)); + return bucket_info; +} + +int DaosBucket::remove_bucket(const DoutPrefixProvider* dpp, + bool delete_children, bool forward_to_master, + req_info* req_info, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: remove_bucket, delete_children=" + + << delete_children + + << " forward_to_master=" << forward_to_master << dendl; + + return ds3_bucket_destroy(get_name().c_str(), delete_children, store->ds3, + nullptr); +} + +int DaosBucket::remove_bucket_bypass_gc(int concurrent_max, + bool keep_index_consistent, + optional_yield y, + const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: remove_bucket_bypass_gc, concurrent_max=" + + << concurrent_max + + << " keep_index_consistent=" << keep_index_consistent + + << dendl; + return ds3_bucket_destroy(get_name().c_str(), true, store->ds3, nullptr); +} + +int DaosBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive, + ceph::real_time _mtime) { + ldpp_dout(dpp, 20) << "DEBUG: put_info(): bucket name=" << get_name() + << dendl; + + int ret = open(dpp); + if (ret != 0) { + return ret; + } + + bufferlist bl; + std::unique_ptr bucket_info = + get_encoded_info(bl, ceph::real_time()); + + ret = ds3_bucket_set_info(bucket_info.get(), ds3b, nullptr); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_set_info failed: " << ret << dendl; + } + return ret; +} + +int DaosBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y, + bool get_stats) { + ldpp_dout(dpp, 20) << "DEBUG: load_bucket(): bucket name=" << get_name() + << dendl; + int ret = open(dpp); + if (ret != 0) { + return ret; + } + + bufferlist bl; + DaosBucketInfo dbinfo; + uint64_t size = DS3_MAX_ENCODED_LEN; + struct ds3_bucket_info bucket_info = {.encoded = bl.append_hole(size).c_str(), + .encoded_length = size}; + + ret = ds3_bucket_get_info(&bucket_info, ds3b, nullptr); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_get_info failed: " << ret << dendl; + return ret; + } + + auto iter = bl.cbegin(); + dbinfo.decode(iter); + info = dbinfo.info; + rgw_placement_rule placement_rule; + placement_rule.name = "default"; + placement_rule.storage_class = "STANDARD"; + info.placement_rule = placement_rule; + + attrs = dbinfo.bucket_attrs; + mtime = dbinfo.mtime; + bucket_version = dbinfo.bucket_version; + return ret; +} + +/* stats - Not for first pass */ +int DaosBucket::read_stats(const DoutPrefixProvider* dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, std::string* bucket_ver, + std::string* master_ver, + std::map& stats, + std::string* max_marker, bool* syncstopped) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::read_stats_async( + const DoutPrefixProvider* dpp, + const bucket_index_layout_generation& idx_layout, int shard_id, + RGWGetBucketStats_CB* ctx) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::sync_user_stats(const DoutPrefixProvider* dpp, + optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::update_container_stats(const DoutPrefixProvider* dpp) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::check_bucket_shards(const DoutPrefixProvider* dpp) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::chown(const DoutPrefixProvider* dpp, User& new_user, + optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +/* Make sure to call load_bucket() if you need it first */ +bool DaosBucket::is_owner(User* user) { + return (info.owner.compare(user->get_id()) == 0); +} + +int DaosBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) { + /* XXX: Check if bucket contains any objects */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota, + uint64_t obj_size, optional_yield y, + bool check_size_only) { + /* Not Handled in the first pass as stats are also needed */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::merge_and_store_attrs(const DoutPrefixProvider* dpp, + Attrs& new_attrs, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs + << dendl; + for (auto& it : new_attrs) { + attrs[it.first] = it.second; + } + + return put_info(dpp, y, ceph::real_time()); +} + +int DaosBucket::try_refresh_info(const DoutPrefixProvider* dpp, + ceph::real_time* pmtime) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +/* XXX: usage and stats not supported in the first pass */ +int DaosBucket::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, + uint64_t end_epoch, uint32_t max_entries, + bool* is_truncated, RGWUsageIter& usage_iter, + map& usage) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, + uint64_t end_epoch) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::remove_objs_from_index( + const DoutPrefixProvider* dpp, + std::list& objs_to_unlink) { + /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table. + * Delete all the object in the list from the object table of this + * bucket + */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::check_index( + const DoutPrefixProvider* dpp, + std::map& existing_stats, + std::map& calculated_stats) { + /* XXX: stats not supported yet */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::rebuild_index(const DoutPrefixProvider* dpp) { + /* there is no index table in DAOS. Not applicable */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::set_tag_timeout(const DoutPrefixProvider* dpp, + uint64_t timeout) { + /* XXX: CHECK: set tag timeout for all the bucket objects? */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::purge_instance(const DoutPrefixProvider* dpp) { + /* XXX: CHECK: for DAOS only single instance supported. + * Remove all the objects for that instance? Anything extra needed? + */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosBucket::set_acl(const DoutPrefixProvider* dpp, + RGWAccessControlPolicy& acl, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: set_acl" << dendl; + int ret = 0; + bufferlist aclbl; + + acls = acl; + acl.encode(aclbl); + + Attrs attrs = get_attrs(); + attrs[RGW_ATTR_ACL] = aclbl; + + return ret; +} + +std::unique_ptr DaosBucket::get_object(const rgw_obj_key& k) { + return std::make_unique(this->store, k, this); +} + +bool compare_rgw_bucket_dir_entry(rgw_bucket_dir_entry& entry1, + rgw_bucket_dir_entry& entry2) { + return (entry1.key < entry2.key); +} + +bool compare_multipart_upload(std::unique_ptr& upload1, + std::unique_ptr& upload2) { + return (upload1->get_key() < upload2->get_key()); +} + +int DaosBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max, + ListResults& results, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: list bucket=" << get_name() << " max=" << max + << " params=" << params << dendl; + // End + if (max == 0) { + return 0; + } + + int ret = open(dpp); + if (ret != 0) { + return ret; + } + + // Init needed structures + vector object_infos(max); + uint32_t nobj = object_infos.size(); + vector> values(nobj, vector(DS3_MAX_ENCODED_LEN)); + for (uint32_t i = 0; i < nobj; i++) { + object_infos[i].encoded = values[i].data(); + object_infos[i].encoded_length = values[i].size(); + } + + vector common_prefixes(max); + uint32_t ncp = common_prefixes.size(); + + char daos_marker[DS3_MAX_KEY_BUFF]; + std::strncpy(daos_marker, params.marker.get_oid().c_str(), sizeof(daos_marker)); + + ret = ds3_bucket_list_obj(&nobj, object_infos.data(), &ncp, + common_prefixes.data(), params.prefix.c_str(), + params.delim.c_str(), daos_marker, + params.list_versions, &results.is_truncated, ds3b); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list_obj failed, name=" + << get_name() << ", ret=" << ret << dendl; + return ret; + } + + object_infos.resize(nobj); + values.resize(nobj); + common_prefixes.resize(ncp); + + // Fill common prefixes + for (auto const& cp : common_prefixes) { + results.common_prefixes[cp.prefix] = true; + } + + // Decode objs + for (auto const& obj : object_infos) { + bufferlist bl; + rgw_bucket_dir_entry ent; + bl.append(reinterpret_cast(obj.encoded), obj.encoded_length); + auto iter = bl.cbegin(); + ent.decode(iter); + if (params.list_versions || ent.is_visible()) { + results.objs.emplace_back(std::move(ent)); + } + } + + if (!params.allow_unordered) { + std::sort(results.objs.begin(), results.objs.end(), + compare_rgw_bucket_dir_entry); + } + + return ret; +} + +int DaosBucket::list_multiparts( + const DoutPrefixProvider* dpp, const string& prefix, string& marker, + const string& delim, const int& max_uploads, + vector>& uploads, + map* common_prefixes, bool* is_truncated) { + ldpp_dout(dpp, 20) << "DEBUG: list_multiparts" << dendl; + // End of uploading + if (max_uploads == 0) { + *is_truncated = false; + return 0; + } + + // Init needed structures + vector multipart_upload_infos(max_uploads); + uint32_t nmp = multipart_upload_infos.size(); + vector> values(nmp, vector(DS3_MAX_ENCODED_LEN)); + for (uint32_t i = 0; i < nmp; i++) { + multipart_upload_infos[i].encoded = values[i].data(); + multipart_upload_infos[i].encoded_length = values[i].size(); + } + + vector cps(max_uploads); + uint32_t ncp = cps.size(); + + char daos_marker[DS3_MAX_KEY_BUFF]; + std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker)); + + int ret = ds3_bucket_list_multipart( + get_name().c_str(), &nmp, multipart_upload_infos.data(), &ncp, cps.data(), + prefix.c_str(), delim.c_str(), daos_marker, is_truncated, store->ds3); + + multipart_upload_infos.resize(nmp); + values.resize(nmp); + cps.resize(ncp); + + // Fill common prefixes + for (auto const& cp : cps) { + (*common_prefixes)[cp.prefix] = true; + } + + for (auto const& mp : multipart_upload_infos) { + // Decode the xattr + bufferlist bl; + rgw_bucket_dir_entry ent; + bl.append(reinterpret_cast(mp.encoded), mp.encoded_length); + auto iter = bl.cbegin(); + ent.decode(iter); + string name = ent.key.name; + + ACLOwner owner(rgw_user(ent.meta.owner)); + owner.set_name(ent.meta.owner_display_name); + uploads.push_back(this->get_multipart_upload( + name, mp.upload_id, std::move(owner), ent.meta.mtime)); + } + + // Sort uploads + std::sort(uploads.begin(), uploads.end(), compare_multipart_upload); + + return ret; +} + +int DaosBucket::abort_multiparts(const DoutPrefixProvider* dpp, + CephContext* cct) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +void DaosStore::finalize(void) { + ldout(cctx, 20) << "DEBUG: finalize" << dendl; + int ret; + + ret = ds3_disconnect(ds3, nullptr); + if (ret != 0) { + ldout(cctx, 0) << "ERROR: ds3_disconnect() failed: " << ret << dendl; + } + ds3 = nullptr; + + ret = ds3_fini(); + if (ret != 0) { + ldout(cctx, 0) << "ERROR: daos_fini() failed: " << ret << dendl; + } +} + +int DaosStore::initialize(CephContext* cct, const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: initialize" << dendl; + int ret = ds3_init(); + + // DS3 init failed, allow the case where init is already done + if (ret != 0 && ret != DER_ALREADY) { + ldout(cct, 0) << "ERROR: ds3_init() failed: " << ret << dendl; + return ret; + } + + // XXX: these params should be taken from config settings and + // cct somehow? + const auto& daos_pool = cct->_conf.get_val("daos_pool"); + ldout(cct, 20) << "INFO: daos pool: " << daos_pool << dendl; + + ret = ds3_connect(daos_pool.c_str(), nullptr, &ds3, nullptr); + + if (ret != 0) { + ldout(cct, 0) << "ERROR: ds3_connect() failed: " << ret << dendl; + ds3_fini(); + } + + return ret; +} + +const std::string& DaosZoneGroup::get_endpoint() const { + if (!group.endpoints.empty()) { + return group.endpoints.front(); + } else { + // use zonegroup's master zone endpoints + auto z = group.zones.find(group.master_zone); + if (z != group.zones.end() && !z->second.endpoints.empty()) { + return z->second.endpoints.front(); + } + } + return empty; +} + +bool DaosZoneGroup::placement_target_exists(std::string& target) const { + return !!group.placement_targets.count(target); +} + +int DaosZoneGroup::get_placement_target_names( + std::set& names) const { + for (const auto& target : group.placement_targets) { + names.emplace(target.second.name); + } + + return 0; +} + +int DaosZoneGroup::get_placement_tier(const rgw_placement_rule& rule, + std::unique_ptr* tier) { + std::map::const_iterator titer; + titer = group.placement_targets.find(rule.name); + if (titer == group.placement_targets.end()) { + return -ENOENT; + } + + const auto& target_rule = titer->second; + std::map::const_iterator ttier; + ttier = target_rule.tier_targets.find(rule.storage_class); + if (ttier == target_rule.tier_targets.end()) { + // not found + return -ENOENT; + } + + PlacementTier* t; + t = new DaosPlacementTier(store, ttier->second); + if (!t) return -ENOMEM; + + tier->reset(t); + return 0; +} + +ZoneGroup& DaosZone::get_zonegroup() { return zonegroup; } + +int DaosZone::get_zonegroup(const std::string& id, + std::unique_ptr* group) { + /* XXX: for now only one zonegroup supported */ + ZoneGroup* zg; + zg = new DaosZoneGroup(store, zonegroup.get_group()); + + group->reset(zg); + return 0; +} + +const rgw_zone_id& DaosZone::get_id() { return cur_zone_id; } + +const std::string& DaosZone::get_name() const { + return zone_params->get_name(); +} + +bool DaosZone::is_writeable() { return true; } + +bool DaosZone::get_redirect_endpoint(std::string* endpoint) { return false; } + +bool DaosZone::has_zonegroup_api(const std::string& api) const { return false; } + +const std::string& DaosZone::get_current_period_id() { + return current_period->get_id(); +} + +std::unique_ptr DaosStore::get_lua_manager() { + return std::make_unique(this); +} + +int DaosObject::get_obj_state(const DoutPrefixProvider* dpp, + RGWObjState** _state, optional_yield y, + bool follow_olh) { + // Get object's metadata (those stored in rgw_bucket_dir_entry) + ldpp_dout(dpp, 20) << "DEBUG: get_obj_state" << dendl; + rgw_bucket_dir_entry ent; + *_state = &state; // state is required even if a failure occurs + + int ret = get_dir_entry_attrs(dpp, &ent); + if (ret != 0) { + return ret; + } + + // Set object state. + state.exists = true; + state.size = ent.meta.size; + state.accounted_size = ent.meta.size; + state.mtime = ent.meta.mtime; + + state.has_attrs = true; + bufferlist etag_bl; + string& etag = ent.meta.etag; + ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag + << dendl; + etag_bl.append(etag); + state.attrset[RGW_ATTR_ETAG] = etag_bl; + return 0; +} + +DaosObject::~DaosObject() { close(nullptr); } + +int DaosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, + Attrs* delattrs, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: DaosObject::set_obj_attrs()" << dendl; + // TODO handle target_obj + // Get object's metadata (those stored in rgw_bucket_dir_entry) + rgw_bucket_dir_entry ent; + int ret = get_dir_entry_attrs(dpp, &ent); + if (ret != 0) { + return ret; + } + + // Update object metadata + Attrs updateattrs = setattrs == nullptr ? attrs : *setattrs; + if (delattrs) { + for (auto const& [attr, attrval] : *delattrs) { + updateattrs.erase(attr); + } + } + + ret = set_dir_entry_attrs(dpp, &ent, &updateattrs); + return ret; +} + +int DaosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, + rgw_obj* target_obj) { + ldpp_dout(dpp, 20) << "DEBUG: DaosObject::get_obj_attrs()" << dendl; + // TODO handle target_obj + // Get object's metadata (those stored in rgw_bucket_dir_entry) + rgw_bucket_dir_entry ent; + int ret = get_dir_entry_attrs(dpp, &ent, &attrs); + return ret; +} + +int DaosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, + optional_yield y, + const DoutPrefixProvider* dpp) { + // Get object's metadata (those stored in rgw_bucket_dir_entry) + ldpp_dout(dpp, 20) << "DEBUG: modify_obj_attrs" << dendl; + rgw_bucket_dir_entry ent; + int ret = get_dir_entry_attrs(dpp, &ent, &attrs); + if (ret != 0) { + return ret; + } + + // Update object attrs + set_atomic(); + attrs[attr_name] = attr_val; + + ret = set_dir_entry_attrs(dpp, &ent, &attrs); + return ret; +} + +int DaosObject::delete_obj_attrs(const DoutPrefixProvider* dpp, + const char* attr_name, optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: delete_obj_attrs" << dendl; + rgw_obj target = get_obj(); + Attrs rmattr; + bufferlist bl; + + rmattr[attr_name] = bl; + return set_obj_attrs(dpp, nullptr, &rmattr, y); +} + +bool DaosObject::is_expired() { + auto iter = attrs.find(RGW_ATTR_DELETE_AT); + if (iter != attrs.end()) { + utime_t delete_at; + try { + auto bufit = iter->second.cbegin(); + decode(delete_at, bufit); + } catch (buffer::error& err) { + ldout(store->ctx(), 0) + << "ERROR: " << __func__ + << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl; + return false; + } + + if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { + return true; + } + } + + return false; +} + +// Taken from rgw_rados.cc +void DaosObject::gen_rand_obj_instance_name() { + enum { OBJ_INSTANCE_LEN = 32 }; + char buf[OBJ_INSTANCE_LEN + 1]; + + gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN); + state.obj.key.set_instance(buf); +} + +int DaosObject::omap_get_vals_by_keys(const DoutPrefixProvider* dpp, + const std::string& oid, + const std::set& keys, + Attrs* vals) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosObject::omap_set_val_by_key(const DoutPrefixProvider* dpp, + const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) { + return 0; +} + +std::unique_ptr DaosObject::get_serializer( + const DoutPrefixProvider* dpp, const std::string& lock_name) { + return std::make_unique(dpp, store, this, lock_name); +} + +int DaosObject::transition(Bucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, uint64_t olh_epoch, + const DoutPrefixProvider* dpp, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosObject::transition_to_cloud( + Bucket* bucket, rgw::sal::PlacementTier* tier, rgw_bucket_dir_entry& o, + std::set& cloud_targets, CephContext* cct, bool update_object, + const DoutPrefixProvider* dpp, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +bool DaosObject::placement_rules_match(rgw_placement_rule& r1, + rgw_placement_rule& r2) { + /* XXX: support single default zone and zonegroup for now */ + return true; +} + +int DaosObject::dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y, + Formatter* f) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +std::unique_ptr DaosObject::get_read_op() { + return std::make_unique(this); +} + +DaosObject::DaosReadOp::DaosReadOp(DaosObject* _source) : source(_source) {} + +int DaosObject::DaosReadOp::prepare(optional_yield y, + const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << __func__ + << ": bucket=" << source->get_bucket()->get_name() + << dendl; + + if (source->get_bucket()->versioned() && !source->have_instance()) { + // If the bucket is versioned and no version is specified, get the latest + // version + source->set_instance(DS3_LATEST_INSTANCE); + } + + rgw_bucket_dir_entry ent; + int ret = source->get_dir_entry_attrs(dpp, &ent); + + // Set source object's attrs. The attrs is key/value map and is used + // in send_response_data() to set attributes, including etag. + bufferlist etag_bl; + string& etag = ent.meta.etag; + ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag + << dendl; + etag_bl.append(etag.c_str(), etag.size()); + source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); + + source->set_key(ent.key); + source->set_obj_size(ent.meta.size); + ldpp_dout(dpp, 20) << __func__ << ": object's size: " << ent.meta.size + << dendl; + + return ret; +} + +int DaosObject::DaosReadOp::read(int64_t off, int64_t end, bufferlist& bl, + optional_yield y, + const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl; + int ret = source->lookup(dpp); + if (ret != 0) { + return ret; + } + + // Calculate size, end is inclusive + uint64_t size = end - off + 1; + + // Read + ret = source->read(dpp, bl, off, size); + if (ret != 0) { + return ret; + } + + return ret; +} + +// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to +// 'end'. The returned data is processed in 'cb' which is a chain of +// post-processing filters such as decompression, de-encryption and sending back +// data to client (RGWGetObj_CB::handle_dta which in turn calls +// RGWGetObj::get_data_cb() to send data back.). +// +// POC implements a simple sync version of iterate() function in which it reads +// a block of data each time and call 'cb' for post-processing. +int DaosObject::DaosReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off, + int64_t end, RGWGetDataCB* cb, + optional_yield y) { + ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl; + int ret = source->lookup(dpp); + if (ret != 0) { + return ret; + } + + // Calculate size, end is inclusive + uint64_t size = end - off + 1; + + // Reserve buffers and read + bufferlist bl; + ret = source->read(dpp, bl, off, size); + if (ret != 0) { + return ret; + } + + // Call cb to process returned data. + ldpp_dout(dpp, 20) << __func__ << ": call cb to process data, actual=" << size + << dendl; + cb->handle_data(bl, off, size); + return ret; +} + +int DaosObject::DaosReadOp::get_attr(const DoutPrefixProvider* dpp, + const char* name, bufferlist& dest, + optional_yield y) { + Attrs attrs; + int ret = source->get_dir_entry_attrs(dpp, nullptr, &attrs); + if (!ret) { + return -ENODATA; + } + + auto search = attrs.find(name); + if (search == attrs.end()) { + return -ENODATA; + } + + dest = search->second; + return 0; +} + +std::unique_ptr DaosObject::get_delete_op() { + return std::make_unique(this); +} + +DaosObject::DaosDeleteOp::DaosDeleteOp(DaosObject* _source) : source(_source) {} + +// Implementation of DELETE OBJ also requires DaosObject::get_obj_state() +// to retrieve and set object's state from object's metadata. +// +// TODO: +// 1. The POC only deletes the Daos objects. It doesn't handle the +// DeleteOp::params. Delete::delete_obj() in rgw_rados.cc shows how rados +// backend process the params. +// 2. Delete an object when its versioning is turned on. +// 3. Handle empty directories +// 4. Fail when file doesn't exist +int DaosObject::DaosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, + optional_yield y) { + ldpp_dout(dpp, 20) << "DaosDeleteOp::delete_obj " + << source->get_key().get_oid() << " from " + << source->get_bucket()->get_name() << dendl; + if (source->get_instance() == "null") { + source->clear_instance(); + } + + // Open bucket + int ret = 0; + std::string key = source->get_key().get_oid(); + DaosBucket* daos_bucket = source->get_daos_bucket(); + ret = daos_bucket->open(dpp); + if (ret != 0) { + return ret; + } + + // Remove the daos object + ret = ds3_obj_destroy(key.c_str(), daos_bucket->ds3b); + ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_destroy key=" << key << " ret=" << ret + << dendl; + + // result.delete_marker = parent_op.result.delete_marker; + // result.version_id = parent_op.result.version_id; + + return ret; +} + +int DaosObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, + bool prevent_versioning) { + ldpp_dout(dpp, 20) << "DEBUG: delete_object" << dendl; + DaosObject::DaosDeleteOp del_op(this); + del_op.params.bucket_owner = bucket->get_info().owner; + del_op.params.versioning_status = bucket->get_info().versioning_status(); + + return del_op.delete_obj(dpp, y); +} + +int DaosObject::copy_object( + User* user, req_info* info, const rgw_zone_id& source_zone, + rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, + rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement, + ceph::real_time* src_mtime, ceph::real_time* mtime, + const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, + bool high_precision_time, const char* if_match, const char* if_nomatch, + AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, + RGWObjCategory category, uint64_t olh_epoch, + boost::optional delete_at, std::string* version_id, + std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*), + void* progress_data, const DoutPrefixProvider* dpp, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosObject::swift_versioning_restore(bool& restored, + const DoutPrefixProvider* dpp) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosObject::swift_versioning_copy(const DoutPrefixProvider* dpp, + optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosObject::lookup(const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: lookup" << dendl; + if (is_open()) { + return 0; + } + + if (get_instance() == "null") { + clear_instance(); + } + + int ret = 0; + DaosBucket* daos_bucket = get_daos_bucket(); + ret = daos_bucket->open(dpp); + if (ret != 0) { + return ret; + } + + ret = ds3_obj_open(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b); + + if (ret == -ENOENT) { + ldpp_dout(dpp, 20) << "DEBUG: daos object (" << get_bucket()->get_name() + << ", " << get_key().get_oid() + << ") does not exist: ret=" << ret << dendl; + } else if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to open daos object (" + << get_bucket()->get_name() << ", " << get_key().get_oid() + << "): ret=" << ret << dendl; + } + return ret; +} + +int DaosObject::create(const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: create" << dendl; + if (is_open()) { + return 0; + } + + if (get_instance() == "null") { + clear_instance(); + } + + int ret = 0; + DaosBucket* daos_bucket = get_daos_bucket(); + ret = daos_bucket->open(dpp); + if (ret != 0) { + return ret; + } + + ret = ds3_obj_create(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to create daos object (" + << get_bucket()->get_name() << ", " << get_key().get_oid() + << "): ret=" << ret << dendl; + } + return ret; +} + +int DaosObject::close(const DoutPrefixProvider* dpp) { + ldpp_dout(dpp, 20) << "DEBUG: close" << dendl; + if (!is_open()) { + return 0; + } + + int ret = ds3_obj_close(ds3o); + ds3o = nullptr; + ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_close ret=" << ret << dendl; + return ret; +} + +int DaosObject::write(const DoutPrefixProvider* dpp, bufferlist&& data, + uint64_t offset) { + ldpp_dout(dpp, 20) << "DEBUG: write" << dendl; + uint64_t size = data.length(); + int ret = ds3_obj_write(data.c_str(), offset, &size, get_daos_bucket()->ds3b, + ds3o, nullptr); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to write into daos object (" + << get_bucket()->get_name() << ", " << get_key().get_oid() + << "): ret=" << ret << dendl; + } + return ret; +} + +int DaosObject::read(const DoutPrefixProvider* dpp, bufferlist& data, + uint64_t offset, uint64_t& size) { + ldpp_dout(dpp, 20) << "DEBUG: read" << dendl; + int ret = ds3_obj_read(data.append_hole(size).c_str(), offset, &size, + get_daos_bucket()->ds3b, ds3o, nullptr); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read from daos object (" + << get_bucket()->get_name() << ", " << get_key().get_oid() + << "): ret=" << ret << dendl; + } + return ret; +} + +// Get the object's dirent and attrs +int DaosObject::get_dir_entry_attrs(const DoutPrefixProvider* dpp, + rgw_bucket_dir_entry* ent, + Attrs* getattrs) { + ldpp_dout(dpp, 20) << "DEBUG: get_dir_entry_attrs" << dendl; + int ret = 0; + vector value(DS3_MAX_ENCODED_LEN); + uint32_t size = value.size(); + + if (get_key().ns == RGW_OBJ_NS_MULTIPART) { + struct ds3_multipart_upload_info ui = {.encoded = value.data(), + .encoded_length = size}; + ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(), + get_key().get_oid().c_str(), store->ds3); + } else { + ret = lookup(dpp); + if (ret != 0) { + return ret; + } + + auto object_info = std::make_unique(); + object_info->encoded = value.data(); + object_info->encoded_length = size; + ret = ds3_obj_get_info(object_info.get(), get_daos_bucket()->ds3b, ds3o); + size = object_info->encoded_length; + } + + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to get info of daos object (" + << get_bucket()->get_name() << ", " << get_key().get_oid() + << "): ret=" << ret << dendl; + return ret; + } + + rgw_bucket_dir_entry dummy_ent; + if (!ent) { + // if ent is not passed, use a dummy ent + ent = &dummy_ent; + } + + bufferlist bl; + bl.append(reinterpret_cast(value.data()), size); + auto iter = bl.cbegin(); + ent->decode(iter); + if (getattrs) { + decode(*getattrs, iter); + } + + return ret; +} +// Set the object's dirent and attrs +int DaosObject::set_dir_entry_attrs(const DoutPrefixProvider* dpp, + rgw_bucket_dir_entry* ent, + Attrs* setattrs) { + ldpp_dout(dpp, 20) << "DEBUG: set_dir_entry_attrs" << dendl; + int ret = lookup(dpp); + if (ret != 0) { + return ret; + } + + // Set defaults + if (!ent) { + // if ent is not passed, return an error + return -EINVAL; + } + + if (!setattrs) { + // if setattrs is not passed, use object attrs + setattrs = &attrs; + } + + bufferlist wbl; + ent->encode(wbl); + encode(*setattrs, wbl); + + // Write rgw_bucket_dir_entry into object xattr + auto object_info = std::make_unique(); + object_info->encoded = wbl.c_str(); + object_info->encoded_length = wbl.length(); + ret = ds3_obj_set_info(object_info.get(), get_daos_bucket()->ds3b, ds3o); + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to set info of daos object (" + << get_bucket()->get_name() << ", " << get_key().get_oid() + << "): ret=" << ret << dendl; + } + return ret; +} + +int DaosObject::mark_as_latest(const DoutPrefixProvider* dpp, + ceph::real_time set_mtime) { + // TODO handle deletion + // TODO understand race conditions + ldpp_dout(dpp, 20) << "DEBUG: mark_as_latest" << dendl; + + // Get latest version so far + std::unique_ptr latest_object = std::make_unique( + store, rgw_obj_key(get_name(), DS3_LATEST_INSTANCE), get_bucket()); + + ldpp_dout(dpp, 20) << __func__ << ": key=" << get_key().get_oid() + << " latest_object_key= " + << latest_object->get_key().get_oid() << dendl; + + int ret = latest_object->lookup(dpp); + if (ret == 0) { + // Get metadata only if file exists + rgw_bucket_dir_entry latest_ent; + Attrs latest_attrs; + ret = latest_object->get_dir_entry_attrs(dpp, &latest_ent, &latest_attrs); + if (ret != 0) { + return ret; + } + + // Update flags + latest_ent.flags = rgw_bucket_dir_entry::FLAG_VER; + latest_ent.meta.mtime = set_mtime; + ret = latest_object->set_dir_entry_attrs(dpp, &latest_ent, &latest_attrs); + if (ret != 0) { + return ret; + } + } + + // Get or create the link [latest], make it link to the current latest + // version. + ret = + ds3_obj_mark_latest(get_key().get_oid().c_str(), get_daos_bucket()->ds3b); + ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_mark_latest ret=" << ret << dendl; + return ret; +} + +DaosAtomicWriter::DaosAtomicWriter( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, DaosStore* _store, + const rgw_user& _owner, const rgw_placement_rule* _ptail_placement_rule, + uint64_t _olh_epoch, const std::string& _unique_tag) + : StoreWriter(dpp, y), + store(_store), + owner(_owner), + ptail_placement_rule(_ptail_placement_rule), + olh_epoch(_olh_epoch), + unique_tag(_unique_tag), + obj(_store, obj->get_key(), obj->get_bucket()) {} + +int DaosAtomicWriter::prepare(optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: prepare" << dendl; + int ret = obj.create(dpp); + return ret; +} + +// TODO: Handle concurrent writes, a unique object id is a possible solution, or +// use DAOS transactions +// XXX: Do we need to accumulate writes as motr does? +int DaosAtomicWriter::process(bufferlist&& data, uint64_t offset) { + ldpp_dout(dpp, 20) << "DEBUG: process" << dendl; + if (data.length() == 0) { + return 0; + } + + int ret = 0; + if (!obj.is_open()) { + ret = obj.lookup(dpp); + if (ret != 0) { + return ret; + } + } + + // XXX: Combine multiple streams into one as motr does + uint64_t data_size = data.length(); + ret = obj.write(dpp, std::move(data), offset); + if (ret == 0) { + total_data_size += data_size; + } + return ret; +} + +int DaosAtomicWriter::complete( + size_t accounted_size, const std::string& etag, ceph::real_time* mtime, + ceph::real_time set_mtime, std::map& attrs, + ceph::real_time delete_at, const char* if_match, const char* if_nomatch, + const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, + optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl; + bufferlist bl; + rgw_bucket_dir_entry ent; + int ret; + + // Set rgw_bucet_dir_entry. Some of the members of this structure may not + // apply to daos. + // + // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc + // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and + // how to set the dir entry. Only set the basic ones for POC, no ACLs and + // other attrs. + obj.get_key().get_index_key(&ent.key); + ent.meta.size = total_data_size; + ent.meta.accounted_size = accounted_size; + ent.meta.mtime = + real_clock::is_zero(set_mtime) ? ceph::real_clock::now() : set_mtime; + ent.meta.etag = etag; + ent.meta.owner = owner.to_str(); + ent.meta.owner_display_name = + obj.get_bucket()->get_owner()->get_display_name(); + bool is_versioned = obj.get_bucket()->versioned(); + if (is_versioned) + ent.flags = + rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT; + ldpp_dout(dpp, 20) << __func__ << ": key=" << obj.get_key().get_oid() + << " etag: " << etag << dendl; + if (user_data) ent.meta.user_data = *user_data; + + RGWBucketInfo& info = obj.get_bucket()->get_info(); + if (info.obj_lock_enabled() && info.obj_lock.has_rule()) { + auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); + if (iter == attrs.end()) { + real_time lock_until_date = + info.obj_lock.get_lock_until_date(ent.meta.mtime); + string mode = info.obj_lock.get_mode(); + RGWObjectRetention obj_retention(mode, lock_until_date); + bufferlist retention_bl; + obj_retention.encode(retention_bl); + attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl; + } + } + + ret = obj.set_dir_entry_attrs(dpp, &ent, &attrs); + + if (is_versioned) { + ret = obj.mark_as_latest(dpp, set_mtime); + if (ret != 0) { + return ret; + } + } + + return ret; +} + +int DaosMultipartUpload::abort(const DoutPrefixProvider* dpp, + CephContext* cct) { + // Remove upload from bucket multipart index + ldpp_dout(dpp, 20) << "DEBUG: abort" << dendl; + return ds3_upload_remove(bucket->get_name().c_str(), get_upload_id().c_str(), + store->ds3); +} + +std::unique_ptr DaosMultipartUpload::get_meta_obj() { + return bucket->get_object( + rgw_obj_key(get_upload_id(), string(), RGW_OBJ_NS_MULTIPART)); +} + +int DaosMultipartUpload::init(const DoutPrefixProvider* dpp, optional_yield y, + ACLOwner& _owner, + rgw_placement_rule& dest_placement, + rgw::sal::Attrs& attrs) { + ldpp_dout(dpp, 20) << "DEBUG: init" << dendl; + int ret; + std::string oid = mp_obj.get_key(); + + // Create an initial entry in the bucket. The entry will be + // updated when multipart upload is completed, for example, + // size, etag etc. + bufferlist bl; + rgw_bucket_dir_entry ent; + ent.key.name = oid; + ent.meta.owner = owner.get_id().to_str(); + ent.meta.category = RGWObjCategory::MultiMeta; + ent.meta.mtime = ceph::real_clock::now(); + + multipart_upload_info upload_info; + upload_info.dest_placement = dest_placement; + + ent.encode(bl); + encode(attrs, bl); + encode(upload_info, bl); + + struct ds3_multipart_upload_info ui; + std::strcpy(ui.upload_id, MULTIPART_UPLOAD_ID_PREFIX); + std::strncpy(ui.key, oid.c_str(), sizeof(ui.key)); + ui.encoded = bl.c_str(); + ui.encoded_length = bl.length(); + int prefix_length = strlen(ui.upload_id); + + do { + gen_rand_alphanumeric(store->ctx(), ui.upload_id + prefix_length, + sizeof(ui.upload_id) - 1 - prefix_length); + mp_obj.init(oid, ui.upload_id); + ret = ds3_upload_init(&ui, bucket->get_name().c_str(), store->ds3); + } while (ret == -EEXIST); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to create multipart upload dir (" + << bucket->get_name() << "/" << get_upload_id() + << "): ret=" << ret << dendl; + } + return ret; +} + +int DaosMultipartUpload::list_parts(const DoutPrefixProvider* dpp, + CephContext* cct, int num_parts, int marker, + int* next_marker, bool* truncated, + bool assume_unsorted) { + ldpp_dout(dpp, 20) << "DEBUG: list_parts" << dendl; + // Init needed structures + vector multipart_part_infos(num_parts); + uint32_t npart = multipart_part_infos.size(); + vector> values(npart, vector(DS3_MAX_ENCODED_LEN)); + for (uint32_t i = 0; i < npart; i++) { + multipart_part_infos[i].encoded = values[i].data(); + multipart_part_infos[i].encoded_length = values[i].size(); + } + + uint32_t daos_marker = marker; + int ret = ds3_upload_list_parts( + bucket->get_name().c_str(), get_upload_id().c_str(), &npart, + multipart_part_infos.data(), &daos_marker, truncated, store->ds3); + + if (ret != 0) { + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + return ret; + } + + multipart_part_infos.resize(npart); + values.resize(npart); + parts.clear(); + + for (auto const& pi : multipart_part_infos) { + bufferlist bl; + bl.append(reinterpret_cast(pi.encoded), pi.encoded_length); + + std::unique_ptr part = + std::make_unique(); + auto iter = bl.cbegin(); + decode(part->info, iter); + parts[pi.part_num] = std::move(part); + } + + if (next_marker) { + *next_marker = daos_marker; + } + return ret; +} + +// Heavily copied from rgw_sal_rados.cc +int DaosMultipartUpload::complete( + const DoutPrefixProvider* dpp, optional_yield y, CephContext* cct, + map& part_etags, list& remove_objs, + uint64_t& accounted_size, bool& compressed, RGWCompressionInfo& cs_info, + off_t& off, std::string& tag, ACLOwner& owner, uint64_t olh_epoch, + rgw::sal::Object* target_obj) { + ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl; + char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; + std::string etag; + bufferlist etag_bl; + MD5 hash; + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + bool truncated; + int ret; + + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): enter" << dendl; + int total_parts = 0; + int handled_parts = 0; + int max_parts = 1000; + int marker = 0; + uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; + auto etags_iter = part_etags.begin(); + rgw::sal::Attrs attrs = target_obj->get_attrs(); + + do { + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): list_parts()" + << dendl; + ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + if (ret != 0) return ret; + + total_parts += parts.size(); + if (!truncated && total_parts != (int)part_etags.size()) { + ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts + << " expected: " << part_etags.size() << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): parts.size()=" + << parts.size() << dendl; + + for (auto obj_iter = parts.begin(); + etags_iter != part_etags.end() && obj_iter != parts.end(); + ++etags_iter, ++obj_iter, ++handled_parts) { + DaosMultipartPart* part = + dynamic_cast(obj_iter->second.get()); + uint64_t part_size = part->get_size(); + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part_size=" + << part_size << dendl; + if (handled_parts < (int)part_etags.size() - 1 && + part_size < min_part_size) { + ret = -ERR_TOO_SMALL; + return ret; + } + + char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; + if (etags_iter->first != (int)obj_iter->first) { + ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: " + << etags_iter->first + << " next uploaded: " << obj_iter->first << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + string part_etag = rgw_string_unquote(etags_iter->second); + if (part_etag.compare(part->get_etag()) != 0) { + ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " + << etags_iter->first + << " etag: " << etags_iter->second << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + + hex_to_buf(part->get_etag().c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE); + hash.Update((const unsigned char*)petag, sizeof(petag)); + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): calc etag " + << dendl; + + RGWUploadPartInfo& obj_part = part->info; + string oid = mp_obj.get_part(obj_part.num); + rgw_obj src_obj; + src_obj.init_ns(bucket->get_key(), oid, RGW_OBJ_NS_MULTIPART); + + bool part_compressed = (obj_part.cs_info.compression_type != "none"); + if ((handled_parts > 0) && + ((part_compressed != compressed) || + (cs_info.compression_type != obj_part.cs_info.compression_type))) { + ldpp_dout(dpp, 0) + << "ERROR: compression type was changed during multipart upload (" + << cs_info.compression_type << ">>" + << obj_part.cs_info.compression_type << ")" << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part compression" + << dendl; + if (part_compressed) { + int64_t new_ofs; // offset in compression data for new part + if (cs_info.blocks.size() > 0) + new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len; + else + new_ofs = 0; + for (const auto& block : obj_part.cs_info.blocks) { + compression_block cb; + cb.old_ofs = block.old_ofs + cs_info.orig_size; + cb.new_ofs = new_ofs; + cb.len = block.len; + cs_info.blocks.push_back(cb); + new_ofs = cb.new_ofs + cb.len; + } + if (!compressed) + cs_info.compression_type = obj_part.cs_info.compression_type; + cs_info.orig_size += obj_part.cs_info.orig_size; + compressed = true; + } + + // We may not need to do the following as remove_objs are those + // don't show when listing a bucket. As we store in-progress uploaded + // object's metadata in a separate index, they are not shown when + // listing a bucket. + rgw_obj_index_key remove_key; + src_obj.key.get_index_key(&remove_key); + + remove_objs.push_back(remove_key); + + off += obj_part.size; + accounted_size += obj_part.accounted_size; + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): off=" << off + << ", accounted_size = " << accounted_size << dendl; + } + } while (truncated); + hash.Final((unsigned char*)final_etag); + + buf_to_hex((unsigned char*)final_etag, sizeof(final_etag), final_etag_str); + snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], + sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, "-%lld", + (long long)part_etags.size()); + etag = final_etag_str; + ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; + + etag_bl.append(etag); + + attrs[RGW_ATTR_ETAG] = etag_bl; + + if (compressed) { + // write compression attribute to full object + bufferlist tmp; + encode(cs_info, tmp); + attrs[RGW_ATTR_COMPRESSION] = tmp; + } + + // Different from rgw_sal_rados.cc starts here + // Read the object's multipart info + bufferlist bl; + uint64_t size = DS3_MAX_ENCODED_LEN; + struct ds3_multipart_upload_info ui = { + .encoded = bl.append_hole(size).c_str(), .encoded_length = size}; + ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(), + get_upload_id().c_str(), store->ds3); + ldpp_dout(dpp, 20) << "DEBUG: ds3_upload_get_info entry=" + << bucket->get_name() << "/" << get_upload_id() << dendl; + if (ret != 0) { + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + return ret; + } + + rgw_bucket_dir_entry ent; + auto iter = bl.cbegin(); + ent.decode(iter); + + // Update entry data and name + target_obj->get_key().get_index_key(&ent.key); + ent.meta.size = off; + ent.meta.accounted_size = accounted_size; + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): obj size=" + << ent.meta.size + << " obj accounted size=" << ent.meta.accounted_size + << dendl; + ent.meta.category = RGWObjCategory::Main; + ent.meta.mtime = ceph::real_clock::now(); + bool is_versioned = target_obj->get_bucket()->versioned(); + if (is_versioned) + ent.flags = + rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT; + ent.meta.etag = etag; + + // Open object + DaosObject* obj = static_cast(target_obj); + ret = obj->create(dpp); + if (ret != 0) { + return ret; + } + + // Copy data from parts to object + uint64_t write_off = 0; + for (auto const& [part_num, part] : get_parts()) { + ds3_part_t* ds3p; + ret = ds3_part_open(get_bucket_name().c_str(), get_upload_id().c_str(), + part_num, false, &ds3p, store->ds3); + if (ret != 0) { + return ret; + } + + // Reserve buffers and read + uint64_t size = part->get_size(); + bufferlist bl; + ret = ds3_part_read(bl.append_hole(size).c_str(), 0, &size, ds3p, + store->ds3, nullptr); + if (ret != 0) { + ds3_part_close(ds3p); + return ret; + } + + ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part " << part_num + << " size is " << size << dendl; + + // write to obj + obj->write(dpp, std::move(bl), write_off); + ds3_part_close(ds3p); + write_off += part->get_size(); + } + + // Set attributes + ret = obj->set_dir_entry_attrs(dpp, &ent, &attrs); + + if (is_versioned) { + ret = obj->mark_as_latest(dpp, ent.meta.mtime); + if (ret != 0) { + return ret; + } + } + + // Remove upload from bucket multipart index + ret = ds3_upload_remove(get_bucket_name().c_str(), get_upload_id().c_str(), + store->ds3); + return ret; +} + +int DaosMultipartUpload::get_info(const DoutPrefixProvider* dpp, + optional_yield y, rgw_placement_rule** rule, + rgw::sal::Attrs* attrs) { + ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_info(): enter" << dendl; + if (!rule && !attrs) { + return 0; + } + + if (rule) { + if (!placement.empty()) { + *rule = &placement; + if (!attrs) { + // Don't need attrs, done + return 0; + } + } else { + *rule = nullptr; + } + } + + // Read the multipart upload dirent from index + bufferlist bl; + uint64_t size = DS3_MAX_ENCODED_LEN; + struct ds3_multipart_upload_info ui = { + .encoded = bl.append_hole(size).c_str(), .encoded_length = size}; + int ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(), + get_upload_id().c_str(), store->ds3); + + if (ret != 0) { + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + return ret; + } + + multipart_upload_info upload_info; + rgw_bucket_dir_entry ent; + Attrs decoded_attrs; + auto iter = bl.cbegin(); + ent.decode(iter); + decode(decoded_attrs, iter); + ldpp_dout(dpp, 20) << "DEBUG: decoded_attrs=" << attrs << dendl; + + if (attrs) { + *attrs = decoded_attrs; + if (!rule || *rule != nullptr) { + // placement was cached; don't actually read + return 0; + } + } + + // Now decode the placement rule + decode(upload_info, iter); + placement = upload_info.dest_placement; + *rule = &placement; + + return 0; +} + +std::unique_ptr DaosMultipartUpload::get_writer( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, uint64_t part_num, + const std::string& part_num_str) { + ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_writer(): enter part=" + << part_num << " head_obj=" << _head_obj << dendl; + return std::make_unique( + dpp, y, this, obj, store, owner, ptail_placement_rule, + part_num, part_num_str); +} + +DaosMultipartWriter::~DaosMultipartWriter() { + if (is_open()) ds3_part_close(ds3p); +} + +int DaosMultipartWriter::prepare(optional_yield y) { + ldpp_dout(dpp, 20) << "DaosMultipartWriter::prepare(): enter part=" + << part_num_str << dendl; + int ret = ds3_part_open(get_bucket_name().c_str(), upload_id.c_str(), + part_num, true, &ds3p, store->ds3); + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + return ret; +} + +const std::string& DaosMultipartWriter::get_bucket_name() { + return static_cast(upload)->get_bucket_name(); +} + +int DaosMultipartWriter::process(bufferlist&& data, uint64_t offset) { + ldpp_dout(dpp, 20) << "DaosMultipartWriter::process(): enter part=" + << part_num_str << " offset=" << offset << dendl; + if (data.length() == 0) { + return 0; + } + + uint64_t size = data.length(); + int ret = + ds3_part_write(data.c_str(), offset, &size, ds3p, store->ds3, nullptr); + if (ret == 0) { + // XXX: Combine multiple streams into one as motr does + actual_part_size += size; + } else { + ldpp_dout(dpp, 0) << "ERROR: failed to write into part (" + << get_bucket_name() << ", " << upload_id << ", " + << part_num << "): ret=" << ret << dendl; + } + return ret; +} + +int DaosMultipartWriter::complete( + size_t accounted_size, const std::string& etag, ceph::real_time* mtime, + ceph::real_time set_mtime, std::map& attrs, + ceph::real_time delete_at, const char* if_match, const char* if_nomatch, + const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, + optional_yield y) { + ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): enter part=" + << part_num_str << dendl; + + // Add an entry into part index + bufferlist bl; + RGWUploadPartInfo info; + info.num = part_num; + info.etag = etag; + info.size = actual_part_size; + info.accounted_size = accounted_size; + info.modified = real_clock::now(); + + bool compressed; + int ret = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info); + ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): compression ret=" + << ret << dendl; + if (ret != 0) { + ldpp_dout(dpp, 1) << "cannot get compression info" << dendl; + return ret; + } + encode(info, bl); + encode(attrs, bl); + ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): entry size" + << bl.length() << dendl; + + struct ds3_multipart_part_info part_info = {.part_num = part_num, + .encoded = bl.c_str(), + .encoded_length = bl.length()}; + + ret = ds3_part_set_info(&part_info, ds3p, store->ds3, nullptr); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to set part info (" << get_bucket_name() + << ", " << upload_id << ", " << part_num + << "): ret=" << ret << dendl; + if (ret == ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + } + + return ret; +} + +std::unique_ptr DaosStore::get_role( + std::string name, std::string tenant, std::string path, + std::string trust_policy, std::string max_session_duration_str, + std::multimap tags) { + RGWRole* p = nullptr; + return std::unique_ptr(p); +} + +std::unique_ptr DaosStore::get_role(const RGWRoleInfo& info) { + RGWRole* p = nullptr; + return std::unique_ptr(p); +} + +std::unique_ptr DaosStore::get_role(std::string id) { + RGWRole* p = nullptr; + return std::unique_ptr(p); +} + +int DaosStore::get_roles(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& path_prefix, + const std::string& tenant, + vector>& roles) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +std::unique_ptr DaosStore::get_oidc_provider() { + RGWOIDCProvider* p = nullptr; + return std::unique_ptr(p); +} + +int DaosStore::get_oidc_providers( + const DoutPrefixProvider* dpp, const std::string& tenant, + vector>& providers) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +std::unique_ptr DaosBucket::get_multipart_upload( + const std::string& oid, std::optional upload_id, + ACLOwner owner, ceph::real_time mtime) { + return std::make_unique(store, this, oid, upload_id, + owner, mtime); +} + +std::unique_ptr DaosStore::get_append_writer( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, + const std::string& unique_tag, uint64_t position, + uint64_t* cur_accounted_size) { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return nullptr; +} + +std::unique_ptr DaosStore::get_atomic_writer( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch, + const std::string& unique_tag) { + ldpp_dout(dpp, 20) << "get_atomic_writer" << dendl; + return std::make_unique(dpp, y, obj, this, + owner, ptail_placement_rule, + olh_epoch, unique_tag); +} + +const std::string& DaosStore::get_compression_type( + const rgw_placement_rule& rule) { + return zone.zone_params->get_compression_type(rule); +} + +bool DaosStore::valid_placement(const rgw_placement_rule& rule) { + return zone.zone_params->valid_placement(rule); +} + +std::unique_ptr DaosStore::get_user(const rgw_user& u) { + ldout(cctx, 20) << "DEBUG: bucket's user: " << u.to_str() << dendl; + return std::make_unique(this, u); +} + +int DaosStore::get_user_by_access_key(const DoutPrefixProvider* dpp, + const std::string& key, optional_yield y, + std::unique_ptr* user) { + // Initialize ds3_user_info + bufferlist bl; + uint64_t size = DS3_MAX_ENCODED_LEN; + struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(), + .encoded_length = size}; + + int ret = ds3_user_get_by_key(key.c_str(), &user_info, ds3, nullptr); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_key failed, key=" << key + << " ret=" << ret << dendl; + return ret; + } + + // Decode + DaosUserInfo duinfo; + bufferlist& blr = bl; + auto iter = blr.cbegin(); + duinfo.decode(iter); + + User* u = new DaosUser(this, duinfo.info); + if (!u) { + return -ENOMEM; + } + + user->reset(u); + return 0; +} + +int DaosStore::get_user_by_email(const DoutPrefixProvider* dpp, + const std::string& email, optional_yield y, + std::unique_ptr* user) { + // Initialize ds3_user_info + bufferlist bl; + uint64_t size = DS3_MAX_ENCODED_LEN; + struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(), + .encoded_length = size}; + + int ret = ds3_user_get_by_email(email.c_str(), &user_info, ds3, nullptr); + + if (ret != 0) { + ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_email failed, email=" << email + << " ret=" << ret << dendl; + return ret; + } + + // Decode + DaosUserInfo duinfo; + bufferlist& blr = bl; + auto iter = blr.cbegin(); + duinfo.decode(iter); + + User* u = new DaosUser(this, duinfo.info); + if (!u) { + return -ENOMEM; + } + + user->reset(u); + return 0; +} + +int DaosStore::get_user_by_swift(const DoutPrefixProvider* dpp, + const std::string& user_str, optional_yield y, + std::unique_ptr* user) { + /* Swift keys and subusers are not supported for now */ + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +std::unique_ptr DaosStore::get_object(const rgw_obj_key& k) { + return std::make_unique(this, k); +} + +inline std::ostream& operator<<(std::ostream& out, const rgw_user* u) { + std::string s; + if (u != nullptr) + u->to_str(s); + else + s = "(nullptr)"; + return out << s; +} + +int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, + const rgw_bucket& b, std::unique_ptr* bucket, + optional_yield y) { + ldpp_dout(dpp, 20) << "DEBUG: get_bucket1: User: " << u << dendl; + int ret; + Bucket* bp; + + bp = new DaosBucket(this, b, u); + ret = bp->load_bucket(dpp, y); + if (ret != 0) { + delete bp; + return ret; + } + + bucket->reset(bp); + return 0; +} + +int DaosStore::get_bucket(User* u, const RGWBucketInfo& i, + std::unique_ptr* bucket) { + DaosBucket* bp; + + bp = new DaosBucket(this, i, u); + /* Don't need to fetch the bucket info, use the provided one */ + + bucket->reset(bp); + return 0; +} + +int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, + const std::string& tenant, const std::string& name, + std::unique_ptr* bucket, optional_yield y) { + ldpp_dout(dpp, 20) << "get_bucket" << dendl; + rgw_bucket b; + + b.tenant = tenant; + b.name = name; + + return get_bucket(dpp, u, b, bucket, y); +} + +bool DaosStore::is_meta_master() { return true; } + +int DaosStore::forward_request_to_master(const DoutPrefixProvider* dpp, + User* user, obj_version* objv, + bufferlist& in_data, JSONParser* jp, + req_info& info, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosStore::forward_iam_request_to_master(const DoutPrefixProvider* dpp, + const RGWAccessKey& key, + obj_version* objv, + bufferlist& in_data, + RGWXMLDecoder::XMLParser* parser, + req_info& info, optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +std::string DaosStore::zone_unique_id(uint64_t unique_num) { return ""; } + +std::string DaosStore::zone_unique_trans_id(const uint64_t unique_num) { + return ""; +} + +int DaosStore::cluster_stat(RGWClusterStat& stats) { + return DAOS_NOT_IMPLEMENTED_LOG(nullptr); +} + +std::unique_ptr DaosStore::get_lifecycle(void) { + DAOS_NOT_IMPLEMENTED_LOG(nullptr); + return 0; +} + +std::unique_ptr DaosStore::get_notification( + rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s, + rgw::notify::EventType event_type, const std::string* object_name) { + return std::make_unique(obj, src_obj, event_type); +} + +std::unique_ptr DaosStore::get_notification( + const DoutPrefixProvider* dpp, Object* obj, Object* src_obj, + rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, + std::string& _user_id, std::string& _user_tenant, std::string& _req_id, + optional_yield y) { + ldpp_dout(dpp, 20) << "get_notification" << dendl; + return std::make_unique(obj, src_obj, event_type); +} + +int DaosStore::log_usage(const DoutPrefixProvider* dpp, + map& usage_info) { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return 0; +} + +int DaosStore::log_op(const DoutPrefixProvider* dpp, string& oid, + bufferlist& bl) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosStore::register_to_service_map(const DoutPrefixProvider* dpp, + const string& daemon_type, + const map& meta) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +void DaosStore::get_quota(RGWQuota& quota) { + // XXX: Not handled for the first pass + return; +} + +void DaosStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, + RGWRateLimitInfo& user_ratelimit, + RGWRateLimitInfo& anon_ratelimit) { + return; +} + +int DaosStore::set_buckets_enabled(const DoutPrefixProvider* dpp, + std::vector& buckets, + bool enabled) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosStore::get_sync_policy_handler(const DoutPrefixProvider* dpp, + std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef* phandler, + optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +RGWDataSyncStatusManager* DaosStore::get_data_sync_manager( + const rgw_zone_id& source_zone) { + DAOS_NOT_IMPLEMENTED_LOG(nullptr); + return 0; +} + +int DaosStore::read_all_usage( + const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, + map& usage) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosStore::trim_all_usage(const DoutPrefixProvider* dpp, + uint64_t start_epoch, uint64_t end_epoch) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosStore::get_config_key_val(string name, bufferlist* bl) { + return DAOS_NOT_IMPLEMENTED_LOG(nullptr); +} + +int DaosStore::meta_list_keys_init(const DoutPrefixProvider* dpp, + const string& section, const string& marker, + void** phandle) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +int DaosStore::meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle, + int max, list& keys, + bool* truncated) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +void DaosStore::meta_list_keys_complete(void* handle) { return; } + +std::string DaosStore::meta_get_marker(void* handle) { return ""; } + +int DaosStore::meta_remove(const DoutPrefixProvider* dpp, string& metadata_key, + optional_yield y) { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); +} + +std::string DaosStore::get_cluster_id(const DoutPrefixProvider* dpp, + optional_yield y) { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return ""; +} + +} // namespace rgw::sal + +extern "C" { + +void* newDaosStore(CephContext* cct) { + return new rgw::sal::DaosStore(cct); +} +} diff --git a/src/rgw/driver/daos/rgw_sal_daos.h b/src/rgw/driver/daos/rgw_sal_daos.h new file mode 100644 index 000000000000..ac7352191f23 --- /dev/null +++ b/src/rgw/driver/daos/rgw_sal_daos.h @@ -0,0 +1,1040 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=2 sw=2 expandtab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * SAL implementation for the CORTX Daos backend + * + * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include "rgw_multi.h" +#include "rgw_notify.h" +#include "rgw_oidc_provider.h" +#include "rgw_putobj_processor.h" +#include "rgw_rados.h" +#include "rgw_role.h" +#include "rgw_sal_store.h" + +inline bool IsDebuggerAttached() { +#ifdef DEBUG + char buf[4096]; + + const int status_fd = ::open("/proc/self/status", O_RDONLY); + if (status_fd == -1) return false; + + const ssize_t num_read = ::read(status_fd, buf, sizeof(buf) - 1); + ::close(status_fd); + + if (num_read <= 0) return false; + + buf[num_read] = '\0'; + constexpr char tracerPidString[] = "TracerPid:"; + const auto tracer_pid_ptr = ::strstr(buf, tracerPidString); + if (!tracer_pid_ptr) return false; + + for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1; + characterPtr <= buf + num_read; ++characterPtr) { + if (::isspace(*characterPtr)) + continue; + else + return ::isdigit(*characterPtr) != 0 && *characterPtr != '0'; + } +#endif // DEBUG + return false; +} + +inline void DebugBreak() { +#ifdef DEBUG + // only break into the debugger if the debugger is attached + if (IsDebuggerAttached()) + raise(SIGINT); // breaks into GDB and stops, can be continued +#endif // DEBUG +} + +inline int NotImplementedLog(const DoutPrefixProvider* ldpp, + const char* filename, int linenumber, + const char* functionname) { + if (ldpp) + ldpp_dout(ldpp, 20) << filename << "(" << linenumber << ") " << functionname + << ": Not implemented" << dendl; + return 0; +} + +inline int NotImplementedGdbBreak(const DoutPrefixProvider* ldpp, + const char* filename, int linenumber, + const char* functionname) { + NotImplementedLog(ldpp, filename, linenumber, functionname); + DebugBreak(); + return 0; +} + +#define DAOS_NOT_IMPLEMENTED_GDB_BREAK(ldpp) \ + NotImplementedGdbBreak(ldpp, __FILE__, __LINE__, __FUNCTION__) +#define DAOS_NOT_IMPLEMENTED_LOG(ldpp) \ + NotImplementedLog(ldpp, __FILE__, __LINE__, __FUNCTION__) + +namespace rgw::sal { + +class DaosStore; +class DaosObject; + +#ifdef DEBUG +// Prepends each log entry with the "filename(source_line) function_name". Makes +// it simple to +// associate log entries with the source that generated the log entry +#undef ldpp_dout +#define ldpp_dout(dpp, v) \ + if (decltype(auto) pdpp = (dpp); \ + pdpp) /* workaround -Wnonnull-compare for 'this' */ \ + dout_impl(pdpp->get_cct(), ceph::dout::need_dynamic(pdpp->get_subsys()), v) \ + pdpp->gen_prefix(*_dout) \ + << __FILE__ << "(" << __LINE__ << ") " << __FUNCTION__ << " - " +#endif // DEBUG + +struct DaosUserInfo { + RGWUserInfo info; + obj_version user_version; + rgw::sal::Attrs attrs; + + void encode(bufferlist& bl) const { + ENCODE_START(3, 3, bl); + encode(info, bl); + encode(user_version, bl); + encode(attrs, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(3, bl); + decode(info, bl); + decode(user_version, bl); + decode(attrs, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(DaosUserInfo); + +class DaosNotification : public StoreNotification { + public: + DaosNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type) + : StoreNotification(_obj, _src_obj, _type) {} + ~DaosNotification() = default; + + virtual int publish_reserve(const DoutPrefixProvider* dpp, + RGWObjTags* obj_tags = nullptr) override { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); + } + virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size, + const ceph::real_time& mtime, + const std::string& etag, + const std::string& version) override { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); + } +}; + +class DaosUser : public StoreUser { + private: + DaosStore* store; + std::vector access_ids; + + public: + DaosUser(DaosStore* _st, const rgw_user& _u) : StoreUser(_u), store(_st) {} + DaosUser(DaosStore* _st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) {} + DaosUser(DaosStore* _st) : store(_st) {} + DaosUser(DaosUser& _o) = default; + DaosUser() {} + + virtual std::unique_ptr clone() override { + return std::make_unique(*this); + } + int list_buckets(const DoutPrefixProvider* dpp, const std::string& marker, + const std::string& end_marker, uint64_t max, bool need_stats, + BucketList& buckets, optional_yield y) override; + virtual int create_bucket( + const DoutPrefixProvider* dpp, const rgw_bucket& b, + const std::string& zonegroup_id, rgw_placement_rule& placement_rule, + std::string& swift_ver_location, const RGWQuotaInfo* pquota_info, + const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info, + obj_version& ep_objv, bool exclusive, bool obj_lock_enabled, + bool* existed, req_info& req_info, std::unique_ptr* bucket, + optional_yield y) override; + virtual int read_attrs(const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, + Attrs& new_attrs, + optional_yield y) override; + virtual int read_stats(const DoutPrefixProvider* dpp, optional_yield y, + RGWStorageStats* stats, + ceph::real_time* last_stats_sync = nullptr, + ceph::real_time* last_stats_update = nullptr) override; + virtual int read_stats_async(const DoutPrefixProvider* dpp, + RGWGetUserStats_CB* cb) override; + virtual int complete_flush_stats(const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int read_usage( + const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, + uint64_t end_epoch) override; + + virtual int load_user(const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, + bool exclusive, + RGWUserInfo* old_info = nullptr) override; + virtual int remove_user(const DoutPrefixProvider* dpp, + optional_yield y) override; + + /** Read user info without loading it */ + int read_user(const DoutPrefixProvider* dpp, std::string name, + DaosUserInfo* duinfo); + + std::unique_ptr get_encoded_info(bufferlist& bl, + obj_version& obj_ver); + + friend class DaosBucket; +}; + +// RGWBucketInfo and other information that are shown when listing a bucket is +// represented in struct DaosBucketInfo. The structure is encoded and stored +// as the value of the global bucket instance index. +// TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.) +// into a different index. +struct DaosBucketInfo { + RGWBucketInfo info; + + obj_version bucket_version; + ceph::real_time mtime; + + rgw::sal::Attrs bucket_attrs; + + void encode(bufferlist& bl) const { + ENCODE_START(4, 4, bl); + encode(info, bl); + encode(bucket_version, bl); + encode(mtime, bl); + encode(bucket_attrs, bl); // rgw_cache.h example for a map + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(4, bl); + decode(info, bl); + decode(bucket_version, bl); + decode(mtime, bl); + decode(bucket_attrs, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(DaosBucketInfo); + +class DaosBucket : public StoreBucket { + private: + DaosStore* store; + RGWAccessControlPolicy acls; + + public: + /** Container ds3b handle */ + ds3_bucket_t* ds3b = nullptr; + + DaosBucket(DaosStore* _st) : store(_st), acls() {} + + DaosBucket(const DaosBucket& _daos_bucket) + : store(_daos_bucket.store), acls(), ds3b(nullptr) { + // TODO: deep copy all objects + } + + DaosBucket(DaosStore* _st, User* _u) : StoreBucket(_u), store(_st), acls() {} + + DaosBucket(DaosStore* _st, const rgw_bucket& _b) + : StoreBucket(_b), store(_st), acls() {} + + DaosBucket(DaosStore* _st, const RGWBucketEnt& _e) + : StoreBucket(_e), store(_st), acls() {} + + DaosBucket(DaosStore* _st, const RGWBucketInfo& _i) + : StoreBucket(_i), store(_st), acls() {} + + DaosBucket(DaosStore* _st, const rgw_bucket& _b, User* _u) + : StoreBucket(_b, _u), store(_st), acls() {} + + DaosBucket(DaosStore* _st, const RGWBucketEnt& _e, User* _u) + : StoreBucket(_e, _u), store(_st), acls() {} + + DaosBucket(DaosStore* _st, const RGWBucketInfo& _i, User* _u) + : StoreBucket(_i, _u), store(_st), acls() {} + + ~DaosBucket(); + + virtual std::unique_ptr get_object(const rgw_obj_key& k) override; + virtual int list(const DoutPrefixProvider* dpp, ListParams&, int, + ListResults&, optional_yield y) override; + virtual int remove_bucket(const DoutPrefixProvider* dpp, bool delete_children, + bool forward_to_master, req_info* req_info, + optional_yield y) override; + virtual int remove_bucket_bypass_gc(int concurrent_max, + bool keep_index_consistent, + optional_yield y, + const DoutPrefixProvider* dpp) override; + virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } + virtual int set_acl(const DoutPrefixProvider* dpp, + RGWAccessControlPolicy& acl, optional_yield y) override; + virtual int load_bucket(const DoutPrefixProvider* dpp, optional_yield y, + bool get_stats = false) override; + virtual int read_stats(const DoutPrefixProvider* dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, std::string* bucket_ver, + std::string* master_ver, + std::map& stats, + std::string* max_marker = nullptr, + bool* syncstopped = nullptr) override; + virtual int read_stats_async(const DoutPrefixProvider* dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, + RGWGetBucketStats_CB* ctx) override; + virtual int sync_user_stats(const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int update_container_stats(const DoutPrefixProvider* dpp) override; + virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override; + virtual int chown(const DoutPrefixProvider* dpp, User& new_user, + optional_yield y) override; + virtual int put_info(const DoutPrefixProvider* dpp, bool exclusive, + ceph::real_time mtime) override; + virtual bool is_owner(User* user) override; + virtual int check_empty(const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota, + uint64_t obj_size, optional_yield y, + bool check_size_only = false) override; + virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& attrs, + optional_yield y) override; + virtual int try_refresh_info(const DoutPrefixProvider* dpp, + ceph::real_time* pmtime) override; + virtual int read_usage( + const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, + uint64_t end_epoch) override; + virtual int remove_objs_from_index( + const DoutPrefixProvider* dpp, + std::list& objs_to_unlink) override; + virtual int check_index( + const DoutPrefixProvider* dpp, + std::map& existing_stats, + std::map& calculated_stats) override; + virtual int rebuild_index(const DoutPrefixProvider* dpp) override; + virtual int set_tag_timeout(const DoutPrefixProvider* dpp, + uint64_t timeout) override; + virtual int purge_instance(const DoutPrefixProvider* dpp) override; + virtual std::unique_ptr clone() override { + return std::make_unique(*this); + } + virtual std::unique_ptr get_multipart_upload( + const std::string& oid, + std::optional upload_id = std::nullopt, ACLOwner owner = {}, + ceph::real_time mtime = real_clock::now()) override; + virtual int list_multiparts( + const DoutPrefixProvider* dpp, const std::string& prefix, + std::string& marker, const std::string& delim, const int& max_uploads, + std::vector>& uploads, + std::map* common_prefixes, + bool* is_truncated) override; + virtual int abort_multiparts(const DoutPrefixProvider* dpp, + CephContext* cct) override; + + int open(const DoutPrefixProvider* dpp); + int close(const DoutPrefixProvider* dpp); + bool is_open() { return ds3b != nullptr; } + std::unique_ptr get_encoded_info( + bufferlist& bl, ceph::real_time mtime); + + friend class DaosStore; +}; + +class DaosPlacementTier : public StorePlacementTier { + DaosStore* store; + RGWZoneGroupPlacementTier tier; + + public: + DaosPlacementTier(DaosStore* _store, const RGWZoneGroupPlacementTier& _tier) + : store(_store), tier(_tier) {} + virtual ~DaosPlacementTier() = default; + + virtual const std::string& get_tier_type() { return tier.tier_type; } + virtual const std::string& get_storage_class() { return tier.storage_class; } + virtual bool retain_head_object() { return tier.retain_head_object; } + RGWZoneGroupPlacementTier& get_rt() { return tier; } +}; + +class DaosZoneGroup : public StoreZoneGroup { + DaosStore* store; + const RGWZoneGroup group; + std::string empty; + + public: + DaosZoneGroup(DaosStore* _store) : store(_store), group() {} + DaosZoneGroup(DaosStore* _store, const RGWZoneGroup& _group) + : store(_store), group(_group) {} + virtual ~DaosZoneGroup() = default; + + virtual const std::string& get_id() const override { return group.get_id(); }; + virtual const std::string& get_name() const override { + return group.get_name(); + }; + virtual int equals(const std::string& other_zonegroup) const override { + return group.equals(other_zonegroup); + }; + /** Get the endpoint from zonegroup, or from master zone if not set */ + virtual const std::string& get_endpoint() const override; + virtual bool placement_target_exists(std::string& target) const override; + virtual bool is_master_zonegroup() const override { + return group.is_master_zonegroup(); + }; + virtual const std::string& get_api_name() const override { + return group.api_name; + }; + virtual int get_placement_target_names( + std::set& names) const override; + virtual const std::string& get_default_placement_name() const override { + return group.default_placement.name; + }; + virtual int get_hostnames(std::list& names) const override { + names = group.hostnames; + return 0; + }; + virtual int get_s3website_hostnames( + std::list& names) const override { + names = group.hostnames_s3website; + return 0; + }; + virtual int get_zone_count() const override { return group.zones.size(); } + virtual int get_placement_tier(const rgw_placement_rule& rule, + std::unique_ptr* tier); + virtual std::unique_ptr clone() override { + return std::make_unique(store, group); + } + const RGWZoneGroup& get_group() { return group; } +}; + +class DaosZone : public StoreZone { + protected: + DaosStore* store; + RGWRealm* realm{nullptr}; + DaosZoneGroup zonegroup; + RGWZone* zone_public_config{ + nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */ + RGWZoneParams* zone_params{ + nullptr}; /* internal zone params, e.g., rados pools */ + RGWPeriod* current_period{nullptr}; + rgw_zone_id cur_zone_id; + + public: + DaosZone(DaosStore* _store) : store(_store), zonegroup(_store) { + realm = new RGWRealm(); + zone_public_config = new RGWZone(); + zone_params = new RGWZoneParams(); + current_period = new RGWPeriod(); + cur_zone_id = rgw_zone_id(zone_params->get_id()); + + // XXX: only default and STANDARD supported for now + RGWZonePlacementInfo info; + RGWZoneStorageClasses sc; + sc.set_storage_class("STANDARD", nullptr, nullptr); + info.storage_classes = sc; + zone_params->placement_pools["default"] = info; + } + DaosZone(DaosStore* _store, DaosZoneGroup _zg) + : store(_store), zonegroup(_zg) { + realm = new RGWRealm(); + zone_public_config = new RGWZone(); + zone_params = new RGWZoneParams(); + current_period = new RGWPeriod(); + cur_zone_id = rgw_zone_id(zone_params->get_id()); + + // XXX: only default and STANDARD supported for now + RGWZonePlacementInfo info; + RGWZoneStorageClasses sc; + sc.set_storage_class("STANDARD", nullptr, nullptr); + info.storage_classes = sc; + zone_params->placement_pools["default"] = info; + } + ~DaosZone() = default; + + virtual std::unique_ptr clone() override { + return std::make_unique(store); + } + virtual ZoneGroup& get_zonegroup() override; + virtual int get_zonegroup(const std::string& id, + std::unique_ptr* zonegroup) override; + virtual const rgw_zone_id& get_id() override; + virtual const std::string& get_name() const override; + virtual bool is_writeable() override; + virtual bool get_redirect_endpoint(std::string* endpoint) override; + virtual bool has_zonegroup_api(const std::string& api) const override; + virtual const std::string& get_current_period_id() override; + virtual const RGWAccessKey& get_system_key() { + return zone_params->system_key; + } + virtual const std::string& get_realm_name() { return realm->get_name(); } + virtual const std::string& get_realm_id() { return realm->get_id(); } + virtual const std::string_view get_tier_type() { return "rgw"; } + + friend class DaosStore; +}; + +class DaosLuaManager : public StoreLuaManager { + DaosStore* store; + + public: + DaosLuaManager(DaosStore* _s) : store(_s) {} + virtual ~DaosLuaManager() = default; + + virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& key, std::string& script) override { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return -ENOENT; + }; + + virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& key, + const std::string& script) override { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return -ENOENT; + }; + + virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& key) override { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return -ENOENT; + }; + + virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& package_name) override { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return -ENOENT; + }; + + virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& package_name) override { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return -ENOENT; + }; + + virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, + rgw::lua::packages_t& packages) override { + DAOS_NOT_IMPLEMENTED_LOG(dpp); + return -ENOENT; + }; +}; + +class DaosObject : public StoreObject { + private: + DaosStore* store; + RGWAccessControlPolicy acls; + + public: + struct DaosReadOp : public StoreReadOp { + private: + DaosObject* source; + + public: + DaosReadOp(DaosObject* _source); + + virtual int prepare(optional_yield y, + const DoutPrefixProvider* dpp) override; + + /* + * Both `read` and `iterate` read up through index `end` + * *inclusive*. The number of bytes that could be returned is + * `end - ofs + 1`. + */ + virtual int read(int64_t off, int64_t end, bufferlist& bl, optional_yield y, + const DoutPrefixProvider* dpp) override; + virtual int iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end, + RGWGetDataCB* cb, optional_yield y) override; + + virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, + bufferlist& dest, optional_yield y) override; + }; + + struct DaosDeleteOp : public StoreDeleteOp { + private: + DaosObject* source; + + public: + DaosDeleteOp(DaosObject* _source); + + virtual int delete_obj(const DoutPrefixProvider* dpp, + optional_yield y) override; + }; + + ds3_obj_t* ds3o = nullptr; + + DaosObject() = default; + + DaosObject(DaosStore* _st, const rgw_obj_key& _k) + : StoreObject(_k), store(_st), acls() {} + DaosObject(DaosStore* _st, const rgw_obj_key& _k, Bucket* _b) + : StoreObject(_k, _b), store(_st), acls() {} + + DaosObject(DaosObject& _o) = default; + + virtual ~DaosObject(); + + virtual int delete_object(const DoutPrefixProvider* dpp, optional_yield y, + bool prevent_versioning = false) override; + virtual int copy_object( + User* user, req_info* info, const rgw_zone_id& source_zone, + rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, + rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement, + ceph::real_time* src_mtime, ceph::real_time* mtime, + const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, + bool high_precision_time, const char* if_match, const char* if_nomatch, + AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, + RGWObjCategory category, uint64_t olh_epoch, + boost::optional delete_at, std::string* version_id, + std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*), + void* progress_data, const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } + virtual int set_acl(const RGWAccessControlPolicy& acl) override { + acls = acl; + return 0; + } + + virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState** state, + optional_yield y, bool follow_olh = true) override; + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, + Attrs* delattrs, optional_yield y) override; + virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, + rgw_obj* target_obj = NULL) override; + virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, + optional_yield y, + const DoutPrefixProvider* dpp) override; + virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, + const char* attr_name, + optional_yield y) override; + virtual bool is_expired() override; + virtual void gen_rand_obj_instance_name() override; + virtual std::unique_ptr clone() override { + return std::make_unique(*this); + } + virtual std::unique_ptr get_serializer( + const DoutPrefixProvider* dpp, const std::string& lock_name) override; + virtual int transition(Bucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, uint64_t olh_epoch, + const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int transition_to_cloud(Bucket* bucket, rgw::sal::PlacementTier* tier, + rgw_bucket_dir_entry& o, + std::set& cloud_targets, + CephContext* cct, bool update_object, + const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual bool placement_rules_match(rgw_placement_rule& r1, + rgw_placement_rule& r2) override; + virtual int dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y, + Formatter* f) override; + + /* Swift versioning */ + virtual int swift_versioning_restore(bool& restored, + const DoutPrefixProvider* dpp) override; + virtual int swift_versioning_copy(const DoutPrefixProvider* dpp, + optional_yield y) override; + + /* OPs */ + virtual std::unique_ptr get_read_op() override; + virtual std::unique_ptr get_delete_op() override; + + /* OMAP */ + virtual int omap_get_vals_by_keys(const DoutPrefixProvider* dpp, + const std::string& oid, + const std::set& keys, + Attrs* vals) override; + virtual int omap_set_val_by_key(const DoutPrefixProvider* dpp, + const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) override; + virtual int chown(User& new_user, const DoutPrefixProvider* dpp, + optional_yield y) override; + + bool is_open() { return ds3o != nullptr; }; + // Only lookup the object, do not create + int lookup(const DoutPrefixProvider* dpp); + // Create the object, truncate if exists + int create(const DoutPrefixProvider* dpp); + // Release the daos resources + int close(const DoutPrefixProvider* dpp); + // Write to object starting from offset + int write(const DoutPrefixProvider* dpp, bufferlist&& data, uint64_t offset); + // Read size bytes from object starting from offset + int read(const DoutPrefixProvider* dpp, bufferlist& data, uint64_t offset, + uint64_t& size); + // Get the object's dirent and attrs + int get_dir_entry_attrs(const DoutPrefixProvider* dpp, + rgw_bucket_dir_entry* ent, Attrs* getattrs = nullptr); + // Set the object's dirent and attrs + int set_dir_entry_attrs(const DoutPrefixProvider* dpp, + rgw_bucket_dir_entry* ent, Attrs* setattrs = nullptr); + // Marks this DAOS object as being the latest version and unmarks all other + // versions as latest + int mark_as_latest(const DoutPrefixProvider* dpp, ceph::real_time set_mtime); + // get_bucket casted as DaosBucket* + DaosBucket* get_daos_bucket() { + return static_cast(get_bucket()); + } +}; + +// A placeholder locking class for multipart upload. +class MPDaosSerializer : public StoreMPSerializer { + public: + MPDaosSerializer(const DoutPrefixProvider* dpp, DaosStore* store, + DaosObject* obj, const std::string& lock_name) {} + + virtual int try_lock(const DoutPrefixProvider* dpp, utime_t dur, + optional_yield y) override { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); + } + virtual int unlock() override { return DAOS_NOT_IMPLEMENTED_LOG(nullptr); } +}; + +class DaosAtomicWriter : public StoreWriter { + protected: + rgw::sal::DaosStore* store; + const rgw_user& owner; + const rgw_placement_rule* ptail_placement_rule; + uint64_t olh_epoch; + const std::string& unique_tag; + DaosObject obj; + uint64_t total_data_size = 0; // for total data being uploaded + + public: + DaosAtomicWriter(const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, + DaosStore* _store, const rgw_user& _owner, + const rgw_placement_rule* _ptail_placement_rule, + uint64_t _olh_epoch, const std::string& _unique_tag); + ~DaosAtomicWriter() = default; + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time* mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, const char* if_match, + const char* if_nomatch, const std::string* user_data, + rgw_zone_set* zones_trace, bool* canceled, + optional_yield y) override; +}; + +class DaosMultipartWriter : public StoreWriter { + protected: + rgw::sal::DaosStore* store; + MultipartUpload* upload; + std::string upload_id; + + // Part parameters. + const uint64_t part_num; + const std::string part_num_str; + uint64_t actual_part_size = 0; + + ds3_part_t* ds3p = nullptr; + bool is_open() { return ds3p != nullptr; }; + + public: + DaosMultipartWriter(const DoutPrefixProvider* dpp, optional_yield y, + MultipartUpload* _upload, + rgw::sal::Object* obj, + DaosStore* _store, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, + uint64_t _part_num, const std::string& part_num_str) + : StoreWriter(dpp, y), + store(_store), + upload(_upload), + upload_id(_upload->get_upload_id()), + part_num(_part_num), + part_num_str(part_num_str) {} + virtual ~DaosMultipartWriter(); + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time* mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, const char* if_match, + const char* if_nomatch, const std::string* user_data, + rgw_zone_set* zones_trace, bool* canceled, + optional_yield y) override; + + const std::string& get_bucket_name(); +}; + +class DaosMultipartPart : public StoreMultipartPart { + protected: + RGWUploadPartInfo info; + + public: + DaosMultipartPart() = default; + virtual ~DaosMultipartPart() = default; + + virtual uint32_t get_num() { return info.num; } + virtual uint64_t get_size() { return info.accounted_size; } + virtual const std::string& get_etag() { return info.etag; } + virtual ceph::real_time& get_mtime() { return info.modified; } + + friend class DaosMultipartUpload; +}; + +class DaosMultipartUpload : public StoreMultipartUpload { + DaosStore* store; + RGWMPObj mp_obj; + ACLOwner owner; + ceph::real_time mtime; + rgw_placement_rule placement; + RGWObjManifest manifest; + + public: + DaosMultipartUpload(DaosStore* _store, Bucket* _bucket, + const std::string& oid, + std::optional upload_id, ACLOwner _owner, + ceph::real_time _mtime) + : StoreMultipartUpload(_bucket), + store(_store), + mp_obj(oid, upload_id), + owner(_owner), + mtime(_mtime) {} + virtual ~DaosMultipartUpload() = default; + + virtual const std::string& get_meta() const { return mp_obj.get_meta(); } + virtual const std::string& get_key() const { return mp_obj.get_key(); } + virtual const std::string& get_upload_id() const { + return mp_obj.get_upload_id(); + } + virtual const ACLOwner& get_owner() const override { return owner; } + virtual ceph::real_time& get_mtime() { return mtime; } + virtual std::unique_ptr get_meta_obj() override; + virtual int init(const DoutPrefixProvider* dpp, optional_yield y, + ACLOwner& owner, rgw_placement_rule& dest_placement, + rgw::sal::Attrs& attrs) override; + virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct, + int num_parts, int marker, int* next_marker, + bool* truncated, + bool assume_unsorted = false) override; + virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override; + virtual int complete(const DoutPrefixProvider* dpp, optional_yield y, + CephContext* cct, std::map& part_etags, + std::list& remove_objs, + uint64_t& accounted_size, bool& compressed, + RGWCompressionInfo& cs_info, off_t& off, + std::string& tag, ACLOwner& owner, uint64_t olh_epoch, + rgw::sal::Object* target_obj) override; + virtual int get_info(const DoutPrefixProvider* dpp, optional_yield y, + rgw_placement_rule** rule, + rgw::sal::Attrs* attrs = nullptr) override; + virtual std::unique_ptr get_writer( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, uint64_t part_num, + const std::string& part_num_str) override; + const std::string& get_bucket_name() { return bucket->get_name(); } +}; + +class DaosStore : public StoreDriver { + private: + DaosZone zone; + RGWSyncModuleInstanceRef sync_module; + + public: + ds3_t* ds3 = nullptr; + + CephContext* cctx; + + DaosStore(CephContext* c) : zone(this), cctx(c) {} + ~DaosStore() = default; + + virtual const std::string get_name() const override { return "daos"; } + + virtual std::unique_ptr get_user(const rgw_user& u) override; + virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int get_user_by_access_key(const DoutPrefixProvider* dpp, + const std::string& key, optional_yield y, + std::unique_ptr* user) override; + virtual int get_user_by_email(const DoutPrefixProvider* dpp, + const std::string& email, optional_yield y, + std::unique_ptr* user) override; + virtual int get_user_by_swift(const DoutPrefixProvider* dpp, + const std::string& user_str, optional_yield y, + std::unique_ptr* user) override; + virtual std::unique_ptr get_object(const rgw_obj_key& k) override; + virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, + const rgw_bucket& b, std::unique_ptr* bucket, + optional_yield y) override; + virtual int get_bucket(User* u, const RGWBucketInfo& i, + std::unique_ptr* bucket) override; + virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, + const std::string& tenant, const std::string& name, + std::unique_ptr* bucket, + optional_yield y) override; + virtual bool is_meta_master() override; + virtual int forward_request_to_master(const DoutPrefixProvider* dpp, + User* user, obj_version* objv, + bufferlist& in_data, JSONParser* jp, + req_info& info, + optional_yield y) override; + virtual int forward_iam_request_to_master( + const DoutPrefixProvider* dpp, const RGWAccessKey& key, obj_version* objv, + bufferlist& in_data, RGWXMLDecoder::XMLParser* parser, req_info& info, + optional_yield y) override; + virtual Zone* get_zone() { return &zone; } + virtual std::string zone_unique_id(uint64_t unique_num) override; + virtual std::string zone_unique_trans_id(const uint64_t unique_num) override; + virtual int cluster_stat(RGWClusterStat& stats) override; + virtual std::unique_ptr get_lifecycle(void) override; + virtual std::unique_ptr get_notification( + rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s, + rgw::notify::EventType event_type, optional_yield y, + const std::string* object_name = nullptr) override; + virtual std::unique_ptr get_notification( + const DoutPrefixProvider* dpp, rgw::sal::Object* obj, + rgw::sal::Object* src_obj, rgw::notify::EventType event_type, + rgw::sal::Bucket* _bucket, std::string& _user_id, + std::string& _user_tenant, std::string& _req_id, + optional_yield y) override; + virtual RGWLC* get_rgwlc(void) override { return NULL; } + virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { + return NULL; + } + + virtual int log_usage( + const DoutPrefixProvider* dpp, + std::map& usage_info) override; + virtual int log_op(const DoutPrefixProvider* dpp, std::string& oid, + bufferlist& bl) override; + virtual int register_to_service_map( + const DoutPrefixProvider* dpp, const std::string& daemon_type, + const std::map& meta) override; + virtual void get_quota(RGWQuota& quota) override; + virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, + RGWRateLimitInfo& user_ratelimit, + RGWRateLimitInfo& anon_ratelimit) override; + virtual int set_buckets_enabled(const DoutPrefixProvider* dpp, + std::vector& buckets, + bool enabled) override; + virtual uint64_t get_new_req_id() override { + return DAOS_NOT_IMPLEMENTED_LOG(nullptr); + } + virtual int get_sync_policy_handler(const DoutPrefixProvider* dpp, + std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef* phandler, + optional_yield y) override; + virtual RGWDataSyncStatusManager* get_data_sync_manager( + const rgw_zone_id& source_zone) override; + virtual void wakeup_meta_sync_shards(std::set& shard_ids) override { + return; + } + virtual void wakeup_data_sync_shards( + const DoutPrefixProvider* dpp, const rgw_zone_id& source_zone, + boost::container::flat_map< + int, boost::container::flat_set>& shard_ids) + override { + return; + } + virtual int clear_usage(const DoutPrefixProvider* dpp) override { + return DAOS_NOT_IMPLEMENTED_LOG(dpp); + } + virtual int read_all_usage( + const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_all_usage(const DoutPrefixProvider* dpp, + uint64_t start_epoch, uint64_t end_epoch) override; + virtual int get_config_key_val(std::string name, bufferlist* bl) override; + virtual int meta_list_keys_init(const DoutPrefixProvider* dpp, + const std::string& section, + const std::string& marker, + void** phandle) override; + virtual int meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle, + int max, std::list& keys, + bool* truncated) override; + virtual void meta_list_keys_complete(void* handle) override; + virtual std::string meta_get_marker(void* handle) override; + virtual int meta_remove(const DoutPrefixProvider* dpp, + std::string& metadata_key, optional_yield y) override; + + virtual const RGWSyncModuleInstanceRef& get_sync_module() { + return sync_module; + } + virtual std::string get_host_id() { return ""; } + + virtual std::unique_ptr get_lua_manager() override; + virtual std::unique_ptr get_role( + std::string name, std::string tenant, std::string path = "", + std::string trust_policy = "", std::string max_session_duration_str = "", + std::multimap tags = {}) override; + virtual std::unique_ptr get_role(const RGWRoleInfo& info) override; + virtual std::unique_ptr get_role(std::string id) override; + virtual int get_roles(const DoutPrefixProvider* dpp, optional_yield y, + const std::string& path_prefix, + const std::string& tenant, + std::vector>& roles) override; + virtual std::unique_ptr get_oidc_provider() override; + virtual int get_oidc_providers( + const DoutPrefixProvider* dpp, const std::string& tenant, + std::vector>& providers) override; + virtual std::unique_ptr get_append_writer( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, + const std::string& unique_tag, uint64_t position, + uint64_t* cur_accounted_size) override; + virtual std::unique_ptr get_atomic_writer( + const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Object* obj, const rgw_user& owner, + const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch, + const std::string& unique_tag) override; + virtual const std::string& get_compression_type( + const rgw_placement_rule& rule) override; + virtual bool valid_placement(const rgw_placement_rule& rule) override; + + virtual void finalize(void) override; + + virtual CephContext* ctx(void) override { return cctx; } + + virtual int initialize(CephContext* cct, + const DoutPrefixProvider* dpp) override; +}; + +} // namespace rgw::sal diff --git a/src/rgw/driver/motr/rgw_sal_motr.cc b/src/rgw/driver/motr/rgw_sal_motr.cc new file mode 100644 index 000000000000..a1bca8b5696d --- /dev/null +++ b/src/rgw/driver/motr/rgw_sal_motr.cc @@ -0,0 +1,4005 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=2 sw=2 expandtab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * SAL implementation for the CORTX Motr backend + * + * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include + +extern "C" { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wextern-c-compat" +#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" +#include "motr/config.h" +#include "lib/types.h" +#include "lib/trace.h" // m0_trace_set_mmapped_buffer +#include "motr/layout.h" // M0_OBJ_LAYOUT_ID +#include "helpers/helpers.h" // m0_ufid_next +#pragma clang diagnostic pop +} + +#include "common/Clock.h" +#include "common/errno.h" + +#include "rgw_compression.h" +#include "rgw_sal.h" +#include "rgw_sal_motr.h" +#include "rgw_bucket.h" + +#define dout_subsys ceph_subsys_rgw + +using std::string; +using std::map; +using std::vector; +using std::set; +using std::list; + +static string mp_ns = RGW_OBJ_NS_MULTIPART; +static struct m0_ufid_generator ufid_gr; + +namespace rgw::sal { + +using ::ceph::encode; +using ::ceph::decode; + +static std::string motr_global_indices[] = { + RGW_MOTR_USERS_IDX_NAME, + RGW_MOTR_BUCKET_INST_IDX_NAME, + RGW_MOTR_BUCKET_HD_IDX_NAME, + RGW_IAM_MOTR_ACCESS_KEY, + RGW_IAM_MOTR_EMAIL_KEY +}; + +void MotrMetaCache::invalid(const DoutPrefixProvider *dpp, + const string& name) +{ + cache.invalidate_remove(dpp, name); +} + +int MotrMetaCache::put(const DoutPrefixProvider *dpp, + const string& name, + const bufferlist& data) +{ + ldpp_dout(dpp, 0) << "Put into cache: name = " << name << dendl; + + ObjectCacheInfo info; + info.status = 0; + info.data = data; + info.flags = CACHE_FLAG_DATA; + info.meta.mtime = ceph::real_clock::now(); + info.meta.size = data.length(); + cache.put(dpp, name, info, NULL); + + // Inform other rgw instances. Do nothing if it gets some error? + int rc = distribute_cache(dpp, name, info, UPDATE_OBJ); + if (rc < 0) + ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << name << dendl; + + return 0; +} + +int MotrMetaCache::get(const DoutPrefixProvider *dpp, + const string& name, + bufferlist& data) +{ + ObjectCacheInfo info; + uint32_t flags = CACHE_FLAG_DATA; + int rc = cache.get(dpp, name, info, flags, NULL); + if (rc == 0) { + if (info.status < 0) + return info.status; + + bufferlist& bl = info.data; + bufferlist::iterator it = bl.begin(); + data.clear(); + + it.copy_all(data); + ldpp_dout(dpp, 0) << "Cache hit: name = " << name << dendl; + return 0; + } + ldpp_dout(dpp, 0) << "Cache miss: name = " << name << ", rc = "<< rc << dendl; + if(rc == -ENODATA) + return -ENOENT; + + return rc; +} + +int MotrMetaCache::remove(const DoutPrefixProvider *dpp, + const string& name) + +{ + cache.invalidate_remove(dpp, name); + + ObjectCacheInfo info; + int rc = distribute_cache(dpp, name, info, INVALIDATE_OBJ); + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: " <<__func__<< "(): failed to distribute cache: rc =" << rc << dendl; + } + + ldpp_dout(dpp, 0) << "Remove from cache: name = " << name << dendl; + return 0; +} + +int MotrMetaCache::distribute_cache(const DoutPrefixProvider *dpp, + const string& normal_name, + ObjectCacheInfo& obj_info, int op) +{ + return 0; +} + +int MotrMetaCache::watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) +{ + return 0; +} + +void MotrMetaCache::set_enabled(bool status) +{ + cache.set_enabled(status); +} + +// TODO: properly handle the number of key/value pairs to get in +// one query. Now the POC simply tries to retrieve all `max` number of pairs +// with starting key `marker`. +int MotrUser::list_buckets(const DoutPrefixProvider *dpp, const string& marker, + const string& end_marker, uint64_t max, bool need_stats, + BucketList &buckets, optional_yield y) +{ + int rc; + vector keys(max); + vector vals(max); + bool is_truncated = false; + + ldpp_dout(dpp, 20) <<__func__<< ": list_user_buckets: marker=" << marker + << " end_marker=" << end_marker + << " max=" << max << dendl; + + // Retrieve all `max` number of pairs. + buckets.clear(); + string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str(); + keys[0] = marker; + rc = store->next_query_by_name(user_info_iname, keys, vals); + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; + return rc; + } + + // Process the returned pairs to add into BucketList. + uint64_t bcount = 0; + for (const auto& bl: vals) { + if (bl.length() == 0) + break; + + RGWBucketEnt ent; + auto iter = bl.cbegin(); + ent.decode(iter); + + std::time_t ctime = ceph::real_clock::to_time_t(ent.creation_time); + ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl; + + if (!end_marker.empty() && + end_marker.compare(ent.bucket.marker) <= 0) + break; + + buckets.add(std::make_unique(this->store, ent, this)); + bcount++; + } + if (bcount == max) + is_truncated = true; + buckets.set_truncated(is_truncated); + + return 0; +} + +int MotrUser::create_bucket(const DoutPrefixProvider* dpp, + const rgw_bucket& b, + const std::string& zonegroup_id, + rgw_placement_rule& placement_rule, + std::string& swift_ver_location, + const RGWQuotaInfo* pquota_info, + const RGWAccessControlPolicy& policy, + Attrs& attrs, + RGWBucketInfo& info, + obj_version& ep_objv, + bool exclusive, + bool obj_lock_enabled, + bool* existed, + req_info& req_info, + std::unique_ptr* bucket_out, + optional_yield y) +{ + int ret; + std::unique_ptr bucket; + + // Look up the bucket. Create it if it doesn't exist. + ret = this->store->get_bucket(dpp, this, b, &bucket, y); + if (ret < 0 && ret != -ENOENT) + return ret; + + if (ret != -ENOENT) { + *existed = true; + // if (swift_ver_location.empty()) { + // swift_ver_location = bucket->get_info().swift_ver_location; + // } + // placement_rule.inherit_from(bucket->get_info().placement_rule); + + // TODO: ACL policy + // // don't allow changes to the acl policy + //RGWAccessControlPolicy old_policy(ctx()); + //int rc = rgw_op_get_bucket_policy_from_attr( + // dpp, this, u, bucket->get_attrs(), &old_policy, y); + //if (rc >= 0 && old_policy != policy) { + // bucket_out->swap(bucket); + // return -EEXIST; + //} + } else { + + placement_rule.name = "default"; + placement_rule.storage_class = "STANDARD"; + bucket = std::make_unique(store, b, this); + bucket->set_attrs(attrs); + *existed = false; + } + + if (!*existed){ + // TODO: how to handle zone and multi-site. + info.placement_rule = placement_rule; + info.bucket = b; + info.owner = this->get_info().user_id; + info.zonegroup = zonegroup_id; + if (obj_lock_enabled) + info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; + bucket->set_version(ep_objv); + bucket->get_info() = info; + + // Create a new bucket: (1) Add a key/value pair in the + // bucket instance index. (2) Create a new bucket index. + MotrBucket* mbucket = static_cast(bucket.get()); + ret = mbucket->put_info(dpp, y, ceph::real_time())? : + mbucket->create_bucket_index() ? : + mbucket->create_multipart_indices(); + if (ret < 0) + ldpp_dout(dpp, 0) << "ERROR: failed to create bucket indices! " << ret << dendl; + + // Insert the bucket entry into the user info index. + ret = mbucket->link_user(dpp, this, y); + if (ret < 0) + ldpp_dout(dpp, 0) << "ERROR: failed to add bucket entry! " << ret << dendl; + } else { + return -EEXIST; + // bucket->set_version(ep_objv); + // bucket->get_info() = info; + } + + bucket_out->swap(bucket); + + return ret; +} + +int MotrUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) +{ + return 0; +} + +int MotrUser::read_stats(const DoutPrefixProvider *dpp, + optional_yield y, RGWStorageStats* stats, + ceph::real_time *last_stats_sync, + ceph::real_time *last_stats_update) +{ + return 0; +} + +/* stats - Not for first pass */ +int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb) +{ + return 0; +} + +int MotrUser::complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) +{ + return 0; +} + +int MotrUser::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, + bool *is_truncated, RGWUsageIter& usage_iter, + map& usage) +{ + return 0; +} + +int MotrUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) +{ + return 0; +} + +int MotrUser::load_user_from_idx(const DoutPrefixProvider *dpp, + MotrStore *store, + RGWUserInfo& info, map *attrs, + RGWObjVersionTracker *objv_tr) +{ + struct MotrUserInfo muinfo; + bufferlist bl; + ldpp_dout(dpp, 20) << "info.user_id.id = " << info.user_id.id << dendl; + if (store->get_user_cache()->get(dpp, info.user_id.id, bl)) { + // Cache misses + int rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME, + M0_IC_GET, info.user_id.to_str(), bl); + ldpp_dout(dpp, 20) << "do_idx_op_by_name() = " << rc << dendl; + if (rc < 0) + return rc; + + // Put into cache. + store->get_user_cache()->put(dpp, info.user_id.id, bl); + } + + bufferlist& blr = bl; + auto iter = blr.cbegin(); + muinfo.decode(iter); + info = muinfo.info; + if (attrs) + *attrs = muinfo.attrs; + if (objv_tr) + { + objv_tr->read_version = muinfo.user_version; + objv_tracker.read_version = objv_tr->read_version; + } + + if (!info.access_keys.empty()) { + for(auto key : info.access_keys) { + access_key_tracker.insert(key.first); + } + } + + return 0; +} + +int MotrUser::load_user(const DoutPrefixProvider *dpp, + optional_yield y) +{ + ldpp_dout(dpp, 20) << "load user: user id = " << info.user_id.to_str() << dendl; + return load_user_from_idx(dpp, store, info, &attrs, &objv_tracker); +} + +int MotrUser::create_user_info_idx() +{ + string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str(); + return store->create_motr_idx_by_name(user_info_iname); +} + +int MotrUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) +{ + for (auto& it : new_attrs) + attrs[it.first] = it.second; + + return store_user(dpp, y, false); +} + +int MotrUser::store_user(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, RGWUserInfo* old_info) +{ + bufferlist bl; + struct MotrUserInfo muinfo; + RGWUserInfo orig_info; + RGWObjVersionTracker objv_tr = {}; + obj_version& obj_ver = objv_tr.read_version; + + ldpp_dout(dpp, 20) << "Store_user(): User = " << info.user_id.id << dendl; + orig_info.user_id = info.user_id; + // XXX: we open and close motr idx 2 times in this method: + // 1) on load_user_from_idx() here and 2) on do_idx_op_by_name(PUT) below. + // Maybe this can be optimised later somewhow. + int rc = load_user_from_idx(dpp, store, orig_info, nullptr, &objv_tr); + ldpp_dout(dpp, 10) << "Get user: rc = " << rc << dendl; + + // Check if the user already exists + if (rc == 0 && obj_ver.ver > 0) { + if (old_info) + *old_info = orig_info; + + if (obj_ver.ver != objv_tracker.read_version.ver) { + rc = -ECANCELED; + ldpp_dout(dpp, 0) << "ERROR: User Read version mismatch" << dendl; + goto out; + } + + if (exclusive) + return rc; + + obj_ver.ver++; + } else { + obj_ver.ver = 1; + obj_ver.tag = "UserTAG"; + } + + // Insert the user to user info index. + muinfo.info = info; + muinfo.attrs = attrs; + muinfo.user_version = obj_ver; + muinfo.encode(bl); + rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME, + M0_IC_PUT, info.user_id.to_str(), bl); + ldpp_dout(dpp, 10) << "Store user to motr index: rc = " << rc << dendl; + if (rc == 0) { + objv_tracker.read_version = obj_ver; + objv_tracker.write_version = obj_ver; + } + + // Store access key in access key index + if (!info.access_keys.empty()) { + std::string access_key; + std::string secret_key; + std::map::const_iterator iter = info.access_keys.begin(); + const RGWAccessKey& k = iter->second; + access_key = k.id; + secret_key = k.key; + MotrAccessKey MGWUserKeys(access_key, secret_key, info.user_id.to_str()); + store->store_access_key(dpp, y, MGWUserKeys); + access_key_tracker.insert(access_key); + } + + // Check if any key need to be deleted + if (access_key_tracker.size() != info.access_keys.size()) { + std::string key_for_deletion; + for (auto key : access_key_tracker) { + if (!info.get_key(key)) { + key_for_deletion = key; + ldpp_dout(dpp, 0) << "Deleting access key: " << key_for_deletion << dendl; + store->delete_access_key(dpp, y, key_for_deletion); + if (rc < 0) { + ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl; + } + } + } + if(rc >= 0){ + access_key_tracker.erase(key_for_deletion); + } + } + + if (!info.user_email.empty()) { + MotrEmailInfo MGWEmailInfo(info.user_id.to_str(), info.user_email); + store->store_email_info(dpp, y, MGWEmailInfo); + } + + // Create user info index to store all buckets that are belong + // to this bucket. + rc = create_user_info_idx(); + if (rc < 0 && rc != -EEXIST) { + ldpp_dout(dpp, 0) << "Failed to create user info index: rc = " << rc << dendl; + goto out; + } + + // Put the user info into cache. + rc = store->get_user_cache()->put(dpp, info.user_id.id, bl); + +out: + return rc; +} + +int MotrUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) +{ + // Remove user info from cache + // Delete access keys for user + // Delete user info + // Delete user from user index + // Delete email for user - TODO + bufferlist bl; + int rc; + // Remove the user info from cache. + store->get_user_cache()->remove(dpp, info.user_id.id); + + // Delete all access key of user + if (!info.access_keys.empty()) { + for(auto acc_key = info.access_keys.begin(); acc_key != info.access_keys.end(); acc_key++) { + auto access_key = acc_key->first; + rc = store->delete_access_key(dpp, y, access_key); + // TODO + // Check error code for access_key does not exist + // Continue to next step only if delete failed because key doesn't exists + if (rc < 0){ + ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl; + } + } + } + + //Delete email id + if (!info.user_email.empty()) { + rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, + M0_IC_DEL, info.user_email, bl); + if (rc < 0 && rc != -ENOENT) { + ldpp_dout(dpp, 0) << "Unable to delete email id " << rc << dendl; + } + } + + // Delete user info index + string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str(); + store->delete_motr_idx_by_name(user_info_iname); + ldpp_dout(dpp, 10) << "Deleted user info index - " << user_info_iname << dendl; + + // Delete user from user index + rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME, + M0_IC_DEL, info.user_id.to_str(), bl); + if (rc < 0){ + ldpp_dout(dpp, 0) << "Unable to delete user from user index " << rc << dendl; + return rc; + } + + // TODO + // Delete email for user + // rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, + // M0_IC_DEL, info.user_email, bl); + // if (rc < 0){ + // ldpp_dout(dpp, 0) << "Unable to delete email for user" << rc << dendl; + // return rc; + // } + return 0; +} + +int MotrUser::verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider *dpp, optional_yield y) +{ + *verified = false; + return 0; +} + +int MotrBucket::remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) +{ + int ret; + + ldpp_dout(dpp, 20) << "remove_bucket Entry=" << info.bucket.name << dendl; + + // Refresh info + ret = load_bucket(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket load_bucket failed rc=" << ret << dendl; + return ret; + } + + ListParams params; + params.list_versions = true; + params.allow_unordered = true; + + ListResults results; + + // 1. Check if Bucket has objects. + // If bucket contains objects and delete_children is true, delete all objects. + // Else throw error that bucket is not empty. + do { + results.objs.clear(); + + // Check if bucket has objects. + ret = list(dpp, params, 1000, results, y); + if (ret < 0) { + return ret; + } + + // If result contains entries, bucket is not empty. + if (!results.objs.empty() && !delete_children) { + ldpp_dout(dpp, 0) << "ERROR: could not remove non-empty bucket " << info.bucket.name << dendl; + return -ENOTEMPTY; + } + + for (const auto& obj : results.objs) { + rgw_obj_key key(obj.key); + if (key.instance.empty()) { + key.instance = "null"; + } + + std::unique_ptr object = get_object(key); + + ret = object->delete_object(dpp, null_yield); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket rgw_remove_object failed rc=" << ret << dendl; + return ret; + } + } + } while(results.is_truncated); + + // 2. Abort Mp uploads on the bucket. + ret = abort_multiparts(dpp, store->ctx()); + if (ret < 0) { + return ret; + } + + // 3. Remove mp index?? + string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts"; + ret = store->delete_motr_idx_by_name(bucket_multipart_iname); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove multipart index rc=" << ret << dendl; + return ret; + } + + // 4. Sync user stats. + ret = this->sync_user_stats(dpp, y); + if (ret < 0) { + ldout(store->ctx(), 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; + } + + // 5. Remove the bucket from user info index. (unlink user) + ret = this->unlink_user(dpp, owner, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl; + return ret; + } + + // 6. Remove bucket index. + string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name; + ret = store->delete_motr_idx_by_name(bucket_index_iname); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl; + return ret; + } + + // 7. Remove bucket instance info. + bufferlist bl; + ret = store->get_bucket_inst_cache()->remove(dpp, info.bucket.name); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance from cache rc=" + << ret << dendl; + return ret; + } + + ret = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME, + M0_IC_DEL, info.bucket.name, bl); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance rc=" + << ret << dendl; + return ret; + } + + // TODO : + // 8. Remove Notifications + // if bucket has notification definitions associated with it + // they should be removed (note that any pending notifications on the bucket are still going to be sent) + + // 9. Forward request to master. + if (forward_to_master) { + bufferlist in_data; + ret = store->forward_request_to_master(dpp, owner, &bucket_version, in_data, nullptr, *req_info, y); + if (ret < 0) { + if (ret == -ENOENT) { + /* adjust error, we want to return with NoSuchBucket and not + * NoSuchKey */ + ret = -ERR_NO_SUCH_BUCKET; + } + ldpp_dout(dpp, 0) << "ERROR: Forward to master failed. ret=" << ret << dendl; + return ret; + } + } + + ldpp_dout(dpp, 20) << "remove_bucket Exit=" << info.bucket.name << dendl; + + return ret; +} + +int MotrBucket::remove_bucket_bypass_gc(int concurrent_max, bool + keep_index_consistent, + optional_yield y, const + DoutPrefixProvider *dpp) { + return 0; +} + +int MotrBucket::put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time _mtime) +{ + bufferlist bl; + struct MotrBucketInfo mbinfo; + + ldpp_dout(dpp, 20) << "put_info(): bucket_id=" << info.bucket.bucket_id << dendl; + mbinfo.info = info; + mbinfo.bucket_attrs = attrs; + mbinfo.mtime = _mtime; + mbinfo.bucket_version = bucket_version; + mbinfo.encode(bl); + + // Insert bucket instance using bucket's marker (string). + int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME, + M0_IC_PUT, info.bucket.name, bl, !exclusive); + if (rc == 0) + store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl); + + return rc; +} + +int MotrBucket::load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats) +{ + // Get bucket instance using bucket's name (string). or bucket id? + bufferlist bl; + if (store->get_bucket_inst_cache()->get(dpp, info.bucket.name, bl)) { + // Cache misses. + ldpp_dout(dpp, 20) << "load_bucket(): name=" << info.bucket.name << dendl; + int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME, + M0_IC_GET, info.bucket.name, bl); + ldpp_dout(dpp, 20) << "load_bucket(): rc=" << rc << dendl; + if (rc < 0) + return rc; + store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl); + } + + struct MotrBucketInfo mbinfo; + bufferlist& blr = bl; + auto iter =blr.cbegin(); + mbinfo.decode(iter); //Decode into MotrBucketInfo. + + info = mbinfo.info; + ldpp_dout(dpp, 20) << "load_bucket(): bucket_id=" << info.bucket.bucket_id << dendl; + rgw_placement_rule placement_rule; + placement_rule.name = "default"; + placement_rule.storage_class = "STANDARD"; + info.placement_rule = placement_rule; + + attrs = mbinfo.bucket_attrs; + mtime = mbinfo.mtime; + bucket_version = mbinfo.bucket_version; + + return 0; +} + +int MotrBucket::link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y) +{ + bufferlist bl; + RGWBucketEnt new_bucket; + ceph::real_time creation_time = get_creation_time(); + + // RGWBucketEnt or cls_user_bucket_entry is the structure that is stored. + new_bucket.bucket = info.bucket; + new_bucket.size = 0; + if (real_clock::is_zero(creation_time)) + creation_time = ceph::real_clock::now(); + new_bucket.creation_time = creation_time; + new_bucket.encode(bl); + std::time_t ctime = ceph::real_clock::to_time_t(new_bucket.creation_time); + ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl; + + // Insert the user into the user info index. + string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str(); + return store->do_idx_op_by_name(user_info_idx_name, + M0_IC_PUT, info.bucket.name, bl); + +} + +int MotrBucket::unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y) +{ + // Remove the user into the user info index. + bufferlist bl; + string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str(); + return store->do_idx_op_by_name(user_info_idx_name, + M0_IC_DEL, info.bucket.name, bl); +} + +/* stats - Not for first pass */ +int MotrBucket::read_stats(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, int shard_id, + std::string *bucket_ver, std::string *master_ver, + std::map& stats, + std::string *max_marker, bool *syncstopped) +{ + return 0; +} + +int MotrBucket::create_bucket_index() +{ + string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name; + return store->create_motr_idx_by_name(bucket_index_iname); +} + +int MotrBucket::create_multipart_indices() +{ + int rc; + + // Bucket multipart index stores in-progress multipart uploads. + // Key is the object name + upload_id, value is a rgw_bucket_dir_entry. + // An entry is inserted when a multipart upload is initialised ( + // MotrMultipartUpload::init()) and will be removed when the upload + // is completed (MotrMultipartUpload::complete()). + // MotrBucket::list_multiparts() will scan this index to return all + // in-progress multipart uploads in the bucket. + string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts"; + rc = store->create_motr_idx_by_name(bucket_multipart_iname); + if (rc < 0) { + ldout(store->cctx, 0) << "Failed to create bucket multipart index " << bucket_multipart_iname << dendl; + return rc; + } + + return 0; +} + + +int MotrBucket::read_stats_async(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, RGWGetBucketStats_CB *ctx) +{ + return 0; +} + +int MotrBucket::sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) +{ + return 0; +} + +int MotrBucket::update_container_stats(const DoutPrefixProvider *dpp) +{ + return 0; +} + +int MotrBucket::check_bucket_shards(const DoutPrefixProvider *dpp) +{ + return 0; +} + +int MotrBucket::chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y) +{ + // TODO: update bucket with new owner + return 0; +} + +/* Make sure to call load_bucket() if you need it first */ +bool MotrBucket::is_owner(User* user) +{ + return (info.owner.compare(user->get_id()) == 0); +} + +int MotrBucket::check_empty(const DoutPrefixProvider *dpp, optional_yield y) +{ + /* XXX: Check if bucket contains any objects */ + return 0; +} + +int MotrBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, + optional_yield y, bool check_size_only) +{ + /* Not Handled in the first pass as stats are also needed */ + return 0; +} + +int MotrBucket::merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& new_attrs, optional_yield y) +{ + for (auto& it : new_attrs) + attrs[it.first] = it.second; + + return put_info(dpp, y, ceph::real_time()); +} + +int MotrBucket::try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime) +{ + return 0; +} + +/* XXX: usage and stats not supported in the first pass */ +int MotrBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool *is_truncated, + RGWUsageIter& usage_iter, + map& usage) +{ + return 0; +} + +int MotrBucket::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) +{ + return 0; +} + +int MotrBucket::remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) +{ + /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table. + * Delete all the object in the list from the object table of this + * bucket + */ + return 0; +} + +int MotrBucket::check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) +{ + /* XXX: stats not supported yet */ + return 0; +} + +int MotrBucket::rebuild_index(const DoutPrefixProvider *dpp) +{ + /* there is no index table in dbstore. Not applicable */ + return 0; +} + +int MotrBucket::set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) +{ + /* XXX: CHECK: set tag timeout for all the bucket objects? */ + return 0; +} + +int MotrBucket::purge_instance(const DoutPrefixProvider *dpp) +{ + /* XXX: CHECK: for dbstore only single instance supported. + * Remove all the objects for that instance? Anything extra needed? + */ + return 0; +} + +int MotrBucket::set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy &acl, optional_yield y) +{ + int ret = 0; + bufferlist aclbl; + + acls = acl; + acl.encode(aclbl); + + Attrs attrs = get_attrs(); + attrs[RGW_ATTR_ACL] = aclbl; + + // TODO: update bucket entry with the new attrs + + return ret; +} + +std::unique_ptr MotrBucket::get_object(const rgw_obj_key& k) +{ + return std::make_unique(this->store, k, this); +} + +int MotrBucket::list(const DoutPrefixProvider *dpp, ListParams& params, int max, ListResults& results, optional_yield y) +{ + int rc; + vector keys(max); + vector vals(max); + + ldpp_dout(dpp, 20) << "bucket=" << info.bucket.name + << " prefix=" << params.prefix + << " marker=" << params.marker + << " max=" << max << dendl; + + // Retrieve all `max` number of pairs. + string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name; + keys[0] = params.marker.empty() ? params.prefix : + params.marker.get_oid(); + rc = store->next_query_by_name(bucket_index_iname, keys, vals, params.prefix, + params.delim); + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; + return rc; + } + + // Process the returned pairs to add into ListResults. + int i = 0; + for (; i < rc; ++i) { + if (vals[i].length() == 0) { + results.common_prefixes[keys[i]] = true; + } else { + rgw_bucket_dir_entry ent; + auto iter = vals[i].cbegin(); + ent.decode(iter); + if (params.list_versions || ent.is_visible()) + results.objs.emplace_back(std::move(ent)); + } + } + + if (i == max) { + results.is_truncated = true; + results.next_marker = keys[max - 1] + " "; + } else { + results.is_truncated = false; + } + + return 0; +} + +int MotrBucket::list_multiparts(const DoutPrefixProvider *dpp, + const string& prefix, + string& marker, + const string& delim, + const int& max_uploads, + vector>& uploads, + map *common_prefixes, + bool *is_truncated) +{ + int rc; + vector key_vec(max_uploads); + vector val_vec(max_uploads); + + string bucket_multipart_iname = + "motr.rgw.bucket." + this->get_name() + ".multiparts"; + key_vec[0].clear(); + key_vec[0].assign(marker.begin(), marker.end()); + rc = store->next_query_by_name(bucket_multipart_iname, key_vec, val_vec); + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; + return rc; + } + + // Process the returned pairs to add into ListResults. + // The POC can only support listing all objects or selecting + // with prefix. + int ocount = 0; + rgw_obj_key last_obj_key; + *is_truncated = false; + for (const auto& bl: val_vec) { + if (bl.length() == 0) + break; + + rgw_bucket_dir_entry ent; + auto iter = bl.cbegin(); + ent.decode(iter); + + if (prefix.size() && + (0 != ent.key.name.compare(0, prefix.size(), prefix))) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << + ": skippping \"" << ent.key << + "\" because doesn't match prefix" << dendl; + continue; + } + + rgw_obj_key key(ent.key); + uploads.push_back(this->get_multipart_upload(key.name)); + last_obj_key = key; + ocount++; + if (ocount == max_uploads) { + *is_truncated = true; + break; + } + } + marker = last_obj_key.name; + + // What is common prefix? We don't handle it for now. + + return 0; + +} + +int MotrBucket::abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct) +{ + return 0; +} + +void MotrStore::finalize(void) +{ + // close connection with motr + m0_client_fini(this->instance, true); +} + +const std::string& MotrZoneGroup::get_endpoint() const +{ + if (!group.endpoints.empty()) { + return group.endpoints.front(); + } else { + // use zonegroup's master zone endpoints + auto z = group.zones.find(group.master_zone); + if (z != group.zones.end() && !z->second.endpoints.empty()) { + return z->second.endpoints.front(); + } + } + return empty; +} + +bool MotrZoneGroup::placement_target_exists(std::string& target) const +{ + return !!group.placement_targets.count(target); +} + +int MotrZoneGroup::get_placement_target_names(std::set& names) const +{ + for (const auto& target : group.placement_targets) { + names.emplace(target.second.name); + } + + return 0; +} + +int MotrZoneGroup::get_placement_tier(const rgw_placement_rule& rule, + std::unique_ptr* tier) +{ + std::map::const_iterator titer; + titer = group.placement_targets.find(rule.name); + if (titer == group.placement_targets.end()) { + return -ENOENT; + } + + const auto& target_rule = titer->second; + std::map::const_iterator ttier; + ttier = target_rule.tier_targets.find(rule.storage_class); + if (ttier == target_rule.tier_targets.end()) { + // not found + return -ENOENT; + } + + PlacementTier* t; + t = new MotrPlacementTier(store, ttier->second); + if (!t) + return -ENOMEM; + + tier->reset(t); + return 0; +} + +ZoneGroup& MotrZone::get_zonegroup() +{ + return zonegroup; +} + +const std::string& MotrZone::get_id() +{ + return zone_params->get_id(); +} + +const std::string& MotrZone::get_name() const +{ + return zone_params->get_name(); +} + +bool MotrZone::is_writeable() +{ + return true; +} + +bool MotrZone::get_redirect_endpoint(std::string* endpoint) +{ + return false; +} + +bool MotrZone::has_zonegroup_api(const std::string& api) const +{ + return (zonegroup.group.api_name == api); +} + +const std::string& MotrZone::get_current_period_id() +{ + return current_period->get_id(); +} + +std::unique_ptr MotrStore::get_lua_manager() +{ + return std::make_unique(this); +} + +int MotrObject::get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **_state, optional_yield y, bool follow_olh) +{ + // Get object's metadata (those stored in rgw_bucket_dir_entry). + bufferlist bl; + if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) { + // Cache misses. + string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name(); + int rc = this->store->do_idx_op_by_name(bucket_index_iname, + M0_IC_GET, this->get_key().get_oid(), bl); + if (rc < 0) { + ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl; + return rc; + } + + // Put into cache. + this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl); + } + + rgw_bucket_dir_entry ent; + bufferlist& blr = bl; + auto iter = blr.cbegin(); + ent.decode(iter); + + // Set object's type. + this->category = ent.meta.category; + + // Set object state. + state.exists = true; + state.size = ent.meta.size; + state.accounted_size = ent.meta.size; + state.mtime = ent.meta.mtime; + + state.has_attrs = true; + bufferlist etag_bl; + string& etag = ent.meta.etag; + ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl; + etag_bl.append(etag); + state.attrset[RGW_ATTR_ETAG] = etag_bl; + + return 0; +} + +MotrObject::~MotrObject() { + this->close_mobj(); +} + +// int MotrObject::read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj) +// { +// read_op.params.attrs = &attrs; +// read_op.params.target_obj = target_obj; +// read_op.params.obj_size = &obj_size; +// read_op.params.lastmod = &mtime; +// +// return read_op.prepare(dpp); +// } + +int MotrObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) +{ + // TODO: implement + ldpp_dout(dpp, 20) <<__func__<< ": MotrObject::set_obj_attrs()" << dendl; + return 0; +} + +int MotrObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj) +{ + if (this->category == RGWObjCategory::MultiMeta) + return 0; + + string bname, key; + if (target_obj) { + bname = target_obj->bucket.name; + key = target_obj->key.get_oid(); + } else { + bname = this->get_bucket()->get_name(); + key = this->get_key().get_oid(); + } + ldpp_dout(dpp, 20) << "MotrObject::get_obj_attrs(): " + << bname << "/" << key << dendl; + + // Get object's metadata (those stored in rgw_bucket_dir_entry). + bufferlist bl; + if (this->store->get_obj_meta_cache()->get(dpp, key, bl)) { + // Cache misses. + string bucket_index_iname = "motr.rgw.bucket.index." + bname; + int rc = this->store->do_idx_op_by_name(bucket_index_iname, M0_IC_GET, key, bl); + if (rc < 0) { + ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl; + return rc; + } + + // Put into cache. + this->store->get_obj_meta_cache()->put(dpp, key, bl); + } + + rgw_bucket_dir_entry ent; + bufferlist& blr = bl; + auto iter = blr.cbegin(); + ent.decode(iter); + decode(state.attrset, iter); + + return 0; +} + +int MotrObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) +{ + rgw_obj target = get_obj(); + int r = get_obj_attrs(y, dpp, &target); + if (r < 0) { + return r; + } + set_atomic(); + state.attrset[attr_name] = attr_val; + return set_obj_attrs(dpp, &state.attrset, nullptr, y); +} + +int MotrObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) +{ + rgw_obj target = get_obj(); + Attrs rmattr; + bufferlist bl; + + set_atomic(); + rmattr[attr_name] = bl; + return set_obj_attrs(dpp, nullptr, &rmattr, y); +} + +bool MotrObject::is_expired() { + return false; +} + +// Taken from rgw_rados.cc +void MotrObject::gen_rand_obj_instance_name() +{ + enum {OBJ_INSTANCE_LEN = 32}; + char buf[OBJ_INSTANCE_LEN + 1]; + + gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN); + state.obj.key.set_instance(buf); +} + +int MotrObject::omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, + const std::set& keys, + Attrs* vals) +{ + return 0; +} + +int MotrObject::omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) +{ + return 0; +} + +int MotrObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) +{ + return 0; +} + +std::unique_ptr MotrObject::get_serializer(const DoutPrefixProvider *dpp, + const std::string& lock_name) +{ + return std::make_unique(dpp, store, this, lock_name); +} + +int MotrObject::transition(Bucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider* dpp, + optional_yield y) +{ + return 0; +} + +bool MotrObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) +{ + /* XXX: support single default zone and zonegroup for now */ + return true; +} + +int MotrObject::dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) +{ + return 0; +} + +std::unique_ptr MotrObject::get_read_op() +{ + return std::make_unique(this); +} + +MotrObject::MotrReadOp::MotrReadOp(MotrObject *_source) : + source(_source) +{ } + +int MotrObject::MotrReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp) +{ + int rc; + ldpp_dout(dpp, 20) <<__func__<< ": bucket=" << source->get_bucket()->get_name() << dendl; + + rgw_bucket_dir_entry ent; + rc = source->get_bucket_dir_ent(dpp, ent); + if (rc < 0) + return rc; + + // Set source object's attrs. The attrs is key/value map and is used + // in send_response_data() to set attributes, including etag. + bufferlist etag_bl; + string& etag = ent.meta.etag; + ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl; + etag_bl.append(etag.c_str(), etag.size()); + source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); + + source->set_key(ent.key); + source->set_obj_size(ent.meta.size); + source->category = ent.meta.category; + *params.lastmod = ent.meta.mtime; + + if (params.mod_ptr || params.unmod_ptr) { + // Convert all times go GMT to make them compatible + obj_time_weight src_weight; + src_weight.init(*params.lastmod, params.mod_zone_id, params.mod_pg_ver); + src_weight.high_precision = params.high_precision_time; + + obj_time_weight dest_weight; + dest_weight.high_precision = params.high_precision_time; + + // Check if-modified-since condition + if (params.mod_ptr && !params.if_nomatch) { + dest_weight.init(*params.mod_ptr, params.mod_zone_id, params.mod_pg_ver); + ldpp_dout(dpp, 10) << "If-Modified-Since: " << dest_weight << " & " + << "Last-Modified: " << src_weight << dendl; + if (!(dest_weight < src_weight)) { + return -ERR_NOT_MODIFIED; + } + } + + // Check if-unmodified-since condition + if (params.unmod_ptr && !params.if_match) { + dest_weight.init(*params.unmod_ptr, params.mod_zone_id, params.mod_pg_ver); + ldpp_dout(dpp, 10) << "If-UnModified-Since: " << dest_weight << " & " + << "Last-Modified: " << src_weight << dendl; + if (dest_weight < src_weight) { + return -ERR_PRECONDITION_FAILED; + } + } + } + // Check if-match condition + if (params.if_match) { + string if_match_str = rgw_string_unquote(params.if_match); + ldpp_dout(dpp, 10) << "ETag: " << etag << " & " + << "If-Match: " << if_match_str << dendl; + if (if_match_str.compare(etag) != 0) { + return -ERR_PRECONDITION_FAILED; + } + } + // Check if-none-match condition + if (params.if_nomatch) { + string if_nomatch_str = rgw_string_unquote(params.if_nomatch); + ldpp_dout(dpp, 10) << "ETag: " << etag << " & " + << "If-NoMatch: " << if_nomatch_str << dendl; + if (if_nomatch_str.compare(etag) == 0) { + return -ERR_NOT_MODIFIED; + } + } + + // Skip opening an empty object. + if(source->get_obj_size() == 0) + return 0; + + // Open the object here. + if (source->category == RGWObjCategory::MultiMeta) { + ldpp_dout(dpp, 20) <<__func__<< ": open obj parts..." << dendl; + rc = source->get_part_objs(dpp, this->part_objs)? : + source->open_part_objs(dpp, this->part_objs); + return rc; + } else { + ldpp_dout(dpp, 20) <<__func__<< ": open object..." << dendl; + return source->open_mobj(dpp); + } +} + +int MotrObject::MotrReadOp::read(int64_t off, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider* dpp) +{ + ldpp_dout(dpp, 20) << "MotrReadOp::read(): sync read." << dendl; + return 0; +} + +// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to 'end'. +// The returned data is processed in 'cb' which is a chain of post-processing +// filters such as decompression, de-encryption and sending back data to client +// (RGWGetObj_CB::handle_dta which in turn calls RGWGetObj::get_data_cb() to +// send data back.). +// +// POC implements a simple sync version of iterate() function in which it reads +// a block of data each time and call 'cb' for post-processing. +int MotrObject::MotrReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb, optional_yield y) +{ + int rc; + + if (source->category == RGWObjCategory::MultiMeta) + rc = source->read_multipart_obj(dpp, off, end, cb, part_objs); + else + rc = source->read_mobj(dpp, off, end, cb); + + return rc; +} + +int MotrObject::MotrReadOp::get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) +{ + //return 0; + return -ENODATA; +} + +std::unique_ptr MotrObject::get_delete_op() +{ + return std::make_unique(this); +} + +MotrObject::MotrDeleteOp::MotrDeleteOp(MotrObject *_source) : + source(_source) +{ } + +// Implementation of DELETE OBJ also requires MotrObject::get_obj_state() +// to retrieve and set object's state from object's metadata. +// +// TODO: +// 1. The POC only remove the object's entry from bucket index and delete +// corresponding Motr objects. It doesn't handle the DeleteOp::params. +// Delete::delete_obj() in rgw_rados.cc shows how rados backend process the +// params. +// 2. Delete an object when its versioning is turned on. +int MotrObject::MotrDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) +{ + ldpp_dout(dpp, 20) << "delete " << source->get_key().get_oid() << " from " << source->get_bucket()->get_name() << dendl; + + rgw_bucket_dir_entry ent; + int rc = source->get_bucket_dir_ent(dpp, ent); + if (rc < 0) { + return rc; + } + + //TODO: When integrating with background GC for object deletion, + // we should consider adding object entry to GC before deleting the metadata. + // Delete from the cache first. + source->store->get_obj_meta_cache()->remove(dpp, source->get_key().get_oid()); + + // Delete the object's entry from the bucket index. + bufferlist bl; + string bucket_index_iname = "motr.rgw.bucket.index." + source->get_bucket()->get_name(); + rc = source->store->do_idx_op_by_name(bucket_index_iname, + M0_IC_DEL, source->get_key().get_oid(), bl); + if (rc < 0) { + ldpp_dout(dpp, 0) << "Failed to del object's entry from bucket index. " << dendl; + return rc; + } + + if (ent.meta.size == 0) { + ldpp_dout(dpp, 0) << __func__ << ": Object size is 0, not deleting motr object." << dendl; + return 0; + } + // Remove the motr objects. + if (source->category == RGWObjCategory::MultiMeta) + rc = source->delete_part_objs(dpp); + else + rc = source->delete_mobj(dpp); + if (rc < 0) { + ldpp_dout(dpp, 0) << "Failed to delete the object from Motr. " << dendl; + return rc; + } + + //result.delete_marker = parent_op.result.delete_marker; + //result.version_id = parent_op.result.version_id; + return 0; +} + +int MotrObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, bool prevent_versioning) +{ + MotrObject::MotrDeleteOp del_op(this); + del_op.params.bucket_owner = bucket->get_info().owner; + del_op.params.versioning_status = bucket->get_info().versioning_status(); + + return del_op.delete_obj(dpp, y); +} + +int MotrObject::copy_object(User* user, + req_info* info, + const rgw_zone_id& source_zone, + rgw::sal::Object* dest_object, + rgw::sal::Bucket* dest_bucket, + rgw::sal::Bucket* src_bucket, + const rgw_placement_rule& dest_placement, + ceph::real_time* src_mtime, + ceph::real_time* mtime, + const ceph::real_time* mod_ptr, + const ceph::real_time* unmod_ptr, + bool high_precision_time, + const char* if_match, + const char* if_nomatch, + AttrsMod attrs_mod, + bool copy_if_newer, + Attrs& attrs, + RGWObjCategory category, + uint64_t olh_epoch, + boost::optional delete_at, + std::string* version_id, + std::string* tag, + std::string* etag, + void (*progress_cb)(off_t, void *), + void* progress_data, + const DoutPrefixProvider* dpp, + optional_yield y) +{ + return 0; +} + +int MotrObject::swift_versioning_restore(bool& restored, + const DoutPrefixProvider* dpp) +{ + return 0; +} + +int MotrObject::swift_versioning_copy(const DoutPrefixProvider* dpp, + optional_yield y) +{ + return 0; +} + +MotrAtomicWriter::MotrAtomicWriter(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + MotrStore* _store, + const rgw_user& _owner, + const rgw_placement_rule *_ptail_placement_rule, + uint64_t _olh_epoch, + const std::string& _unique_tag) : + StoreWriter(dpp, y), + store(_store), + owner(_owner), + ptail_placement_rule(_ptail_placement_rule), + olh_epoch(_olh_epoch), + unique_tag(_unique_tag), + obj(_store, obj->get_key(), obj->get_bucket()), + old_obj(_store, obj->get_key(), obj->get_bucket()) {} + +static const unsigned MAX_BUFVEC_NR = 256; + +int MotrAtomicWriter::prepare(optional_yield y) +{ + total_data_size = 0; + + if (obj.is_opened()) + return 0; + + rgw_bucket_dir_entry ent; + int rc = old_obj.get_bucket_dir_ent(dpp, ent); + if (rc == 0) { + ldpp_dout(dpp, 20) << __func__ << ": object exists." << dendl; + } + + rc = m0_bufvec_empty_alloc(&buf, MAX_BUFVEC_NR) ?: + m0_bufvec_alloc(&attr, MAX_BUFVEC_NR, 1) ?: + m0_indexvec_alloc(&ext, MAX_BUFVEC_NR); + if (rc != 0) + this->cleanup(); + + return rc; +} + +int MotrObject::create_mobj(const DoutPrefixProvider *dpp, uint64_t sz) +{ + if (mobj != nullptr) { + ldpp_dout(dpp, 0) <<__func__<< "ERROR: object is already opened" << dendl; + return -EINVAL; + } + + int rc = m0_ufid_next(&ufid_gr, 1, &meta.oid); + if (rc != 0) { + ldpp_dout(dpp, 0) <<__func__<< "ERROR: m0_ufid_next() failed: " << rc << dendl; + return rc; + } + + char fid_str[M0_FID_STR_LEN]; + snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); + ldpp_dout(dpp, 20) <<__func__<< ": sz=" << sz << " oid=" << fid_str << dendl; + + int64_t lid = m0_layout_find_by_objsz(store->instance, nullptr, sz); + M0_ASSERT(lid > 0); + + M0_ASSERT(mobj == nullptr); + mobj = new m0_obj(); + m0_obj_init(mobj, &store->container.co_realm, &meta.oid, lid); + + struct m0_op *op = nullptr; + mobj->ob_entity.en_flags |= M0_ENF_META; + rc = m0_entity_create(nullptr, &mobj->ob_entity, &op); + if (rc != 0) { + this->close_mobj(); + ldpp_dout(dpp, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl; + return rc; + } + ldpp_dout(dpp, 20) <<__func__<< ": call m0_op_launch()..." << dendl; + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc != 0) { + this->close_mobj(); + ldpp_dout(dpp, 0) << "ERROR: failed to create motr object: " << rc << dendl; + return rc; + } + + meta.layout_id = mobj->ob_attr.oa_layout_id; + meta.pver = mobj->ob_attr.oa_pver; + ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id + << std::dec << " rc=" << rc << dendl; + + // TODO: add key:user+bucket+key+obj.meta.oid value:timestamp to + // gc.queue.index. See more at github.com/Seagate/cortx-rgw/issues/7. + + return rc; +} + +int MotrObject::open_mobj(const DoutPrefixProvider *dpp) +{ + char fid_str[M0_FID_STR_LEN]; + snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); + ldpp_dout(dpp, 20) <<__func__<< ": oid=" << fid_str << dendl; + + int rc; + if (meta.layout_id == 0) { + rgw_bucket_dir_entry ent; + rc = this->get_bucket_dir_ent(dpp, ent); + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: open_mobj() failed: rc=" << rc << dendl; + return rc; + } + } + + if (meta.layout_id == 0) + return -ENOENT; + + M0_ASSERT(mobj == nullptr); + mobj = new m0_obj(); + memset(mobj, 0, sizeof *mobj); + m0_obj_init(mobj, &store->container.co_realm, &meta.oid, store->conf.mc_layout_id); + + struct m0_op *op = nullptr; + mobj->ob_attr.oa_layout_id = meta.layout_id; + mobj->ob_attr.oa_pver = meta.pver; + mobj->ob_entity.en_flags |= M0_ENF_META; + rc = m0_entity_open(&mobj->ob_entity, &op); + if (rc != 0) { + ldpp_dout(dpp, 0) << "ERROR: m0_entity_open() failed: rc=" << rc << dendl; + this->close_mobj(); + return rc; + } + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc < 0) { + ldpp_dout(dpp, 10) << "ERROR: failed to open motr object: rc=" << rc << dendl; + this->close_mobj(); + return rc; + } + + ldpp_dout(dpp, 20) <<__func__<< ": rc=" << rc << dendl; + + return 0; +} + +int MotrObject::delete_mobj(const DoutPrefixProvider *dpp) +{ + int rc; + char fid_str[M0_FID_STR_LEN]; + snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); + if (!meta.oid.u_hi || !meta.oid.u_lo) { + ldpp_dout(dpp, 20) << __func__ << ": invalid motr object oid=" << fid_str << dendl; + return -EINVAL; + } + ldpp_dout(dpp, 20) << __func__ << ": deleting motr object oid=" << fid_str << dendl; + + // Open the object. + if (mobj == nullptr) { + rc = this->open_mobj(dpp); + if (rc < 0) + return rc; + } + + // Create an DELETE op and execute it (sync version). + struct m0_op *op = nullptr; + mobj->ob_entity.en_flags |= M0_ENF_META; + rc = m0_entity_delete(&mobj->ob_entity, &op); + if (rc != 0) { + ldpp_dout(dpp, 0) << "ERROR: m0_entity_delete() failed: " << rc << dendl; + return rc; + } + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to open motr object: " << rc << dendl; + return rc; + } + + this->close_mobj(); + + return 0; +} + +void MotrObject::close_mobj() +{ + if (mobj == nullptr) + return; + m0_obj_fini(mobj); + delete mobj; mobj = nullptr; +} + +int MotrObject::write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset) +{ + int rc; + unsigned bs, left; + struct m0_op *op; + char *start, *p; + struct m0_bufvec buf; + struct m0_bufvec attr; + struct m0_indexvec ext; + + left = data.length(); + if (left == 0) + return 0; + + rc = m0_bufvec_empty_alloc(&buf, 1) ?: + m0_bufvec_alloc(&attr, 1, 1) ?: + m0_indexvec_alloc(&ext, 1); + if (rc != 0) + goto out; + + bs = this->get_optimal_bs(left); + ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl; + + start = data.c_str(); + + for (p = start; left > 0; left -= bs, p += bs, offset += bs) { + if (left < bs) + bs = this->get_optimal_bs(left); + if (left < bs) { + data.append_zero(bs - left); + left = bs; + p = data.c_str(); + } + buf.ov_buf[0] = p; + buf.ov_vec.v_count[0] = bs; + ext.iv_index[0] = offset; + ext.iv_vec.v_count[0] = bs; + attr.ov_vec.v_count[0] = 0; + + op = nullptr; + rc = m0_obj_op(this->mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op); + if (rc != 0) + goto out; + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + if (rc != 0) + goto out; + } + +out: + m0_indexvec_free(&ext); + m0_bufvec_free(&attr); + m0_bufvec_free2(&buf); + return rc; +} + +int MotrObject::read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb) +{ + int rc; + unsigned bs, actual, left; + struct m0_op *op; + struct m0_bufvec buf; + struct m0_bufvec attr; + struct m0_indexvec ext; + + // make end pointer exclusive: + // it's easier to work with it this way + end++; + ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off << + " end=" << end << dendl; + // As `off` may not be parity group size aligned, even using optimal + // buffer block size, simply reading data from offset `off` could come + // across parity group boundary. And Motr only allows page-size aligned + // offset. + // + // The optimal size of each IO should also take into account the data + // transfer size to s3 client. For example, 16MB may be nice to read + // data from motr, but it could be too big for network transfer. + // + // TODO: We leave proper handling of offset in the future. + bs = this->get_optimal_bs(end - off); + ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): bs=" << bs << dendl; + + rc = m0_bufvec_empty_alloc(&buf, 1) ? : + m0_bufvec_alloc(&attr, 1, 1) ? : + m0_indexvec_alloc(&ext, 1); + if (rc < 0) + goto out; + + left = end - off; + for (; left > 0; off += actual) { + if (left < bs) + bs = this->get_optimal_bs(left); + actual = bs; + if (left < bs) + actual = left; + ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off << + " actual=" << actual << dendl; + bufferlist bl; + buf.ov_buf[0] = bl.append_hole(bs).c_str(); + buf.ov_vec.v_count[0] = bs; + ext.iv_index[0] = off; + ext.iv_vec.v_count[0] = bs; + attr.ov_vec.v_count[0] = 0; + + left -= actual; + // Read from Motr. + op = nullptr; + rc = m0_obj_op(this->mobj, M0_OC_READ, &ext, &buf, &attr, 0, 0, &op); + ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): init read op rc=" << rc << dendl; + if (rc != 0) { + ldpp_dout(dpp, 0) << __func__ << ": read failed during m0_obj_op, rc=" << rc << dendl; + goto out; + } + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + if (rc != 0) { + ldpp_dout(dpp, 0) << __func__ << ": read failed, m0_op_wait rc=" << rc << dendl; + goto out; + } + // Call `cb` to process returned data. + ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): call cb to process data" << dendl; + cb->handle_data(bl, 0, actual); + } + +out: + m0_indexvec_free(&ext); + m0_bufvec_free(&attr); + m0_bufvec_free2(&buf); + this->close_mobj(); + + return rc; +} + +int MotrObject::get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent) +{ + int rc = 0; + string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name(); + int max = 1000; + vector keys(max); + vector vals(max); + bufferlist bl; + bufferlist::const_iterator iter; + + if (this->get_bucket()->get_info().versioning_status() == BUCKET_VERSIONED || + this->get_bucket()->get_info().versioning_status() == BUCKET_SUSPENDED) { + + rgw_bucket_dir_entry ent_to_check; + + if (this->store->get_obj_meta_cache()->get(dpp, this->get_name(), bl) == 0) { + iter = bl.cbegin(); + ent_to_check.decode(iter); + if (ent_to_check.is_current()) { + ent = ent_to_check; + rc = 0; + goto out; + } + } + + ldpp_dout(dpp, 20) <<__func__<< ": versioned bucket!" << dendl; + keys[0] = this->get_name(); + rc = store->next_query_by_name(bucket_index_iname, keys, vals); + if (rc < 0) { + ldpp_dout(dpp, 0) << __func__ << "ERROR: NEXT query failed. " << rc << dendl; + return rc; + } + + rc = -ENOENT; + for (const auto& bl: vals) { + if (bl.length() == 0) + break; + + iter = bl.cbegin(); + ent_to_check.decode(iter); + if (ent_to_check.is_current()) { + ldpp_dout(dpp, 20) <<__func__<< ": found current version!" << dendl; + ent = ent_to_check; + rc = 0; + + this->store->get_obj_meta_cache()->put(dpp, this->get_name(), bl); + + break; + } + } + } else { + if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) { + ldpp_dout(dpp, 20) <<__func__<< ": non-versioned bucket!" << dendl; + rc = this->store->do_idx_op_by_name(bucket_index_iname, + M0_IC_GET, this->get_key().get_oid(), bl); + if (rc < 0) { + ldpp_dout(dpp, 0) << __func__ << "ERROR: failed to get object's entry from bucket index: rc=" + << rc << dendl; + return rc; + } + this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl); + } + + bufferlist& blr = bl; + iter = blr.cbegin(); + ent.decode(iter); + } + +out: + if (rc == 0) { + sal::Attrs dummy; + decode(dummy, iter); + meta.decode(iter); + ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id << dendl; + char fid_str[M0_FID_STR_LEN]; + snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); + ldpp_dout(dpp, 70) << __func__ << ": oid=" << fid_str << dendl; + } else + ldpp_dout(dpp, 0) <<__func__<< ": rc=" << rc << dendl; + + return rc; +} + +int MotrObject::update_version_entries(const DoutPrefixProvider *dpp) +{ + int rc; + int max = 10; + vector keys(max); + vector vals(max); + + string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name(); + keys[0] = this->get_name(); + rc = store->next_query_by_name(bucket_index_iname, keys, vals); + ldpp_dout(dpp, 20) << "get all versions, name = " << this->get_name() << "rc = " << rc << dendl; + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; + return rc; + } + + // no entries returned. + if (rc == 0) + return 0; + + for (const auto& bl: vals) { + if (bl.length() == 0) + break; + + rgw_bucket_dir_entry ent; + auto iter = bl.cbegin(); + ent.decode(iter); + + if (0 != ent.key.name.compare(0, this->get_name().size(), this->get_name())) + continue; + + if (!ent.is_current()) + continue; + + // Remove from the cache. + store->get_obj_meta_cache()->remove(dpp, this->get_name()); + + rgw::sal::Attrs attrs; + decode(attrs, iter); + MotrObject::Meta meta; + meta.decode(iter); + + ent.flags = rgw_bucket_dir_entry::FLAG_VER; + string key; + if (ent.key.instance.empty()) + key = ent.key.name; + else { + char buf[ent.key.name.size() + ent.key.instance.size() + 16]; + snprintf(buf, sizeof(buf), "%s[%s]", ent.key.name.c_str(), ent.key.instance.c_str()); + key = buf; + } + ldpp_dout(dpp, 20) << "update one version, key = " << key << dendl; + bufferlist ent_bl; + ent.encode(ent_bl); + encode(attrs, ent_bl); + meta.encode(ent_bl); + + rc = store->do_idx_op_by_name(bucket_index_iname, + M0_IC_PUT, key, ent_bl); + if (rc < 0) + break; + } + return rc; +} + +// Scan object_nnn_part_index to get all parts then open their motr objects. +// TODO: all parts are opened in the POC. But for a large object, for example +// a 5GB object will have about 300 parts (for default 15MB part). A better +// way of managing opened object may be needed. +int MotrObject::get_part_objs(const DoutPrefixProvider* dpp, + std::map>& part_objs) +{ + int rc; + int max_parts = 1000; + int marker = 0; + uint64_t off = 0; + bool truncated = false; + std::unique_ptr upload; + + upload = this->get_bucket()->get_multipart_upload(this->get_name(), string()); + + do { + rc = upload->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated); + if (rc == -ENOENT) { + rc = -ERR_NO_SUCH_UPLOAD; + } + if (rc < 0) + return rc; + + std::map>& parts = upload->get_parts(); + for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) { + + MultipartPart *mpart = part_iter->second.get(); + MotrMultipartPart *mmpart = static_cast(mpart); + uint32_t part_num = mmpart->get_num(); + uint64_t part_size = mmpart->get_size(); + + string part_obj_name = this->get_bucket()->get_name() + "." + + this->get_key().get_oid() + + ".part." + std::to_string(part_num); + std::unique_ptr obj; + obj = this->bucket->get_object(rgw_obj_key(part_obj_name)); + std::unique_ptr mobj(static_cast(obj.release())); + + ldpp_dout(dpp, 20) << "get_part_objs: off = " << off << ", size = " << part_size << dendl; + mobj->part_off = off; + mobj->part_size = part_size; + mobj->part_num = part_num; + mobj->meta = mmpart->meta; + + part_objs.emplace(part_num, std::move(mobj)); + + off += part_size; + } + } while (truncated); + + return 0; +} + +int MotrObject::open_part_objs(const DoutPrefixProvider* dpp, + std::map>& part_objs) +{ + //for (auto& iter: part_objs) { + for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) { + MotrObject* obj = static_cast(iter->second.get()); + ldpp_dout(dpp, 20) << "open_part_objs: name = " << obj->get_name() << dendl; + int rc = obj->open_mobj(dpp); + if (rc < 0) + return rc; + } + + return 0; +} + +int MotrObject::delete_part_objs(const DoutPrefixProvider* dpp) +{ + std::unique_ptr upload; + upload = this->get_bucket()->get_multipart_upload(this->get_name(), string()); + std::unique_ptr mupload(static_cast(upload.release())); + return mupload->delete_parts(dpp); +} + +int MotrObject::read_multipart_obj(const DoutPrefixProvider* dpp, + int64_t off, int64_t end, RGWGetDataCB* cb, + std::map>& part_objs) +{ + int64_t cursor = off; + + ldpp_dout(dpp, 20) << "read_multipart_obj: off=" << off << " end=" << end << dendl; + + // Find the parts which are in the (off, end) range and + // read data from it. Note: `end` argument is inclusive. + for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) { + MotrObject* obj = static_cast(iter->second.get()); + int64_t part_off = obj->part_off; + int64_t part_size = obj->part_size; + int64_t part_end = obj->part_off + obj->part_size - 1; + ldpp_dout(dpp, 20) << "read_multipart_obj: part_off=" << part_off + << " part_end=" << part_end << dendl; + if (part_end < off) + continue; + + int64_t local_off = cursor - obj->part_off; + int64_t local_end = part_end < end? part_size - 1 : end - part_off; + ldpp_dout(dpp, 20) << "real_multipart_obj: name=" << obj->get_name() + << " local_off=" << local_off + << " local_end=" << local_end << dendl; + int rc = obj->read_mobj(dpp, local_off, local_end, cb); + if (rc < 0) + return rc; + + cursor = part_end + 1; + if (cursor > end) + break; + } + + return 0; +} + +static unsigned roundup(unsigned x, unsigned by) +{ + return ((x - 1) / by + 1) * by; +} + +unsigned MotrObject::get_optimal_bs(unsigned len) +{ + struct m0_pool_version *pver; + + pver = m0_pool_version_find(&store->instance->m0c_pools_common, + &mobj->ob_attr.oa_pver); + M0_ASSERT(pver != nullptr); + struct m0_pdclust_attr *pa = &pver->pv_attr; + uint64_t lid = M0_OBJ_LAYOUT_ID(meta.layout_id); + unsigned unit_sz = m0_obj_layout_id_to_unit_size(lid); + unsigned grp_sz = unit_sz * pa->pa_N; + + // bs should be max 4-times pool-width deep counting by 1MB units, or + // 8-times deep counting by 512K units, 16-times deep by 256K units, + // and so on. Several units to one target will be aggregated to make + // fewer network RPCs, disk i/o operations and BE transactions. + // For unit sizes of 32K or less, the depth is 128, which + // makes it 32K * 128 == 4MB - the maximum amount per target when + // the performance is still good on LNet (which has max 1MB frames). + // TODO: it may be different on libfabric, should be re-measured. + unsigned depth = 128 / ((unit_sz + 0x7fff) / 0x8000); + if (depth == 0) + depth = 1; + // P * N / (N + K + S) - number of data units to span the pool-width + unsigned max_bs = depth * unit_sz * pa->pa_P * pa->pa_N / + (pa->pa_N + pa->pa_K + pa->pa_S); + max_bs = roundup(max_bs, grp_sz); // multiple of group size + if (len >= max_bs) + return max_bs; + else if (len <= grp_sz) + return grp_sz; + else + return roundup(len, grp_sz); +} + +void MotrAtomicWriter::cleanup() +{ + m0_indexvec_free(&ext); + m0_bufvec_free(&attr); + m0_bufvec_free2(&buf); + acc_data.clear(); + obj.close_mobj(); + old_obj.close_mobj(); +} + +unsigned MotrAtomicWriter::populate_bvec(unsigned len, bufferlist::iterator &bi) +{ + unsigned i, l, done = 0; + const char *data; + + for (i = 0; i < MAX_BUFVEC_NR && len > 0; ++i) { + l = bi.get_ptr_and_advance(len, &data); + buf.ov_buf[i] = (char*)data; + buf.ov_vec.v_count[i] = l; + ext.iv_index[i] = acc_off; + ext.iv_vec.v_count[i] = l; + attr.ov_vec.v_count[i] = 0; + acc_off += l; + len -= l; + done += l; + } + buf.ov_vec.v_nr = i; + ext.iv_vec.v_nr = i; + + return done; +} + +int MotrAtomicWriter::write() +{ + int rc; + unsigned bs, left; + struct m0_op *op; + bufferlist::iterator bi; + + left = acc_data.length(); + + if (!obj.is_opened()) { + rc = obj.create_mobj(dpp, left); + if (rc == -EEXIST) + rc = obj.open_mobj(dpp); + if (rc != 0) { + char fid_str[M0_FID_STR_LEN]; + snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&obj.meta.oid)); + ldpp_dout(dpp, 0) << "ERROR: failed to create/open motr object " + << fid_str << " (" << obj.get_bucket()->get_name() + << "/" << obj.get_key().get_oid() << "): rc=" << rc + << dendl; + goto err; + } + } + + total_data_size += left; + + bs = obj.get_optimal_bs(left); + ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl; + + bi = acc_data.begin(); + while (left > 0) { + if (left < bs) + bs = obj.get_optimal_bs(left); + if (left < bs) { + acc_data.append_zero(bs - left); + auto off = bi.get_off(); + bufferlist tmp; + acc_data.splice(off, bs, &tmp); + acc_data.clear(); + acc_data.append(tmp.c_str(), bs); // make it a single buf + bi = acc_data.begin(); + left = bs; + } + + left -= this->populate_bvec(bs, bi); + + op = nullptr; + rc = m0_obj_op(obj.mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op); + if (rc != 0) + goto err; + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + if (rc != 0) + goto err; + } + acc_data.clear(); + + return 0; + +err: + this->cleanup(); + return rc; +} + +static const unsigned MAX_ACC_SIZE = 32 * 1024 * 1024; + +// Accumulate enough data first to make a reasonable decision about the +// optimal unit size for a new object, or bs for existing object (32M seems +// enough for 4M units in 8+2 parity groups, a common config on wide pools), +// and then launch the write operations. +int MotrAtomicWriter::process(bufferlist&& data, uint64_t offset) +{ + if (data.length() == 0) { // last call, flush data + int rc = 0; + if (acc_data.length() != 0) + rc = this->write(); + this->cleanup(); + return rc; + } + + if (acc_data.length() == 0) + acc_off = offset; + + acc_data.append(std::move(data)); + if (acc_data.length() < MAX_ACC_SIZE) + return 0; + + return this->write(); +} + +int MotrAtomicWriter::complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) +{ + int rc = 0; + + if (acc_data.length() != 0) { // check again, just in case + rc = this->write(); + this->cleanup(); + if (rc != 0) + return rc; + } + + bufferlist bl; + rgw_bucket_dir_entry ent; + + // Set rgw_bucet_dir_entry. Some of the member of this structure may not + // apply to motr. For example the storage_class. + // + // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc + // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and + // how to set the dir entry. Only set the basic ones for POC, no ACLs and + // other attrs. + obj.get_key().get_index_key(&ent.key); + ent.meta.size = total_data_size; + ent.meta.accounted_size = total_data_size; + ent.meta.mtime = real_clock::is_zero(set_mtime)? ceph::real_clock::now() : set_mtime; + ent.meta.etag = etag; + ent.meta.owner = owner.to_str(); + ent.meta.owner_display_name = obj.get_bucket()->get_owner()->get_display_name(); + bool is_versioned = obj.get_key().have_instance(); + if (is_versioned) + ent.flags = rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT; + ldpp_dout(dpp, 20) <<__func__<< ": key=" << obj.get_key().get_oid() + << " etag: " << etag << " user_data=" << user_data << dendl; + if (user_data) + ent.meta.user_data = *user_data; + ent.encode(bl); + + RGWBucketInfo &info = obj.get_bucket()->get_info(); + if (info.obj_lock_enabled() && info.obj_lock.has_rule()) { + auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); + if (iter == attrs.end()) { + real_time lock_until_date = info.obj_lock.get_lock_until_date(ent.meta.mtime); + string mode = info.obj_lock.get_mode(); + RGWObjectRetention obj_retention(mode, lock_until_date); + bufferlist retention_bl; + obj_retention.encode(retention_bl); + attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl; + } + } + encode(attrs, bl); + obj.meta.encode(bl); + ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << obj.meta.layout_id + << dendl; + if (is_versioned) { + // get the list of all versioned objects with the same key and + // unset their FLAG_CURRENT later, if do_idx_op_by_name() is successful. + // Note: without distributed lock on the index - it is possible that 2 + // CURRENT entries would appear in the bucket. For example, consider the + // following scenario when two clients are trying to add the new object + // version concurrently: + // client 1: reads all the CURRENT entries + // client 2: updates the index and sets the new CURRENT + // client 1: updates the index and sets the new CURRENT + // At the step (1) client 1 would not see the new current record from step (2), + // so it won't update it. As a result, two CURRENT version entries will appear + // in the bucket. + // TODO: update the current version (unset the flag) and insert the new current + // version can be launched in one motr op. This requires change at do_idx_op() + // and do_idx_op_by_name(). + rc = obj.update_version_entries(dpp); + if (rc < 0) + return rc; + } + // Insert an entry into bucket index. + string bucket_index_iname = "motr.rgw.bucket.index." + obj.get_bucket()->get_name(); + rc = store->do_idx_op_by_name(bucket_index_iname, + M0_IC_PUT, obj.get_key().get_oid(), bl); + if (rc == 0) + store->get_obj_meta_cache()->put(dpp, obj.get_key().get_oid(), bl); + + if (old_obj.get_bucket()->get_info().versioning_status() != BUCKET_VERSIONED) { + // Delete old object data if exists. + old_obj.delete_mobj(dpp); + } + + // TODO: We need to handle the object leak caused by parallel object upload by + // making use of background gc, which is currently not enabled for motr. + return rc; +} + +int MotrMultipartUpload::delete_parts(const DoutPrefixProvider *dpp) +{ + int rc; + int max_parts = 1000; + int marker = 0; + bool truncated = false; + + // Scan all parts and delete the corresponding motr objects. + do { + rc = this->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated); + if (rc == -ENOENT) { + truncated = false; + rc = 0; + } + if (rc < 0) + return rc; + + std::map>& parts = this->get_parts(); + for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) { + + MultipartPart *mpart = part_iter->second.get(); + MotrMultipartPart *mmpart = static_cast(mpart); + uint32_t part_num = mmpart->get_num(); + + // Delete the part object. Note that the part object is not + // inserted into bucket index, only the corresponding motr object + // needs to be delete. That is why we don't call + // MotrObject::delete_object(). + string part_obj_name = bucket->get_name() + "." + + mp_obj.get_key() + + ".part." + std::to_string(part_num); + std::unique_ptr obj; + obj = this->bucket->get_object(rgw_obj_key(part_obj_name)); + std::unique_ptr mobj(static_cast(obj.release())); + mobj->meta = mmpart->meta; + rc = mobj->delete_mobj(dpp); + if (rc < 0) { + ldpp_dout(dpp, 0) << __func__ << ": Failed to delete object from Motr. rc=" << rc << dendl; + return rc; + } + } + } while (truncated); + + // Delete object part index. + std::string oid = mp_obj.get_key(); + string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts"; + return store->delete_motr_idx_by_name(obj_part_iname); +} + +int MotrMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct) +{ + int rc; + // Check if multipart upload exists + bufferlist bl; + std::unique_ptr meta_obj; + meta_obj = get_meta_obj(); + string bucket_multipart_iname = + "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts"; + rc = store->do_idx_op_by_name(bucket_multipart_iname, + M0_IC_GET, meta_obj->get_oid(), bl); + if (rc < 0) { + ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart upload. rc=" << rc << dendl; + return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc; + } + + // Scan all parts and delete the corresponding motr objects. + rc = this->delete_parts(dpp); + if (rc < 0) + return rc; + + bl.clear(); + // Remove the upload from bucket multipart index. + rc = store->do_idx_op_by_name(bucket_multipart_iname, + M0_IC_DEL, meta_obj->get_key().get_oid(), bl); + return rc; +} + +std::unique_ptr MotrMultipartUpload::get_meta_obj() +{ + std::unique_ptr obj = bucket->get_object(rgw_obj_key(get_meta(), string(), mp_ns)); + std::unique_ptr mobj(static_cast(obj.release())); + mobj->set_category(RGWObjCategory::MultiMeta); + return mobj; +} + +struct motr_multipart_upload_info +{ + rgw_placement_rule dest_placement; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(dest_placement, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(dest_placement, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(motr_multipart_upload_info) + +int MotrMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, + ACLOwner& _owner, + rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) +{ + int rc; + std::string oid = mp_obj.get_key(); + + owner = _owner; + + do { + char buf[33]; + string tmp_obj_name; + gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); + std::string upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */ + upload_id.append(buf); + + mp_obj.init(oid, upload_id); + tmp_obj_name = mp_obj.get_meta(); + + std::unique_ptr obj; + obj = bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns)); + // the meta object will be indexed with 0 size, we c + obj->set_in_extra_data(true); + obj->set_hash_source(oid); + + motr_multipart_upload_info upload_info; + upload_info.dest_placement = dest_placement; + bufferlist mpbl; + encode(upload_info, mpbl); + + // Create an initial entry in the bucket. The entry will be + // updated when multipart upload is completed, for example, + // size, etag etc. + bufferlist bl; + rgw_bucket_dir_entry ent; + obj->get_key().get_index_key(&ent.key); + ent.meta.owner = owner.get_id().to_str(); + ent.meta.category = RGWObjCategory::MultiMeta; + ent.meta.mtime = ceph::real_clock::now(); + ent.meta.user_data.assign(mpbl.c_str(), mpbl.c_str() + mpbl.length()); + ent.encode(bl); + + // Insert an entry into bucket multipart index so it is not shown + // when listing a bucket. + string bucket_multipart_iname = + "motr.rgw.bucket." + obj->get_bucket()->get_name() + ".multiparts"; + rc = store->do_idx_op_by_name(bucket_multipart_iname, + M0_IC_PUT, obj->get_key().get_oid(), bl); + + } while (rc == -EEXIST); + + if (rc < 0) + return rc; + + // Create object part index. + // TODO: add bucket as part of the name. + string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts"; + ldpp_dout(dpp, 20) << "MotrMultipartUpload::init(): object part index=" << obj_part_iname << dendl; + rc = store->create_motr_idx_by_name(obj_part_iname); + if (rc == -EEXIST) + rc = 0; + if (rc < 0) + // TODO: clean the bucket index entry + ldpp_dout(dpp, 0) << "Failed to create object multipart index " << obj_part_iname << dendl; + + return rc; +} + +int MotrMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct, + int num_parts, int marker, + int *next_marker, bool *truncated, + bool assume_unsorted) +{ + int rc; + vector key_vec(num_parts); + vector val_vec(num_parts); + + std::string oid = mp_obj.get_key(); + string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts"; + ldpp_dout(dpp, 20) << __func__ << ": object part index = " << obj_part_iname << dendl; + key_vec[0].clear(); + key_vec[0] = "part."; + char buf[32]; + snprintf(buf, sizeof(buf), "%08d", marker + 1); + key_vec[0].append(buf); + rc = store->next_query_by_name(obj_part_iname, key_vec, val_vec); + if (rc < 0) { + ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; + return rc; + } + + int last_num = 0; + int part_cnt = 0; + uint32_t expected_next = 0; + ldpp_dout(dpp, 20) << __func__ << ": marker = " << marker << dendl; + for (const auto& bl: val_vec) { + if (bl.length() == 0) + break; + + RGWUploadPartInfo info; + auto iter = bl.cbegin(); + info.decode(iter); + rgw::sal::Attrs attrs_dummy; + decode(attrs_dummy, iter); + MotrObject::Meta meta; + meta.decode(iter); + + ldpp_dout(dpp, 20) << __func__ << ": part_num=" << info.num + << " part_size=" << info.size << dendl; + ldpp_dout(dpp, 20) << __func__ << ": meta:oid=[" << meta.oid.u_hi << "," << meta.oid.u_lo + << "], meta:pvid=[" << meta.pver.f_container << "," << meta.pver.f_key + << "], meta:layout id=" << meta.layout_id << dendl; + + if (!expected_next) + expected_next = info.num + 1; + else if (expected_next && info.num != expected_next) + return -EINVAL; + else expected_next = info.num + 1; + + if ((int)info.num > marker) { + last_num = info.num; + parts.emplace(info.num, std::make_unique(info, meta)); + } + + part_cnt++; + } + + // Does it have more parts? + if (truncated) + *truncated = part_cnt < num_parts? false : true; + ldpp_dout(dpp, 20) << __func__ << ": truncated=" << *truncated << dendl; + + if (next_marker) + *next_marker = last_num; + + return 0; +} + +// Heavily copy from rgw_sal_rados.cc +int MotrMultipartUpload::complete(const DoutPrefixProvider *dpp, + optional_yield y, CephContext* cct, + map& part_etags, + list& remove_objs, + uint64_t& accounted_size, bool& compressed, + RGWCompressionInfo& cs_info, off_t& off, + std::string& tag, ACLOwner& owner, + uint64_t olh_epoch, + rgw::sal::Object* target_obj) +{ + char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; + std::string etag; + bufferlist etag_bl; + MD5 hash; + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + bool truncated; + int rc; + + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): enter" << dendl; + int total_parts = 0; + int handled_parts = 0; + int max_parts = 1000; + int marker = 0; + uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; + auto etags_iter = part_etags.begin(); + rgw::sal::Attrs attrs = target_obj->get_attrs(); + + do { + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): list_parts()" << dendl; + rc = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); + if (rc == -ENOENT) { + rc = -ERR_NO_SUCH_UPLOAD; + } + if (rc < 0) + return rc; + + total_parts += parts.size(); + if (!truncated && total_parts != (int)part_etags.size()) { + ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts + << " expected: " << part_etags.size() << dendl; + rc = -ERR_INVALID_PART; + return rc; + } + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): parts.size()=" << parts.size() << dendl; + + for (auto obj_iter = parts.begin(); + etags_iter != part_etags.end() && obj_iter != parts.end(); + ++etags_iter, ++obj_iter, ++handled_parts) { + MultipartPart *mpart = obj_iter->second.get(); + MotrMultipartPart *mmpart = static_cast(mpart); + RGWUploadPartInfo *part = &mmpart->info; + + uint64_t part_size = part->accounted_size; + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part_size=" << part_size << dendl; + if (handled_parts < (int)part_etags.size() - 1 && + part_size < min_part_size) { + rc = -ERR_TOO_SMALL; + return rc; + } + + char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; + if (etags_iter->first != (int)obj_iter->first) { + ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: " + << etags_iter->first << " next uploaded: " + << obj_iter->first << dendl; + rc = -ERR_INVALID_PART; + return rc; + } + string part_etag = rgw_string_unquote(etags_iter->second); + if (part_etag.compare(part->etag) != 0) { + ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " << etags_iter->first + << " etag: " << etags_iter->second << dendl; + rc = -ERR_INVALID_PART; + return rc; + } + + hex_to_buf(part->etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE); + hash.Update((const unsigned char *)petag, sizeof(petag)); + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): calc etag " << dendl; + + string oid = mp_obj.get_part(part->num); + rgw_obj src_obj; + src_obj.init_ns(bucket->get_key(), oid, mp_ns); + +#if 0 // does Motr backend need it? + /* update manifest for part */ + if (part->manifest.empty()) { + ldpp_dout(dpp, 0) << "ERROR: empty manifest for object part: obj=" + << src_obj << dendl; + rc = -ERR_INVALID_PART; + return rc; + } else { + manifest.append(dpp, part->manifest, store->get_zone()); + } + ldpp_dout(dpp, 0) << "MotrMultipartUpload::complete(): manifest " << dendl; +#endif + + bool part_compressed = (part->cs_info.compression_type != "none"); + if ((handled_parts > 0) && + ((part_compressed != compressed) || + (cs_info.compression_type != part->cs_info.compression_type))) { + ldpp_dout(dpp, 0) << "ERROR: compression type was changed during multipart upload (" + << cs_info.compression_type << ">>" << part->cs_info.compression_type << ")" << dendl; + rc = -ERR_INVALID_PART; + return rc; + } + + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part compression" << dendl; + if (part_compressed) { + int64_t new_ofs; // offset in compression data for new part + if (cs_info.blocks.size() > 0) + new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len; + else + new_ofs = 0; + for (const auto& block : part->cs_info.blocks) { + compression_block cb; + cb.old_ofs = block.old_ofs + cs_info.orig_size; + cb.new_ofs = new_ofs; + cb.len = block.len; + cs_info.blocks.push_back(cb); + new_ofs = cb.new_ofs + cb.len; + } + if (!compressed) + cs_info.compression_type = part->cs_info.compression_type; + cs_info.orig_size += part->cs_info.orig_size; + compressed = true; + } + + // We may not need to do the following as remove_objs are those + // don't show when listing a bucket. As we store in-progress uploaded + // object's metadata in a separate index, they are not shown when + // listing a bucket. + rgw_obj_index_key remove_key; + src_obj.key.get_index_key(&remove_key); + remove_objs.push_back(remove_key); + + off += part_size; + accounted_size += part->accounted_size; + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): off=" << off << ", accounted_size = " << accounted_size << dendl; + } + } while (truncated); + hash.Final((unsigned char *)final_etag); + + buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str); + snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], + sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, + "-%lld", (long long)part_etags.size()); + etag = final_etag_str; + ldpp_dout(dpp, 20) << "calculated etag: " << etag << dendl; + etag_bl.append(etag); + attrs[RGW_ATTR_ETAG] = etag_bl; + + if (compressed) { + // write compression attribute to full object + bufferlist tmp; + encode(cs_info, tmp); + attrs[RGW_ATTR_COMPRESSION] = tmp; + } + + // Read the object's the multipart_upload_info. + // TODO: all those index name and key constructions should be implemented as + // member functions. + bufferlist bl; + std::unique_ptr meta_obj; + meta_obj = get_meta_obj(); + string bucket_multipart_iname = + "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts"; + rc = this->store->do_idx_op_by_name(bucket_multipart_iname, + M0_IC_GET, meta_obj->get_key().get_oid(), bl); + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): read entry from bucket multipart index rc=" << rc << dendl; + if (rc < 0) + return rc; + rgw_bucket_dir_entry ent; + bufferlist& blr = bl; + auto ent_iter = blr.cbegin(); + ent.decode(ent_iter); + + // Update the dir entry and insert it to the bucket index so + // the object will be seen when listing the bucket. + bufferlist update_bl; + target_obj->get_key().get_index_key(&ent.key); // Change to offical name :) + ent.meta.size = off; + ent.meta.accounted_size = accounted_size; + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): obj size=" << ent.meta.size + << " obj accounted size=" << ent.meta.accounted_size << dendl; + ent.meta.mtime = ceph::real_clock::now(); + ent.meta.etag = etag; + ent.encode(update_bl); + encode(attrs, update_bl); + MotrObject::Meta meta_dummy; + meta_dummy.encode(update_bl); + + string bucket_index_iname = "motr.rgw.bucket.index." + meta_obj->get_bucket()->get_name(); + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): target_obj name=" << target_obj->get_name() + << " target_obj oid=" << target_obj->get_oid() << dendl; + rc = store->do_idx_op_by_name(bucket_index_iname, M0_IC_PUT, + target_obj->get_name(), update_bl); + if (rc < 0) + return rc; + + // Put into metadata cache. + store->get_obj_meta_cache()->put(dpp, target_obj->get_name(), update_bl); + + // Now we can remove it from bucket multipart index. + ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): remove from bucket multipartindex " << dendl; + return store->do_idx_op_by_name(bucket_multipart_iname, + M0_IC_DEL, meta_obj->get_key().get_oid(), bl); +} + +int MotrMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs) +{ + if (!rule && !attrs) { + return 0; + } + + if (rule) { + if (!placement.empty()) { + *rule = &placement; + if (!attrs) { + /* Don't need attrs, done */ + return 0; + } + } else { + *rule = nullptr; + } + } + + std::unique_ptr meta_obj; + meta_obj = get_meta_obj(); + meta_obj->set_in_extra_data(true); + + // Read the object's the multipart_upload_info. + bufferlist bl; + string bucket_multipart_iname = + "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts"; + int rc = this->store->do_idx_op_by_name(bucket_multipart_iname, + M0_IC_GET, meta_obj->get_key().get_oid(), bl); + if (rc < 0) { + ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart info. rc=" << rc << dendl; + return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc; + } + + rgw_bucket_dir_entry ent; + bufferlist& blr = bl; + auto ent_iter = blr.cbegin(); + ent.decode(ent_iter); + + if (attrs) { + bufferlist etag_bl; + string& etag = ent.meta.etag; + ldpp_dout(dpp, 20) << "object's etag: " << ent.meta.etag << dendl; + etag_bl.append(etag.c_str(), etag.size()); + attrs->emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); + if (!rule || *rule != nullptr) { + /* placement was cached; don't actually read */ + return 0; + } + } + + /* Decode multipart_upload_info */ + motr_multipart_upload_info upload_info; + bufferlist mpbl; + mpbl.append(ent.meta.user_data.c_str(), ent.meta.user_data.size()); + auto mpbl_iter = mpbl.cbegin(); + upload_info.decode(mpbl_iter); + placement = upload_info.dest_placement; + *rule = &placement; + + return 0; +} + +std::unique_ptr MotrMultipartUpload::get_writer( + const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t part_num, + const std::string& part_num_str) +{ + return std::make_unique(dpp, y, this, + obj, store, owner, + ptail_placement_rule, part_num, part_num_str); +} + +int MotrMultipartWriter::prepare(optional_yield y) +{ + string part_obj_name = head_obj->get_bucket()->get_name() + "." + + head_obj->get_key().get_oid() + + ".part." + std::to_string(part_num); + ldpp_dout(dpp, 20) << "bucket=" << head_obj->get_bucket()->get_name() << "part_obj_name=" << part_obj_name << dendl; + part_obj = std::make_unique(this->store, rgw_obj_key(part_obj_name), head_obj->get_bucket()); + if (part_obj == nullptr) + return -ENOMEM; + + // s3 client may retry uploading part, so the part may have already + // been created. + int rc = part_obj->create_mobj(dpp, store->cctx->_conf->rgw_max_chunk_size); + if (rc == -EEXIST) { + rc = part_obj->open_mobj(dpp); + if (rc < 0) + return rc; + } + return rc; +} + +int MotrMultipartWriter::process(bufferlist&& data, uint64_t offset) +{ + int rc = part_obj->write_mobj(dpp, std::move(data), offset); + if (rc == 0) { + actual_part_size += data.length(); + ldpp_dout(dpp, 20) << " write_mobj(): actual_part_size=" << actual_part_size << dendl; + } + return rc; +} + +int MotrMultipartWriter::complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) +{ + // Should the dir entry(object metadata) be updated? For example + // mtime. + + ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): enter" << dendl; + // Add an entry into object_nnn_part_index. + bufferlist bl; + RGWUploadPartInfo info; + info.num = part_num; + info.etag = etag; + info.size = actual_part_size; + info.accounted_size = accounted_size; + info.modified = real_clock::now(); + + bool compressed; + int rc = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info); + ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): compression rc=" << rc << dendl; + if (rc < 0) { + ldpp_dout(dpp, 1) << "cannot get compression info" << dendl; + return rc; + } + encode(info, bl); + encode(attrs, bl); + part_obj->meta.encode(bl); + + string p = "part."; + char buf[32]; + snprintf(buf, sizeof(buf), "%08d", (int)part_num); + p.append(buf); + string obj_part_iname = "motr.rgw.object." + head_obj->get_bucket()->get_name() + "." + + head_obj->get_key().get_oid() + ".parts"; + ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): object part index = " << obj_part_iname << dendl; + rc = store->do_idx_op_by_name(obj_part_iname, M0_IC_PUT, p, bl); + if (rc < 0) { + return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc; + } + + return 0; +} + +std::unique_ptr MotrStore::get_role(std::string name, + std::string tenant, + std::string path, + std::string trust_policy, + std::string max_session_duration_str, + std::multimap tags) +{ + RGWRole* p = nullptr; + return std::unique_ptr(p); +} + +std::unique_ptr MotrStore::get_role(const RGWRoleInfo& info) +{ + RGWRole* p = nullptr; + return std::unique_ptr(p); +} + +std::unique_ptr MotrStore::get_role(std::string id) +{ + RGWRole* p = nullptr; + return std::unique_ptr(p); +} + +int MotrStore::get_roles(const DoutPrefixProvider *dpp, + optional_yield y, + const std::string& path_prefix, + const std::string& tenant, + vector>& roles) +{ + return 0; +} + +std::unique_ptr MotrStore::get_oidc_provider() +{ + RGWOIDCProvider* p = nullptr; + return std::unique_ptr(p); +} + +int MotrStore::get_oidc_providers(const DoutPrefixProvider *dpp, + const std::string& tenant, + vector>& providers) +{ + return 0; +} + +std::unique_ptr MotrBucket::get_multipart_upload(const std::string& oid, + std::optional upload_id, + ACLOwner owner, ceph::real_time mtime) +{ + return std::make_unique(store, this, oid, upload_id, owner, mtime); +} + +std::unique_ptr MotrStore::get_append_writer(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + const std::string& unique_tag, + uint64_t position, + uint64_t *cur_accounted_size) { + return nullptr; +} + +std::unique_ptr MotrStore::get_atomic_writer(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t olh_epoch, + const std::string& unique_tag) { + return std::make_unique(dpp, y, + obj, this, owner, + ptail_placement_rule, olh_epoch, unique_tag); +} + +const std::string& MotrStore::get_compression_type(const rgw_placement_rule& rule) +{ + return zone.zone_params->get_compression_type(rule); +} + +bool MotrStore::valid_placement(const rgw_placement_rule& rule) +{ + return zone.zone_params->valid_placement(rule); +} + +std::unique_ptr MotrStore::get_user(const rgw_user &u) +{ + ldout(cctx, 20) << "bucket's user: " << u.to_str() << dendl; + return std::make_unique(this, u); +} + +int MotrStore::get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string &key, optional_yield y, std::unique_ptr *user) +{ + int rc; + User *u; + bufferlist bl; + RGWUserInfo uinfo; + MotrAccessKey access_key; + + rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY, + M0_IC_GET, key, bl); + if (rc < 0){ + ldout(cctx, 0) << "Access key not found: rc = " << rc << dendl; + return rc; + } + + bufferlist& blr = bl; + auto iter = blr.cbegin(); + access_key.decode(iter); + + uinfo.user_id.from_str(access_key.user_id); + ldout(cctx, 0) << "Loading user: " << uinfo.user_id.id << dendl; + rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr); + if (rc < 0){ + ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl; + return rc; + } + u = new MotrUser(this, uinfo); + if (!u) + return -ENOMEM; + + user->reset(u); + return 0; +} + +int MotrStore::get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr* user) +{ + int rc; + User *u; + bufferlist bl; + RGWUserInfo uinfo; + MotrEmailInfo email_info; + rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, + M0_IC_GET, email, bl); + if (rc < 0){ + ldout(cctx, 0) << "Email Id not found: rc = " << rc << dendl; + return rc; + } + auto iter = bl.cbegin(); + email_info.decode(iter); + ldout(cctx, 0) << "Loading user: " << email_info.user_id << dendl; + uinfo.user_id.from_str(email_info.user_id); + rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr); + if (rc < 0){ + ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl; + return rc; + } + u = new MotrUser(this, uinfo); + if (!u) + return -ENOMEM; + + user->reset(u); + return 0; +} + +int MotrStore::get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) +{ + /* Swift keys and subusers are not supported for now */ + return 0; +} + +int MotrStore::store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key) +{ + int rc; + bufferlist bl; + access_key.encode(bl); + rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY, + M0_IC_PUT, access_key.id, bl); + if (rc < 0){ + ldout(cctx, 0) << "Failed to store key: rc = " << rc << dendl; + return rc; + } + return rc; +} + +int MotrStore::delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key) +{ + int rc; + bufferlist bl; + rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY, + M0_IC_DEL, access_key, bl); + if (rc < 0){ + ldout(cctx, 0) << "Failed to delete key: rc = " << rc << dendl; + } + return rc; +} + +int MotrStore::store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info ) +{ + int rc; + bufferlist bl; + email_info.encode(bl); + rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, + M0_IC_PUT, email_info.email_id, bl); + if (rc < 0) { + ldout(cctx, 0) << "Failed to store the user by email as key: rc = " << rc << dendl; + } + return rc; +} + +std::unique_ptr MotrStore::get_object(const rgw_obj_key& k) +{ + return std::make_unique(this, k); +} + + +int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) +{ + int ret; + Bucket* bp; + + bp = new MotrBucket(this, b, u); + ret = bp->load_bucket(dpp, y); + if (ret < 0) { + delete bp; + return ret; + } + + bucket->reset(bp); + return 0; +} + +int MotrStore::get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) +{ + Bucket* bp; + + bp = new MotrBucket(this, i, u); + /* Don't need to fetch the bucket info, use the provided one */ + + bucket->reset(bp); + return 0; +} + +int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string& name, std::unique_ptr* bucket, optional_yield y) +{ + rgw_bucket b; + + b.tenant = tenant; + b.name = name; + + return get_bucket(dpp, u, b, bucket, y); +} + +bool MotrStore::is_meta_master() +{ + return true; +} + +int MotrStore::forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version *objv, + bufferlist& in_data, + JSONParser *jp, req_info& info, + optional_yield y) +{ + return 0; +} + +int MotrStore::forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, + bufferlist& in_data, + RGWXMLDecoder::XMLParser* parser, req_info& info, + optional_yield y) +{ + return 0; +} + +std::string MotrStore::zone_unique_id(uint64_t unique_num) +{ + return ""; +} + +std::string MotrStore::zone_unique_trans_id(const uint64_t unique_num) +{ + return ""; +} + +int MotrStore::get_zonegroup(const std::string& id, std::unique_ptr* group) +{ + /* XXX: for now only one zonegroup supported */ + ZoneGroup* zg; + zg = new MotrZoneGroup(this, zone.zonegroup.get_group()); + + group->reset(zg); + return 0; +} + +int MotrStore::list_all_zones(const DoutPrefixProvider* dpp, + std::list& zone_ids) +{ + zone_ids.push_back(zone.get_id()); + return 0; +} + +int MotrStore::cluster_stat(RGWClusterStat& stats) +{ + return 0; +} + +std::unique_ptr MotrStore::get_lifecycle(void) +{ + return 0; +} + +std::unique_ptr MotrStore::get_notification(Object* obj, Object* src_obj, req_state* s, + rgw::notify::EventType event_type, optional_yield y, const string* object_name) +{ + return std::make_unique(obj, src_obj, event_type); +} + +std::unique_ptr MotrStore::get_notification(const DoutPrefixProvider* dpp, Object* obj, + Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, + std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) +{ + return std::make_unique(obj, src_obj, event_type); +} + +int MotrStore::log_usage(const DoutPrefixProvider *dpp, map& usage_info) +{ + return 0; +} + +int MotrStore::log_op(const DoutPrefixProvider *dpp, string& oid, bufferlist& bl) +{ + return 0; +} + +int MotrStore::register_to_service_map(const DoutPrefixProvider *dpp, const string& daemon_type, + const map& meta) +{ + return 0; +} + +void MotrStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, + RGWRateLimitInfo& user_ratelimit, + RGWRateLimitInfo& anon_ratelimit) +{ + return; +} + +void MotrStore::get_quota(RGWQuota& quota) +{ + // XXX: Not handled for the first pass + return; +} + +int MotrStore::set_buckets_enabled(const DoutPrefixProvider *dpp, vector& buckets, bool enabled) +{ + return 0; +} + +int MotrStore::get_sync_policy_handler(const DoutPrefixProvider *dpp, + std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef *phandler, + optional_yield y) +{ + return 0; +} + +RGWDataSyncStatusManager* MotrStore::get_data_sync_manager(const rgw_zone_id& source_zone) +{ + return 0; +} + +int MotrStore::read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool *is_truncated, + RGWUsageIter& usage_iter, + map& usage) +{ + return 0; +} + +int MotrStore::trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) +{ + return 0; +} + +int MotrStore::get_config_key_val(string name, bufferlist *bl) +{ + return 0; +} + +int MotrStore::meta_list_keys_init(const DoutPrefixProvider *dpp, const string& section, const string& marker, void** phandle) +{ + return 0; +} + +int MotrStore::meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, list& keys, bool* truncated) +{ + return 0; +} + +void MotrStore::meta_list_keys_complete(void* handle) +{ + return; +} + +std::string MotrStore::meta_get_marker(void* handle) +{ + return ""; +} + +int MotrStore::meta_remove(const DoutPrefixProvider *dpp, string& metadata_key, optional_yield y) +{ + return 0; +} + +int MotrStore::open_idx(struct m0_uint128 *id, bool create, struct m0_idx *idx) +{ + m0_idx_init(idx, &container.co_realm, id); + + if (!create) + return 0; // nothing to do more + + // create index or make sure it's created + struct m0_op *op = nullptr; + int rc = m0_entity_create(nullptr, &idx->in_entity, &op); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl; + goto out; + } + + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc != 0 && rc != -EEXIST) + ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl; +out: + return rc; +} + +static void set_m0bufvec(struct m0_bufvec *bv, vector& vec) +{ + *bv->ov_buf = reinterpret_cast(vec.data()); + *bv->ov_vec.v_count = vec.size(); +} + +// idx must be opened with open_idx() beforehand +int MotrStore::do_idx_op(struct m0_idx *idx, enum m0_idx_opcode opcode, + vector& key, vector& val, bool update) +{ + int rc, rc_i; + struct m0_bufvec k, v, *vp = &v; + uint32_t flags = 0; + struct m0_op *op = nullptr; + + if (m0_bufvec_empty_alloc(&k, 1) != 0) { + ldout(cctx, 0) << "ERROR: failed to allocate key bufvec" << dendl; + return -ENOMEM; + } + + if (opcode == M0_IC_PUT || opcode == M0_IC_GET) { + rc = -ENOMEM; + if (m0_bufvec_empty_alloc(&v, 1) != 0) { + ldout(cctx, 0) << "ERROR: failed to allocate value bufvec" << dendl; + goto out; + } + } + + set_m0bufvec(&k, key); + if (opcode == M0_IC_PUT) + set_m0bufvec(&v, val); + + if (opcode == M0_IC_DEL) + vp = nullptr; + + if (opcode == M0_IC_PUT && update) + flags |= M0_OIF_OVERWRITE; + + rc = m0_idx_op(idx, opcode, &k, vp, &rc_i, flags, &op); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl; + goto out; + } + + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc != 0) { + ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl; + goto out; + } + + if (rc_i != 0) { + ldout(cctx, 0) << "ERROR: idx op failed: " << rc_i << dendl; + rc = rc_i; + goto out; + } + + if (opcode == M0_IC_GET) { + val.resize(*v.ov_vec.v_count); + memcpy(reinterpret_cast(val.data()), *v.ov_buf, *v.ov_vec.v_count); + } + +out: + m0_bufvec_free2(&k); + if (opcode == M0_IC_GET) + m0_bufvec_free(&v); // cleanup buffer after GET + else if (opcode == M0_IC_PUT) + m0_bufvec_free2(&v); + + return rc; +} + +// Retrieve a range of key/value pairs starting from keys[0]. +int MotrStore::do_idx_next_op(struct m0_idx *idx, + vector>& keys, + vector>& vals) +{ + int rc; + uint32_t i = 0; + int nr_kvp = vals.size(); + int *rcs = new int[nr_kvp]; + struct m0_bufvec k, v; + struct m0_op *op = nullptr; + + rc = m0_bufvec_empty_alloc(&k, nr_kvp)?: + m0_bufvec_empty_alloc(&v, nr_kvp); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: failed to allocate kv bufvecs" << dendl; + return rc; + } + + set_m0bufvec(&k, keys[0]); + + rc = m0_idx_op(idx, M0_IC_NEXT, &k, &v, rcs, 0, &op); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl; + goto out; + } + + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc != 0) { + ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl; + goto out; + } + + for (i = 0; i < v.ov_vec.v_nr; ++i) { + if (rcs[i] < 0) + break; + + vector& key = keys[i]; + vector& val = vals[i]; + key.resize(k.ov_vec.v_count[i]); + val.resize(v.ov_vec.v_count[i]); + memcpy(reinterpret_cast(key.data()), k.ov_buf[i], k.ov_vec.v_count[i]); + memcpy(reinterpret_cast(val.data()), v.ov_buf[i], v.ov_vec.v_count[i]); + } + +out: + k.ov_vec.v_nr = i; + v.ov_vec.v_nr = i; + m0_bufvec_free(&k); + m0_bufvec_free(&v); // cleanup buffer after GET + + delete []rcs; + return rc ?: i; +} + +// Retrieve a number of key/value pairs under the prefix starting +// from the marker at key_out[0]. +int MotrStore::next_query_by_name(string idx_name, + vector& key_out, + vector& val_out, + string prefix, string delim) +{ + unsigned nr_kvp = std::min(val_out.size(), 100UL); + struct m0_idx idx = {}; + vector> keys(nr_kvp); + vector> vals(nr_kvp); + struct m0_uint128 idx_id; + int i = 0, j, k = 0; + + index_name_to_motr_fid(idx_name, &idx_id); + int rc = open_motr_idx(&idx_id, &idx); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: next_query_by_name(): failed to open index: rc=" + << rc << dendl; + goto out; + } + + // Only the first element for keys needs to be set for NEXT query. + // The keys will be set will the returned keys from motr index. + ldout(cctx, 20) <<__func__<< ": next_query_by_name(): index=" << idx_name + << " prefix=" << prefix << " delim=" << delim << dendl; + keys[0].assign(key_out[0].begin(), key_out[0].end()); + for (i = 0; i < (int)val_out.size(); i += k, k = 0) { + rc = do_idx_next_op(&idx, keys, vals); + ldout(cctx, 20) << "do_idx_next_op() = " << rc << dendl; + if (rc < 0) { + ldout(cctx, 0) << "ERROR: NEXT query failed. " << rc << dendl; + goto out; + } + + string dir; + for (j = 0, k = 0; j < rc; ++j) { + string key(keys[j].begin(), keys[j].end()); + size_t pos = std::string::npos; + if (!delim.empty()) + pos = key.find(delim, prefix.length()); + if (pos != std::string::npos) { // DIR entry + dir.assign(key, 0, pos + 1); + if (dir.compare(0, prefix.length(), prefix) != 0) + goto out; + if (i + k == 0 || dir != key_out[i + k - 1]) // a new one + key_out[i + k++] = dir; + continue; + } + dir = ""; + if (key.compare(0, prefix.length(), prefix) != 0) + goto out; + key_out[i + k] = key; + bufferlist& vbl = val_out[i + k]; + vbl.append(reinterpret_cast(vals[j].data()), vals[j].size()); + ++k; + } + + if (rc < (int)nr_kvp) // there are no more keys to fetch + break; + + string next_key; + if (dir != "") + next_key = dir + "\xff"; // skip all dir content in 1 step + else + next_key = key_out[i + k - 1] + " "; + ldout(cctx, 0) << "do_idx_next_op(): next_key=" << next_key << dendl; + keys[0].assign(next_key.begin(), next_key.end()); + } + +out: + m0_idx_fini(&idx); + return rc < 0 ? rc : i + k; +} + +int MotrStore::delete_motr_idx_by_name(string iname) +{ + struct m0_idx idx; + struct m0_uint128 idx_id; + struct m0_op *op = nullptr; + + ldout(cctx, 20) << "delete_motr_idx_by_name=" << iname << dendl; + + index_name_to_motr_fid(iname, &idx_id); + m0_idx_init(&idx, &container.co_realm, &idx_id); + m0_entity_open(&idx.in_entity, &op); + int rc = m0_entity_delete(&idx.in_entity, &op); + if (rc < 0) + goto out; + + m0_op_launch(&op, 1); + + ldout(cctx, 70) << "waiting for op completion" << dendl; + + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc == -ENOENT) // race deletion?? + rc = 0; + else if (rc < 0) + ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl; + + ldout(cctx, 20) << "delete_motr_idx_by_name rc=" << rc << dendl; + +out: + m0_idx_fini(&idx); + return rc; +} + +int MotrStore::open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx) +{ + m0_idx_init(idx, &container.co_realm, id); + return 0; +} + +// The following marcos are from dix/fid_convert.h which are not exposed. +enum { + M0_DIX_FID_DEVICE_ID_OFFSET = 32, + M0_DIX_FID_DIX_CONTAINER_MASK = (1ULL << M0_DIX_FID_DEVICE_ID_OFFSET) + - 1, +}; + +// md5 is used here, a more robust way to convert index name to fid is +// needed to avoid collision. +void MotrStore::index_name_to_motr_fid(string iname, struct m0_uint128 *id) +{ + unsigned char md5[16]; // 128/8 = 16 + MD5 hash; + + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + hash.Update((const unsigned char *)iname.c_str(), iname.length()); + hash.Final(md5); + + memcpy(&id->u_hi, md5, 8); + memcpy(&id->u_lo, md5 + 8, 8); + ldout(cctx, 20) << "id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl; + + struct m0_fid *fid = (struct m0_fid*)id; + m0_fid_tset(fid, m0_dix_fid_type.ft_id, + fid->f_container & M0_DIX_FID_DIX_CONTAINER_MASK, fid->f_key); + ldout(cctx, 20) << "converted id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl; +} + +int MotrStore::do_idx_op_by_name(string idx_name, enum m0_idx_opcode opcode, + string key_str, bufferlist &bl, bool update) +{ + struct m0_idx idx; + vector key(key_str.begin(), key_str.end()); + vector val; + struct m0_uint128 idx_id; + + index_name_to_motr_fid(idx_name, &idx_id); + int rc = open_motr_idx(&idx_id, &idx); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: failed to open index: " << rc << dendl; + goto out; + } + + if (opcode == M0_IC_PUT) + val.assign(bl.c_str(), bl.c_str() + bl.length()); + + ldout(cctx, 20) <<__func__<< ": do_idx_op_by_name(): op=" + << (opcode == M0_IC_PUT ? "PUT" : "GET") + << " idx=" << idx_name << " key=" << key_str << dendl; + rc = do_idx_op(&idx, opcode, key, val, update); + if (rc == 0 && opcode == M0_IC_GET) + // Append the returned value (blob) to the bufferlist. + bl.append(reinterpret_cast(val.data()), val.size()); + +out: + m0_idx_fini(&idx); + return rc; +} + +int MotrStore::create_motr_idx_by_name(string iname) +{ + struct m0_idx idx = {}; + struct m0_uint128 id; + + index_name_to_motr_fid(iname, &id); + m0_idx_init(&idx, &container.co_realm, &id); + + // create index or make sure it's created + struct m0_op *op = nullptr; + int rc = m0_entity_create(nullptr, &idx.in_entity, &op); + if (rc != 0) { + ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl; + goto out; + } + + m0_op_launch(&op, 1); + rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: + m0_rc(op); + m0_op_fini(op); + m0_op_free(op); + + if (rc != 0 && rc != -EEXIST) + ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl; +out: + m0_idx_fini(&idx); + return rc; +} + +// If a global index is checked (if it has been create) every time +// before they're queried (put/get), which takes 2 Motr operations to +// complete the query. As the global indices' name and FID are known +// already when MotrStore is created, we move the check and creation +// in newMotrStore(). +// Similar method is used for per bucket/user index. For example, +// bucket instance index is created when creating the bucket. +int MotrStore::check_n_create_global_indices() +{ + int rc = 0; + + for (const auto& iname : motr_global_indices) { + rc = create_motr_idx_by_name(iname); + if (rc < 0 && rc != -EEXIST) + break; + rc = 0; + } + + return rc; +} + +std::string MotrStore::get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) +{ + char id[M0_FID_STR_LEN]; + struct m0_confc *confc = m0_reqh2confc(&instance->m0c_reqh); + + m0_fid_print(id, ARRAY_SIZE(id), &confc->cc_root->co_id); + return std::string(id); +} + +int MotrStore::init_metadata_cache(const DoutPrefixProvider *dpp, + CephContext *cct) +{ + this->obj_meta_cache = new MotrMetaCache(dpp, cct); + this->get_obj_meta_cache()->set_enabled(true); + + this->user_cache = new MotrMetaCache(dpp, cct); + this->get_user_cache()->set_enabled(true); + + this->bucket_inst_cache = new MotrMetaCache(dpp, cct); + this->get_bucket_inst_cache()->set_enabled(true); + + return 0; +} + + int MotrLuaManager::get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) + { + return -ENOENT; + } + + int MotrLuaManager::put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) + { + return -ENOENT; + } + + int MotrLuaManager::del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) + { + return -ENOENT; + } + + int MotrLuaManager::add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) + { + return -ENOENT; + } + + int MotrLuaManager::remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) + { + return -ENOENT; + } + + int MotrLuaManager::list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) + { + return -ENOENT; + } +} // namespace rgw::sal + +extern "C" { + +void *newMotrStore(CephContext *cct) +{ + int rc = -1; + rgw::sal::MotrStore *store = new rgw::sal::MotrStore(cct); + + if (store) { + store->conf.mc_is_oostore = true; + // XXX: these params should be taken from config settings and + // cct somehow? + store->instance = nullptr; + const auto& proc_ep = g_conf().get_val("motr_my_endpoint"); + const auto& ha_ep = g_conf().get_val("motr_ha_endpoint"); + const auto& proc_fid = g_conf().get_val("motr_my_fid"); + const auto& profile = g_conf().get_val("motr_profile_fid"); + const auto& admin_proc_ep = g_conf().get_val("motr_admin_endpoint"); + const auto& admin_proc_fid = g_conf().get_val("motr_admin_fid"); + const int init_flags = cct->get_init_flags(); + ldout(cct, 0) << "INFO: motr my endpoint: " << proc_ep << dendl; + ldout(cct, 0) << "INFO: motr ha endpoint: " << ha_ep << dendl; + ldout(cct, 0) << "INFO: motr my fid: " << proc_fid << dendl; + ldout(cct, 0) << "INFO: motr profile fid: " << profile << dendl; + store->conf.mc_local_addr = proc_ep.c_str(); + store->conf.mc_process_fid = proc_fid.c_str(); + + ldout(cct, 0) << "INFO: init flags: " << init_flags << dendl; + ldout(cct, 0) << "INFO: motr admin endpoint: " << admin_proc_ep << dendl; + ldout(cct, 0) << "INFO: motr admin fid: " << admin_proc_fid << dendl; + + // HACK this is so that radosge-admin uses a different client + if (init_flags == 0) { + store->conf.mc_process_fid = admin_proc_fid.c_str(); + store->conf.mc_local_addr = admin_proc_ep.c_str(); + } else { + store->conf.mc_process_fid = proc_fid.c_str(); + store->conf.mc_local_addr = proc_ep.c_str(); + } + store->conf.mc_ha_addr = ha_ep.c_str(); + store->conf.mc_profile = profile.c_str(); + + ldout(cct, 50) << "INFO: motr profile fid: " << store->conf.mc_profile << dendl; + ldout(cct, 50) << "INFO: ha addr: " << store->conf.mc_ha_addr << dendl; + ldout(cct, 50) << "INFO: process fid: " << store->conf.mc_process_fid << dendl; + ldout(cct, 50) << "INFO: motr endpoint: " << store->conf.mc_local_addr << dendl; + + store->conf.mc_tm_recv_queue_min_len = 64; + store->conf.mc_max_rpc_msg_size = 524288; + store->conf.mc_idx_service_id = M0_IDX_DIX; + store->dix_conf.kc_create_meta = false; + store->conf.mc_idx_service_conf = &store->dix_conf; + + if (!g_conf().get_val("motr_tracing_enabled")) { + m0_trace_level_allow(M0_WARN); // allow errors and warnings in syslog anyway + m0_trace_set_mmapped_buffer(false); + } + + store->instance = nullptr; + rc = m0_client_init(&store->instance, &store->conf, true); + if (rc != 0) { + ldout(cct, 0) << "ERROR: m0_client_init() failed: " << rc << dendl; + goto out; + } + + m0_container_init(&store->container, nullptr, &M0_UBER_REALM, store->instance); + rc = store->container.co_realm.re_entity.en_sm.sm_rc; + if (rc != 0) { + ldout(cct, 0) << "ERROR: m0_container_init() failed: " << rc << dendl; + goto out; + } + + rc = m0_ufid_init(store->instance, &ufid_gr); + if (rc != 0) { + ldout(cct, 0) << "ERROR: m0_ufid_init() failed: " << rc << dendl; + goto out; + } + + // Create global indices if not yet. + rc = store->check_n_create_global_indices(); + if (rc != 0) { + ldout(cct, 0) << "ERROR: check_n_create_global_indices() failed: " << rc << dendl; + goto out; + } + + } + +out: + if (rc != 0) { + delete store; + return nullptr; + } + return store; +} + +} diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h new file mode 100644 index 000000000000..153ac8abd005 --- /dev/null +++ b/src/rgw/driver/motr/rgw_sal_motr.h @@ -0,0 +1,1195 @@ + +// vim: ts=2 sw=2 expandtab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * SAL implementation for the CORTX Motr backend + * + * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +extern "C" { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wextern-c-compat" +#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" +#include "motr/config.h" +#include "motr/client.h" +#pragma clang diagnostic pop +} + +#include "rgw_sal_store.h" +#include "rgw_rados.h" +#include "rgw_notify.h" +#include "rgw_oidc_provider.h" +#include "rgw_role.h" +#include "rgw_multi.h" +#include "rgw_putobj_processor.h" + +namespace rgw::sal { + +class MotrStore; + +// Global Motr indices +#define RGW_MOTR_USERS_IDX_NAME "motr.rgw.users" +#define RGW_MOTR_BUCKET_INST_IDX_NAME "motr.rgw.bucket.instances" +#define RGW_MOTR_BUCKET_HD_IDX_NAME "motr.rgw.bucket.headers" +#define RGW_IAM_MOTR_ACCESS_KEY "motr.rgw.accesskeys" +#define RGW_IAM_MOTR_EMAIL_KEY "motr.rgw.emails" + +//#define RGW_MOTR_BUCKET_ACL_IDX_NAME "motr.rgw.bucket.acls" + +// A simplified metadata cache implementation. +// Note: MotrObjMetaCache doesn't handle the IO operations to Motr. A proxy +// class can be added to handle cache and 'real' ops. +class MotrMetaCache +{ +protected: + // MGW re-uses ObjectCache to cache object's metadata as it has already + // implemented a lru cache: (1) ObjectCache internally uses a map and lru + // list to manage cache entry. POC uses object name, user name or bucket + // name as the key to lookup and insert an entry. (2) ObjectCache::data is + // a bufferlist and can be used to store any metadata structure, such as + // object's bucket dir entry, user info or bucket instance. + // + // Note from RGW: + // The Rados Gateway stores metadata and objects in an internal cache. This + // should be kept consistent by the OSD's relaying notify events between + // multiple watching RGW processes. In the event that this notification + // protocol fails, bounding the length of time that any data in the cache will + // be assumed valid will ensure that any RGW instance that falls out of sync + // will eventually recover. This seems to be an issue mostly for large numbers + // of RGW instances under heavy use. If you would like to turn off cache expiry, + // set this value to zero. + // + // Currently POC hasn't implemented the watch-notify menchanism yet. So the + // current implementation is similar to cortx-s3server which is based on expiry + // time. TODO: see comments on distribute_cache). + // + // Beaware: Motr object data is not cached in current POC as RGW! + // RGW caches the first chunk (4MB by default). + ObjectCache cache; + +public: + // Lookup a cache entry. + int get(const DoutPrefixProvider *dpp, const std::string& name, bufferlist& data); + + // Insert a cache entry. + int put(const DoutPrefixProvider *dpp, const std::string& name, const bufferlist& data); + + // Called when an object is deleted. Notification should be sent to other + // RGW instances. + int remove(const DoutPrefixProvider *dpp, const std::string& name); + + // Make the local cache entry invalid. + void invalid(const DoutPrefixProvider *dpp, const std::string& name); + + // TODO: Distribute_cache() and watch_cb() now are only place holder functions. + // Checkout services/svc_sys_obj_cache.h/cc for reference. + // These 2 functions are designed to notify or to act on cache notification. + // It is feasible to implement the functionality using Motr's FDMI after discussing + // with Hua. + int distribute_cache(const DoutPrefixProvider *dpp, + const std::string& normal_name, + ObjectCacheInfo& obj_info, int op); + int watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl); + + void set_enabled(bool status); + + MotrMetaCache(const DoutPrefixProvider *dpp, CephContext *cct) { + cache.set_ctx(cct); + } +}; + +struct MotrUserInfo { + RGWUserInfo info; + obj_version user_version; + rgw::sal::Attrs attrs; + + void encode(bufferlist& bl) const + { + ENCODE_START(3, 3, bl); + encode(info, bl); + encode(user_version, bl); + encode(attrs, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) + { + DECODE_START(3, bl); + decode(info, bl); + decode(user_version, bl); + decode(attrs, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(MotrUserInfo); + +struct MotrEmailInfo { + std::string user_id; + std::string email_id; + + MotrEmailInfo() {} + MotrEmailInfo(std::string _user_id, std::string _email_id ) + : user_id(std::move(_user_id)), email_id(std::move(_email_id)) {} + + void encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + encode(user_id, bl); + encode(email_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl); + decode(user_id, bl); + decode(email_id, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(MotrEmailInfo); + +struct MotrAccessKey { + std::string id; // AccessKey + std::string key; // SecretKey + std::string user_id; // UserID + + MotrAccessKey() {} + MotrAccessKey(std::string _id, std::string _key, std::string _user_id) + : id(std::move(_id)), key(std::move(_key)), user_id(std::move(_user_id)) {} + + void encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + encode(id, bl); + encode(key, bl); + encode(user_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl); + decode(id, bl); + decode(key, bl); + decode(user_id, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(MotrAccessKey); + +class MotrNotification : public StoreNotification { + public: + MotrNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type) : + StoreNotification(_obj, _src_obj, _type) {} + ~MotrNotification() = default; + + virtual int publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags = nullptr) override { return 0;} + virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size, + const ceph::real_time& mtime, const std::string& etag, const std::string& version) override { return 0; } +}; + +class MotrUser : public StoreUser { + private: + MotrStore *store; + struct m0_uint128 idxID = {0xe5ecb53640d4ecce, 0x6a156cd5a74aa3b8}; // MD5 of “motr.rgw.users“ + struct m0_idx idx; + + public: + std::set access_key_tracker; + MotrUser(MotrStore *_st, const rgw_user& _u) : StoreUser(_u), store(_st) { } + MotrUser(MotrStore *_st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) { } + MotrUser(MotrStore *_st) : store(_st) { } + MotrUser(MotrUser& _o) = default; + MotrUser() {} + + virtual std::unique_ptr clone() override { + return std::unique_ptr(new MotrUser(*this)); + } + int list_buckets(const DoutPrefixProvider *dpp, const std::string& marker, const std::string& end_marker, + uint64_t max, bool need_stats, BucketList& buckets, optional_yield y) override; + virtual int create_bucket(const DoutPrefixProvider* dpp, + const rgw_bucket& b, + const std::string& zonegroup_id, + rgw_placement_rule& placement_rule, + std::string& swift_ver_location, + const RGWQuotaInfo* pquota_info, + const RGWAccessControlPolicy& policy, + Attrs& attrs, + RGWBucketInfo& info, + obj_version& ep_objv, + bool exclusive, + bool obj_lock_enabled, + bool* existed, + req_info& req_info, + std::unique_ptr* bucket, + optional_yield y) override; + virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) override; + virtual int read_stats(const DoutPrefixProvider *dpp, + optional_yield y, RGWStorageStats* stats, + ceph::real_time *last_stats_sync = nullptr, + ceph::real_time *last_stats_update = nullptr) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; + virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, + bool* is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; + + virtual int load_user(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info = nullptr) override; + virtual int remove_user(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider* dpp, optional_yield y) override; + + int create_user_info_idx(); + int load_user_from_idx(const DoutPrefixProvider *dpp, MotrStore *store, RGWUserInfo& info, std::map *attrs, RGWObjVersionTracker *objv_tr); + + friend class MotrBucket; +}; + +class MotrBucket : public StoreBucket { + private: + MotrStore *store; + RGWAccessControlPolicy acls; + + // RGWBucketInfo and other information that are shown when listing a bucket is + // represented in struct MotrBucketInfo. The structure is encoded and stored + // as the value of the global bucket instance index. + // TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.) + // into a different index. + struct MotrBucketInfo { + RGWBucketInfo info; + + obj_version bucket_version; + ceph::real_time mtime; + + rgw::sal::Attrs bucket_attrs; + + void encode(bufferlist& bl) const + { + ENCODE_START(4, 4, bl); + encode(info, bl); + encode(bucket_version, bl); + encode(mtime, bl); + encode(bucket_attrs, bl); //rgw_cache.h example for a map + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) + { + DECODE_START(4, bl); + decode(info, bl); + decode(bucket_version, bl); + decode(mtime, bl); + decode(bucket_attrs, bl); + DECODE_FINISH(bl); + } + }; + WRITE_CLASS_ENCODER(MotrBucketInfo); + + public: + MotrBucket(MotrStore *_st) + : store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, User* _u) + : StoreBucket(_u), + store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, const rgw_bucket& _b) + : StoreBucket(_b), + store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, const RGWBucketEnt& _e) + : StoreBucket(_e), + store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, const RGWBucketInfo& _i) + : StoreBucket(_i), + store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, const rgw_bucket& _b, User* _u) + : StoreBucket(_b, _u), + store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, const RGWBucketEnt& _e, User* _u) + : StoreBucket(_e, _u), + store(_st), + acls() { + } + + MotrBucket(MotrStore *_st, const RGWBucketInfo& _i, User* _u) + : StoreBucket(_i, _u), + store(_st), + acls() { + } + + ~MotrBucket() { } + + virtual std::unique_ptr get_object(const rgw_obj_key& k) override; + virtual int list(const DoutPrefixProvider *dpp, ListParams&, int, ListResults&, optional_yield y) override; + virtual int remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) override; + virtual int remove_bucket_bypass_gc(int concurrent_max, bool + keep_index_consistent, + optional_yield y, const + DoutPrefixProvider *dpp) override; + virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } + virtual int set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy& acl, optional_yield y) override; + virtual int load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats = false) override; + int link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y); + int unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y); + int create_bucket_index(); + int create_multipart_indices(); + virtual int read_stats(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, int shard_id, + std::string *bucket_ver, std::string *master_ver, + std::map& stats, + std::string *max_marker = nullptr, + bool *syncstopped = nullptr) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, RGWGetBucketStats_CB* ctx) override; + virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int update_container_stats(const DoutPrefixProvider *dpp) override; + virtual int check_bucket_shards(const DoutPrefixProvider *dpp) override; + virtual int chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y) override; + virtual int put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time mtime) override; + virtual bool is_owner(User* user) override; + virtual int check_empty(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) override; + virtual int merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& attrs, optional_yield y) override; + virtual int try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime) override; + virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, + bool *is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; + virtual int remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) override; + virtual int check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) override; + virtual int rebuild_index(const DoutPrefixProvider *dpp) override; + virtual int set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) override; + virtual int purge_instance(const DoutPrefixProvider *dpp) override; + virtual std::unique_ptr clone() override { + return std::make_unique(*this); + } + virtual std::unique_ptr get_multipart_upload(const std::string& oid, + std::optional upload_id=std::nullopt, + ACLOwner owner={}, ceph::real_time mtime=real_clock::now()) override; + virtual int list_multiparts(const DoutPrefixProvider *dpp, + const std::string& prefix, + std::string& marker, + const std::string& delim, + const int& max_uploads, + std::vector>& uploads, + std::map *common_prefixes, + bool *is_truncated) override; + virtual int abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct) override; + + friend class MotrStore; +}; + +class MotrPlacementTier: public StorePlacementTier { + MotrStore* store; + RGWZoneGroupPlacementTier tier; +public: + MotrPlacementTier(MotrStore* _store, const RGWZoneGroupPlacementTier& _tier) : store(_store), tier(_tier) {} + virtual ~MotrPlacementTier() = default; + + virtual const std::string& get_tier_type() { return tier.tier_type; } + virtual const std::string& get_storage_class() { return tier.storage_class; } + virtual bool retain_head_object() { return tier.retain_head_object; } + RGWZoneGroupPlacementTier& get_rt() { return tier; } +}; + +class MotrZoneGroup : public StoreZoneGroup { +protected: + MotrStore* store; + const RGWZoneGroup group; + std::string empty; +public: + MotrZoneGroup(MotrStore* _store) : store(_store), group() {} + MotrZoneGroup(MotrStore* _store, const RGWZoneGroup& _group) : store(_store), group(_group) {} + virtual ~MotrZoneGroup() = default; + + virtual const std::string& get_id() const override { return group.get_id(); }; + virtual const std::string& get_name() const override { return group.get_name(); }; + virtual int equals(const std::string& other_zonegroup) const override { + return group.equals(other_zonegroup); + }; + /** Get the endpoint from zonegroup, or from master zone if not set */ + virtual const std::string& get_endpoint() const override; + virtual bool placement_target_exists(std::string& target) const override; + virtual bool is_master_zonegroup() const override { + return group.is_master_zonegroup(); + }; + virtual const std::string& get_api_name() const override { return group.api_name; }; + virtual int get_placement_target_names(std::set& names) const override; + virtual const std::string& get_default_placement_name() const override { + return group.default_placement.name; }; + virtual int get_hostnames(std::list& names) const override { + names = group.hostnames; + return 0; + }; + virtual int get_s3website_hostnames(std::list& names) const override { + names = group.hostnames_s3website; + return 0; + }; + virtual int get_zone_count() const override { + return group.zones.size(); + } + virtual int get_placement_tier(const rgw_placement_rule& rule, std::unique_ptr* tier); + virtual int get_zone_by_id(const std::string& id, std::unique_ptr* zone) override { + return -1; + } + virtual int get_zone_by_name(const std::string& name, std::unique_ptr* zone) override { + return -1; + } + virtual int list_zones(std::list& zone_ids) override { + zone_ids.clear(); + return 0; + } + const RGWZoneGroup& get_group() { return group; } + virtual std::unique_ptr clone() override { + return std::make_unique(store, group); + } + friend class MotrZone; +}; + +class MotrZone : public StoreZone { + protected: + MotrStore* store; + RGWRealm *realm{nullptr}; + MotrZoneGroup zonegroup; + RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */ + RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */ + RGWPeriod *current_period{nullptr}; + + public: + MotrZone(MotrStore* _store) : store(_store), zonegroup(_store) { + realm = new RGWRealm(); + zone_public_config = new RGWZone(); + zone_params = new RGWZoneParams(); + current_period = new RGWPeriod(); + + // XXX: only default and STANDARD supported for now + RGWZonePlacementInfo info; + RGWZoneStorageClasses sc; + sc.set_storage_class("STANDARD", nullptr, nullptr); + info.storage_classes = sc; + zone_params->placement_pools["default"] = info; + } + MotrZone(MotrStore* _store, MotrZoneGroup _zg) : store(_store), zonegroup(_zg) { + realm = new RGWRealm(); + // TODO: fetch zonegroup params (eg. id) from provisioner config. + //zonegroup.group.set_id("0956b174-fe14-4f97-8b50-bb7ec5e1cf62"); + //zonegroup.group.api_name = "default"; + zone_public_config = new RGWZone(); + zone_params = new RGWZoneParams(); + current_period = new RGWPeriod(); + + // XXX: only default and STANDARD supported for now + RGWZonePlacementInfo info; + RGWZoneStorageClasses sc; + sc.set_storage_class("STANDARD", nullptr, nullptr); + info.storage_classes = sc; + zone_params->placement_pools["default"] = info; + } + ~MotrZone() = default; + + virtual std::unique_ptr clone() override { + return std::make_unique(store); + } + virtual ZoneGroup& get_zonegroup() override; + virtual const std::string& get_id() override; + virtual const std::string& get_name() const override; + virtual bool is_writeable() override; + virtual bool get_redirect_endpoint(std::string* endpoint) override; + virtual bool has_zonegroup_api(const std::string& api) const override; + virtual const std::string& get_current_period_id() override; + virtual const RGWAccessKey& get_system_key() { return zone_params->system_key; } + virtual const std::string& get_realm_name() { return realm->get_name(); } + virtual const std::string& get_realm_id() { return realm->get_id(); } + virtual const std::string_view get_tier_type() { return "rgw"; } + virtual RGWBucketSyncPolicyHandlerRef get_sync_policy_handler() { return nullptr; } + friend class MotrStore; +}; + +class MotrLuaManager : public StoreLuaManager { + MotrStore* store; + + public: + MotrLuaManager(MotrStore* _s) : store(_s) + { + } + virtual ~MotrLuaManager() = default; + + /** Get a script named with the given key from the backing store */ + virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) override; + /** Put a script named with the given key to the backing store */ + virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) override; + /** Delete a script named with the given key from the backing store */ + virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) override; + /** Add a lua package */ + virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override; + /** Remove a lua package */ + virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override; + /** List lua packages */ + virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) override; +}; + +class MotrOIDCProvider : public RGWOIDCProvider { + MotrStore* store; + public: + MotrOIDCProvider(MotrStore* _store) : store(_store) {} + ~MotrOIDCProvider() = default; + + virtual int store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) override { return 0; } + virtual int read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) override { return 0; } + virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override { return 0;} + + void encode(bufferlist& bl) const { + RGWOIDCProvider::encode(bl); + } + void decode(bufferlist::const_iterator& bl) { + RGWOIDCProvider::decode(bl); + } +}; + +class MotrObject : public StoreObject { + private: + MotrStore *store; + RGWAccessControlPolicy acls; + RGWObjCategory category; + + // If this object is pat of a multipart uploaded one. + // TODO: do it in another class? MotrPartObject : public MotrObject + uint64_t part_off; + uint64_t part_size; + uint64_t part_num; + + public: + + // motr object metadata stored in index + struct Meta { + struct m0_uint128 oid = {}; + struct m0_fid pver = {}; + uint64_t layout_id = 0; + + void encode(bufferlist& bl) const + { + ENCODE_START(5, 5, bl); + encode(oid.u_hi, bl); + encode(oid.u_lo, bl); + encode(pver.f_container, bl); + encode(pver.f_key, bl); + encode(layout_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) + { + DECODE_START(5, bl); + decode(oid.u_hi, bl); + decode(oid.u_lo, bl); + decode(pver.f_container, bl); + decode(pver.f_key, bl); + decode(layout_id, bl); + DECODE_FINISH(bl); + } + }; + + struct m0_obj *mobj = NULL; + Meta meta; + + struct MotrReadOp : public ReadOp { + private: + MotrObject* source; + + // The set of part objects if the source is + // a multipart uploaded object. + std::map> part_objs; + + public: + MotrReadOp(MotrObject *_source); + + virtual int prepare(optional_yield y, const DoutPrefixProvider* dpp) override; + + /* + * Both `read` and `iterate` read up through index `end` + * *inclusive*. The number of bytes that could be returned is + * `end - ofs + 1`. + */ + virtual int read(int64_t off, int64_t end, bufferlist& bl, + optional_yield y, + const DoutPrefixProvider* dpp) override; + virtual int iterate(const DoutPrefixProvider* dpp, int64_t off, + int64_t end, RGWGetDataCB* cb, + optional_yield y) override; + + virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) override; + }; + + struct MotrDeleteOp : public DeleteOp { + private: + MotrObject* source; + + public: + MotrDeleteOp(MotrObject* _source); + + virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override; + }; + + MotrObject() = default; + + MotrObject(MotrStore *_st, const rgw_obj_key& _k) + : StoreObject(_k), store(_st), acls() {} + MotrObject(MotrStore *_st, const rgw_obj_key& _k, Bucket* _b) + : StoreObject(_k, _b), store(_st), acls() {} + + MotrObject(MotrObject& _o) = default; + + virtual ~MotrObject(); + + virtual int delete_object(const DoutPrefixProvider* dpp, + optional_yield y, + bool prevent_versioning = false) override; + virtual int copy_object(User* user, + req_info* info, const rgw_zone_id& source_zone, + rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, + rgw::sal::Bucket* src_bucket, + const rgw_placement_rule& dest_placement, + ceph::real_time* src_mtime, ceph::real_time* mtime, + const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, + bool high_precision_time, + const char* if_match, const char* if_nomatch, + AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, + RGWObjCategory category, uint64_t olh_epoch, + boost::optional delete_at, + std::string* version_id, std::string* tag, std::string* etag, + void (*progress_cb)(off_t, void *), void* progress_data, + const DoutPrefixProvider* dpp, optional_yield y) override; + virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } + virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } + virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **state, optional_yield y, bool follow_olh = true) override; + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; + virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; + virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; + virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; + virtual bool is_expired() override; + virtual void gen_rand_obj_instance_name() override; + virtual std::unique_ptr clone() override { + return std::unique_ptr(new MotrObject(*this)); + } + virtual std::unique_ptr get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) override; + virtual int transition(Bucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override; + virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override; + + /* Swift versioning */ + virtual int swift_versioning_restore(bool& restored, + const DoutPrefixProvider* dpp) override; + virtual int swift_versioning_copy(const DoutPrefixProvider* dpp, + optional_yield y) override; + + /* OPs */ + virtual std::unique_ptr get_read_op() override; + virtual std::unique_ptr get_delete_op() override; + + /* OMAP */ + virtual int omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, + const std::set& keys, + Attrs* vals) override; + virtual int omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) override; + virtual int chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) override; + private: + //int read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj = nullptr); + + public: + bool is_opened() { return mobj != NULL; } + int create_mobj(const DoutPrefixProvider *dpp, uint64_t sz); + int open_mobj(const DoutPrefixProvider *dpp); + int delete_mobj(const DoutPrefixProvider *dpp); + void close_mobj(); + int write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset); + int read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb); + unsigned get_optimal_bs(unsigned len); + + int get_part_objs(const DoutPrefixProvider *dpp, + std::map>& part_objs); + int open_part_objs(const DoutPrefixProvider* dpp, + std::map>& part_objs); + int read_multipart_obj(const DoutPrefixProvider* dpp, + int64_t off, int64_t end, RGWGetDataCB* cb, + std::map>& part_objs); + int delete_part_objs(const DoutPrefixProvider* dpp); + void set_category(RGWObjCategory _category) {category = _category;} + int get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent); + int update_version_entries(const DoutPrefixProvider *dpp); +}; + +// A placeholder locking class for multipart upload. +// TODO: implement it using Motr object locks. +class MPMotrSerializer : public StoreMPSerializer { + + public: + MPMotrSerializer(const DoutPrefixProvider *dpp, MotrStore* store, MotrObject* obj, const std::string& lock_name) {} + + virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override {return 0; } + virtual int unlock() override { return 0;} +}; + +class MotrAtomicWriter : public StoreWriter { + protected: + rgw::sal::MotrStore* store; + const rgw_user& owner; + const rgw_placement_rule *ptail_placement_rule; + uint64_t olh_epoch; + const std::string& unique_tag; + MotrObject obj; + MotrObject old_obj; + uint64_t total_data_size; // for total data being uploaded + bufferlist acc_data; // accumulated data + uint64_t acc_off; // accumulated data offset + + struct m0_bufvec buf; + struct m0_bufvec attr; + struct m0_indexvec ext; + + public: + MotrAtomicWriter(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + MotrStore* _store, + const rgw_user& _owner, + const rgw_placement_rule *_ptail_placement_rule, + uint64_t _olh_epoch, + const std::string& _unique_tag); + ~MotrAtomicWriter() = default; + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + int write(); + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) override; + + unsigned populate_bvec(unsigned len, bufferlist::iterator &bi); + void cleanup(); +}; + +class MotrMultipartWriter : public StoreWriter { +protected: + rgw::sal::MotrStore* store; + + // Head object. + rgw::sal::Object* head_obj; + + // Part parameters. + const uint64_t part_num; + const std::string part_num_str; + std::unique_ptr part_obj; + uint64_t actual_part_size = 0; + +public: + MotrMultipartWriter(const DoutPrefixProvider *dpp, + optional_yield y, MultipartUpload* upload, + rgw::sal::Object* obj, + MotrStore* _store, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t _part_num, const std::string& part_num_str) : + StoreWriter(dpp, y), store(_store), head_obj(obj), + part_num(_part_num), part_num_str(part_num_str) + { + } + ~MotrMultipartWriter() = default; + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) override; +}; + +// The implementation of multipart upload in POC roughly follows the +// cortx-s3server's design. Parts are stored in separate Motr objects. +// s3server uses a few auxiliary Motr indices to manage multipart +// related metadata: (1) Bucket multipart index (bucket_nnn_multipart_index) +// which contains metadata that answers questions such as which objects have +// started multipart upload and its upload id. This index is created during +// bucket creation. (2) Object part index (object_nnn_part_index) which stores +// metadata of a part's details (size, pvid, oid...). This index is created in +// MotrMultipartUpload::init(). (3) Extended metadata index +// (bucket_nnn_extended_metadata): once parts has been uploaded and their +// metadata saved in the part index, the user may issue multipart completion +// request. When processing the completion request, the parts are read from +// object part index and for each part an entry is created in extended index. +// The entry for the object is created in bucket (object list) index. The part +// index is deleted and an entry removed from bucket_nnn_multipart_index. Like +// bucket multipart index, bucket part extened metadata index is created during +// bucket creation. +// +// The extended metadata index is used mainly due to fault tolerant +// considerations (how to handle Motr service crash when uploading an object) +// and to avoid to create too many Motr indices (I am not sure I understand +// why many Motr indices is bad.). In our POC, to keep it simple, only 2 +// indices are maintained: bucket multipart index and object_nnn_part_index. +// +// + +class MotrMultipartPart : public StoreMultipartPart { +protected: + RGWUploadPartInfo info; + +public: + MotrObject::Meta meta; + + MotrMultipartPart(RGWUploadPartInfo _info, MotrObject::Meta _meta) : + info(_info), meta(_meta) {} + virtual ~MotrMultipartPart() = default; + + virtual uint32_t get_num() { return info.num; } + virtual uint64_t get_size() { return info.accounted_size; } + virtual const std::string& get_etag() { return info.etag; } + virtual ceph::real_time& get_mtime() { return info.modified; } + + RGWObjManifest& get_manifest() { return info.manifest; } + + friend class MotrMultipartUpload; +}; + +class MotrMultipartUpload : public StoreMultipartUpload { + MotrStore* store; + RGWMPObj mp_obj; + ACLOwner owner; + ceph::real_time mtime; + rgw_placement_rule placement; + RGWObjManifest manifest; + +public: + MotrMultipartUpload(MotrStore* _store, Bucket* _bucket, const std::string& oid, + std::optional upload_id, ACLOwner _owner, ceph::real_time _mtime) : + StoreMultipartUpload(_bucket), store(_store), mp_obj(oid, upload_id), owner(_owner), mtime(_mtime) {} + virtual ~MotrMultipartUpload() = default; + + virtual const std::string& get_meta() const { return mp_obj.get_meta(); } + virtual const std::string& get_key() const { return mp_obj.get_key(); } + virtual const std::string& get_upload_id() const { return mp_obj.get_upload_id(); } + virtual const ACLOwner& get_owner() const override { return owner; } + virtual ceph::real_time& get_mtime() { return mtime; } + virtual std::unique_ptr get_meta_obj() override; + virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) override; + virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct, + int num_parts, int marker, + int* next_marker, bool* truncated, + bool assume_unsorted = false) override; + virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override; + virtual int complete(const DoutPrefixProvider* dpp, + optional_yield y, CephContext* cct, + std::map& part_etags, + std::list& remove_objs, + uint64_t& accounted_size, bool& compressed, + RGWCompressionInfo& cs_info, off_t& off, + std::string& tag, ACLOwner& owner, + uint64_t olh_epoch, + rgw::sal::Object* target_obj) override; + virtual int get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs = nullptr) override; + virtual std::unique_ptr get_writer(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t part_num, + const std::string& part_num_str) override; + int delete_parts(const DoutPrefixProvider *dpp); +}; + +class MotrStore : public StoreDriver { + private: + MotrZone zone; + RGWSyncModuleInstanceRef sync_module; + + MotrMetaCache* obj_meta_cache; + MotrMetaCache* user_cache; + MotrMetaCache* bucket_inst_cache; + + public: + CephContext *cctx; + struct m0_client *instance; + struct m0_container container; + struct m0_realm uber_realm; + struct m0_config conf = {}; + struct m0_idx_dix_config dix_conf = {}; + + MotrStore(CephContext *c): zone(this), cctx(c) {} + ~MotrStore() { + delete obj_meta_cache; + delete user_cache; + delete bucket_inst_cache; + } + + virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) { return 0; } + virtual const std::string get_name() const override { + return "motr"; + } + + virtual std::unique_ptr get_user(const rgw_user& u) override; + virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y, std::unique_ptr* user) override; + virtual int get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr* user) override; + virtual int get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) override; + virtual std::unique_ptr get_object(const rgw_obj_key& k) override; + virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) override; + virtual int get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) override; + virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string&name, std::unique_ptr* bucket, optional_yield y) override; + virtual bool is_meta_master() override; + virtual int forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv, + bufferlist& in_data, JSONParser *jp, req_info& info, + optional_yield y) override; + virtual int forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, + bufferlist& in_data, + RGWXMLDecoder::XMLParser* parser, req_info& info, + optional_yield y) override; + virtual Zone* get_zone() { return &zone; } + virtual std::string zone_unique_id(uint64_t unique_num) override; + virtual std::string zone_unique_trans_id(const uint64_t unique_num) override; + virtual int get_zonegroup(const std::string& id, std::unique_ptr* zonegroup) override; + virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list& zone_ids) override; + virtual int cluster_stat(RGWClusterStat& stats) override; + virtual std::unique_ptr get_lifecycle(void) override; + virtual std::unique_ptr get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj, + req_state* s, rgw::notify::EventType event_type, optional_yield y, const std::string* object_name=nullptr) override; + virtual std::unique_ptr get_notification(const DoutPrefixProvider* dpp, rgw::sal::Object* obj, + rgw::sal::Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, + std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) override; + virtual RGWLC* get_rgwlc(void) override { return NULL; } + virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return NULL; } + + virtual int log_usage(const DoutPrefixProvider *dpp, std::map& usage_info) override; + virtual int log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) override; + virtual int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, + const std::map& meta) override; + virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) override; + virtual void get_quota(RGWQuota& quota) override; + virtual int set_buckets_enabled(const DoutPrefixProvider *dpp, std::vector& buckets, bool enabled) override; + virtual int get_sync_policy_handler(const DoutPrefixProvider *dpp, + std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef *phandler, + optional_yield y) override; + virtual RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone) override; + virtual void wakeup_meta_sync_shards(std::set& shard_ids) override { return; } + virtual void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, boost::container::flat_map>& shard_ids) override {} + virtual int clear_usage(const DoutPrefixProvider *dpp) override { return 0; } + virtual int read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool *is_truncated, + RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; + virtual int get_config_key_val(std::string name, bufferlist* bl) override; + virtual int meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) override; + virtual int meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, std::list& keys, bool* truncated) override; + virtual void meta_list_keys_complete(void* handle) override; + virtual std::string meta_get_marker(void *handle) override; + virtual int meta_remove(const DoutPrefixProvider *dpp, std::string& metadata_key, optional_yield y) override; + + virtual const RGWSyncModuleInstanceRef& get_sync_module() { return sync_module; } + virtual std::string get_host_id() { return ""; } + + virtual std::unique_ptr get_lua_manager() override; + virtual std::unique_ptr get_role(std::string name, + std::string tenant, + std::string path="", + std::string trust_policy="", + std::string max_session_duration_str="", + std::multimap tags={}) override; + virtual std::unique_ptr get_role(const RGWRoleInfo& info) override; + virtual std::unique_ptr get_role(std::string id) override; + virtual int get_roles(const DoutPrefixProvider *dpp, + optional_yield y, + const std::string& path_prefix, + const std::string& tenant, + std::vector>& roles) override; + virtual std::unique_ptr get_oidc_provider() override; + virtual int get_oidc_providers(const DoutPrefixProvider *dpp, + const std::string& tenant, + std::vector>& providers) override; + virtual std::unique_ptr get_append_writer(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + const std::string& unique_tag, + uint64_t position, + uint64_t *cur_accounted_size) override; + virtual std::unique_ptr get_atomic_writer(const DoutPrefixProvider *dpp, + optional_yield y, + rgw::sal::Object* obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t olh_epoch, + const std::string& unique_tag) override; + virtual const std::string& get_compression_type(const rgw_placement_rule& rule) override; + virtual bool valid_placement(const rgw_placement_rule& rule) override; + + virtual void finalize(void) override; + + virtual CephContext *ctx(void) override { + return cctx; + } + + virtual void register_admin_apis(RGWRESTMgr* mgr) override { }; + + int open_idx(struct m0_uint128 *id, bool create, struct m0_idx *out); + void close_idx(struct m0_idx *idx) { m0_idx_fini(idx); } + int do_idx_op(struct m0_idx *, enum m0_idx_opcode opcode, + std::vector& key, std::vector& val, bool update = false); + + int do_idx_next_op(struct m0_idx *idx, + std::vector>& key_vec, + std::vector>& val_vec); + int next_query_by_name(std::string idx_name, std::vector& key_str_vec, + std::vector& val_bl_vec, + std::string prefix="", std::string delim=""); + + void index_name_to_motr_fid(std::string iname, struct m0_uint128 *fid); + int open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx); + int create_motr_idx_by_name(std::string iname); + int delete_motr_idx_by_name(std::string iname); + int do_idx_op_by_name(std::string idx_name, enum m0_idx_opcode opcode, + std::string key_str, bufferlist &bl, bool update=true); + int check_n_create_global_indices(); + int store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key); + int delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key); + int store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info); + + int init_metadata_cache(const DoutPrefixProvider *dpp, CephContext *cct); + MotrMetaCache* get_obj_meta_cache() {return obj_meta_cache;} + MotrMetaCache* get_user_cache() {return user_cache;} + MotrMetaCache* get_bucket_inst_cache() {return bucket_inst_cache;} +}; + +struct obj_time_weight { + real_time mtime; + uint32_t zone_short_id; + uint64_t pg_ver; + bool high_precision; + + obj_time_weight() : zone_short_id(0), pg_ver(0), high_precision(false) {} + + bool compare_low_precision(const obj_time_weight& rhs) { + struct timespec l = ceph::real_clock::to_timespec(mtime); + struct timespec r = ceph::real_clock::to_timespec(rhs.mtime); + l.tv_nsec = 0; + r.tv_nsec = 0; + if (l > r) { + return false; + } + if (l < r) { + return true; + } + if (!zone_short_id || !rhs.zone_short_id) { + /* don't compare zone ids, if one wasn't provided */ + return false; + } + if (zone_short_id != rhs.zone_short_id) { + return (zone_short_id < rhs.zone_short_id); + } + return (pg_ver < rhs.pg_ver); + + } + + bool operator<(const obj_time_weight& rhs) { + if (!high_precision || !rhs.high_precision) { + return compare_low_precision(rhs); + } + if (mtime > rhs.mtime) { + return false; + } + if (mtime < rhs.mtime) { + return true; + } + if (!zone_short_id || !rhs.zone_short_id) { + /* don't compare zone ids, if one wasn't provided */ + return false; + } + if (zone_short_id != rhs.zone_short_id) { + return (zone_short_id < rhs.zone_short_id); + } + return (pg_ver < rhs.pg_ver); + } + + void init(const real_time& _mtime, uint32_t _short_id, uint64_t _pg_ver) { + mtime = _mtime; + zone_short_id = _short_id; + pg_ver = _pg_ver; + } + + void init(RGWObjState *state) { + mtime = state->mtime; + zone_short_id = state->zone_short_id; + pg_ver = state->pg_ver; + } +}; + +inline std::ostream& operator<<(std::ostream& out, const obj_time_weight &o) { + out << o.mtime; + + if (o.zone_short_id != 0 || o.pg_ver != 0) { + out << "[zid=" << o.zone_short_id << ", pgv=" << o.pg_ver << "]"; + } + + return out; +} + +} // namespace rgw::sal diff --git a/src/rgw/rgw_sal.cc b/src/rgw/rgw_sal.cc index b69628911c1a..009eb34e736b 100644 --- a/src/rgw/rgw_sal.cc +++ b/src/rgw/rgw_sal.cc @@ -33,11 +33,11 @@ #endif #ifdef WITH_RADOSGW_MOTR -#include "rgw_sal_motr.h" +#include "driver/motr/rgw_sal_motr.h" #endif #ifdef WITH_RADOSGW_DAOS -#include "rgw_sal_daos.h" +#include "driver/daos/rgw_sal_daos.h" #endif #define dout_subsys ceph_subsys_rgw diff --git a/src/rgw/rgw_sal_daos.cc b/src/rgw/rgw_sal_daos.cc deleted file mode 100644 index a8663805d828..000000000000 --- a/src/rgw/rgw_sal_daos.cc +++ /dev/null @@ -1,2450 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=2 sw=2 expandtab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * SAL implementation for the CORTX DAOS backend - * - * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "rgw_sal_daos.h" - -#include -#include -#include - -#include -#include - -#include "common/Clock.h" -#include "common/errno.h" -#include "rgw_bucket.h" -#include "rgw_compression.h" -#include "rgw_sal.h" - -#define dout_subsys ceph_subsys_rgw - -using std::list; -using std::map; -using std::set; -using std::string; -using std::vector; - -namespace fs = std::filesystem; - -namespace rgw::sal { - -using ::ceph::decode; -using ::ceph::encode; - -int DaosUser::list_buckets(const DoutPrefixProvider* dpp, const string& marker, - const string& end_marker, uint64_t max, - bool need_stats, BucketList& buckets, - optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: list_user_buckets: marker=" << marker - << " end_marker=" << end_marker << " max=" << max << dendl; - int ret = 0; - bool is_truncated = false; - buckets.clear(); - vector bucket_infos(max); - daos_size_t bcount = bucket_infos.size(); - vector> values(bcount, vector(DS3_MAX_ENCODED_LEN)); - for (daos_size_t i = 0; i < bcount; i++) { - bucket_infos[i].encoded = values[i].data(); - bucket_infos[i].encoded_length = values[i].size(); - } - - char daos_marker[DS3_MAX_BUCKET_NAME]; - std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker)); - ret = ds3_bucket_list(&bcount, bucket_infos.data(), daos_marker, - &is_truncated, store->ds3, nullptr); - ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_list: bcount=" << bcount - << " ret=" << ret << dendl; - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list failed!" << ret << dendl; - return ret; - } - - bucket_infos.resize(bcount); - values.resize(bcount); - - for (const auto& bi : bucket_infos) { - DaosBucketInfo dbinfo; - bufferlist bl; - bl.append(reinterpret_cast(bi.encoded), bi.encoded_length); - auto iter = bl.cbegin(); - dbinfo.decode(iter); - buckets.add(std::make_unique(this->store, dbinfo.info, this)); - } - - buckets.set_truncated(is_truncated); - return 0; -} - -int DaosUser::create_bucket( - const DoutPrefixProvider* dpp, const rgw_bucket& b, - const std::string& zonegroup_id, rgw_placement_rule& placement_rule, - std::string& swift_ver_location, const RGWQuotaInfo* pquota_info, - const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info, - obj_version& ep_objv, bool exclusive, bool obj_lock_enabled, bool* existed, - req_info& req_info, std::unique_ptr* bucket_out, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: create_bucket:" << b.name << dendl; - int ret; - std::unique_ptr bucket; - - // Look up the bucket. Create it if it doesn't exist. - ret = this->store->get_bucket(dpp, this, b, &bucket, y); - if (ret != 0 && ret != -ENOENT) { - return ret; - } - - if (ret != -ENOENT) { - *existed = true; - if (swift_ver_location.empty()) { - swift_ver_location = bucket->get_info().swift_ver_location; - } - placement_rule.inherit_from(bucket->get_info().placement_rule); - - // TODO: ACL policy - // // don't allow changes to the acl policy - // RGWAccessControlPolicy old_policy(ctx()); - // int rc = rgw_op_get_bucket_policy_from_attr( - // dpp, this, u, bucket->get_attrs(), &old_policy, y); - // if (rc >= 0 && old_policy != policy) { - // bucket_out->swap(bucket); - // return -EEXIST; - //} - } else { - placement_rule.name = "default"; - placement_rule.storage_class = "STANDARD"; - bucket = std::make_unique(store, b, this); - bucket->set_attrs(attrs); - - *existed = false; - } - - // TODO: how to handle zone and multi-site. - - if (!*existed) { - info.placement_rule = placement_rule; - info.bucket = b; - info.owner = this->get_info().user_id; - info.zonegroup = zonegroup_id; - info.creation_time = ceph::real_clock::now(); - if (obj_lock_enabled) - info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; - bucket->set_version(ep_objv); - bucket->get_info() = info; - - // Create a new bucket: - DaosBucket* daos_bucket = static_cast(bucket.get()); - bufferlist bl; - std::unique_ptr bucket_info = - daos_bucket->get_encoded_info(bl, ceph::real_time()); - ret = ds3_bucket_create(bucket->get_name().c_str(), bucket_info.get(), - nullptr, store->ds3, nullptr); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_create failed! ret=" << ret - << dendl; - return ret; - } - } else { - bucket->set_version(ep_objv); - bucket->get_info() = info; - } - - bucket_out->swap(bucket); - - return ret; -} - -int DaosUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosUser::read_stats(const DoutPrefixProvider* dpp, optional_yield y, - RGWStorageStats* stats, - ceph::real_time* last_stats_sync, - ceph::real_time* last_stats_update) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -/* stats - Not for first pass */ -int DaosUser::read_stats_async(const DoutPrefixProvider* dpp, - RGWGetUserStats_CB* cb) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosUser::complete_flush_stats(const DoutPrefixProvider* dpp, - optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosUser::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, - uint64_t end_epoch, uint32_t max_entries, - bool* is_truncated, RGWUsageIter& usage_iter, - map& usage) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosUser::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, - uint64_t end_epoch) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) { - const string name = info.user_id.to_str(); - ldpp_dout(dpp, 20) << "DEBUG: load_user, name=" << name << dendl; - - DaosUserInfo duinfo; - int ret = read_user(dpp, name, &duinfo); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: load_user failed, name=" << name << dendl; - return ret; - } - - info = duinfo.info; - attrs = duinfo.attrs; - objv_tracker.read_version = duinfo.user_version; - return 0; -} - -int DaosUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, - Attrs& new_attrs, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs - << dendl; - for (auto& it : new_attrs) { - attrs[it.first] = it.second; - } - return store_user(dpp, y, false); -} - -int DaosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y, - bool exclusive, RGWUserInfo* old_info) { - const string name = info.user_id.to_str(); - ldpp_dout(dpp, 10) << "DEBUG: Store_user(): User name=" << name << dendl; - - // Read user - int ret = 0; - struct DaosUserInfo duinfo; - ret = read_user(dpp, name, &duinfo); - obj_version obj_ver = duinfo.user_version; - std::unique_ptr old_user_info; - std::vector old_access_ids; - - // Check if the user already exists - if (ret == 0 && obj_ver.ver) { - // already exists. - - if (old_info) { - *old_info = duinfo.info; - } - - if (objv_tracker.read_version.ver != obj_ver.ver) { - // Object version mismatch.. return ECANCELED - ret = -ECANCELED; - ldpp_dout(dpp, 0) << "User Read version mismatch read_version=" - << objv_tracker.read_version.ver - << " obj_ver=" << obj_ver.ver << dendl; - return ret; - } - - if (exclusive) { - // return - return ret; - } - obj_ver.ver++; - - for (auto const& [id, key] : duinfo.info.access_keys) { - old_access_ids.push_back(id.c_str()); - } - old_user_info.reset( - new ds3_user_info{.name = duinfo.info.user_id.to_str().c_str(), - .email = duinfo.info.user_email.c_str(), - .access_ids = old_access_ids.data(), - .access_ids_nr = old_access_ids.size()}); - } else { - obj_ver.ver = 1; - obj_ver.tag = "UserTAG"; - } - - bufferlist bl; - std::unique_ptr user_info = - get_encoded_info(bl, obj_ver); - - ret = ds3_user_set(name.c_str(), user_info.get(), old_user_info.get(), - store->ds3, nullptr); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name - << " ret=" << ret << dendl; - } - - return ret; -} - -int DaosUser::read_user(const DoutPrefixProvider* dpp, std::string name, - DaosUserInfo* duinfo) { - // Initialize ds3_user_info - bufferlist bl; - uint64_t size = DS3_MAX_ENCODED_LEN; - struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(), - .encoded_length = size}; - - int ret = ds3_user_get(name.c_str(), &user_info, store->ds3, nullptr); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "Error: ds3_user_get failed, name=" << name - << " ret=" << ret << dendl; - return ret; - } - - // Decode - bufferlist& blr = bl; - auto iter = blr.cbegin(); - duinfo->decode(iter); - return ret; -} - -std::unique_ptr DaosUser::get_encoded_info( - bufferlist& bl, obj_version& obj_ver) { - // Encode user data - struct DaosUserInfo duinfo; - duinfo.info = info; - duinfo.attrs = attrs; - duinfo.user_version = obj_ver; - duinfo.encode(bl); - - // Initialize ds3_user_info - access_ids.clear(); - for (auto const& [id, key] : info.access_keys) { - access_ids.push_back(id.c_str()); - } - return std::unique_ptr( - new ds3_user_info{.name = info.user_id.to_str().c_str(), - .email = info.user_email.c_str(), - .access_ids = access_ids.data(), - .access_ids_nr = access_ids.size(), - .encoded = bl.c_str(), - .encoded_length = bl.length()}); -} - -int DaosUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) { - const string name = info.user_id.to_str(); - - // TODO: the expectation is that the object version needs to be passed in as a - // method arg see int DB::remove_user(const DoutPrefixProvider *dpp, - // RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv) - obj_version obj_ver; - bufferlist bl; - std::unique_ptr user_info = - get_encoded_info(bl, obj_ver); - - // Remove user - int ret = ds3_user_remove(name.c_str(), user_info.get(), store->ds3, nullptr); - if (ret != 0) { - ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name - << " ret=" << ret << dendl; - } - return ret; -} - -DaosBucket::~DaosBucket() { close(nullptr); } - -int DaosBucket::open(const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: open, name=" << info.bucket.name.c_str() - << dendl; - // Idempotent - if (is_open()) { - return 0; - } - - int ret = ds3_bucket_open(get_name().c_str(), &ds3b, store->ds3, nullptr); - ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_open, name=" << get_name() - << ", ret=" << ret << dendl; - - return ret; -} - -int DaosBucket::close(const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: close" << dendl; - // Idempotent - if (!is_open()) { - return 0; - } - - int ret = ds3_bucket_close(ds3b, nullptr); - ds3b = nullptr; - ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_close ret=" << ret << dendl; - - return ret; -} - -std::unique_ptr DaosBucket::get_encoded_info( - bufferlist& bl, ceph::real_time _mtime) { - DaosBucketInfo dbinfo; - dbinfo.info = info; - dbinfo.bucket_attrs = attrs; - dbinfo.mtime = _mtime; - dbinfo.bucket_version = bucket_version; - dbinfo.encode(bl); - - auto bucket_info = std::make_unique(); - bucket_info->encoded = bl.c_str(); - bucket_info->encoded_length = bl.length(); - std::strncpy(bucket_info->name, get_name().c_str(), sizeof(bucket_info->name)); - return bucket_info; -} - -int DaosBucket::remove_bucket(const DoutPrefixProvider* dpp, - bool delete_children, bool forward_to_master, - req_info* req_info, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: remove_bucket, delete_children=" - - << delete_children - - << " forward_to_master=" << forward_to_master << dendl; - - return ds3_bucket_destroy(get_name().c_str(), delete_children, store->ds3, - nullptr); -} - -int DaosBucket::remove_bucket_bypass_gc(int concurrent_max, - bool keep_index_consistent, - optional_yield y, - const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: remove_bucket_bypass_gc, concurrent_max=" - - << concurrent_max - - << " keep_index_consistent=" << keep_index_consistent - - << dendl; - return ds3_bucket_destroy(get_name().c_str(), true, store->ds3, nullptr); -} - -int DaosBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive, - ceph::real_time _mtime) { - ldpp_dout(dpp, 20) << "DEBUG: put_info(): bucket name=" << get_name() - << dendl; - - int ret = open(dpp); - if (ret != 0) { - return ret; - } - - bufferlist bl; - std::unique_ptr bucket_info = - get_encoded_info(bl, ceph::real_time()); - - ret = ds3_bucket_set_info(bucket_info.get(), ds3b, nullptr); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_set_info failed: " << ret << dendl; - } - return ret; -} - -int DaosBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y, - bool get_stats) { - ldpp_dout(dpp, 20) << "DEBUG: load_bucket(): bucket name=" << get_name() - << dendl; - int ret = open(dpp); - if (ret != 0) { - return ret; - } - - bufferlist bl; - DaosBucketInfo dbinfo; - uint64_t size = DS3_MAX_ENCODED_LEN; - struct ds3_bucket_info bucket_info = {.encoded = bl.append_hole(size).c_str(), - .encoded_length = size}; - - ret = ds3_bucket_get_info(&bucket_info, ds3b, nullptr); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_get_info failed: " << ret << dendl; - return ret; - } - - auto iter = bl.cbegin(); - dbinfo.decode(iter); - info = dbinfo.info; - rgw_placement_rule placement_rule; - placement_rule.name = "default"; - placement_rule.storage_class = "STANDARD"; - info.placement_rule = placement_rule; - - attrs = dbinfo.bucket_attrs; - mtime = dbinfo.mtime; - bucket_version = dbinfo.bucket_version; - return ret; -} - -/* stats - Not for first pass */ -int DaosBucket::read_stats(const DoutPrefixProvider* dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, std::string* bucket_ver, - std::string* master_ver, - std::map& stats, - std::string* max_marker, bool* syncstopped) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::read_stats_async( - const DoutPrefixProvider* dpp, - const bucket_index_layout_generation& idx_layout, int shard_id, - RGWGetBucketStats_CB* ctx) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::sync_user_stats(const DoutPrefixProvider* dpp, - optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::update_container_stats(const DoutPrefixProvider* dpp) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::check_bucket_shards(const DoutPrefixProvider* dpp) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::chown(const DoutPrefixProvider* dpp, User& new_user, - optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -/* Make sure to call load_bucket() if you need it first */ -bool DaosBucket::is_owner(User* user) { - return (info.owner.compare(user->get_id()) == 0); -} - -int DaosBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) { - /* XXX: Check if bucket contains any objects */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota, - uint64_t obj_size, optional_yield y, - bool check_size_only) { - /* Not Handled in the first pass as stats are also needed */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::merge_and_store_attrs(const DoutPrefixProvider* dpp, - Attrs& new_attrs, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs - << dendl; - for (auto& it : new_attrs) { - attrs[it.first] = it.second; - } - - return put_info(dpp, y, ceph::real_time()); -} - -int DaosBucket::try_refresh_info(const DoutPrefixProvider* dpp, - ceph::real_time* pmtime) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -/* XXX: usage and stats not supported in the first pass */ -int DaosBucket::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, - uint64_t end_epoch, uint32_t max_entries, - bool* is_truncated, RGWUsageIter& usage_iter, - map& usage) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, - uint64_t end_epoch) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::remove_objs_from_index( - const DoutPrefixProvider* dpp, - std::list& objs_to_unlink) { - /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table. - * Delete all the object in the list from the object table of this - * bucket - */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::check_index( - const DoutPrefixProvider* dpp, - std::map& existing_stats, - std::map& calculated_stats) { - /* XXX: stats not supported yet */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::rebuild_index(const DoutPrefixProvider* dpp) { - /* there is no index table in DAOS. Not applicable */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::set_tag_timeout(const DoutPrefixProvider* dpp, - uint64_t timeout) { - /* XXX: CHECK: set tag timeout for all the bucket objects? */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::purge_instance(const DoutPrefixProvider* dpp) { - /* XXX: CHECK: for DAOS only single instance supported. - * Remove all the objects for that instance? Anything extra needed? - */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosBucket::set_acl(const DoutPrefixProvider* dpp, - RGWAccessControlPolicy& acl, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: set_acl" << dendl; - int ret = 0; - bufferlist aclbl; - - acls = acl; - acl.encode(aclbl); - - Attrs attrs = get_attrs(); - attrs[RGW_ATTR_ACL] = aclbl; - - return ret; -} - -std::unique_ptr DaosBucket::get_object(const rgw_obj_key& k) { - return std::make_unique(this->store, k, this); -} - -bool compare_rgw_bucket_dir_entry(rgw_bucket_dir_entry& entry1, - rgw_bucket_dir_entry& entry2) { - return (entry1.key < entry2.key); -} - -bool compare_multipart_upload(std::unique_ptr& upload1, - std::unique_ptr& upload2) { - return (upload1->get_key() < upload2->get_key()); -} - -int DaosBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max, - ListResults& results, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: list bucket=" << get_name() << " max=" << max - << " params=" << params << dendl; - // End - if (max == 0) { - return 0; - } - - int ret = open(dpp); - if (ret != 0) { - return ret; - } - - // Init needed structures - vector object_infos(max); - uint32_t nobj = object_infos.size(); - vector> values(nobj, vector(DS3_MAX_ENCODED_LEN)); - for (uint32_t i = 0; i < nobj; i++) { - object_infos[i].encoded = values[i].data(); - object_infos[i].encoded_length = values[i].size(); - } - - vector common_prefixes(max); - uint32_t ncp = common_prefixes.size(); - - char daos_marker[DS3_MAX_KEY_BUFF]; - std::strncpy(daos_marker, params.marker.get_oid().c_str(), sizeof(daos_marker)); - - ret = ds3_bucket_list_obj(&nobj, object_infos.data(), &ncp, - common_prefixes.data(), params.prefix.c_str(), - params.delim.c_str(), daos_marker, - params.list_versions, &results.is_truncated, ds3b); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list_obj failed, name=" - << get_name() << ", ret=" << ret << dendl; - return ret; - } - - object_infos.resize(nobj); - values.resize(nobj); - common_prefixes.resize(ncp); - - // Fill common prefixes - for (auto const& cp : common_prefixes) { - results.common_prefixes[cp.prefix] = true; - } - - // Decode objs - for (auto const& obj : object_infos) { - bufferlist bl; - rgw_bucket_dir_entry ent; - bl.append(reinterpret_cast(obj.encoded), obj.encoded_length); - auto iter = bl.cbegin(); - ent.decode(iter); - if (params.list_versions || ent.is_visible()) { - results.objs.emplace_back(std::move(ent)); - } - } - - if (!params.allow_unordered) { - std::sort(results.objs.begin(), results.objs.end(), - compare_rgw_bucket_dir_entry); - } - - return ret; -} - -int DaosBucket::list_multiparts( - const DoutPrefixProvider* dpp, const string& prefix, string& marker, - const string& delim, const int& max_uploads, - vector>& uploads, - map* common_prefixes, bool* is_truncated) { - ldpp_dout(dpp, 20) << "DEBUG: list_multiparts" << dendl; - // End of uploading - if (max_uploads == 0) { - *is_truncated = false; - return 0; - } - - // Init needed structures - vector multipart_upload_infos(max_uploads); - uint32_t nmp = multipart_upload_infos.size(); - vector> values(nmp, vector(DS3_MAX_ENCODED_LEN)); - for (uint32_t i = 0; i < nmp; i++) { - multipart_upload_infos[i].encoded = values[i].data(); - multipart_upload_infos[i].encoded_length = values[i].size(); - } - - vector cps(max_uploads); - uint32_t ncp = cps.size(); - - char daos_marker[DS3_MAX_KEY_BUFF]; - std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker)); - - int ret = ds3_bucket_list_multipart( - get_name().c_str(), &nmp, multipart_upload_infos.data(), &ncp, cps.data(), - prefix.c_str(), delim.c_str(), daos_marker, is_truncated, store->ds3); - - multipart_upload_infos.resize(nmp); - values.resize(nmp); - cps.resize(ncp); - - // Fill common prefixes - for (auto const& cp : cps) { - (*common_prefixes)[cp.prefix] = true; - } - - for (auto const& mp : multipart_upload_infos) { - // Decode the xattr - bufferlist bl; - rgw_bucket_dir_entry ent; - bl.append(reinterpret_cast(mp.encoded), mp.encoded_length); - auto iter = bl.cbegin(); - ent.decode(iter); - string name = ent.key.name; - - ACLOwner owner(rgw_user(ent.meta.owner)); - owner.set_name(ent.meta.owner_display_name); - uploads.push_back(this->get_multipart_upload( - name, mp.upload_id, std::move(owner), ent.meta.mtime)); - } - - // Sort uploads - std::sort(uploads.begin(), uploads.end(), compare_multipart_upload); - - return ret; -} - -int DaosBucket::abort_multiparts(const DoutPrefixProvider* dpp, - CephContext* cct) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -void DaosStore::finalize(void) { - ldout(cctx, 20) << "DEBUG: finalize" << dendl; - int ret; - - ret = ds3_disconnect(ds3, nullptr); - if (ret != 0) { - ldout(cctx, 0) << "ERROR: ds3_disconnect() failed: " << ret << dendl; - } - ds3 = nullptr; - - ret = ds3_fini(); - if (ret != 0) { - ldout(cctx, 0) << "ERROR: daos_fini() failed: " << ret << dendl; - } -} - -int DaosStore::initialize(CephContext* cct, const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: initialize" << dendl; - int ret = ds3_init(); - - // DS3 init failed, allow the case where init is already done - if (ret != 0 && ret != DER_ALREADY) { - ldout(cct, 0) << "ERROR: ds3_init() failed: " << ret << dendl; - return ret; - } - - // XXX: these params should be taken from config settings and - // cct somehow? - const auto& daos_pool = cct->_conf.get_val("daos_pool"); - ldout(cct, 20) << "INFO: daos pool: " << daos_pool << dendl; - - ret = ds3_connect(daos_pool.c_str(), nullptr, &ds3, nullptr); - - if (ret != 0) { - ldout(cct, 0) << "ERROR: ds3_connect() failed: " << ret << dendl; - ds3_fini(); - } - - return ret; -} - -const std::string& DaosZoneGroup::get_endpoint() const { - if (!group.endpoints.empty()) { - return group.endpoints.front(); - } else { - // use zonegroup's master zone endpoints - auto z = group.zones.find(group.master_zone); - if (z != group.zones.end() && !z->second.endpoints.empty()) { - return z->second.endpoints.front(); - } - } - return empty; -} - -bool DaosZoneGroup::placement_target_exists(std::string& target) const { - return !!group.placement_targets.count(target); -} - -int DaosZoneGroup::get_placement_target_names( - std::set& names) const { - for (const auto& target : group.placement_targets) { - names.emplace(target.second.name); - } - - return 0; -} - -int DaosZoneGroup::get_placement_tier(const rgw_placement_rule& rule, - std::unique_ptr* tier) { - std::map::const_iterator titer; - titer = group.placement_targets.find(rule.name); - if (titer == group.placement_targets.end()) { - return -ENOENT; - } - - const auto& target_rule = titer->second; - std::map::const_iterator ttier; - ttier = target_rule.tier_targets.find(rule.storage_class); - if (ttier == target_rule.tier_targets.end()) { - // not found - return -ENOENT; - } - - PlacementTier* t; - t = new DaosPlacementTier(store, ttier->second); - if (!t) return -ENOMEM; - - tier->reset(t); - return 0; -} - -ZoneGroup& DaosZone::get_zonegroup() { return zonegroup; } - -int DaosZone::get_zonegroup(const std::string& id, - std::unique_ptr* group) { - /* XXX: for now only one zonegroup supported */ - ZoneGroup* zg; - zg = new DaosZoneGroup(store, zonegroup.get_group()); - - group->reset(zg); - return 0; -} - -const rgw_zone_id& DaosZone::get_id() { return cur_zone_id; } - -const std::string& DaosZone::get_name() const { - return zone_params->get_name(); -} - -bool DaosZone::is_writeable() { return true; } - -bool DaosZone::get_redirect_endpoint(std::string* endpoint) { return false; } - -bool DaosZone::has_zonegroup_api(const std::string& api) const { return false; } - -const std::string& DaosZone::get_current_period_id() { - return current_period->get_id(); -} - -std::unique_ptr DaosStore::get_lua_manager() { - return std::make_unique(this); -} - -int DaosObject::get_obj_state(const DoutPrefixProvider* dpp, - RGWObjState** _state, optional_yield y, - bool follow_olh) { - // Get object's metadata (those stored in rgw_bucket_dir_entry) - ldpp_dout(dpp, 20) << "DEBUG: get_obj_state" << dendl; - rgw_bucket_dir_entry ent; - *_state = &state; // state is required even if a failure occurs - - int ret = get_dir_entry_attrs(dpp, &ent); - if (ret != 0) { - return ret; - } - - // Set object state. - state.exists = true; - state.size = ent.meta.size; - state.accounted_size = ent.meta.size; - state.mtime = ent.meta.mtime; - - state.has_attrs = true; - bufferlist etag_bl; - string& etag = ent.meta.etag; - ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag - << dendl; - etag_bl.append(etag); - state.attrset[RGW_ATTR_ETAG] = etag_bl; - return 0; -} - -DaosObject::~DaosObject() { close(nullptr); } - -int DaosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: DaosObject::set_obj_attrs()" << dendl; - // TODO handle target_obj - // Get object's metadata (those stored in rgw_bucket_dir_entry) - rgw_bucket_dir_entry ent; - int ret = get_dir_entry_attrs(dpp, &ent); - if (ret != 0) { - return ret; - } - - // Update object metadata - Attrs updateattrs = setattrs == nullptr ? attrs : *setattrs; - if (delattrs) { - for (auto const& [attr, attrval] : *delattrs) { - updateattrs.erase(attr); - } - } - - ret = set_dir_entry_attrs(dpp, &ent, &updateattrs); - return ret; -} - -int DaosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, - rgw_obj* target_obj) { - ldpp_dout(dpp, 20) << "DEBUG: DaosObject::get_obj_attrs()" << dendl; - // TODO handle target_obj - // Get object's metadata (those stored in rgw_bucket_dir_entry) - rgw_bucket_dir_entry ent; - int ret = get_dir_entry_attrs(dpp, &ent, &attrs); - return ret; -} - -int DaosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, - optional_yield y, - const DoutPrefixProvider* dpp) { - // Get object's metadata (those stored in rgw_bucket_dir_entry) - ldpp_dout(dpp, 20) << "DEBUG: modify_obj_attrs" << dendl; - rgw_bucket_dir_entry ent; - int ret = get_dir_entry_attrs(dpp, &ent, &attrs); - if (ret != 0) { - return ret; - } - - // Update object attrs - set_atomic(); - attrs[attr_name] = attr_val; - - ret = set_dir_entry_attrs(dpp, &ent, &attrs); - return ret; -} - -int DaosObject::delete_obj_attrs(const DoutPrefixProvider* dpp, - const char* attr_name, optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: delete_obj_attrs" << dendl; - rgw_obj target = get_obj(); - Attrs rmattr; - bufferlist bl; - - rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); -} - -bool DaosObject::is_expired() { - auto iter = attrs.find(RGW_ATTR_DELETE_AT); - if (iter != attrs.end()) { - utime_t delete_at; - try { - auto bufit = iter->second.cbegin(); - decode(delete_at, bufit); - } catch (buffer::error& err) { - ldout(store->ctx(), 0) - << "ERROR: " << __func__ - << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl; - return false; - } - - if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { - return true; - } - } - - return false; -} - -// Taken from rgw_rados.cc -void DaosObject::gen_rand_obj_instance_name() { - enum { OBJ_INSTANCE_LEN = 32 }; - char buf[OBJ_INSTANCE_LEN + 1]; - - gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN); - state.obj.key.set_instance(buf); -} - -int DaosObject::omap_get_vals_by_keys(const DoutPrefixProvider* dpp, - const std::string& oid, - const std::set& keys, - Attrs* vals) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosObject::omap_set_val_by_key(const DoutPrefixProvider* dpp, - const std::string& key, bufferlist& val, - bool must_exist, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) { - return 0; -} - -std::unique_ptr DaosObject::get_serializer( - const DoutPrefixProvider* dpp, const std::string& lock_name) { - return std::make_unique(dpp, store, this, lock_name); -} - -int DaosObject::transition(Bucket* bucket, - const rgw_placement_rule& placement_rule, - const real_time& mtime, uint64_t olh_epoch, - const DoutPrefixProvider* dpp, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosObject::transition_to_cloud( - Bucket* bucket, rgw::sal::PlacementTier* tier, rgw_bucket_dir_entry& o, - std::set& cloud_targets, CephContext* cct, bool update_object, - const DoutPrefixProvider* dpp, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -bool DaosObject::placement_rules_match(rgw_placement_rule& r1, - rgw_placement_rule& r2) { - /* XXX: support single default zone and zonegroup for now */ - return true; -} - -int DaosObject::dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y, - Formatter* f) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -std::unique_ptr DaosObject::get_read_op() { - return std::make_unique(this); -} - -DaosObject::DaosReadOp::DaosReadOp(DaosObject* _source) : source(_source) {} - -int DaosObject::DaosReadOp::prepare(optional_yield y, - const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << __func__ - << ": bucket=" << source->get_bucket()->get_name() - << dendl; - - if (source->get_bucket()->versioned() && !source->have_instance()) { - // If the bucket is versioned and no version is specified, get the latest - // version - source->set_instance(DS3_LATEST_INSTANCE); - } - - rgw_bucket_dir_entry ent; - int ret = source->get_dir_entry_attrs(dpp, &ent); - - // Set source object's attrs. The attrs is key/value map and is used - // in send_response_data() to set attributes, including etag. - bufferlist etag_bl; - string& etag = ent.meta.etag; - ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag - << dendl; - etag_bl.append(etag.c_str(), etag.size()); - source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); - - source->set_key(ent.key); - source->set_obj_size(ent.meta.size); - ldpp_dout(dpp, 20) << __func__ << ": object's size: " << ent.meta.size - << dendl; - - return ret; -} - -int DaosObject::DaosReadOp::read(int64_t off, int64_t end, bufferlist& bl, - optional_yield y, - const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl; - int ret = source->lookup(dpp); - if (ret != 0) { - return ret; - } - - // Calculate size, end is inclusive - uint64_t size = end - off + 1; - - // Read - ret = source->read(dpp, bl, off, size); - if (ret != 0) { - return ret; - } - - return ret; -} - -// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to -// 'end'. The returned data is processed in 'cb' which is a chain of -// post-processing filters such as decompression, de-encryption and sending back -// data to client (RGWGetObj_CB::handle_dta which in turn calls -// RGWGetObj::get_data_cb() to send data back.). -// -// POC implements a simple sync version of iterate() function in which it reads -// a block of data each time and call 'cb' for post-processing. -int DaosObject::DaosReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off, - int64_t end, RGWGetDataCB* cb, - optional_yield y) { - ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl; - int ret = source->lookup(dpp); - if (ret != 0) { - return ret; - } - - // Calculate size, end is inclusive - uint64_t size = end - off + 1; - - // Reserve buffers and read - bufferlist bl; - ret = source->read(dpp, bl, off, size); - if (ret != 0) { - return ret; - } - - // Call cb to process returned data. - ldpp_dout(dpp, 20) << __func__ << ": call cb to process data, actual=" << size - << dendl; - cb->handle_data(bl, off, size); - return ret; -} - -int DaosObject::DaosReadOp::get_attr(const DoutPrefixProvider* dpp, - const char* name, bufferlist& dest, - optional_yield y) { - Attrs attrs; - int ret = source->get_dir_entry_attrs(dpp, nullptr, &attrs); - if (!ret) { - return -ENODATA; - } - - auto search = attrs.find(name); - if (search == attrs.end()) { - return -ENODATA; - } - - dest = search->second; - return 0; -} - -std::unique_ptr DaosObject::get_delete_op() { - return std::make_unique(this); -} - -DaosObject::DaosDeleteOp::DaosDeleteOp(DaosObject* _source) : source(_source) {} - -// Implementation of DELETE OBJ also requires DaosObject::get_obj_state() -// to retrieve and set object's state from object's metadata. -// -// TODO: -// 1. The POC only deletes the Daos objects. It doesn't handle the -// DeleteOp::params. Delete::delete_obj() in rgw_rados.cc shows how rados -// backend process the params. -// 2. Delete an object when its versioning is turned on. -// 3. Handle empty directories -// 4. Fail when file doesn't exist -int DaosObject::DaosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, - optional_yield y) { - ldpp_dout(dpp, 20) << "DaosDeleteOp::delete_obj " - << source->get_key().get_oid() << " from " - << source->get_bucket()->get_name() << dendl; - if (source->get_instance() == "null") { - source->clear_instance(); - } - - // Open bucket - int ret = 0; - std::string key = source->get_key().get_oid(); - DaosBucket* daos_bucket = source->get_daos_bucket(); - ret = daos_bucket->open(dpp); - if (ret != 0) { - return ret; - } - - // Remove the daos object - ret = ds3_obj_destroy(key.c_str(), daos_bucket->ds3b); - ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_destroy key=" << key << " ret=" << ret - << dendl; - - // result.delete_marker = parent_op.result.delete_marker; - // result.version_id = parent_op.result.version_id; - - return ret; -} - -int DaosObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, - bool prevent_versioning) { - ldpp_dout(dpp, 20) << "DEBUG: delete_object" << dendl; - DaosObject::DaosDeleteOp del_op(this); - del_op.params.bucket_owner = bucket->get_info().owner; - del_op.params.versioning_status = bucket->get_info().versioning_status(); - - return del_op.delete_obj(dpp, y); -} - -int DaosObject::copy_object( - User* user, req_info* info, const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, ceph::real_time* mtime, - const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, - bool high_precision_time, const char* if_match, const char* if_nomatch, - AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, - RGWObjCategory category, uint64_t olh_epoch, - boost::optional delete_at, std::string* version_id, - std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*), - void* progress_data, const DoutPrefixProvider* dpp, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosObject::swift_versioning_restore(bool& restored, - const DoutPrefixProvider* dpp) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosObject::swift_versioning_copy(const DoutPrefixProvider* dpp, - optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosObject::lookup(const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: lookup" << dendl; - if (is_open()) { - return 0; - } - - if (get_instance() == "null") { - clear_instance(); - } - - int ret = 0; - DaosBucket* daos_bucket = get_daos_bucket(); - ret = daos_bucket->open(dpp); - if (ret != 0) { - return ret; - } - - ret = ds3_obj_open(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b); - - if (ret == -ENOENT) { - ldpp_dout(dpp, 20) << "DEBUG: daos object (" << get_bucket()->get_name() - << ", " << get_key().get_oid() - << ") does not exist: ret=" << ret << dendl; - } else if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to open daos object (" - << get_bucket()->get_name() << ", " << get_key().get_oid() - << "): ret=" << ret << dendl; - } - return ret; -} - -int DaosObject::create(const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: create" << dendl; - if (is_open()) { - return 0; - } - - if (get_instance() == "null") { - clear_instance(); - } - - int ret = 0; - DaosBucket* daos_bucket = get_daos_bucket(); - ret = daos_bucket->open(dpp); - if (ret != 0) { - return ret; - } - - ret = ds3_obj_create(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to create daos object (" - << get_bucket()->get_name() << ", " << get_key().get_oid() - << "): ret=" << ret << dendl; - } - return ret; -} - -int DaosObject::close(const DoutPrefixProvider* dpp) { - ldpp_dout(dpp, 20) << "DEBUG: close" << dendl; - if (!is_open()) { - return 0; - } - - int ret = ds3_obj_close(ds3o); - ds3o = nullptr; - ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_close ret=" << ret << dendl; - return ret; -} - -int DaosObject::write(const DoutPrefixProvider* dpp, bufferlist&& data, - uint64_t offset) { - ldpp_dout(dpp, 20) << "DEBUG: write" << dendl; - uint64_t size = data.length(); - int ret = ds3_obj_write(data.c_str(), offset, &size, get_daos_bucket()->ds3b, - ds3o, nullptr); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to write into daos object (" - << get_bucket()->get_name() << ", " << get_key().get_oid() - << "): ret=" << ret << dendl; - } - return ret; -} - -int DaosObject::read(const DoutPrefixProvider* dpp, bufferlist& data, - uint64_t offset, uint64_t& size) { - ldpp_dout(dpp, 20) << "DEBUG: read" << dendl; - int ret = ds3_obj_read(data.append_hole(size).c_str(), offset, &size, - get_daos_bucket()->ds3b, ds3o, nullptr); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to read from daos object (" - << get_bucket()->get_name() << ", " << get_key().get_oid() - << "): ret=" << ret << dendl; - } - return ret; -} - -// Get the object's dirent and attrs -int DaosObject::get_dir_entry_attrs(const DoutPrefixProvider* dpp, - rgw_bucket_dir_entry* ent, - Attrs* getattrs) { - ldpp_dout(dpp, 20) << "DEBUG: get_dir_entry_attrs" << dendl; - int ret = 0; - vector value(DS3_MAX_ENCODED_LEN); - uint32_t size = value.size(); - - if (get_key().ns == RGW_OBJ_NS_MULTIPART) { - struct ds3_multipart_upload_info ui = {.encoded = value.data(), - .encoded_length = size}; - ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(), - get_key().get_oid().c_str(), store->ds3); - } else { - ret = lookup(dpp); - if (ret != 0) { - return ret; - } - - auto object_info = std::make_unique(); - object_info->encoded = value.data(); - object_info->encoded_length = size; - ret = ds3_obj_get_info(object_info.get(), get_daos_bucket()->ds3b, ds3o); - size = object_info->encoded_length; - } - - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to get info of daos object (" - << get_bucket()->get_name() << ", " << get_key().get_oid() - << "): ret=" << ret << dendl; - return ret; - } - - rgw_bucket_dir_entry dummy_ent; - if (!ent) { - // if ent is not passed, use a dummy ent - ent = &dummy_ent; - } - - bufferlist bl; - bl.append(reinterpret_cast(value.data()), size); - auto iter = bl.cbegin(); - ent->decode(iter); - if (getattrs) { - decode(*getattrs, iter); - } - - return ret; -} -// Set the object's dirent and attrs -int DaosObject::set_dir_entry_attrs(const DoutPrefixProvider* dpp, - rgw_bucket_dir_entry* ent, - Attrs* setattrs) { - ldpp_dout(dpp, 20) << "DEBUG: set_dir_entry_attrs" << dendl; - int ret = lookup(dpp); - if (ret != 0) { - return ret; - } - - // Set defaults - if (!ent) { - // if ent is not passed, return an error - return -EINVAL; - } - - if (!setattrs) { - // if setattrs is not passed, use object attrs - setattrs = &attrs; - } - - bufferlist wbl; - ent->encode(wbl); - encode(*setattrs, wbl); - - // Write rgw_bucket_dir_entry into object xattr - auto object_info = std::make_unique(); - object_info->encoded = wbl.c_str(); - object_info->encoded_length = wbl.length(); - ret = ds3_obj_set_info(object_info.get(), get_daos_bucket()->ds3b, ds3o); - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to set info of daos object (" - << get_bucket()->get_name() << ", " << get_key().get_oid() - << "): ret=" << ret << dendl; - } - return ret; -} - -int DaosObject::mark_as_latest(const DoutPrefixProvider* dpp, - ceph::real_time set_mtime) { - // TODO handle deletion - // TODO understand race conditions - ldpp_dout(dpp, 20) << "DEBUG: mark_as_latest" << dendl; - - // Get latest version so far - std::unique_ptr latest_object = std::make_unique( - store, rgw_obj_key(get_name(), DS3_LATEST_INSTANCE), get_bucket()); - - ldpp_dout(dpp, 20) << __func__ << ": key=" << get_key().get_oid() - << " latest_object_key= " - << latest_object->get_key().get_oid() << dendl; - - int ret = latest_object->lookup(dpp); - if (ret == 0) { - // Get metadata only if file exists - rgw_bucket_dir_entry latest_ent; - Attrs latest_attrs; - ret = latest_object->get_dir_entry_attrs(dpp, &latest_ent, &latest_attrs); - if (ret != 0) { - return ret; - } - - // Update flags - latest_ent.flags = rgw_bucket_dir_entry::FLAG_VER; - latest_ent.meta.mtime = set_mtime; - ret = latest_object->set_dir_entry_attrs(dpp, &latest_ent, &latest_attrs); - if (ret != 0) { - return ret; - } - } - - // Get or create the link [latest], make it link to the current latest - // version. - ret = - ds3_obj_mark_latest(get_key().get_oid().c_str(), get_daos_bucket()->ds3b); - ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_mark_latest ret=" << ret << dendl; - return ret; -} - -DaosAtomicWriter::DaosAtomicWriter( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, DaosStore* _store, - const rgw_user& _owner, const rgw_placement_rule* _ptail_placement_rule, - uint64_t _olh_epoch, const std::string& _unique_tag) - : StoreWriter(dpp, y), - store(_store), - owner(_owner), - ptail_placement_rule(_ptail_placement_rule), - olh_epoch(_olh_epoch), - unique_tag(_unique_tag), - obj(_store, obj->get_key(), obj->get_bucket()) {} - -int DaosAtomicWriter::prepare(optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: prepare" << dendl; - int ret = obj.create(dpp); - return ret; -} - -// TODO: Handle concurrent writes, a unique object id is a possible solution, or -// use DAOS transactions -// XXX: Do we need to accumulate writes as motr does? -int DaosAtomicWriter::process(bufferlist&& data, uint64_t offset) { - ldpp_dout(dpp, 20) << "DEBUG: process" << dendl; - if (data.length() == 0) { - return 0; - } - - int ret = 0; - if (!obj.is_open()) { - ret = obj.lookup(dpp); - if (ret != 0) { - return ret; - } - } - - // XXX: Combine multiple streams into one as motr does - uint64_t data_size = data.length(); - ret = obj.write(dpp, std::move(data), offset); - if (ret == 0) { - total_data_size += data_size; - } - return ret; -} - -int DaosAtomicWriter::complete( - size_t accounted_size, const std::string& etag, ceph::real_time* mtime, - ceph::real_time set_mtime, std::map& attrs, - ceph::real_time delete_at, const char* if_match, const char* if_nomatch, - const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, - optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl; - bufferlist bl; - rgw_bucket_dir_entry ent; - int ret; - - // Set rgw_bucet_dir_entry. Some of the members of this structure may not - // apply to daos. - // - // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc - // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and - // how to set the dir entry. Only set the basic ones for POC, no ACLs and - // other attrs. - obj.get_key().get_index_key(&ent.key); - ent.meta.size = total_data_size; - ent.meta.accounted_size = accounted_size; - ent.meta.mtime = - real_clock::is_zero(set_mtime) ? ceph::real_clock::now() : set_mtime; - ent.meta.etag = etag; - ent.meta.owner = owner.to_str(); - ent.meta.owner_display_name = - obj.get_bucket()->get_owner()->get_display_name(); - bool is_versioned = obj.get_bucket()->versioned(); - if (is_versioned) - ent.flags = - rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT; - ldpp_dout(dpp, 20) << __func__ << ": key=" << obj.get_key().get_oid() - << " etag: " << etag << dendl; - if (user_data) ent.meta.user_data = *user_data; - - RGWBucketInfo& info = obj.get_bucket()->get_info(); - if (info.obj_lock_enabled() && info.obj_lock.has_rule()) { - auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); - if (iter == attrs.end()) { - real_time lock_until_date = - info.obj_lock.get_lock_until_date(ent.meta.mtime); - string mode = info.obj_lock.get_mode(); - RGWObjectRetention obj_retention(mode, lock_until_date); - bufferlist retention_bl; - obj_retention.encode(retention_bl); - attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl; - } - } - - ret = obj.set_dir_entry_attrs(dpp, &ent, &attrs); - - if (is_versioned) { - ret = obj.mark_as_latest(dpp, set_mtime); - if (ret != 0) { - return ret; - } - } - - return ret; -} - -int DaosMultipartUpload::abort(const DoutPrefixProvider* dpp, - CephContext* cct) { - // Remove upload from bucket multipart index - ldpp_dout(dpp, 20) << "DEBUG: abort" << dendl; - return ds3_upload_remove(bucket->get_name().c_str(), get_upload_id().c_str(), - store->ds3); -} - -std::unique_ptr DaosMultipartUpload::get_meta_obj() { - return bucket->get_object( - rgw_obj_key(get_upload_id(), string(), RGW_OBJ_NS_MULTIPART)); -} - -int DaosMultipartUpload::init(const DoutPrefixProvider* dpp, optional_yield y, - ACLOwner& _owner, - rgw_placement_rule& dest_placement, - rgw::sal::Attrs& attrs) { - ldpp_dout(dpp, 20) << "DEBUG: init" << dendl; - int ret; - std::string oid = mp_obj.get_key(); - - // Create an initial entry in the bucket. The entry will be - // updated when multipart upload is completed, for example, - // size, etag etc. - bufferlist bl; - rgw_bucket_dir_entry ent; - ent.key.name = oid; - ent.meta.owner = owner.get_id().to_str(); - ent.meta.category = RGWObjCategory::MultiMeta; - ent.meta.mtime = ceph::real_clock::now(); - - multipart_upload_info upload_info; - upload_info.dest_placement = dest_placement; - - ent.encode(bl); - encode(attrs, bl); - encode(upload_info, bl); - - struct ds3_multipart_upload_info ui; - std::strcpy(ui.upload_id, MULTIPART_UPLOAD_ID_PREFIX); - std::strncpy(ui.key, oid.c_str(), sizeof(ui.key)); - ui.encoded = bl.c_str(); - ui.encoded_length = bl.length(); - int prefix_length = strlen(ui.upload_id); - - do { - gen_rand_alphanumeric(store->ctx(), ui.upload_id + prefix_length, - sizeof(ui.upload_id) - 1 - prefix_length); - mp_obj.init(oid, ui.upload_id); - ret = ds3_upload_init(&ui, bucket->get_name().c_str(), store->ds3); - } while (ret == -EEXIST); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to create multipart upload dir (" - << bucket->get_name() << "/" << get_upload_id() - << "): ret=" << ret << dendl; - } - return ret; -} - -int DaosMultipartUpload::list_parts(const DoutPrefixProvider* dpp, - CephContext* cct, int num_parts, int marker, - int* next_marker, bool* truncated, - bool assume_unsorted) { - ldpp_dout(dpp, 20) << "DEBUG: list_parts" << dendl; - // Init needed structures - vector multipart_part_infos(num_parts); - uint32_t npart = multipart_part_infos.size(); - vector> values(npart, vector(DS3_MAX_ENCODED_LEN)); - for (uint32_t i = 0; i < npart; i++) { - multipart_part_infos[i].encoded = values[i].data(); - multipart_part_infos[i].encoded_length = values[i].size(); - } - - uint32_t daos_marker = marker; - int ret = ds3_upload_list_parts( - bucket->get_name().c_str(), get_upload_id().c_str(), &npart, - multipart_part_infos.data(), &daos_marker, truncated, store->ds3); - - if (ret != 0) { - if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - return ret; - } - - multipart_part_infos.resize(npart); - values.resize(npart); - parts.clear(); - - for (auto const& pi : multipart_part_infos) { - bufferlist bl; - bl.append(reinterpret_cast(pi.encoded), pi.encoded_length); - - std::unique_ptr part = - std::make_unique(); - auto iter = bl.cbegin(); - decode(part->info, iter); - parts[pi.part_num] = std::move(part); - } - - if (next_marker) { - *next_marker = daos_marker; - } - return ret; -} - -// Heavily copied from rgw_sal_rados.cc -int DaosMultipartUpload::complete( - const DoutPrefixProvider* dpp, optional_yield y, CephContext* cct, - map& part_etags, list& remove_objs, - uint64_t& accounted_size, bool& compressed, RGWCompressionInfo& cs_info, - off_t& off, std::string& tag, ACLOwner& owner, uint64_t olh_epoch, - rgw::sal::Object* target_obj) { - ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl; - char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; - std::string etag; - bufferlist etag_bl; - MD5 hash; - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - bool truncated; - int ret; - - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): enter" << dendl; - int total_parts = 0; - int handled_parts = 0; - int max_parts = 1000; - int marker = 0; - uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; - auto etags_iter = part_etags.begin(); - rgw::sal::Attrs attrs = target_obj->get_attrs(); - - do { - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): list_parts()" - << dendl; - ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); - if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - if (ret != 0) return ret; - - total_parts += parts.size(); - if (!truncated && total_parts != (int)part_etags.size()) { - ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts - << " expected: " << part_etags.size() << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): parts.size()=" - << parts.size() << dendl; - - for (auto obj_iter = parts.begin(); - etags_iter != part_etags.end() && obj_iter != parts.end(); - ++etags_iter, ++obj_iter, ++handled_parts) { - DaosMultipartPart* part = - dynamic_cast(obj_iter->second.get()); - uint64_t part_size = part->get_size(); - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part_size=" - << part_size << dendl; - if (handled_parts < (int)part_etags.size() - 1 && - part_size < min_part_size) { - ret = -ERR_TOO_SMALL; - return ret; - } - - char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; - if (etags_iter->first != (int)obj_iter->first) { - ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: " - << etags_iter->first - << " next uploaded: " << obj_iter->first << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - string part_etag = rgw_string_unquote(etags_iter->second); - if (part_etag.compare(part->get_etag()) != 0) { - ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " - << etags_iter->first - << " etag: " << etags_iter->second << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - - hex_to_buf(part->get_etag().c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE); - hash.Update((const unsigned char*)petag, sizeof(petag)); - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): calc etag " - << dendl; - - RGWUploadPartInfo& obj_part = part->info; - string oid = mp_obj.get_part(obj_part.num); - rgw_obj src_obj; - src_obj.init_ns(bucket->get_key(), oid, RGW_OBJ_NS_MULTIPART); - - bool part_compressed = (obj_part.cs_info.compression_type != "none"); - if ((handled_parts > 0) && - ((part_compressed != compressed) || - (cs_info.compression_type != obj_part.cs_info.compression_type))) { - ldpp_dout(dpp, 0) - << "ERROR: compression type was changed during multipart upload (" - << cs_info.compression_type << ">>" - << obj_part.cs_info.compression_type << ")" << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part compression" - << dendl; - if (part_compressed) { - int64_t new_ofs; // offset in compression data for new part - if (cs_info.blocks.size() > 0) - new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len; - else - new_ofs = 0; - for (const auto& block : obj_part.cs_info.blocks) { - compression_block cb; - cb.old_ofs = block.old_ofs + cs_info.orig_size; - cb.new_ofs = new_ofs; - cb.len = block.len; - cs_info.blocks.push_back(cb); - new_ofs = cb.new_ofs + cb.len; - } - if (!compressed) - cs_info.compression_type = obj_part.cs_info.compression_type; - cs_info.orig_size += obj_part.cs_info.orig_size; - compressed = true; - } - - // We may not need to do the following as remove_objs are those - // don't show when listing a bucket. As we store in-progress uploaded - // object's metadata in a separate index, they are not shown when - // listing a bucket. - rgw_obj_index_key remove_key; - src_obj.key.get_index_key(&remove_key); - - remove_objs.push_back(remove_key); - - off += obj_part.size; - accounted_size += obj_part.accounted_size; - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): off=" << off - << ", accounted_size = " << accounted_size << dendl; - } - } while (truncated); - hash.Final((unsigned char*)final_etag); - - buf_to_hex((unsigned char*)final_etag, sizeof(final_etag), final_etag_str); - snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], - sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, "-%lld", - (long long)part_etags.size()); - etag = final_etag_str; - ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; - - etag_bl.append(etag); - - attrs[RGW_ATTR_ETAG] = etag_bl; - - if (compressed) { - // write compression attribute to full object - bufferlist tmp; - encode(cs_info, tmp); - attrs[RGW_ATTR_COMPRESSION] = tmp; - } - - // Different from rgw_sal_rados.cc starts here - // Read the object's multipart info - bufferlist bl; - uint64_t size = DS3_MAX_ENCODED_LEN; - struct ds3_multipart_upload_info ui = { - .encoded = bl.append_hole(size).c_str(), .encoded_length = size}; - ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(), - get_upload_id().c_str(), store->ds3); - ldpp_dout(dpp, 20) << "DEBUG: ds3_upload_get_info entry=" - << bucket->get_name() << "/" << get_upload_id() << dendl; - if (ret != 0) { - if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - return ret; - } - - rgw_bucket_dir_entry ent; - auto iter = bl.cbegin(); - ent.decode(iter); - - // Update entry data and name - target_obj->get_key().get_index_key(&ent.key); - ent.meta.size = off; - ent.meta.accounted_size = accounted_size; - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): obj size=" - << ent.meta.size - << " obj accounted size=" << ent.meta.accounted_size - << dendl; - ent.meta.category = RGWObjCategory::Main; - ent.meta.mtime = ceph::real_clock::now(); - bool is_versioned = target_obj->get_bucket()->versioned(); - if (is_versioned) - ent.flags = - rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT; - ent.meta.etag = etag; - - // Open object - DaosObject* obj = static_cast(target_obj); - ret = obj->create(dpp); - if (ret != 0) { - return ret; - } - - // Copy data from parts to object - uint64_t write_off = 0; - for (auto const& [part_num, part] : get_parts()) { - ds3_part_t* ds3p; - ret = ds3_part_open(get_bucket_name().c_str(), get_upload_id().c_str(), - part_num, false, &ds3p, store->ds3); - if (ret != 0) { - return ret; - } - - // Reserve buffers and read - uint64_t size = part->get_size(); - bufferlist bl; - ret = ds3_part_read(bl.append_hole(size).c_str(), 0, &size, ds3p, - store->ds3, nullptr); - if (ret != 0) { - ds3_part_close(ds3p); - return ret; - } - - ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part " << part_num - << " size is " << size << dendl; - - // write to obj - obj->write(dpp, std::move(bl), write_off); - ds3_part_close(ds3p); - write_off += part->get_size(); - } - - // Set attributes - ret = obj->set_dir_entry_attrs(dpp, &ent, &attrs); - - if (is_versioned) { - ret = obj->mark_as_latest(dpp, ent.meta.mtime); - if (ret != 0) { - return ret; - } - } - - // Remove upload from bucket multipart index - ret = ds3_upload_remove(get_bucket_name().c_str(), get_upload_id().c_str(), - store->ds3); - return ret; -} - -int DaosMultipartUpload::get_info(const DoutPrefixProvider* dpp, - optional_yield y, rgw_placement_rule** rule, - rgw::sal::Attrs* attrs) { - ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_info(): enter" << dendl; - if (!rule && !attrs) { - return 0; - } - - if (rule) { - if (!placement.empty()) { - *rule = &placement; - if (!attrs) { - // Don't need attrs, done - return 0; - } - } else { - *rule = nullptr; - } - } - - // Read the multipart upload dirent from index - bufferlist bl; - uint64_t size = DS3_MAX_ENCODED_LEN; - struct ds3_multipart_upload_info ui = { - .encoded = bl.append_hole(size).c_str(), .encoded_length = size}; - int ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(), - get_upload_id().c_str(), store->ds3); - - if (ret != 0) { - if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - return ret; - } - - multipart_upload_info upload_info; - rgw_bucket_dir_entry ent; - Attrs decoded_attrs; - auto iter = bl.cbegin(); - ent.decode(iter); - decode(decoded_attrs, iter); - ldpp_dout(dpp, 20) << "DEBUG: decoded_attrs=" << attrs << dendl; - - if (attrs) { - *attrs = decoded_attrs; - if (!rule || *rule != nullptr) { - // placement was cached; don't actually read - return 0; - } - } - - // Now decode the placement rule - decode(upload_info, iter); - placement = upload_info.dest_placement; - *rule = &placement; - - return 0; -} - -std::unique_ptr DaosMultipartUpload::get_writer( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, uint64_t part_num, - const std::string& part_num_str) { - ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_writer(): enter part=" - << part_num << " head_obj=" << _head_obj << dendl; - return std::make_unique( - dpp, y, this, obj, store, owner, ptail_placement_rule, - part_num, part_num_str); -} - -DaosMultipartWriter::~DaosMultipartWriter() { - if (is_open()) ds3_part_close(ds3p); -} - -int DaosMultipartWriter::prepare(optional_yield y) { - ldpp_dout(dpp, 20) << "DaosMultipartWriter::prepare(): enter part=" - << part_num_str << dendl; - int ret = ds3_part_open(get_bucket_name().c_str(), upload_id.c_str(), - part_num, true, &ds3p, store->ds3); - if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - return ret; -} - -const std::string& DaosMultipartWriter::get_bucket_name() { - return static_cast(upload)->get_bucket_name(); -} - -int DaosMultipartWriter::process(bufferlist&& data, uint64_t offset) { - ldpp_dout(dpp, 20) << "DaosMultipartWriter::process(): enter part=" - << part_num_str << " offset=" << offset << dendl; - if (data.length() == 0) { - return 0; - } - - uint64_t size = data.length(); - int ret = - ds3_part_write(data.c_str(), offset, &size, ds3p, store->ds3, nullptr); - if (ret == 0) { - // XXX: Combine multiple streams into one as motr does - actual_part_size += size; - } else { - ldpp_dout(dpp, 0) << "ERROR: failed to write into part (" - << get_bucket_name() << ", " << upload_id << ", " - << part_num << "): ret=" << ret << dendl; - } - return ret; -} - -int DaosMultipartWriter::complete( - size_t accounted_size, const std::string& etag, ceph::real_time* mtime, - ceph::real_time set_mtime, std::map& attrs, - ceph::real_time delete_at, const char* if_match, const char* if_nomatch, - const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, - optional_yield y) { - ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): enter part=" - << part_num_str << dendl; - - // Add an entry into part index - bufferlist bl; - RGWUploadPartInfo info; - info.num = part_num; - info.etag = etag; - info.size = actual_part_size; - info.accounted_size = accounted_size; - info.modified = real_clock::now(); - - bool compressed; - int ret = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info); - ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): compression ret=" - << ret << dendl; - if (ret != 0) { - ldpp_dout(dpp, 1) << "cannot get compression info" << dendl; - return ret; - } - encode(info, bl); - encode(attrs, bl); - ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): entry size" - << bl.length() << dendl; - - struct ds3_multipart_part_info part_info = {.part_num = part_num, - .encoded = bl.c_str(), - .encoded_length = bl.length()}; - - ret = ds3_part_set_info(&part_info, ds3p, store->ds3, nullptr); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to set part info (" << get_bucket_name() - << ", " << upload_id << ", " << part_num - << "): ret=" << ret << dendl; - if (ret == ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - } - - return ret; -} - -std::unique_ptr DaosStore::get_role( - std::string name, std::string tenant, std::string path, - std::string trust_policy, std::string max_session_duration_str, - std::multimap tags) { - RGWRole* p = nullptr; - return std::unique_ptr(p); -} - -std::unique_ptr DaosStore::get_role(const RGWRoleInfo& info) { - RGWRole* p = nullptr; - return std::unique_ptr(p); -} - -std::unique_ptr DaosStore::get_role(std::string id) { - RGWRole* p = nullptr; - return std::unique_ptr(p); -} - -int DaosStore::get_roles(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& path_prefix, - const std::string& tenant, - vector>& roles) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -std::unique_ptr DaosStore::get_oidc_provider() { - RGWOIDCProvider* p = nullptr; - return std::unique_ptr(p); -} - -int DaosStore::get_oidc_providers( - const DoutPrefixProvider* dpp, const std::string& tenant, - vector>& providers) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -std::unique_ptr DaosBucket::get_multipart_upload( - const std::string& oid, std::optional upload_id, - ACLOwner owner, ceph::real_time mtime) { - return std::make_unique(store, this, oid, upload_id, - owner, mtime); -} - -std::unique_ptr DaosStore::get_append_writer( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, - const std::string& unique_tag, uint64_t position, - uint64_t* cur_accounted_size) { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return nullptr; -} - -std::unique_ptr DaosStore::get_atomic_writer( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch, - const std::string& unique_tag) { - ldpp_dout(dpp, 20) << "get_atomic_writer" << dendl; - return std::make_unique(dpp, y, obj, this, - owner, ptail_placement_rule, - olh_epoch, unique_tag); -} - -const std::string& DaosStore::get_compression_type( - const rgw_placement_rule& rule) { - return zone.zone_params->get_compression_type(rule); -} - -bool DaosStore::valid_placement(const rgw_placement_rule& rule) { - return zone.zone_params->valid_placement(rule); -} - -std::unique_ptr DaosStore::get_user(const rgw_user& u) { - ldout(cctx, 20) << "DEBUG: bucket's user: " << u.to_str() << dendl; - return std::make_unique(this, u); -} - -int DaosStore::get_user_by_access_key(const DoutPrefixProvider* dpp, - const std::string& key, optional_yield y, - std::unique_ptr* user) { - // Initialize ds3_user_info - bufferlist bl; - uint64_t size = DS3_MAX_ENCODED_LEN; - struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(), - .encoded_length = size}; - - int ret = ds3_user_get_by_key(key.c_str(), &user_info, ds3, nullptr); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_key failed, key=" << key - << " ret=" << ret << dendl; - return ret; - } - - // Decode - DaosUserInfo duinfo; - bufferlist& blr = bl; - auto iter = blr.cbegin(); - duinfo.decode(iter); - - User* u = new DaosUser(this, duinfo.info); - if (!u) { - return -ENOMEM; - } - - user->reset(u); - return 0; -} - -int DaosStore::get_user_by_email(const DoutPrefixProvider* dpp, - const std::string& email, optional_yield y, - std::unique_ptr* user) { - // Initialize ds3_user_info - bufferlist bl; - uint64_t size = DS3_MAX_ENCODED_LEN; - struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(), - .encoded_length = size}; - - int ret = ds3_user_get_by_email(email.c_str(), &user_info, ds3, nullptr); - - if (ret != 0) { - ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_email failed, email=" << email - << " ret=" << ret << dendl; - return ret; - } - - // Decode - DaosUserInfo duinfo; - bufferlist& blr = bl; - auto iter = blr.cbegin(); - duinfo.decode(iter); - - User* u = new DaosUser(this, duinfo.info); - if (!u) { - return -ENOMEM; - } - - user->reset(u); - return 0; -} - -int DaosStore::get_user_by_swift(const DoutPrefixProvider* dpp, - const std::string& user_str, optional_yield y, - std::unique_ptr* user) { - /* Swift keys and subusers are not supported for now */ - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -std::unique_ptr DaosStore::get_object(const rgw_obj_key& k) { - return std::make_unique(this, k); -} - -inline std::ostream& operator<<(std::ostream& out, const rgw_user* u) { - std::string s; - if (u != nullptr) - u->to_str(s); - else - s = "(nullptr)"; - return out << s; -} - -int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, - const rgw_bucket& b, std::unique_ptr* bucket, - optional_yield y) { - ldpp_dout(dpp, 20) << "DEBUG: get_bucket1: User: " << u << dendl; - int ret; - Bucket* bp; - - bp = new DaosBucket(this, b, u); - ret = bp->load_bucket(dpp, y); - if (ret != 0) { - delete bp; - return ret; - } - - bucket->reset(bp); - return 0; -} - -int DaosStore::get_bucket(User* u, const RGWBucketInfo& i, - std::unique_ptr* bucket) { - DaosBucket* bp; - - bp = new DaosBucket(this, i, u); - /* Don't need to fetch the bucket info, use the provided one */ - - bucket->reset(bp); - return 0; -} - -int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, - const std::string& tenant, const std::string& name, - std::unique_ptr* bucket, optional_yield y) { - ldpp_dout(dpp, 20) << "get_bucket" << dendl; - rgw_bucket b; - - b.tenant = tenant; - b.name = name; - - return get_bucket(dpp, u, b, bucket, y); -} - -bool DaosStore::is_meta_master() { return true; } - -int DaosStore::forward_request_to_master(const DoutPrefixProvider* dpp, - User* user, obj_version* objv, - bufferlist& in_data, JSONParser* jp, - req_info& info, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosStore::forward_iam_request_to_master(const DoutPrefixProvider* dpp, - const RGWAccessKey& key, - obj_version* objv, - bufferlist& in_data, - RGWXMLDecoder::XMLParser* parser, - req_info& info, optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -std::string DaosStore::zone_unique_id(uint64_t unique_num) { return ""; } - -std::string DaosStore::zone_unique_trans_id(const uint64_t unique_num) { - return ""; -} - -int DaosStore::cluster_stat(RGWClusterStat& stats) { - return DAOS_NOT_IMPLEMENTED_LOG(nullptr); -} - -std::unique_ptr DaosStore::get_lifecycle(void) { - DAOS_NOT_IMPLEMENTED_LOG(nullptr); - return 0; -} - -std::unique_ptr DaosStore::get_notification( - rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s, - rgw::notify::EventType event_type, const std::string* object_name) { - return std::make_unique(obj, src_obj, event_type); -} - -std::unique_ptr DaosStore::get_notification( - const DoutPrefixProvider* dpp, Object* obj, Object* src_obj, - rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, - std::string& _user_id, std::string& _user_tenant, std::string& _req_id, - optional_yield y) { - ldpp_dout(dpp, 20) << "get_notification" << dendl; - return std::make_unique(obj, src_obj, event_type); -} - -int DaosStore::log_usage(const DoutPrefixProvider* dpp, - map& usage_info) { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return 0; -} - -int DaosStore::log_op(const DoutPrefixProvider* dpp, string& oid, - bufferlist& bl) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosStore::register_to_service_map(const DoutPrefixProvider* dpp, - const string& daemon_type, - const map& meta) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -void DaosStore::get_quota(RGWQuota& quota) { - // XXX: Not handled for the first pass - return; -} - -void DaosStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, - RGWRateLimitInfo& user_ratelimit, - RGWRateLimitInfo& anon_ratelimit) { - return; -} - -int DaosStore::set_buckets_enabled(const DoutPrefixProvider* dpp, - std::vector& buckets, - bool enabled) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosStore::get_sync_policy_handler(const DoutPrefixProvider* dpp, - std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef* phandler, - optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -RGWDataSyncStatusManager* DaosStore::get_data_sync_manager( - const rgw_zone_id& source_zone) { - DAOS_NOT_IMPLEMENTED_LOG(nullptr); - return 0; -} - -int DaosStore::read_all_usage( - const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, - map& usage) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosStore::trim_all_usage(const DoutPrefixProvider* dpp, - uint64_t start_epoch, uint64_t end_epoch) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosStore::get_config_key_val(string name, bufferlist* bl) { - return DAOS_NOT_IMPLEMENTED_LOG(nullptr); -} - -int DaosStore::meta_list_keys_init(const DoutPrefixProvider* dpp, - const string& section, const string& marker, - void** phandle) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -int DaosStore::meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle, - int max, list& keys, - bool* truncated) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -void DaosStore::meta_list_keys_complete(void* handle) { return; } - -std::string DaosStore::meta_get_marker(void* handle) { return ""; } - -int DaosStore::meta_remove(const DoutPrefixProvider* dpp, string& metadata_key, - optional_yield y) { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); -} - -std::string DaosStore::get_cluster_id(const DoutPrefixProvider* dpp, - optional_yield y) { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return ""; -} - -} // namespace rgw::sal - -extern "C" { - -void* newDaosStore(CephContext* cct) { - return new rgw::sal::DaosStore(cct); -} -} diff --git a/src/rgw/rgw_sal_daos.h b/src/rgw/rgw_sal_daos.h deleted file mode 100644 index ac7352191f23..000000000000 --- a/src/rgw/rgw_sal_daos.h +++ /dev/null @@ -1,1040 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=2 sw=2 expandtab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * SAL implementation for the CORTX Daos backend - * - * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include -#include - -#include "rgw_multi.h" -#include "rgw_notify.h" -#include "rgw_oidc_provider.h" -#include "rgw_putobj_processor.h" -#include "rgw_rados.h" -#include "rgw_role.h" -#include "rgw_sal_store.h" - -inline bool IsDebuggerAttached() { -#ifdef DEBUG - char buf[4096]; - - const int status_fd = ::open("/proc/self/status", O_RDONLY); - if (status_fd == -1) return false; - - const ssize_t num_read = ::read(status_fd, buf, sizeof(buf) - 1); - ::close(status_fd); - - if (num_read <= 0) return false; - - buf[num_read] = '\0'; - constexpr char tracerPidString[] = "TracerPid:"; - const auto tracer_pid_ptr = ::strstr(buf, tracerPidString); - if (!tracer_pid_ptr) return false; - - for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1; - characterPtr <= buf + num_read; ++characterPtr) { - if (::isspace(*characterPtr)) - continue; - else - return ::isdigit(*characterPtr) != 0 && *characterPtr != '0'; - } -#endif // DEBUG - return false; -} - -inline void DebugBreak() { -#ifdef DEBUG - // only break into the debugger if the debugger is attached - if (IsDebuggerAttached()) - raise(SIGINT); // breaks into GDB and stops, can be continued -#endif // DEBUG -} - -inline int NotImplementedLog(const DoutPrefixProvider* ldpp, - const char* filename, int linenumber, - const char* functionname) { - if (ldpp) - ldpp_dout(ldpp, 20) << filename << "(" << linenumber << ") " << functionname - << ": Not implemented" << dendl; - return 0; -} - -inline int NotImplementedGdbBreak(const DoutPrefixProvider* ldpp, - const char* filename, int linenumber, - const char* functionname) { - NotImplementedLog(ldpp, filename, linenumber, functionname); - DebugBreak(); - return 0; -} - -#define DAOS_NOT_IMPLEMENTED_GDB_BREAK(ldpp) \ - NotImplementedGdbBreak(ldpp, __FILE__, __LINE__, __FUNCTION__) -#define DAOS_NOT_IMPLEMENTED_LOG(ldpp) \ - NotImplementedLog(ldpp, __FILE__, __LINE__, __FUNCTION__) - -namespace rgw::sal { - -class DaosStore; -class DaosObject; - -#ifdef DEBUG -// Prepends each log entry with the "filename(source_line) function_name". Makes -// it simple to -// associate log entries with the source that generated the log entry -#undef ldpp_dout -#define ldpp_dout(dpp, v) \ - if (decltype(auto) pdpp = (dpp); \ - pdpp) /* workaround -Wnonnull-compare for 'this' */ \ - dout_impl(pdpp->get_cct(), ceph::dout::need_dynamic(pdpp->get_subsys()), v) \ - pdpp->gen_prefix(*_dout) \ - << __FILE__ << "(" << __LINE__ << ") " << __FUNCTION__ << " - " -#endif // DEBUG - -struct DaosUserInfo { - RGWUserInfo info; - obj_version user_version; - rgw::sal::Attrs attrs; - - void encode(bufferlist& bl) const { - ENCODE_START(3, 3, bl); - encode(info, bl); - encode(user_version, bl); - encode(attrs, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(3, bl); - decode(info, bl); - decode(user_version, bl); - decode(attrs, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(DaosUserInfo); - -class DaosNotification : public StoreNotification { - public: - DaosNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type) - : StoreNotification(_obj, _src_obj, _type) {} - ~DaosNotification() = default; - - virtual int publish_reserve(const DoutPrefixProvider* dpp, - RGWObjTags* obj_tags = nullptr) override { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); - } - virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size, - const ceph::real_time& mtime, - const std::string& etag, - const std::string& version) override { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); - } -}; - -class DaosUser : public StoreUser { - private: - DaosStore* store; - std::vector access_ids; - - public: - DaosUser(DaosStore* _st, const rgw_user& _u) : StoreUser(_u), store(_st) {} - DaosUser(DaosStore* _st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) {} - DaosUser(DaosStore* _st) : store(_st) {} - DaosUser(DaosUser& _o) = default; - DaosUser() {} - - virtual std::unique_ptr clone() override { - return std::make_unique(*this); - } - int list_buckets(const DoutPrefixProvider* dpp, const std::string& marker, - const std::string& end_marker, uint64_t max, bool need_stats, - BucketList& buckets, optional_yield y) override; - virtual int create_bucket( - const DoutPrefixProvider* dpp, const rgw_bucket& b, - const std::string& zonegroup_id, rgw_placement_rule& placement_rule, - std::string& swift_ver_location, const RGWQuotaInfo* pquota_info, - const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info, - obj_version& ep_objv, bool exclusive, bool obj_lock_enabled, - bool* existed, req_info& req_info, std::unique_ptr* bucket, - optional_yield y) override; - virtual int read_attrs(const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, - Attrs& new_attrs, - optional_yield y) override; - virtual int read_stats(const DoutPrefixProvider* dpp, optional_yield y, - RGWStorageStats* stats, - ceph::real_time* last_stats_sync = nullptr, - ceph::real_time* last_stats_update = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider* dpp, - RGWGetUserStats_CB* cb) override; - virtual int complete_flush_stats(const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int read_usage( - const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, - uint64_t end_epoch) override; - - virtual int load_user(const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, - bool exclusive, - RGWUserInfo* old_info = nullptr) override; - virtual int remove_user(const DoutPrefixProvider* dpp, - optional_yield y) override; - - /** Read user info without loading it */ - int read_user(const DoutPrefixProvider* dpp, std::string name, - DaosUserInfo* duinfo); - - std::unique_ptr get_encoded_info(bufferlist& bl, - obj_version& obj_ver); - - friend class DaosBucket; -}; - -// RGWBucketInfo and other information that are shown when listing a bucket is -// represented in struct DaosBucketInfo. The structure is encoded and stored -// as the value of the global bucket instance index. -// TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.) -// into a different index. -struct DaosBucketInfo { - RGWBucketInfo info; - - obj_version bucket_version; - ceph::real_time mtime; - - rgw::sal::Attrs bucket_attrs; - - void encode(bufferlist& bl) const { - ENCODE_START(4, 4, bl); - encode(info, bl); - encode(bucket_version, bl); - encode(mtime, bl); - encode(bucket_attrs, bl); // rgw_cache.h example for a map - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(4, bl); - decode(info, bl); - decode(bucket_version, bl); - decode(mtime, bl); - decode(bucket_attrs, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(DaosBucketInfo); - -class DaosBucket : public StoreBucket { - private: - DaosStore* store; - RGWAccessControlPolicy acls; - - public: - /** Container ds3b handle */ - ds3_bucket_t* ds3b = nullptr; - - DaosBucket(DaosStore* _st) : store(_st), acls() {} - - DaosBucket(const DaosBucket& _daos_bucket) - : store(_daos_bucket.store), acls(), ds3b(nullptr) { - // TODO: deep copy all objects - } - - DaosBucket(DaosStore* _st, User* _u) : StoreBucket(_u), store(_st), acls() {} - - DaosBucket(DaosStore* _st, const rgw_bucket& _b) - : StoreBucket(_b), store(_st), acls() {} - - DaosBucket(DaosStore* _st, const RGWBucketEnt& _e) - : StoreBucket(_e), store(_st), acls() {} - - DaosBucket(DaosStore* _st, const RGWBucketInfo& _i) - : StoreBucket(_i), store(_st), acls() {} - - DaosBucket(DaosStore* _st, const rgw_bucket& _b, User* _u) - : StoreBucket(_b, _u), store(_st), acls() {} - - DaosBucket(DaosStore* _st, const RGWBucketEnt& _e, User* _u) - : StoreBucket(_e, _u), store(_st), acls() {} - - DaosBucket(DaosStore* _st, const RGWBucketInfo& _i, User* _u) - : StoreBucket(_i, _u), store(_st), acls() {} - - ~DaosBucket(); - - virtual std::unique_ptr get_object(const rgw_obj_key& k) override; - virtual int list(const DoutPrefixProvider* dpp, ListParams&, int, - ListResults&, optional_yield y) override; - virtual int remove_bucket(const DoutPrefixProvider* dpp, bool delete_children, - bool forward_to_master, req_info* req_info, - optional_yield y) override; - virtual int remove_bucket_bypass_gc(int concurrent_max, - bool keep_index_consistent, - optional_yield y, - const DoutPrefixProvider* dpp) override; - virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } - virtual int set_acl(const DoutPrefixProvider* dpp, - RGWAccessControlPolicy& acl, optional_yield y) override; - virtual int load_bucket(const DoutPrefixProvider* dpp, optional_yield y, - bool get_stats = false) override; - virtual int read_stats(const DoutPrefixProvider* dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, std::string* bucket_ver, - std::string* master_ver, - std::map& stats, - std::string* max_marker = nullptr, - bool* syncstopped = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider* dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, - RGWGetBucketStats_CB* ctx) override; - virtual int sync_user_stats(const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int update_container_stats(const DoutPrefixProvider* dpp) override; - virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override; - virtual int chown(const DoutPrefixProvider* dpp, User& new_user, - optional_yield y) override; - virtual int put_info(const DoutPrefixProvider* dpp, bool exclusive, - ceph::real_time mtime) override; - virtual bool is_owner(User* user) override; - virtual int check_empty(const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota, - uint64_t obj_size, optional_yield y, - bool check_size_only = false) override; - virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& attrs, - optional_yield y) override; - virtual int try_refresh_info(const DoutPrefixProvider* dpp, - ceph::real_time* pmtime) override; - virtual int read_usage( - const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch, - uint64_t end_epoch) override; - virtual int remove_objs_from_index( - const DoutPrefixProvider* dpp, - std::list& objs_to_unlink) override; - virtual int check_index( - const DoutPrefixProvider* dpp, - std::map& existing_stats, - std::map& calculated_stats) override; - virtual int rebuild_index(const DoutPrefixProvider* dpp) override; - virtual int set_tag_timeout(const DoutPrefixProvider* dpp, - uint64_t timeout) override; - virtual int purge_instance(const DoutPrefixProvider* dpp) override; - virtual std::unique_ptr clone() override { - return std::make_unique(*this); - } - virtual std::unique_ptr get_multipart_upload( - const std::string& oid, - std::optional upload_id = std::nullopt, ACLOwner owner = {}, - ceph::real_time mtime = real_clock::now()) override; - virtual int list_multiparts( - const DoutPrefixProvider* dpp, const std::string& prefix, - std::string& marker, const std::string& delim, const int& max_uploads, - std::vector>& uploads, - std::map* common_prefixes, - bool* is_truncated) override; - virtual int abort_multiparts(const DoutPrefixProvider* dpp, - CephContext* cct) override; - - int open(const DoutPrefixProvider* dpp); - int close(const DoutPrefixProvider* dpp); - bool is_open() { return ds3b != nullptr; } - std::unique_ptr get_encoded_info( - bufferlist& bl, ceph::real_time mtime); - - friend class DaosStore; -}; - -class DaosPlacementTier : public StorePlacementTier { - DaosStore* store; - RGWZoneGroupPlacementTier tier; - - public: - DaosPlacementTier(DaosStore* _store, const RGWZoneGroupPlacementTier& _tier) - : store(_store), tier(_tier) {} - virtual ~DaosPlacementTier() = default; - - virtual const std::string& get_tier_type() { return tier.tier_type; } - virtual const std::string& get_storage_class() { return tier.storage_class; } - virtual bool retain_head_object() { return tier.retain_head_object; } - RGWZoneGroupPlacementTier& get_rt() { return tier; } -}; - -class DaosZoneGroup : public StoreZoneGroup { - DaosStore* store; - const RGWZoneGroup group; - std::string empty; - - public: - DaosZoneGroup(DaosStore* _store) : store(_store), group() {} - DaosZoneGroup(DaosStore* _store, const RGWZoneGroup& _group) - : store(_store), group(_group) {} - virtual ~DaosZoneGroup() = default; - - virtual const std::string& get_id() const override { return group.get_id(); }; - virtual const std::string& get_name() const override { - return group.get_name(); - }; - virtual int equals(const std::string& other_zonegroup) const override { - return group.equals(other_zonegroup); - }; - /** Get the endpoint from zonegroup, or from master zone if not set */ - virtual const std::string& get_endpoint() const override; - virtual bool placement_target_exists(std::string& target) const override; - virtual bool is_master_zonegroup() const override { - return group.is_master_zonegroup(); - }; - virtual const std::string& get_api_name() const override { - return group.api_name; - }; - virtual int get_placement_target_names( - std::set& names) const override; - virtual const std::string& get_default_placement_name() const override { - return group.default_placement.name; - }; - virtual int get_hostnames(std::list& names) const override { - names = group.hostnames; - return 0; - }; - virtual int get_s3website_hostnames( - std::list& names) const override { - names = group.hostnames_s3website; - return 0; - }; - virtual int get_zone_count() const override { return group.zones.size(); } - virtual int get_placement_tier(const rgw_placement_rule& rule, - std::unique_ptr* tier); - virtual std::unique_ptr clone() override { - return std::make_unique(store, group); - } - const RGWZoneGroup& get_group() { return group; } -}; - -class DaosZone : public StoreZone { - protected: - DaosStore* store; - RGWRealm* realm{nullptr}; - DaosZoneGroup zonegroup; - RGWZone* zone_public_config{ - nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */ - RGWZoneParams* zone_params{ - nullptr}; /* internal zone params, e.g., rados pools */ - RGWPeriod* current_period{nullptr}; - rgw_zone_id cur_zone_id; - - public: - DaosZone(DaosStore* _store) : store(_store), zonegroup(_store) { - realm = new RGWRealm(); - zone_public_config = new RGWZone(); - zone_params = new RGWZoneParams(); - current_period = new RGWPeriod(); - cur_zone_id = rgw_zone_id(zone_params->get_id()); - - // XXX: only default and STANDARD supported for now - RGWZonePlacementInfo info; - RGWZoneStorageClasses sc; - sc.set_storage_class("STANDARD", nullptr, nullptr); - info.storage_classes = sc; - zone_params->placement_pools["default"] = info; - } - DaosZone(DaosStore* _store, DaosZoneGroup _zg) - : store(_store), zonegroup(_zg) { - realm = new RGWRealm(); - zone_public_config = new RGWZone(); - zone_params = new RGWZoneParams(); - current_period = new RGWPeriod(); - cur_zone_id = rgw_zone_id(zone_params->get_id()); - - // XXX: only default and STANDARD supported for now - RGWZonePlacementInfo info; - RGWZoneStorageClasses sc; - sc.set_storage_class("STANDARD", nullptr, nullptr); - info.storage_classes = sc; - zone_params->placement_pools["default"] = info; - } - ~DaosZone() = default; - - virtual std::unique_ptr clone() override { - return std::make_unique(store); - } - virtual ZoneGroup& get_zonegroup() override; - virtual int get_zonegroup(const std::string& id, - std::unique_ptr* zonegroup) override; - virtual const rgw_zone_id& get_id() override; - virtual const std::string& get_name() const override; - virtual bool is_writeable() override; - virtual bool get_redirect_endpoint(std::string* endpoint) override; - virtual bool has_zonegroup_api(const std::string& api) const override; - virtual const std::string& get_current_period_id() override; - virtual const RGWAccessKey& get_system_key() { - return zone_params->system_key; - } - virtual const std::string& get_realm_name() { return realm->get_name(); } - virtual const std::string& get_realm_id() { return realm->get_id(); } - virtual const std::string_view get_tier_type() { return "rgw"; } - - friend class DaosStore; -}; - -class DaosLuaManager : public StoreLuaManager { - DaosStore* store; - - public: - DaosLuaManager(DaosStore* _s) : store(_s) {} - virtual ~DaosLuaManager() = default; - - virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& key, std::string& script) override { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return -ENOENT; - }; - - virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& key, - const std::string& script) override { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return -ENOENT; - }; - - virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& key) override { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return -ENOENT; - }; - - virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& package_name) override { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return -ENOENT; - }; - - virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& package_name) override { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return -ENOENT; - }; - - virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, - rgw::lua::packages_t& packages) override { - DAOS_NOT_IMPLEMENTED_LOG(dpp); - return -ENOENT; - }; -}; - -class DaosObject : public StoreObject { - private: - DaosStore* store; - RGWAccessControlPolicy acls; - - public: - struct DaosReadOp : public StoreReadOp { - private: - DaosObject* source; - - public: - DaosReadOp(DaosObject* _source); - - virtual int prepare(optional_yield y, - const DoutPrefixProvider* dpp) override; - - /* - * Both `read` and `iterate` read up through index `end` - * *inclusive*. The number of bytes that could be returned is - * `end - ofs + 1`. - */ - virtual int read(int64_t off, int64_t end, bufferlist& bl, optional_yield y, - const DoutPrefixProvider* dpp) override; - virtual int iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end, - RGWGetDataCB* cb, optional_yield y) override; - - virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, - bufferlist& dest, optional_yield y) override; - }; - - struct DaosDeleteOp : public StoreDeleteOp { - private: - DaosObject* source; - - public: - DaosDeleteOp(DaosObject* _source); - - virtual int delete_obj(const DoutPrefixProvider* dpp, - optional_yield y) override; - }; - - ds3_obj_t* ds3o = nullptr; - - DaosObject() = default; - - DaosObject(DaosStore* _st, const rgw_obj_key& _k) - : StoreObject(_k), store(_st), acls() {} - DaosObject(DaosStore* _st, const rgw_obj_key& _k, Bucket* _b) - : StoreObject(_k, _b), store(_st), acls() {} - - DaosObject(DaosObject& _o) = default; - - virtual ~DaosObject(); - - virtual int delete_object(const DoutPrefixProvider* dpp, optional_yield y, - bool prevent_versioning = false) override; - virtual int copy_object( - User* user, req_info* info, const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, ceph::real_time* mtime, - const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, - bool high_precision_time, const char* if_match, const char* if_nomatch, - AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, - RGWObjCategory category, uint64_t olh_epoch, - boost::optional delete_at, std::string* version_id, - std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*), - void* progress_data, const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } - virtual int set_acl(const RGWAccessControlPolicy& acl) override { - acls = acl; - return 0; - } - - virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState** state, - optional_yield y, bool follow_olh = true) override; - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) override; - virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, - rgw_obj* target_obj = NULL) override; - virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, - optional_yield y, - const DoutPrefixProvider* dpp) override; - virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, - const char* attr_name, - optional_yield y) override; - virtual bool is_expired() override; - virtual void gen_rand_obj_instance_name() override; - virtual std::unique_ptr clone() override { - return std::make_unique(*this); - } - virtual std::unique_ptr get_serializer( - const DoutPrefixProvider* dpp, const std::string& lock_name) override; - virtual int transition(Bucket* bucket, - const rgw_placement_rule& placement_rule, - const real_time& mtime, uint64_t olh_epoch, - const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int transition_to_cloud(Bucket* bucket, rgw::sal::PlacementTier* tier, - rgw_bucket_dir_entry& o, - std::set& cloud_targets, - CephContext* cct, bool update_object, - const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual bool placement_rules_match(rgw_placement_rule& r1, - rgw_placement_rule& r2) override; - virtual int dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y, - Formatter* f) override; - - /* Swift versioning */ - virtual int swift_versioning_restore(bool& restored, - const DoutPrefixProvider* dpp) override; - virtual int swift_versioning_copy(const DoutPrefixProvider* dpp, - optional_yield y) override; - - /* OPs */ - virtual std::unique_ptr get_read_op() override; - virtual std::unique_ptr get_delete_op() override; - - /* OMAP */ - virtual int omap_get_vals_by_keys(const DoutPrefixProvider* dpp, - const std::string& oid, - const std::set& keys, - Attrs* vals) override; - virtual int omap_set_val_by_key(const DoutPrefixProvider* dpp, - const std::string& key, bufferlist& val, - bool must_exist, optional_yield y) override; - virtual int chown(User& new_user, const DoutPrefixProvider* dpp, - optional_yield y) override; - - bool is_open() { return ds3o != nullptr; }; - // Only lookup the object, do not create - int lookup(const DoutPrefixProvider* dpp); - // Create the object, truncate if exists - int create(const DoutPrefixProvider* dpp); - // Release the daos resources - int close(const DoutPrefixProvider* dpp); - // Write to object starting from offset - int write(const DoutPrefixProvider* dpp, bufferlist&& data, uint64_t offset); - // Read size bytes from object starting from offset - int read(const DoutPrefixProvider* dpp, bufferlist& data, uint64_t offset, - uint64_t& size); - // Get the object's dirent and attrs - int get_dir_entry_attrs(const DoutPrefixProvider* dpp, - rgw_bucket_dir_entry* ent, Attrs* getattrs = nullptr); - // Set the object's dirent and attrs - int set_dir_entry_attrs(const DoutPrefixProvider* dpp, - rgw_bucket_dir_entry* ent, Attrs* setattrs = nullptr); - // Marks this DAOS object as being the latest version and unmarks all other - // versions as latest - int mark_as_latest(const DoutPrefixProvider* dpp, ceph::real_time set_mtime); - // get_bucket casted as DaosBucket* - DaosBucket* get_daos_bucket() { - return static_cast(get_bucket()); - } -}; - -// A placeholder locking class for multipart upload. -class MPDaosSerializer : public StoreMPSerializer { - public: - MPDaosSerializer(const DoutPrefixProvider* dpp, DaosStore* store, - DaosObject* obj, const std::string& lock_name) {} - - virtual int try_lock(const DoutPrefixProvider* dpp, utime_t dur, - optional_yield y) override { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); - } - virtual int unlock() override { return DAOS_NOT_IMPLEMENTED_LOG(nullptr); } -}; - -class DaosAtomicWriter : public StoreWriter { - protected: - rgw::sal::DaosStore* store; - const rgw_user& owner; - const rgw_placement_rule* ptail_placement_rule; - uint64_t olh_epoch; - const std::string& unique_tag; - DaosObject obj; - uint64_t total_data_size = 0; // for total data being uploaded - - public: - DaosAtomicWriter(const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, - DaosStore* _store, const rgw_user& _owner, - const rgw_placement_rule* _ptail_placement_rule, - uint64_t _olh_epoch, const std::string& _unique_tag); - ~DaosAtomicWriter() = default; - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time* mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, const char* if_match, - const char* if_nomatch, const std::string* user_data, - rgw_zone_set* zones_trace, bool* canceled, - optional_yield y) override; -}; - -class DaosMultipartWriter : public StoreWriter { - protected: - rgw::sal::DaosStore* store; - MultipartUpload* upload; - std::string upload_id; - - // Part parameters. - const uint64_t part_num; - const std::string part_num_str; - uint64_t actual_part_size = 0; - - ds3_part_t* ds3p = nullptr; - bool is_open() { return ds3p != nullptr; }; - - public: - DaosMultipartWriter(const DoutPrefixProvider* dpp, optional_yield y, - MultipartUpload* _upload, - rgw::sal::Object* obj, - DaosStore* _store, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, - uint64_t _part_num, const std::string& part_num_str) - : StoreWriter(dpp, y), - store(_store), - upload(_upload), - upload_id(_upload->get_upload_id()), - part_num(_part_num), - part_num_str(part_num_str) {} - virtual ~DaosMultipartWriter(); - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time* mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, const char* if_match, - const char* if_nomatch, const std::string* user_data, - rgw_zone_set* zones_trace, bool* canceled, - optional_yield y) override; - - const std::string& get_bucket_name(); -}; - -class DaosMultipartPart : public StoreMultipartPart { - protected: - RGWUploadPartInfo info; - - public: - DaosMultipartPart() = default; - virtual ~DaosMultipartPart() = default; - - virtual uint32_t get_num() { return info.num; } - virtual uint64_t get_size() { return info.accounted_size; } - virtual const std::string& get_etag() { return info.etag; } - virtual ceph::real_time& get_mtime() { return info.modified; } - - friend class DaosMultipartUpload; -}; - -class DaosMultipartUpload : public StoreMultipartUpload { - DaosStore* store; - RGWMPObj mp_obj; - ACLOwner owner; - ceph::real_time mtime; - rgw_placement_rule placement; - RGWObjManifest manifest; - - public: - DaosMultipartUpload(DaosStore* _store, Bucket* _bucket, - const std::string& oid, - std::optional upload_id, ACLOwner _owner, - ceph::real_time _mtime) - : StoreMultipartUpload(_bucket), - store(_store), - mp_obj(oid, upload_id), - owner(_owner), - mtime(_mtime) {} - virtual ~DaosMultipartUpload() = default; - - virtual const std::string& get_meta() const { return mp_obj.get_meta(); } - virtual const std::string& get_key() const { return mp_obj.get_key(); } - virtual const std::string& get_upload_id() const { - return mp_obj.get_upload_id(); - } - virtual const ACLOwner& get_owner() const override { return owner; } - virtual ceph::real_time& get_mtime() { return mtime; } - virtual std::unique_ptr get_meta_obj() override; - virtual int init(const DoutPrefixProvider* dpp, optional_yield y, - ACLOwner& owner, rgw_placement_rule& dest_placement, - rgw::sal::Attrs& attrs) override; - virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct, - int num_parts, int marker, int* next_marker, - bool* truncated, - bool assume_unsorted = false) override; - virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override; - virtual int complete(const DoutPrefixProvider* dpp, optional_yield y, - CephContext* cct, std::map& part_etags, - std::list& remove_objs, - uint64_t& accounted_size, bool& compressed, - RGWCompressionInfo& cs_info, off_t& off, - std::string& tag, ACLOwner& owner, uint64_t olh_epoch, - rgw::sal::Object* target_obj) override; - virtual int get_info(const DoutPrefixProvider* dpp, optional_yield y, - rgw_placement_rule** rule, - rgw::sal::Attrs* attrs = nullptr) override; - virtual std::unique_ptr get_writer( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, uint64_t part_num, - const std::string& part_num_str) override; - const std::string& get_bucket_name() { return bucket->get_name(); } -}; - -class DaosStore : public StoreDriver { - private: - DaosZone zone; - RGWSyncModuleInstanceRef sync_module; - - public: - ds3_t* ds3 = nullptr; - - CephContext* cctx; - - DaosStore(CephContext* c) : zone(this), cctx(c) {} - ~DaosStore() = default; - - virtual const std::string get_name() const override { return "daos"; } - - virtual std::unique_ptr get_user(const rgw_user& u) override; - virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int get_user_by_access_key(const DoutPrefixProvider* dpp, - const std::string& key, optional_yield y, - std::unique_ptr* user) override; - virtual int get_user_by_email(const DoutPrefixProvider* dpp, - const std::string& email, optional_yield y, - std::unique_ptr* user) override; - virtual int get_user_by_swift(const DoutPrefixProvider* dpp, - const std::string& user_str, optional_yield y, - std::unique_ptr* user) override; - virtual std::unique_ptr get_object(const rgw_obj_key& k) override; - virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, - const rgw_bucket& b, std::unique_ptr* bucket, - optional_yield y) override; - virtual int get_bucket(User* u, const RGWBucketInfo& i, - std::unique_ptr* bucket) override; - virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, - const std::string& tenant, const std::string& name, - std::unique_ptr* bucket, - optional_yield y) override; - virtual bool is_meta_master() override; - virtual int forward_request_to_master(const DoutPrefixProvider* dpp, - User* user, obj_version* objv, - bufferlist& in_data, JSONParser* jp, - req_info& info, - optional_yield y) override; - virtual int forward_iam_request_to_master( - const DoutPrefixProvider* dpp, const RGWAccessKey& key, obj_version* objv, - bufferlist& in_data, RGWXMLDecoder::XMLParser* parser, req_info& info, - optional_yield y) override; - virtual Zone* get_zone() { return &zone; } - virtual std::string zone_unique_id(uint64_t unique_num) override; - virtual std::string zone_unique_trans_id(const uint64_t unique_num) override; - virtual int cluster_stat(RGWClusterStat& stats) override; - virtual std::unique_ptr get_lifecycle(void) override; - virtual std::unique_ptr get_notification( - rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s, - rgw::notify::EventType event_type, optional_yield y, - const std::string* object_name = nullptr) override; - virtual std::unique_ptr get_notification( - const DoutPrefixProvider* dpp, rgw::sal::Object* obj, - rgw::sal::Object* src_obj, rgw::notify::EventType event_type, - rgw::sal::Bucket* _bucket, std::string& _user_id, - std::string& _user_tenant, std::string& _req_id, - optional_yield y) override; - virtual RGWLC* get_rgwlc(void) override { return NULL; } - virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { - return NULL; - } - - virtual int log_usage( - const DoutPrefixProvider* dpp, - std::map& usage_info) override; - virtual int log_op(const DoutPrefixProvider* dpp, std::string& oid, - bufferlist& bl) override; - virtual int register_to_service_map( - const DoutPrefixProvider* dpp, const std::string& daemon_type, - const std::map& meta) override; - virtual void get_quota(RGWQuota& quota) override; - virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, - RGWRateLimitInfo& user_ratelimit, - RGWRateLimitInfo& anon_ratelimit) override; - virtual int set_buckets_enabled(const DoutPrefixProvider* dpp, - std::vector& buckets, - bool enabled) override; - virtual uint64_t get_new_req_id() override { - return DAOS_NOT_IMPLEMENTED_LOG(nullptr); - } - virtual int get_sync_policy_handler(const DoutPrefixProvider* dpp, - std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef* phandler, - optional_yield y) override; - virtual RGWDataSyncStatusManager* get_data_sync_manager( - const rgw_zone_id& source_zone) override; - virtual void wakeup_meta_sync_shards(std::set& shard_ids) override { - return; - } - virtual void wakeup_data_sync_shards( - const DoutPrefixProvider* dpp, const rgw_zone_id& source_zone, - boost::container::flat_map< - int, boost::container::flat_set>& shard_ids) - override { - return; - } - virtual int clear_usage(const DoutPrefixProvider* dpp) override { - return DAOS_NOT_IMPLEMENTED_LOG(dpp); - } - virtual int read_all_usage( - const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_all_usage(const DoutPrefixProvider* dpp, - uint64_t start_epoch, uint64_t end_epoch) override; - virtual int get_config_key_val(std::string name, bufferlist* bl) override; - virtual int meta_list_keys_init(const DoutPrefixProvider* dpp, - const std::string& section, - const std::string& marker, - void** phandle) override; - virtual int meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle, - int max, std::list& keys, - bool* truncated) override; - virtual void meta_list_keys_complete(void* handle) override; - virtual std::string meta_get_marker(void* handle) override; - virtual int meta_remove(const DoutPrefixProvider* dpp, - std::string& metadata_key, optional_yield y) override; - - virtual const RGWSyncModuleInstanceRef& get_sync_module() { - return sync_module; - } - virtual std::string get_host_id() { return ""; } - - virtual std::unique_ptr get_lua_manager() override; - virtual std::unique_ptr get_role( - std::string name, std::string tenant, std::string path = "", - std::string trust_policy = "", std::string max_session_duration_str = "", - std::multimap tags = {}) override; - virtual std::unique_ptr get_role(const RGWRoleInfo& info) override; - virtual std::unique_ptr get_role(std::string id) override; - virtual int get_roles(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& path_prefix, - const std::string& tenant, - std::vector>& roles) override; - virtual std::unique_ptr get_oidc_provider() override; - virtual int get_oidc_providers( - const DoutPrefixProvider* dpp, const std::string& tenant, - std::vector>& providers) override; - virtual std::unique_ptr get_append_writer( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, - const std::string& unique_tag, uint64_t position, - uint64_t* cur_accounted_size) override; - virtual std::unique_ptr get_atomic_writer( - const DoutPrefixProvider* dpp, optional_yield y, - rgw::sal::Object* obj, const rgw_user& owner, - const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch, - const std::string& unique_tag) override; - virtual const std::string& get_compression_type( - const rgw_placement_rule& rule) override; - virtual bool valid_placement(const rgw_placement_rule& rule) override; - - virtual void finalize(void) override; - - virtual CephContext* ctx(void) override { return cctx; } - - virtual int initialize(CephContext* cct, - const DoutPrefixProvider* dpp) override; -}; - -} // namespace rgw::sal diff --git a/src/rgw/rgw_sal_motr.cc b/src/rgw/rgw_sal_motr.cc deleted file mode 100644 index a1bca8b5696d..000000000000 --- a/src/rgw/rgw_sal_motr.cc +++ /dev/null @@ -1,4005 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=2 sw=2 expandtab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * SAL implementation for the CORTX Motr backend - * - * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include -#include -#include - -extern "C" { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wextern-c-compat" -#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" -#include "motr/config.h" -#include "lib/types.h" -#include "lib/trace.h" // m0_trace_set_mmapped_buffer -#include "motr/layout.h" // M0_OBJ_LAYOUT_ID -#include "helpers/helpers.h" // m0_ufid_next -#pragma clang diagnostic pop -} - -#include "common/Clock.h" -#include "common/errno.h" - -#include "rgw_compression.h" -#include "rgw_sal.h" -#include "rgw_sal_motr.h" -#include "rgw_bucket.h" - -#define dout_subsys ceph_subsys_rgw - -using std::string; -using std::map; -using std::vector; -using std::set; -using std::list; - -static string mp_ns = RGW_OBJ_NS_MULTIPART; -static struct m0_ufid_generator ufid_gr; - -namespace rgw::sal { - -using ::ceph::encode; -using ::ceph::decode; - -static std::string motr_global_indices[] = { - RGW_MOTR_USERS_IDX_NAME, - RGW_MOTR_BUCKET_INST_IDX_NAME, - RGW_MOTR_BUCKET_HD_IDX_NAME, - RGW_IAM_MOTR_ACCESS_KEY, - RGW_IAM_MOTR_EMAIL_KEY -}; - -void MotrMetaCache::invalid(const DoutPrefixProvider *dpp, - const string& name) -{ - cache.invalidate_remove(dpp, name); -} - -int MotrMetaCache::put(const DoutPrefixProvider *dpp, - const string& name, - const bufferlist& data) -{ - ldpp_dout(dpp, 0) << "Put into cache: name = " << name << dendl; - - ObjectCacheInfo info; - info.status = 0; - info.data = data; - info.flags = CACHE_FLAG_DATA; - info.meta.mtime = ceph::real_clock::now(); - info.meta.size = data.length(); - cache.put(dpp, name, info, NULL); - - // Inform other rgw instances. Do nothing if it gets some error? - int rc = distribute_cache(dpp, name, info, UPDATE_OBJ); - if (rc < 0) - ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << name << dendl; - - return 0; -} - -int MotrMetaCache::get(const DoutPrefixProvider *dpp, - const string& name, - bufferlist& data) -{ - ObjectCacheInfo info; - uint32_t flags = CACHE_FLAG_DATA; - int rc = cache.get(dpp, name, info, flags, NULL); - if (rc == 0) { - if (info.status < 0) - return info.status; - - bufferlist& bl = info.data; - bufferlist::iterator it = bl.begin(); - data.clear(); - - it.copy_all(data); - ldpp_dout(dpp, 0) << "Cache hit: name = " << name << dendl; - return 0; - } - ldpp_dout(dpp, 0) << "Cache miss: name = " << name << ", rc = "<< rc << dendl; - if(rc == -ENODATA) - return -ENOENT; - - return rc; -} - -int MotrMetaCache::remove(const DoutPrefixProvider *dpp, - const string& name) - -{ - cache.invalidate_remove(dpp, name); - - ObjectCacheInfo info; - int rc = distribute_cache(dpp, name, info, INVALIDATE_OBJ); - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: " <<__func__<< "(): failed to distribute cache: rc =" << rc << dendl; - } - - ldpp_dout(dpp, 0) << "Remove from cache: name = " << name << dendl; - return 0; -} - -int MotrMetaCache::distribute_cache(const DoutPrefixProvider *dpp, - const string& normal_name, - ObjectCacheInfo& obj_info, int op) -{ - return 0; -} - -int MotrMetaCache::watch_cb(const DoutPrefixProvider *dpp, - uint64_t notify_id, - uint64_t cookie, - uint64_t notifier_id, - bufferlist& bl) -{ - return 0; -} - -void MotrMetaCache::set_enabled(bool status) -{ - cache.set_enabled(status); -} - -// TODO: properly handle the number of key/value pairs to get in -// one query. Now the POC simply tries to retrieve all `max` number of pairs -// with starting key `marker`. -int MotrUser::list_buckets(const DoutPrefixProvider *dpp, const string& marker, - const string& end_marker, uint64_t max, bool need_stats, - BucketList &buckets, optional_yield y) -{ - int rc; - vector keys(max); - vector vals(max); - bool is_truncated = false; - - ldpp_dout(dpp, 20) <<__func__<< ": list_user_buckets: marker=" << marker - << " end_marker=" << end_marker - << " max=" << max << dendl; - - // Retrieve all `max` number of pairs. - buckets.clear(); - string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str(); - keys[0] = marker; - rc = store->next_query_by_name(user_info_iname, keys, vals); - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; - return rc; - } - - // Process the returned pairs to add into BucketList. - uint64_t bcount = 0; - for (const auto& bl: vals) { - if (bl.length() == 0) - break; - - RGWBucketEnt ent; - auto iter = bl.cbegin(); - ent.decode(iter); - - std::time_t ctime = ceph::real_clock::to_time_t(ent.creation_time); - ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl; - - if (!end_marker.empty() && - end_marker.compare(ent.bucket.marker) <= 0) - break; - - buckets.add(std::make_unique(this->store, ent, this)); - bcount++; - } - if (bcount == max) - is_truncated = true; - buckets.set_truncated(is_truncated); - - return 0; -} - -int MotrUser::create_bucket(const DoutPrefixProvider* dpp, - const rgw_bucket& b, - const std::string& zonegroup_id, - rgw_placement_rule& placement_rule, - std::string& swift_ver_location, - const RGWQuotaInfo* pquota_info, - const RGWAccessControlPolicy& policy, - Attrs& attrs, - RGWBucketInfo& info, - obj_version& ep_objv, - bool exclusive, - bool obj_lock_enabled, - bool* existed, - req_info& req_info, - std::unique_ptr* bucket_out, - optional_yield y) -{ - int ret; - std::unique_ptr bucket; - - // Look up the bucket. Create it if it doesn't exist. - ret = this->store->get_bucket(dpp, this, b, &bucket, y); - if (ret < 0 && ret != -ENOENT) - return ret; - - if (ret != -ENOENT) { - *existed = true; - // if (swift_ver_location.empty()) { - // swift_ver_location = bucket->get_info().swift_ver_location; - // } - // placement_rule.inherit_from(bucket->get_info().placement_rule); - - // TODO: ACL policy - // // don't allow changes to the acl policy - //RGWAccessControlPolicy old_policy(ctx()); - //int rc = rgw_op_get_bucket_policy_from_attr( - // dpp, this, u, bucket->get_attrs(), &old_policy, y); - //if (rc >= 0 && old_policy != policy) { - // bucket_out->swap(bucket); - // return -EEXIST; - //} - } else { - - placement_rule.name = "default"; - placement_rule.storage_class = "STANDARD"; - bucket = std::make_unique(store, b, this); - bucket->set_attrs(attrs); - *existed = false; - } - - if (!*existed){ - // TODO: how to handle zone and multi-site. - info.placement_rule = placement_rule; - info.bucket = b; - info.owner = this->get_info().user_id; - info.zonegroup = zonegroup_id; - if (obj_lock_enabled) - info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; - bucket->set_version(ep_objv); - bucket->get_info() = info; - - // Create a new bucket: (1) Add a key/value pair in the - // bucket instance index. (2) Create a new bucket index. - MotrBucket* mbucket = static_cast(bucket.get()); - ret = mbucket->put_info(dpp, y, ceph::real_time())? : - mbucket->create_bucket_index() ? : - mbucket->create_multipart_indices(); - if (ret < 0) - ldpp_dout(dpp, 0) << "ERROR: failed to create bucket indices! " << ret << dendl; - - // Insert the bucket entry into the user info index. - ret = mbucket->link_user(dpp, this, y); - if (ret < 0) - ldpp_dout(dpp, 0) << "ERROR: failed to add bucket entry! " << ret << dendl; - } else { - return -EEXIST; - // bucket->set_version(ep_objv); - // bucket->get_info() = info; - } - - bucket_out->swap(bucket); - - return ret; -} - -int MotrUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) -{ - return 0; -} - -int MotrUser::read_stats(const DoutPrefixProvider *dpp, - optional_yield y, RGWStorageStats* stats, - ceph::real_time *last_stats_sync, - ceph::real_time *last_stats_update) -{ - return 0; -} - -/* stats - Not for first pass */ -int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb) -{ - return 0; -} - -int MotrUser::complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) -{ - return 0; -} - -int MotrUser::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, - bool *is_truncated, RGWUsageIter& usage_iter, - map& usage) -{ - return 0; -} - -int MotrUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) -{ - return 0; -} - -int MotrUser::load_user_from_idx(const DoutPrefixProvider *dpp, - MotrStore *store, - RGWUserInfo& info, map *attrs, - RGWObjVersionTracker *objv_tr) -{ - struct MotrUserInfo muinfo; - bufferlist bl; - ldpp_dout(dpp, 20) << "info.user_id.id = " << info.user_id.id << dendl; - if (store->get_user_cache()->get(dpp, info.user_id.id, bl)) { - // Cache misses - int rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME, - M0_IC_GET, info.user_id.to_str(), bl); - ldpp_dout(dpp, 20) << "do_idx_op_by_name() = " << rc << dendl; - if (rc < 0) - return rc; - - // Put into cache. - store->get_user_cache()->put(dpp, info.user_id.id, bl); - } - - bufferlist& blr = bl; - auto iter = blr.cbegin(); - muinfo.decode(iter); - info = muinfo.info; - if (attrs) - *attrs = muinfo.attrs; - if (objv_tr) - { - objv_tr->read_version = muinfo.user_version; - objv_tracker.read_version = objv_tr->read_version; - } - - if (!info.access_keys.empty()) { - for(auto key : info.access_keys) { - access_key_tracker.insert(key.first); - } - } - - return 0; -} - -int MotrUser::load_user(const DoutPrefixProvider *dpp, - optional_yield y) -{ - ldpp_dout(dpp, 20) << "load user: user id = " << info.user_id.to_str() << dendl; - return load_user_from_idx(dpp, store, info, &attrs, &objv_tracker); -} - -int MotrUser::create_user_info_idx() -{ - string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str(); - return store->create_motr_idx_by_name(user_info_iname); -} - -int MotrUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) -{ - for (auto& it : new_attrs) - attrs[it.first] = it.second; - - return store_user(dpp, y, false); -} - -int MotrUser::store_user(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, RGWUserInfo* old_info) -{ - bufferlist bl; - struct MotrUserInfo muinfo; - RGWUserInfo orig_info; - RGWObjVersionTracker objv_tr = {}; - obj_version& obj_ver = objv_tr.read_version; - - ldpp_dout(dpp, 20) << "Store_user(): User = " << info.user_id.id << dendl; - orig_info.user_id = info.user_id; - // XXX: we open and close motr idx 2 times in this method: - // 1) on load_user_from_idx() here and 2) on do_idx_op_by_name(PUT) below. - // Maybe this can be optimised later somewhow. - int rc = load_user_from_idx(dpp, store, orig_info, nullptr, &objv_tr); - ldpp_dout(dpp, 10) << "Get user: rc = " << rc << dendl; - - // Check if the user already exists - if (rc == 0 && obj_ver.ver > 0) { - if (old_info) - *old_info = orig_info; - - if (obj_ver.ver != objv_tracker.read_version.ver) { - rc = -ECANCELED; - ldpp_dout(dpp, 0) << "ERROR: User Read version mismatch" << dendl; - goto out; - } - - if (exclusive) - return rc; - - obj_ver.ver++; - } else { - obj_ver.ver = 1; - obj_ver.tag = "UserTAG"; - } - - // Insert the user to user info index. - muinfo.info = info; - muinfo.attrs = attrs; - muinfo.user_version = obj_ver; - muinfo.encode(bl); - rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME, - M0_IC_PUT, info.user_id.to_str(), bl); - ldpp_dout(dpp, 10) << "Store user to motr index: rc = " << rc << dendl; - if (rc == 0) { - objv_tracker.read_version = obj_ver; - objv_tracker.write_version = obj_ver; - } - - // Store access key in access key index - if (!info.access_keys.empty()) { - std::string access_key; - std::string secret_key; - std::map::const_iterator iter = info.access_keys.begin(); - const RGWAccessKey& k = iter->second; - access_key = k.id; - secret_key = k.key; - MotrAccessKey MGWUserKeys(access_key, secret_key, info.user_id.to_str()); - store->store_access_key(dpp, y, MGWUserKeys); - access_key_tracker.insert(access_key); - } - - // Check if any key need to be deleted - if (access_key_tracker.size() != info.access_keys.size()) { - std::string key_for_deletion; - for (auto key : access_key_tracker) { - if (!info.get_key(key)) { - key_for_deletion = key; - ldpp_dout(dpp, 0) << "Deleting access key: " << key_for_deletion << dendl; - store->delete_access_key(dpp, y, key_for_deletion); - if (rc < 0) { - ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl; - } - } - } - if(rc >= 0){ - access_key_tracker.erase(key_for_deletion); - } - } - - if (!info.user_email.empty()) { - MotrEmailInfo MGWEmailInfo(info.user_id.to_str(), info.user_email); - store->store_email_info(dpp, y, MGWEmailInfo); - } - - // Create user info index to store all buckets that are belong - // to this bucket. - rc = create_user_info_idx(); - if (rc < 0 && rc != -EEXIST) { - ldpp_dout(dpp, 0) << "Failed to create user info index: rc = " << rc << dendl; - goto out; - } - - // Put the user info into cache. - rc = store->get_user_cache()->put(dpp, info.user_id.id, bl); - -out: - return rc; -} - -int MotrUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) -{ - // Remove user info from cache - // Delete access keys for user - // Delete user info - // Delete user from user index - // Delete email for user - TODO - bufferlist bl; - int rc; - // Remove the user info from cache. - store->get_user_cache()->remove(dpp, info.user_id.id); - - // Delete all access key of user - if (!info.access_keys.empty()) { - for(auto acc_key = info.access_keys.begin(); acc_key != info.access_keys.end(); acc_key++) { - auto access_key = acc_key->first; - rc = store->delete_access_key(dpp, y, access_key); - // TODO - // Check error code for access_key does not exist - // Continue to next step only if delete failed because key doesn't exists - if (rc < 0){ - ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl; - } - } - } - - //Delete email id - if (!info.user_email.empty()) { - rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, - M0_IC_DEL, info.user_email, bl); - if (rc < 0 && rc != -ENOENT) { - ldpp_dout(dpp, 0) << "Unable to delete email id " << rc << dendl; - } - } - - // Delete user info index - string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str(); - store->delete_motr_idx_by_name(user_info_iname); - ldpp_dout(dpp, 10) << "Deleted user info index - " << user_info_iname << dendl; - - // Delete user from user index - rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME, - M0_IC_DEL, info.user_id.to_str(), bl); - if (rc < 0){ - ldpp_dout(dpp, 0) << "Unable to delete user from user index " << rc << dendl; - return rc; - } - - // TODO - // Delete email for user - // rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, - // M0_IC_DEL, info.user_email, bl); - // if (rc < 0){ - // ldpp_dout(dpp, 0) << "Unable to delete email for user" << rc << dendl; - // return rc; - // } - return 0; -} - -int MotrUser::verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider *dpp, optional_yield y) -{ - *verified = false; - return 0; -} - -int MotrBucket::remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) -{ - int ret; - - ldpp_dout(dpp, 20) << "remove_bucket Entry=" << info.bucket.name << dendl; - - // Refresh info - ret = load_bucket(dpp, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket load_bucket failed rc=" << ret << dendl; - return ret; - } - - ListParams params; - params.list_versions = true; - params.allow_unordered = true; - - ListResults results; - - // 1. Check if Bucket has objects. - // If bucket contains objects and delete_children is true, delete all objects. - // Else throw error that bucket is not empty. - do { - results.objs.clear(); - - // Check if bucket has objects. - ret = list(dpp, params, 1000, results, y); - if (ret < 0) { - return ret; - } - - // If result contains entries, bucket is not empty. - if (!results.objs.empty() && !delete_children) { - ldpp_dout(dpp, 0) << "ERROR: could not remove non-empty bucket " << info.bucket.name << dendl; - return -ENOTEMPTY; - } - - for (const auto& obj : results.objs) { - rgw_obj_key key(obj.key); - if (key.instance.empty()) { - key.instance = "null"; - } - - std::unique_ptr object = get_object(key); - - ret = object->delete_object(dpp, null_yield); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket rgw_remove_object failed rc=" << ret << dendl; - return ret; - } - } - } while(results.is_truncated); - - // 2. Abort Mp uploads on the bucket. - ret = abort_multiparts(dpp, store->ctx()); - if (ret < 0) { - return ret; - } - - // 3. Remove mp index?? - string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts"; - ret = store->delete_motr_idx_by_name(bucket_multipart_iname); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove multipart index rc=" << ret << dendl; - return ret; - } - - // 4. Sync user stats. - ret = this->sync_user_stats(dpp, y); - if (ret < 0) { - ldout(store->ctx(), 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; - } - - // 5. Remove the bucket from user info index. (unlink user) - ret = this->unlink_user(dpp, owner, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl; - return ret; - } - - // 6. Remove bucket index. - string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name; - ret = store->delete_motr_idx_by_name(bucket_index_iname); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl; - return ret; - } - - // 7. Remove bucket instance info. - bufferlist bl; - ret = store->get_bucket_inst_cache()->remove(dpp, info.bucket.name); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance from cache rc=" - << ret << dendl; - return ret; - } - - ret = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME, - M0_IC_DEL, info.bucket.name, bl); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance rc=" - << ret << dendl; - return ret; - } - - // TODO : - // 8. Remove Notifications - // if bucket has notification definitions associated with it - // they should be removed (note that any pending notifications on the bucket are still going to be sent) - - // 9. Forward request to master. - if (forward_to_master) { - bufferlist in_data; - ret = store->forward_request_to_master(dpp, owner, &bucket_version, in_data, nullptr, *req_info, y); - if (ret < 0) { - if (ret == -ENOENT) { - /* adjust error, we want to return with NoSuchBucket and not - * NoSuchKey */ - ret = -ERR_NO_SUCH_BUCKET; - } - ldpp_dout(dpp, 0) << "ERROR: Forward to master failed. ret=" << ret << dendl; - return ret; - } - } - - ldpp_dout(dpp, 20) << "remove_bucket Exit=" << info.bucket.name << dendl; - - return ret; -} - -int MotrBucket::remove_bucket_bypass_gc(int concurrent_max, bool - keep_index_consistent, - optional_yield y, const - DoutPrefixProvider *dpp) { - return 0; -} - -int MotrBucket::put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time _mtime) -{ - bufferlist bl; - struct MotrBucketInfo mbinfo; - - ldpp_dout(dpp, 20) << "put_info(): bucket_id=" << info.bucket.bucket_id << dendl; - mbinfo.info = info; - mbinfo.bucket_attrs = attrs; - mbinfo.mtime = _mtime; - mbinfo.bucket_version = bucket_version; - mbinfo.encode(bl); - - // Insert bucket instance using bucket's marker (string). - int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME, - M0_IC_PUT, info.bucket.name, bl, !exclusive); - if (rc == 0) - store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl); - - return rc; -} - -int MotrBucket::load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats) -{ - // Get bucket instance using bucket's name (string). or bucket id? - bufferlist bl; - if (store->get_bucket_inst_cache()->get(dpp, info.bucket.name, bl)) { - // Cache misses. - ldpp_dout(dpp, 20) << "load_bucket(): name=" << info.bucket.name << dendl; - int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME, - M0_IC_GET, info.bucket.name, bl); - ldpp_dout(dpp, 20) << "load_bucket(): rc=" << rc << dendl; - if (rc < 0) - return rc; - store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl); - } - - struct MotrBucketInfo mbinfo; - bufferlist& blr = bl; - auto iter =blr.cbegin(); - mbinfo.decode(iter); //Decode into MotrBucketInfo. - - info = mbinfo.info; - ldpp_dout(dpp, 20) << "load_bucket(): bucket_id=" << info.bucket.bucket_id << dendl; - rgw_placement_rule placement_rule; - placement_rule.name = "default"; - placement_rule.storage_class = "STANDARD"; - info.placement_rule = placement_rule; - - attrs = mbinfo.bucket_attrs; - mtime = mbinfo.mtime; - bucket_version = mbinfo.bucket_version; - - return 0; -} - -int MotrBucket::link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y) -{ - bufferlist bl; - RGWBucketEnt new_bucket; - ceph::real_time creation_time = get_creation_time(); - - // RGWBucketEnt or cls_user_bucket_entry is the structure that is stored. - new_bucket.bucket = info.bucket; - new_bucket.size = 0; - if (real_clock::is_zero(creation_time)) - creation_time = ceph::real_clock::now(); - new_bucket.creation_time = creation_time; - new_bucket.encode(bl); - std::time_t ctime = ceph::real_clock::to_time_t(new_bucket.creation_time); - ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl; - - // Insert the user into the user info index. - string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str(); - return store->do_idx_op_by_name(user_info_idx_name, - M0_IC_PUT, info.bucket.name, bl); - -} - -int MotrBucket::unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y) -{ - // Remove the user into the user info index. - bufferlist bl; - string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str(); - return store->do_idx_op_by_name(user_info_idx_name, - M0_IC_DEL, info.bucket.name, bl); -} - -/* stats - Not for first pass */ -int MotrBucket::read_stats(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, int shard_id, - std::string *bucket_ver, std::string *master_ver, - std::map& stats, - std::string *max_marker, bool *syncstopped) -{ - return 0; -} - -int MotrBucket::create_bucket_index() -{ - string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name; - return store->create_motr_idx_by_name(bucket_index_iname); -} - -int MotrBucket::create_multipart_indices() -{ - int rc; - - // Bucket multipart index stores in-progress multipart uploads. - // Key is the object name + upload_id, value is a rgw_bucket_dir_entry. - // An entry is inserted when a multipart upload is initialised ( - // MotrMultipartUpload::init()) and will be removed when the upload - // is completed (MotrMultipartUpload::complete()). - // MotrBucket::list_multiparts() will scan this index to return all - // in-progress multipart uploads in the bucket. - string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts"; - rc = store->create_motr_idx_by_name(bucket_multipart_iname); - if (rc < 0) { - ldout(store->cctx, 0) << "Failed to create bucket multipart index " << bucket_multipart_iname << dendl; - return rc; - } - - return 0; -} - - -int MotrBucket::read_stats_async(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB *ctx) -{ - return 0; -} - -int MotrBucket::sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) -{ - return 0; -} - -int MotrBucket::update_container_stats(const DoutPrefixProvider *dpp) -{ - return 0; -} - -int MotrBucket::check_bucket_shards(const DoutPrefixProvider *dpp) -{ - return 0; -} - -int MotrBucket::chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y) -{ - // TODO: update bucket with new owner - return 0; -} - -/* Make sure to call load_bucket() if you need it first */ -bool MotrBucket::is_owner(User* user) -{ - return (info.owner.compare(user->get_id()) == 0); -} - -int MotrBucket::check_empty(const DoutPrefixProvider *dpp, optional_yield y) -{ - /* XXX: Check if bucket contains any objects */ - return 0; -} - -int MotrBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, - optional_yield y, bool check_size_only) -{ - /* Not Handled in the first pass as stats are also needed */ - return 0; -} - -int MotrBucket::merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& new_attrs, optional_yield y) -{ - for (auto& it : new_attrs) - attrs[it.first] = it.second; - - return put_info(dpp, y, ceph::real_time()); -} - -int MotrBucket::try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime) -{ - return 0; -} - -/* XXX: usage and stats not supported in the first pass */ -int MotrBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool *is_truncated, - RGWUsageIter& usage_iter, - map& usage) -{ - return 0; -} - -int MotrBucket::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) -{ - return 0; -} - -int MotrBucket::remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) -{ - /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table. - * Delete all the object in the list from the object table of this - * bucket - */ - return 0; -} - -int MotrBucket::check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) -{ - /* XXX: stats not supported yet */ - return 0; -} - -int MotrBucket::rebuild_index(const DoutPrefixProvider *dpp) -{ - /* there is no index table in dbstore. Not applicable */ - return 0; -} - -int MotrBucket::set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) -{ - /* XXX: CHECK: set tag timeout for all the bucket objects? */ - return 0; -} - -int MotrBucket::purge_instance(const DoutPrefixProvider *dpp) -{ - /* XXX: CHECK: for dbstore only single instance supported. - * Remove all the objects for that instance? Anything extra needed? - */ - return 0; -} - -int MotrBucket::set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy &acl, optional_yield y) -{ - int ret = 0; - bufferlist aclbl; - - acls = acl; - acl.encode(aclbl); - - Attrs attrs = get_attrs(); - attrs[RGW_ATTR_ACL] = aclbl; - - // TODO: update bucket entry with the new attrs - - return ret; -} - -std::unique_ptr MotrBucket::get_object(const rgw_obj_key& k) -{ - return std::make_unique(this->store, k, this); -} - -int MotrBucket::list(const DoutPrefixProvider *dpp, ListParams& params, int max, ListResults& results, optional_yield y) -{ - int rc; - vector keys(max); - vector vals(max); - - ldpp_dout(dpp, 20) << "bucket=" << info.bucket.name - << " prefix=" << params.prefix - << " marker=" << params.marker - << " max=" << max << dendl; - - // Retrieve all `max` number of pairs. - string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name; - keys[0] = params.marker.empty() ? params.prefix : - params.marker.get_oid(); - rc = store->next_query_by_name(bucket_index_iname, keys, vals, params.prefix, - params.delim); - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; - return rc; - } - - // Process the returned pairs to add into ListResults. - int i = 0; - for (; i < rc; ++i) { - if (vals[i].length() == 0) { - results.common_prefixes[keys[i]] = true; - } else { - rgw_bucket_dir_entry ent; - auto iter = vals[i].cbegin(); - ent.decode(iter); - if (params.list_versions || ent.is_visible()) - results.objs.emplace_back(std::move(ent)); - } - } - - if (i == max) { - results.is_truncated = true; - results.next_marker = keys[max - 1] + " "; - } else { - results.is_truncated = false; - } - - return 0; -} - -int MotrBucket::list_multiparts(const DoutPrefixProvider *dpp, - const string& prefix, - string& marker, - const string& delim, - const int& max_uploads, - vector>& uploads, - map *common_prefixes, - bool *is_truncated) -{ - int rc; - vector key_vec(max_uploads); - vector val_vec(max_uploads); - - string bucket_multipart_iname = - "motr.rgw.bucket." + this->get_name() + ".multiparts"; - key_vec[0].clear(); - key_vec[0].assign(marker.begin(), marker.end()); - rc = store->next_query_by_name(bucket_multipart_iname, key_vec, val_vec); - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; - return rc; - } - - // Process the returned pairs to add into ListResults. - // The POC can only support listing all objects or selecting - // with prefix. - int ocount = 0; - rgw_obj_key last_obj_key; - *is_truncated = false; - for (const auto& bl: val_vec) { - if (bl.length() == 0) - break; - - rgw_bucket_dir_entry ent; - auto iter = bl.cbegin(); - ent.decode(iter); - - if (prefix.size() && - (0 != ent.key.name.compare(0, prefix.size(), prefix))) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << - ": skippping \"" << ent.key << - "\" because doesn't match prefix" << dendl; - continue; - } - - rgw_obj_key key(ent.key); - uploads.push_back(this->get_multipart_upload(key.name)); - last_obj_key = key; - ocount++; - if (ocount == max_uploads) { - *is_truncated = true; - break; - } - } - marker = last_obj_key.name; - - // What is common prefix? We don't handle it for now. - - return 0; - -} - -int MotrBucket::abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct) -{ - return 0; -} - -void MotrStore::finalize(void) -{ - // close connection with motr - m0_client_fini(this->instance, true); -} - -const std::string& MotrZoneGroup::get_endpoint() const -{ - if (!group.endpoints.empty()) { - return group.endpoints.front(); - } else { - // use zonegroup's master zone endpoints - auto z = group.zones.find(group.master_zone); - if (z != group.zones.end() && !z->second.endpoints.empty()) { - return z->second.endpoints.front(); - } - } - return empty; -} - -bool MotrZoneGroup::placement_target_exists(std::string& target) const -{ - return !!group.placement_targets.count(target); -} - -int MotrZoneGroup::get_placement_target_names(std::set& names) const -{ - for (const auto& target : group.placement_targets) { - names.emplace(target.second.name); - } - - return 0; -} - -int MotrZoneGroup::get_placement_tier(const rgw_placement_rule& rule, - std::unique_ptr* tier) -{ - std::map::const_iterator titer; - titer = group.placement_targets.find(rule.name); - if (titer == group.placement_targets.end()) { - return -ENOENT; - } - - const auto& target_rule = titer->second; - std::map::const_iterator ttier; - ttier = target_rule.tier_targets.find(rule.storage_class); - if (ttier == target_rule.tier_targets.end()) { - // not found - return -ENOENT; - } - - PlacementTier* t; - t = new MotrPlacementTier(store, ttier->second); - if (!t) - return -ENOMEM; - - tier->reset(t); - return 0; -} - -ZoneGroup& MotrZone::get_zonegroup() -{ - return zonegroup; -} - -const std::string& MotrZone::get_id() -{ - return zone_params->get_id(); -} - -const std::string& MotrZone::get_name() const -{ - return zone_params->get_name(); -} - -bool MotrZone::is_writeable() -{ - return true; -} - -bool MotrZone::get_redirect_endpoint(std::string* endpoint) -{ - return false; -} - -bool MotrZone::has_zonegroup_api(const std::string& api) const -{ - return (zonegroup.group.api_name == api); -} - -const std::string& MotrZone::get_current_period_id() -{ - return current_period->get_id(); -} - -std::unique_ptr MotrStore::get_lua_manager() -{ - return std::make_unique(this); -} - -int MotrObject::get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **_state, optional_yield y, bool follow_olh) -{ - // Get object's metadata (those stored in rgw_bucket_dir_entry). - bufferlist bl; - if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) { - // Cache misses. - string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name(); - int rc = this->store->do_idx_op_by_name(bucket_index_iname, - M0_IC_GET, this->get_key().get_oid(), bl); - if (rc < 0) { - ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl; - return rc; - } - - // Put into cache. - this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl); - } - - rgw_bucket_dir_entry ent; - bufferlist& blr = bl; - auto iter = blr.cbegin(); - ent.decode(iter); - - // Set object's type. - this->category = ent.meta.category; - - // Set object state. - state.exists = true; - state.size = ent.meta.size; - state.accounted_size = ent.meta.size; - state.mtime = ent.meta.mtime; - - state.has_attrs = true; - bufferlist etag_bl; - string& etag = ent.meta.etag; - ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl; - etag_bl.append(etag); - state.attrset[RGW_ATTR_ETAG] = etag_bl; - - return 0; -} - -MotrObject::~MotrObject() { - this->close_mobj(); -} - -// int MotrObject::read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj) -// { -// read_op.params.attrs = &attrs; -// read_op.params.target_obj = target_obj; -// read_op.params.obj_size = &obj_size; -// read_op.params.lastmod = &mtime; -// -// return read_op.prepare(dpp); -// } - -int MotrObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) -{ - // TODO: implement - ldpp_dout(dpp, 20) <<__func__<< ": MotrObject::set_obj_attrs()" << dendl; - return 0; -} - -int MotrObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj) -{ - if (this->category == RGWObjCategory::MultiMeta) - return 0; - - string bname, key; - if (target_obj) { - bname = target_obj->bucket.name; - key = target_obj->key.get_oid(); - } else { - bname = this->get_bucket()->get_name(); - key = this->get_key().get_oid(); - } - ldpp_dout(dpp, 20) << "MotrObject::get_obj_attrs(): " - << bname << "/" << key << dendl; - - // Get object's metadata (those stored in rgw_bucket_dir_entry). - bufferlist bl; - if (this->store->get_obj_meta_cache()->get(dpp, key, bl)) { - // Cache misses. - string bucket_index_iname = "motr.rgw.bucket.index." + bname; - int rc = this->store->do_idx_op_by_name(bucket_index_iname, M0_IC_GET, key, bl); - if (rc < 0) { - ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl; - return rc; - } - - // Put into cache. - this->store->get_obj_meta_cache()->put(dpp, key, bl); - } - - rgw_bucket_dir_entry ent; - bufferlist& blr = bl; - auto iter = blr.cbegin(); - ent.decode(iter); - decode(state.attrset, iter); - - return 0; -} - -int MotrObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) -{ - rgw_obj target = get_obj(); - int r = get_obj_attrs(y, dpp, &target); - if (r < 0) { - return r; - } - set_atomic(); - state.attrset[attr_name] = attr_val; - return set_obj_attrs(dpp, &state.attrset, nullptr, y); -} - -int MotrObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) -{ - rgw_obj target = get_obj(); - Attrs rmattr; - bufferlist bl; - - set_atomic(); - rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); -} - -bool MotrObject::is_expired() { - return false; -} - -// Taken from rgw_rados.cc -void MotrObject::gen_rand_obj_instance_name() -{ - enum {OBJ_INSTANCE_LEN = 32}; - char buf[OBJ_INSTANCE_LEN + 1]; - - gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN); - state.obj.key.set_instance(buf); -} - -int MotrObject::omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, - const std::set& keys, - Attrs* vals) -{ - return 0; -} - -int MotrObject::omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, - bool must_exist, optional_yield y) -{ - return 0; -} - -int MotrObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) -{ - return 0; -} - -std::unique_ptr MotrObject::get_serializer(const DoutPrefixProvider *dpp, - const std::string& lock_name) -{ - return std::make_unique(dpp, store, this, lock_name); -} - -int MotrObject::transition(Bucket* bucket, - const rgw_placement_rule& placement_rule, - const real_time& mtime, - uint64_t olh_epoch, - const DoutPrefixProvider* dpp, - optional_yield y) -{ - return 0; -} - -bool MotrObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) -{ - /* XXX: support single default zone and zonegroup for now */ - return true; -} - -int MotrObject::dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) -{ - return 0; -} - -std::unique_ptr MotrObject::get_read_op() -{ - return std::make_unique(this); -} - -MotrObject::MotrReadOp::MotrReadOp(MotrObject *_source) : - source(_source) -{ } - -int MotrObject::MotrReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp) -{ - int rc; - ldpp_dout(dpp, 20) <<__func__<< ": bucket=" << source->get_bucket()->get_name() << dendl; - - rgw_bucket_dir_entry ent; - rc = source->get_bucket_dir_ent(dpp, ent); - if (rc < 0) - return rc; - - // Set source object's attrs. The attrs is key/value map and is used - // in send_response_data() to set attributes, including etag. - bufferlist etag_bl; - string& etag = ent.meta.etag; - ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl; - etag_bl.append(etag.c_str(), etag.size()); - source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); - - source->set_key(ent.key); - source->set_obj_size(ent.meta.size); - source->category = ent.meta.category; - *params.lastmod = ent.meta.mtime; - - if (params.mod_ptr || params.unmod_ptr) { - // Convert all times go GMT to make them compatible - obj_time_weight src_weight; - src_weight.init(*params.lastmod, params.mod_zone_id, params.mod_pg_ver); - src_weight.high_precision = params.high_precision_time; - - obj_time_weight dest_weight; - dest_weight.high_precision = params.high_precision_time; - - // Check if-modified-since condition - if (params.mod_ptr && !params.if_nomatch) { - dest_weight.init(*params.mod_ptr, params.mod_zone_id, params.mod_pg_ver); - ldpp_dout(dpp, 10) << "If-Modified-Since: " << dest_weight << " & " - << "Last-Modified: " << src_weight << dendl; - if (!(dest_weight < src_weight)) { - return -ERR_NOT_MODIFIED; - } - } - - // Check if-unmodified-since condition - if (params.unmod_ptr && !params.if_match) { - dest_weight.init(*params.unmod_ptr, params.mod_zone_id, params.mod_pg_ver); - ldpp_dout(dpp, 10) << "If-UnModified-Since: " << dest_weight << " & " - << "Last-Modified: " << src_weight << dendl; - if (dest_weight < src_weight) { - return -ERR_PRECONDITION_FAILED; - } - } - } - // Check if-match condition - if (params.if_match) { - string if_match_str = rgw_string_unquote(params.if_match); - ldpp_dout(dpp, 10) << "ETag: " << etag << " & " - << "If-Match: " << if_match_str << dendl; - if (if_match_str.compare(etag) != 0) { - return -ERR_PRECONDITION_FAILED; - } - } - // Check if-none-match condition - if (params.if_nomatch) { - string if_nomatch_str = rgw_string_unquote(params.if_nomatch); - ldpp_dout(dpp, 10) << "ETag: " << etag << " & " - << "If-NoMatch: " << if_nomatch_str << dendl; - if (if_nomatch_str.compare(etag) == 0) { - return -ERR_NOT_MODIFIED; - } - } - - // Skip opening an empty object. - if(source->get_obj_size() == 0) - return 0; - - // Open the object here. - if (source->category == RGWObjCategory::MultiMeta) { - ldpp_dout(dpp, 20) <<__func__<< ": open obj parts..." << dendl; - rc = source->get_part_objs(dpp, this->part_objs)? : - source->open_part_objs(dpp, this->part_objs); - return rc; - } else { - ldpp_dout(dpp, 20) <<__func__<< ": open object..." << dendl; - return source->open_mobj(dpp); - } -} - -int MotrObject::MotrReadOp::read(int64_t off, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider* dpp) -{ - ldpp_dout(dpp, 20) << "MotrReadOp::read(): sync read." << dendl; - return 0; -} - -// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to 'end'. -// The returned data is processed in 'cb' which is a chain of post-processing -// filters such as decompression, de-encryption and sending back data to client -// (RGWGetObj_CB::handle_dta which in turn calls RGWGetObj::get_data_cb() to -// send data back.). -// -// POC implements a simple sync version of iterate() function in which it reads -// a block of data each time and call 'cb' for post-processing. -int MotrObject::MotrReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb, optional_yield y) -{ - int rc; - - if (source->category == RGWObjCategory::MultiMeta) - rc = source->read_multipart_obj(dpp, off, end, cb, part_objs); - else - rc = source->read_mobj(dpp, off, end, cb); - - return rc; -} - -int MotrObject::MotrReadOp::get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) -{ - //return 0; - return -ENODATA; -} - -std::unique_ptr MotrObject::get_delete_op() -{ - return std::make_unique(this); -} - -MotrObject::MotrDeleteOp::MotrDeleteOp(MotrObject *_source) : - source(_source) -{ } - -// Implementation of DELETE OBJ also requires MotrObject::get_obj_state() -// to retrieve and set object's state from object's metadata. -// -// TODO: -// 1. The POC only remove the object's entry from bucket index and delete -// corresponding Motr objects. It doesn't handle the DeleteOp::params. -// Delete::delete_obj() in rgw_rados.cc shows how rados backend process the -// params. -// 2. Delete an object when its versioning is turned on. -int MotrObject::MotrDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) -{ - ldpp_dout(dpp, 20) << "delete " << source->get_key().get_oid() << " from " << source->get_bucket()->get_name() << dendl; - - rgw_bucket_dir_entry ent; - int rc = source->get_bucket_dir_ent(dpp, ent); - if (rc < 0) { - return rc; - } - - //TODO: When integrating with background GC for object deletion, - // we should consider adding object entry to GC before deleting the metadata. - // Delete from the cache first. - source->store->get_obj_meta_cache()->remove(dpp, source->get_key().get_oid()); - - // Delete the object's entry from the bucket index. - bufferlist bl; - string bucket_index_iname = "motr.rgw.bucket.index." + source->get_bucket()->get_name(); - rc = source->store->do_idx_op_by_name(bucket_index_iname, - M0_IC_DEL, source->get_key().get_oid(), bl); - if (rc < 0) { - ldpp_dout(dpp, 0) << "Failed to del object's entry from bucket index. " << dendl; - return rc; - } - - if (ent.meta.size == 0) { - ldpp_dout(dpp, 0) << __func__ << ": Object size is 0, not deleting motr object." << dendl; - return 0; - } - // Remove the motr objects. - if (source->category == RGWObjCategory::MultiMeta) - rc = source->delete_part_objs(dpp); - else - rc = source->delete_mobj(dpp); - if (rc < 0) { - ldpp_dout(dpp, 0) << "Failed to delete the object from Motr. " << dendl; - return rc; - } - - //result.delete_marker = parent_op.result.delete_marker; - //result.version_id = parent_op.result.version_id; - return 0; -} - -int MotrObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, bool prevent_versioning) -{ - MotrObject::MotrDeleteOp del_op(this); - del_op.params.bucket_owner = bucket->get_info().owner; - del_op.params.versioning_status = bucket->get_info().versioning_status(); - - return del_op.delete_obj(dpp, y); -} - -int MotrObject::copy_object(User* user, - req_info* info, - const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, - rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, - const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, - ceph::real_time* mtime, - const ceph::real_time* mod_ptr, - const ceph::real_time* unmod_ptr, - bool high_precision_time, - const char* if_match, - const char* if_nomatch, - AttrsMod attrs_mod, - bool copy_if_newer, - Attrs& attrs, - RGWObjCategory category, - uint64_t olh_epoch, - boost::optional delete_at, - std::string* version_id, - std::string* tag, - std::string* etag, - void (*progress_cb)(off_t, void *), - void* progress_data, - const DoutPrefixProvider* dpp, - optional_yield y) -{ - return 0; -} - -int MotrObject::swift_versioning_restore(bool& restored, - const DoutPrefixProvider* dpp) -{ - return 0; -} - -int MotrObject::swift_versioning_copy(const DoutPrefixProvider* dpp, - optional_yield y) -{ - return 0; -} - -MotrAtomicWriter::MotrAtomicWriter(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - MotrStore* _store, - const rgw_user& _owner, - const rgw_placement_rule *_ptail_placement_rule, - uint64_t _olh_epoch, - const std::string& _unique_tag) : - StoreWriter(dpp, y), - store(_store), - owner(_owner), - ptail_placement_rule(_ptail_placement_rule), - olh_epoch(_olh_epoch), - unique_tag(_unique_tag), - obj(_store, obj->get_key(), obj->get_bucket()), - old_obj(_store, obj->get_key(), obj->get_bucket()) {} - -static const unsigned MAX_BUFVEC_NR = 256; - -int MotrAtomicWriter::prepare(optional_yield y) -{ - total_data_size = 0; - - if (obj.is_opened()) - return 0; - - rgw_bucket_dir_entry ent; - int rc = old_obj.get_bucket_dir_ent(dpp, ent); - if (rc == 0) { - ldpp_dout(dpp, 20) << __func__ << ": object exists." << dendl; - } - - rc = m0_bufvec_empty_alloc(&buf, MAX_BUFVEC_NR) ?: - m0_bufvec_alloc(&attr, MAX_BUFVEC_NR, 1) ?: - m0_indexvec_alloc(&ext, MAX_BUFVEC_NR); - if (rc != 0) - this->cleanup(); - - return rc; -} - -int MotrObject::create_mobj(const DoutPrefixProvider *dpp, uint64_t sz) -{ - if (mobj != nullptr) { - ldpp_dout(dpp, 0) <<__func__<< "ERROR: object is already opened" << dendl; - return -EINVAL; - } - - int rc = m0_ufid_next(&ufid_gr, 1, &meta.oid); - if (rc != 0) { - ldpp_dout(dpp, 0) <<__func__<< "ERROR: m0_ufid_next() failed: " << rc << dendl; - return rc; - } - - char fid_str[M0_FID_STR_LEN]; - snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); - ldpp_dout(dpp, 20) <<__func__<< ": sz=" << sz << " oid=" << fid_str << dendl; - - int64_t lid = m0_layout_find_by_objsz(store->instance, nullptr, sz); - M0_ASSERT(lid > 0); - - M0_ASSERT(mobj == nullptr); - mobj = new m0_obj(); - m0_obj_init(mobj, &store->container.co_realm, &meta.oid, lid); - - struct m0_op *op = nullptr; - mobj->ob_entity.en_flags |= M0_ENF_META; - rc = m0_entity_create(nullptr, &mobj->ob_entity, &op); - if (rc != 0) { - this->close_mobj(); - ldpp_dout(dpp, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl; - return rc; - } - ldpp_dout(dpp, 20) <<__func__<< ": call m0_op_launch()..." << dendl; - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc != 0) { - this->close_mobj(); - ldpp_dout(dpp, 0) << "ERROR: failed to create motr object: " << rc << dendl; - return rc; - } - - meta.layout_id = mobj->ob_attr.oa_layout_id; - meta.pver = mobj->ob_attr.oa_pver; - ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id - << std::dec << " rc=" << rc << dendl; - - // TODO: add key:user+bucket+key+obj.meta.oid value:timestamp to - // gc.queue.index. See more at github.com/Seagate/cortx-rgw/issues/7. - - return rc; -} - -int MotrObject::open_mobj(const DoutPrefixProvider *dpp) -{ - char fid_str[M0_FID_STR_LEN]; - snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); - ldpp_dout(dpp, 20) <<__func__<< ": oid=" << fid_str << dendl; - - int rc; - if (meta.layout_id == 0) { - rgw_bucket_dir_entry ent; - rc = this->get_bucket_dir_ent(dpp, ent); - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: open_mobj() failed: rc=" << rc << dendl; - return rc; - } - } - - if (meta.layout_id == 0) - return -ENOENT; - - M0_ASSERT(mobj == nullptr); - mobj = new m0_obj(); - memset(mobj, 0, sizeof *mobj); - m0_obj_init(mobj, &store->container.co_realm, &meta.oid, store->conf.mc_layout_id); - - struct m0_op *op = nullptr; - mobj->ob_attr.oa_layout_id = meta.layout_id; - mobj->ob_attr.oa_pver = meta.pver; - mobj->ob_entity.en_flags |= M0_ENF_META; - rc = m0_entity_open(&mobj->ob_entity, &op); - if (rc != 0) { - ldpp_dout(dpp, 0) << "ERROR: m0_entity_open() failed: rc=" << rc << dendl; - this->close_mobj(); - return rc; - } - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc < 0) { - ldpp_dout(dpp, 10) << "ERROR: failed to open motr object: rc=" << rc << dendl; - this->close_mobj(); - return rc; - } - - ldpp_dout(dpp, 20) <<__func__<< ": rc=" << rc << dendl; - - return 0; -} - -int MotrObject::delete_mobj(const DoutPrefixProvider *dpp) -{ - int rc; - char fid_str[M0_FID_STR_LEN]; - snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); - if (!meta.oid.u_hi || !meta.oid.u_lo) { - ldpp_dout(dpp, 20) << __func__ << ": invalid motr object oid=" << fid_str << dendl; - return -EINVAL; - } - ldpp_dout(dpp, 20) << __func__ << ": deleting motr object oid=" << fid_str << dendl; - - // Open the object. - if (mobj == nullptr) { - rc = this->open_mobj(dpp); - if (rc < 0) - return rc; - } - - // Create an DELETE op and execute it (sync version). - struct m0_op *op = nullptr; - mobj->ob_entity.en_flags |= M0_ENF_META; - rc = m0_entity_delete(&mobj->ob_entity, &op); - if (rc != 0) { - ldpp_dout(dpp, 0) << "ERROR: m0_entity_delete() failed: " << rc << dendl; - return rc; - } - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to open motr object: " << rc << dendl; - return rc; - } - - this->close_mobj(); - - return 0; -} - -void MotrObject::close_mobj() -{ - if (mobj == nullptr) - return; - m0_obj_fini(mobj); - delete mobj; mobj = nullptr; -} - -int MotrObject::write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset) -{ - int rc; - unsigned bs, left; - struct m0_op *op; - char *start, *p; - struct m0_bufvec buf; - struct m0_bufvec attr; - struct m0_indexvec ext; - - left = data.length(); - if (left == 0) - return 0; - - rc = m0_bufvec_empty_alloc(&buf, 1) ?: - m0_bufvec_alloc(&attr, 1, 1) ?: - m0_indexvec_alloc(&ext, 1); - if (rc != 0) - goto out; - - bs = this->get_optimal_bs(left); - ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl; - - start = data.c_str(); - - for (p = start; left > 0; left -= bs, p += bs, offset += bs) { - if (left < bs) - bs = this->get_optimal_bs(left); - if (left < bs) { - data.append_zero(bs - left); - left = bs; - p = data.c_str(); - } - buf.ov_buf[0] = p; - buf.ov_vec.v_count[0] = bs; - ext.iv_index[0] = offset; - ext.iv_vec.v_count[0] = bs; - attr.ov_vec.v_count[0] = 0; - - op = nullptr; - rc = m0_obj_op(this->mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op); - if (rc != 0) - goto out; - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - if (rc != 0) - goto out; - } - -out: - m0_indexvec_free(&ext); - m0_bufvec_free(&attr); - m0_bufvec_free2(&buf); - return rc; -} - -int MotrObject::read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb) -{ - int rc; - unsigned bs, actual, left; - struct m0_op *op; - struct m0_bufvec buf; - struct m0_bufvec attr; - struct m0_indexvec ext; - - // make end pointer exclusive: - // it's easier to work with it this way - end++; - ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off << - " end=" << end << dendl; - // As `off` may not be parity group size aligned, even using optimal - // buffer block size, simply reading data from offset `off` could come - // across parity group boundary. And Motr only allows page-size aligned - // offset. - // - // The optimal size of each IO should also take into account the data - // transfer size to s3 client. For example, 16MB may be nice to read - // data from motr, but it could be too big for network transfer. - // - // TODO: We leave proper handling of offset in the future. - bs = this->get_optimal_bs(end - off); - ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): bs=" << bs << dendl; - - rc = m0_bufvec_empty_alloc(&buf, 1) ? : - m0_bufvec_alloc(&attr, 1, 1) ? : - m0_indexvec_alloc(&ext, 1); - if (rc < 0) - goto out; - - left = end - off; - for (; left > 0; off += actual) { - if (left < bs) - bs = this->get_optimal_bs(left); - actual = bs; - if (left < bs) - actual = left; - ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off << - " actual=" << actual << dendl; - bufferlist bl; - buf.ov_buf[0] = bl.append_hole(bs).c_str(); - buf.ov_vec.v_count[0] = bs; - ext.iv_index[0] = off; - ext.iv_vec.v_count[0] = bs; - attr.ov_vec.v_count[0] = 0; - - left -= actual; - // Read from Motr. - op = nullptr; - rc = m0_obj_op(this->mobj, M0_OC_READ, &ext, &buf, &attr, 0, 0, &op); - ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): init read op rc=" << rc << dendl; - if (rc != 0) { - ldpp_dout(dpp, 0) << __func__ << ": read failed during m0_obj_op, rc=" << rc << dendl; - goto out; - } - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - if (rc != 0) { - ldpp_dout(dpp, 0) << __func__ << ": read failed, m0_op_wait rc=" << rc << dendl; - goto out; - } - // Call `cb` to process returned data. - ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): call cb to process data" << dendl; - cb->handle_data(bl, 0, actual); - } - -out: - m0_indexvec_free(&ext); - m0_bufvec_free(&attr); - m0_bufvec_free2(&buf); - this->close_mobj(); - - return rc; -} - -int MotrObject::get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent) -{ - int rc = 0; - string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name(); - int max = 1000; - vector keys(max); - vector vals(max); - bufferlist bl; - bufferlist::const_iterator iter; - - if (this->get_bucket()->get_info().versioning_status() == BUCKET_VERSIONED || - this->get_bucket()->get_info().versioning_status() == BUCKET_SUSPENDED) { - - rgw_bucket_dir_entry ent_to_check; - - if (this->store->get_obj_meta_cache()->get(dpp, this->get_name(), bl) == 0) { - iter = bl.cbegin(); - ent_to_check.decode(iter); - if (ent_to_check.is_current()) { - ent = ent_to_check; - rc = 0; - goto out; - } - } - - ldpp_dout(dpp, 20) <<__func__<< ": versioned bucket!" << dendl; - keys[0] = this->get_name(); - rc = store->next_query_by_name(bucket_index_iname, keys, vals); - if (rc < 0) { - ldpp_dout(dpp, 0) << __func__ << "ERROR: NEXT query failed. " << rc << dendl; - return rc; - } - - rc = -ENOENT; - for (const auto& bl: vals) { - if (bl.length() == 0) - break; - - iter = bl.cbegin(); - ent_to_check.decode(iter); - if (ent_to_check.is_current()) { - ldpp_dout(dpp, 20) <<__func__<< ": found current version!" << dendl; - ent = ent_to_check; - rc = 0; - - this->store->get_obj_meta_cache()->put(dpp, this->get_name(), bl); - - break; - } - } - } else { - if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) { - ldpp_dout(dpp, 20) <<__func__<< ": non-versioned bucket!" << dendl; - rc = this->store->do_idx_op_by_name(bucket_index_iname, - M0_IC_GET, this->get_key().get_oid(), bl); - if (rc < 0) { - ldpp_dout(dpp, 0) << __func__ << "ERROR: failed to get object's entry from bucket index: rc=" - << rc << dendl; - return rc; - } - this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl); - } - - bufferlist& blr = bl; - iter = blr.cbegin(); - ent.decode(iter); - } - -out: - if (rc == 0) { - sal::Attrs dummy; - decode(dummy, iter); - meta.decode(iter); - ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id << dendl; - char fid_str[M0_FID_STR_LEN]; - snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid)); - ldpp_dout(dpp, 70) << __func__ << ": oid=" << fid_str << dendl; - } else - ldpp_dout(dpp, 0) <<__func__<< ": rc=" << rc << dendl; - - return rc; -} - -int MotrObject::update_version_entries(const DoutPrefixProvider *dpp) -{ - int rc; - int max = 10; - vector keys(max); - vector vals(max); - - string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name(); - keys[0] = this->get_name(); - rc = store->next_query_by_name(bucket_index_iname, keys, vals); - ldpp_dout(dpp, 20) << "get all versions, name = " << this->get_name() << "rc = " << rc << dendl; - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; - return rc; - } - - // no entries returned. - if (rc == 0) - return 0; - - for (const auto& bl: vals) { - if (bl.length() == 0) - break; - - rgw_bucket_dir_entry ent; - auto iter = bl.cbegin(); - ent.decode(iter); - - if (0 != ent.key.name.compare(0, this->get_name().size(), this->get_name())) - continue; - - if (!ent.is_current()) - continue; - - // Remove from the cache. - store->get_obj_meta_cache()->remove(dpp, this->get_name()); - - rgw::sal::Attrs attrs; - decode(attrs, iter); - MotrObject::Meta meta; - meta.decode(iter); - - ent.flags = rgw_bucket_dir_entry::FLAG_VER; - string key; - if (ent.key.instance.empty()) - key = ent.key.name; - else { - char buf[ent.key.name.size() + ent.key.instance.size() + 16]; - snprintf(buf, sizeof(buf), "%s[%s]", ent.key.name.c_str(), ent.key.instance.c_str()); - key = buf; - } - ldpp_dout(dpp, 20) << "update one version, key = " << key << dendl; - bufferlist ent_bl; - ent.encode(ent_bl); - encode(attrs, ent_bl); - meta.encode(ent_bl); - - rc = store->do_idx_op_by_name(bucket_index_iname, - M0_IC_PUT, key, ent_bl); - if (rc < 0) - break; - } - return rc; -} - -// Scan object_nnn_part_index to get all parts then open their motr objects. -// TODO: all parts are opened in the POC. But for a large object, for example -// a 5GB object will have about 300 parts (for default 15MB part). A better -// way of managing opened object may be needed. -int MotrObject::get_part_objs(const DoutPrefixProvider* dpp, - std::map>& part_objs) -{ - int rc; - int max_parts = 1000; - int marker = 0; - uint64_t off = 0; - bool truncated = false; - std::unique_ptr upload; - - upload = this->get_bucket()->get_multipart_upload(this->get_name(), string()); - - do { - rc = upload->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated); - if (rc == -ENOENT) { - rc = -ERR_NO_SUCH_UPLOAD; - } - if (rc < 0) - return rc; - - std::map>& parts = upload->get_parts(); - for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) { - - MultipartPart *mpart = part_iter->second.get(); - MotrMultipartPart *mmpart = static_cast(mpart); - uint32_t part_num = mmpart->get_num(); - uint64_t part_size = mmpart->get_size(); - - string part_obj_name = this->get_bucket()->get_name() + "." + - this->get_key().get_oid() + - ".part." + std::to_string(part_num); - std::unique_ptr obj; - obj = this->bucket->get_object(rgw_obj_key(part_obj_name)); - std::unique_ptr mobj(static_cast(obj.release())); - - ldpp_dout(dpp, 20) << "get_part_objs: off = " << off << ", size = " << part_size << dendl; - mobj->part_off = off; - mobj->part_size = part_size; - mobj->part_num = part_num; - mobj->meta = mmpart->meta; - - part_objs.emplace(part_num, std::move(mobj)); - - off += part_size; - } - } while (truncated); - - return 0; -} - -int MotrObject::open_part_objs(const DoutPrefixProvider* dpp, - std::map>& part_objs) -{ - //for (auto& iter: part_objs) { - for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) { - MotrObject* obj = static_cast(iter->second.get()); - ldpp_dout(dpp, 20) << "open_part_objs: name = " << obj->get_name() << dendl; - int rc = obj->open_mobj(dpp); - if (rc < 0) - return rc; - } - - return 0; -} - -int MotrObject::delete_part_objs(const DoutPrefixProvider* dpp) -{ - std::unique_ptr upload; - upload = this->get_bucket()->get_multipart_upload(this->get_name(), string()); - std::unique_ptr mupload(static_cast(upload.release())); - return mupload->delete_parts(dpp); -} - -int MotrObject::read_multipart_obj(const DoutPrefixProvider* dpp, - int64_t off, int64_t end, RGWGetDataCB* cb, - std::map>& part_objs) -{ - int64_t cursor = off; - - ldpp_dout(dpp, 20) << "read_multipart_obj: off=" << off << " end=" << end << dendl; - - // Find the parts which are in the (off, end) range and - // read data from it. Note: `end` argument is inclusive. - for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) { - MotrObject* obj = static_cast(iter->second.get()); - int64_t part_off = obj->part_off; - int64_t part_size = obj->part_size; - int64_t part_end = obj->part_off + obj->part_size - 1; - ldpp_dout(dpp, 20) << "read_multipart_obj: part_off=" << part_off - << " part_end=" << part_end << dendl; - if (part_end < off) - continue; - - int64_t local_off = cursor - obj->part_off; - int64_t local_end = part_end < end? part_size - 1 : end - part_off; - ldpp_dout(dpp, 20) << "real_multipart_obj: name=" << obj->get_name() - << " local_off=" << local_off - << " local_end=" << local_end << dendl; - int rc = obj->read_mobj(dpp, local_off, local_end, cb); - if (rc < 0) - return rc; - - cursor = part_end + 1; - if (cursor > end) - break; - } - - return 0; -} - -static unsigned roundup(unsigned x, unsigned by) -{ - return ((x - 1) / by + 1) * by; -} - -unsigned MotrObject::get_optimal_bs(unsigned len) -{ - struct m0_pool_version *pver; - - pver = m0_pool_version_find(&store->instance->m0c_pools_common, - &mobj->ob_attr.oa_pver); - M0_ASSERT(pver != nullptr); - struct m0_pdclust_attr *pa = &pver->pv_attr; - uint64_t lid = M0_OBJ_LAYOUT_ID(meta.layout_id); - unsigned unit_sz = m0_obj_layout_id_to_unit_size(lid); - unsigned grp_sz = unit_sz * pa->pa_N; - - // bs should be max 4-times pool-width deep counting by 1MB units, or - // 8-times deep counting by 512K units, 16-times deep by 256K units, - // and so on. Several units to one target will be aggregated to make - // fewer network RPCs, disk i/o operations and BE transactions. - // For unit sizes of 32K or less, the depth is 128, which - // makes it 32K * 128 == 4MB - the maximum amount per target when - // the performance is still good on LNet (which has max 1MB frames). - // TODO: it may be different on libfabric, should be re-measured. - unsigned depth = 128 / ((unit_sz + 0x7fff) / 0x8000); - if (depth == 0) - depth = 1; - // P * N / (N + K + S) - number of data units to span the pool-width - unsigned max_bs = depth * unit_sz * pa->pa_P * pa->pa_N / - (pa->pa_N + pa->pa_K + pa->pa_S); - max_bs = roundup(max_bs, grp_sz); // multiple of group size - if (len >= max_bs) - return max_bs; - else if (len <= grp_sz) - return grp_sz; - else - return roundup(len, grp_sz); -} - -void MotrAtomicWriter::cleanup() -{ - m0_indexvec_free(&ext); - m0_bufvec_free(&attr); - m0_bufvec_free2(&buf); - acc_data.clear(); - obj.close_mobj(); - old_obj.close_mobj(); -} - -unsigned MotrAtomicWriter::populate_bvec(unsigned len, bufferlist::iterator &bi) -{ - unsigned i, l, done = 0; - const char *data; - - for (i = 0; i < MAX_BUFVEC_NR && len > 0; ++i) { - l = bi.get_ptr_and_advance(len, &data); - buf.ov_buf[i] = (char*)data; - buf.ov_vec.v_count[i] = l; - ext.iv_index[i] = acc_off; - ext.iv_vec.v_count[i] = l; - attr.ov_vec.v_count[i] = 0; - acc_off += l; - len -= l; - done += l; - } - buf.ov_vec.v_nr = i; - ext.iv_vec.v_nr = i; - - return done; -} - -int MotrAtomicWriter::write() -{ - int rc; - unsigned bs, left; - struct m0_op *op; - bufferlist::iterator bi; - - left = acc_data.length(); - - if (!obj.is_opened()) { - rc = obj.create_mobj(dpp, left); - if (rc == -EEXIST) - rc = obj.open_mobj(dpp); - if (rc != 0) { - char fid_str[M0_FID_STR_LEN]; - snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&obj.meta.oid)); - ldpp_dout(dpp, 0) << "ERROR: failed to create/open motr object " - << fid_str << " (" << obj.get_bucket()->get_name() - << "/" << obj.get_key().get_oid() << "): rc=" << rc - << dendl; - goto err; - } - } - - total_data_size += left; - - bs = obj.get_optimal_bs(left); - ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl; - - bi = acc_data.begin(); - while (left > 0) { - if (left < bs) - bs = obj.get_optimal_bs(left); - if (left < bs) { - acc_data.append_zero(bs - left); - auto off = bi.get_off(); - bufferlist tmp; - acc_data.splice(off, bs, &tmp); - acc_data.clear(); - acc_data.append(tmp.c_str(), bs); // make it a single buf - bi = acc_data.begin(); - left = bs; - } - - left -= this->populate_bvec(bs, bi); - - op = nullptr; - rc = m0_obj_op(obj.mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op); - if (rc != 0) - goto err; - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - if (rc != 0) - goto err; - } - acc_data.clear(); - - return 0; - -err: - this->cleanup(); - return rc; -} - -static const unsigned MAX_ACC_SIZE = 32 * 1024 * 1024; - -// Accumulate enough data first to make a reasonable decision about the -// optimal unit size for a new object, or bs for existing object (32M seems -// enough for 4M units in 8+2 parity groups, a common config on wide pools), -// and then launch the write operations. -int MotrAtomicWriter::process(bufferlist&& data, uint64_t offset) -{ - if (data.length() == 0) { // last call, flush data - int rc = 0; - if (acc_data.length() != 0) - rc = this->write(); - this->cleanup(); - return rc; - } - - if (acc_data.length() == 0) - acc_off = offset; - - acc_data.append(std::move(data)); - if (acc_data.length() < MAX_ACC_SIZE) - return 0; - - return this->write(); -} - -int MotrAtomicWriter::complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) -{ - int rc = 0; - - if (acc_data.length() != 0) { // check again, just in case - rc = this->write(); - this->cleanup(); - if (rc != 0) - return rc; - } - - bufferlist bl; - rgw_bucket_dir_entry ent; - - // Set rgw_bucet_dir_entry. Some of the member of this structure may not - // apply to motr. For example the storage_class. - // - // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc - // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and - // how to set the dir entry. Only set the basic ones for POC, no ACLs and - // other attrs. - obj.get_key().get_index_key(&ent.key); - ent.meta.size = total_data_size; - ent.meta.accounted_size = total_data_size; - ent.meta.mtime = real_clock::is_zero(set_mtime)? ceph::real_clock::now() : set_mtime; - ent.meta.etag = etag; - ent.meta.owner = owner.to_str(); - ent.meta.owner_display_name = obj.get_bucket()->get_owner()->get_display_name(); - bool is_versioned = obj.get_key().have_instance(); - if (is_versioned) - ent.flags = rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT; - ldpp_dout(dpp, 20) <<__func__<< ": key=" << obj.get_key().get_oid() - << " etag: " << etag << " user_data=" << user_data << dendl; - if (user_data) - ent.meta.user_data = *user_data; - ent.encode(bl); - - RGWBucketInfo &info = obj.get_bucket()->get_info(); - if (info.obj_lock_enabled() && info.obj_lock.has_rule()) { - auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); - if (iter == attrs.end()) { - real_time lock_until_date = info.obj_lock.get_lock_until_date(ent.meta.mtime); - string mode = info.obj_lock.get_mode(); - RGWObjectRetention obj_retention(mode, lock_until_date); - bufferlist retention_bl; - obj_retention.encode(retention_bl); - attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl; - } - } - encode(attrs, bl); - obj.meta.encode(bl); - ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << obj.meta.layout_id - << dendl; - if (is_versioned) { - // get the list of all versioned objects with the same key and - // unset their FLAG_CURRENT later, if do_idx_op_by_name() is successful. - // Note: without distributed lock on the index - it is possible that 2 - // CURRENT entries would appear in the bucket. For example, consider the - // following scenario when two clients are trying to add the new object - // version concurrently: - // client 1: reads all the CURRENT entries - // client 2: updates the index and sets the new CURRENT - // client 1: updates the index and sets the new CURRENT - // At the step (1) client 1 would not see the new current record from step (2), - // so it won't update it. As a result, two CURRENT version entries will appear - // in the bucket. - // TODO: update the current version (unset the flag) and insert the new current - // version can be launched in one motr op. This requires change at do_idx_op() - // and do_idx_op_by_name(). - rc = obj.update_version_entries(dpp); - if (rc < 0) - return rc; - } - // Insert an entry into bucket index. - string bucket_index_iname = "motr.rgw.bucket.index." + obj.get_bucket()->get_name(); - rc = store->do_idx_op_by_name(bucket_index_iname, - M0_IC_PUT, obj.get_key().get_oid(), bl); - if (rc == 0) - store->get_obj_meta_cache()->put(dpp, obj.get_key().get_oid(), bl); - - if (old_obj.get_bucket()->get_info().versioning_status() != BUCKET_VERSIONED) { - // Delete old object data if exists. - old_obj.delete_mobj(dpp); - } - - // TODO: We need to handle the object leak caused by parallel object upload by - // making use of background gc, which is currently not enabled for motr. - return rc; -} - -int MotrMultipartUpload::delete_parts(const DoutPrefixProvider *dpp) -{ - int rc; - int max_parts = 1000; - int marker = 0; - bool truncated = false; - - // Scan all parts and delete the corresponding motr objects. - do { - rc = this->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated); - if (rc == -ENOENT) { - truncated = false; - rc = 0; - } - if (rc < 0) - return rc; - - std::map>& parts = this->get_parts(); - for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) { - - MultipartPart *mpart = part_iter->second.get(); - MotrMultipartPart *mmpart = static_cast(mpart); - uint32_t part_num = mmpart->get_num(); - - // Delete the part object. Note that the part object is not - // inserted into bucket index, only the corresponding motr object - // needs to be delete. That is why we don't call - // MotrObject::delete_object(). - string part_obj_name = bucket->get_name() + "." + - mp_obj.get_key() + - ".part." + std::to_string(part_num); - std::unique_ptr obj; - obj = this->bucket->get_object(rgw_obj_key(part_obj_name)); - std::unique_ptr mobj(static_cast(obj.release())); - mobj->meta = mmpart->meta; - rc = mobj->delete_mobj(dpp); - if (rc < 0) { - ldpp_dout(dpp, 0) << __func__ << ": Failed to delete object from Motr. rc=" << rc << dendl; - return rc; - } - } - } while (truncated); - - // Delete object part index. - std::string oid = mp_obj.get_key(); - string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts"; - return store->delete_motr_idx_by_name(obj_part_iname); -} - -int MotrMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct) -{ - int rc; - // Check if multipart upload exists - bufferlist bl; - std::unique_ptr meta_obj; - meta_obj = get_meta_obj(); - string bucket_multipart_iname = - "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts"; - rc = store->do_idx_op_by_name(bucket_multipart_iname, - M0_IC_GET, meta_obj->get_oid(), bl); - if (rc < 0) { - ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart upload. rc=" << rc << dendl; - return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc; - } - - // Scan all parts and delete the corresponding motr objects. - rc = this->delete_parts(dpp); - if (rc < 0) - return rc; - - bl.clear(); - // Remove the upload from bucket multipart index. - rc = store->do_idx_op_by_name(bucket_multipart_iname, - M0_IC_DEL, meta_obj->get_key().get_oid(), bl); - return rc; -} - -std::unique_ptr MotrMultipartUpload::get_meta_obj() -{ - std::unique_ptr obj = bucket->get_object(rgw_obj_key(get_meta(), string(), mp_ns)); - std::unique_ptr mobj(static_cast(obj.release())); - mobj->set_category(RGWObjCategory::MultiMeta); - return mobj; -} - -struct motr_multipart_upload_info -{ - rgw_placement_rule dest_placement; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(dest_placement, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(dest_placement, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(motr_multipart_upload_info) - -int MotrMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, - ACLOwner& _owner, - rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) -{ - int rc; - std::string oid = mp_obj.get_key(); - - owner = _owner; - - do { - char buf[33]; - string tmp_obj_name; - gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); - std::string upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */ - upload_id.append(buf); - - mp_obj.init(oid, upload_id); - tmp_obj_name = mp_obj.get_meta(); - - std::unique_ptr obj; - obj = bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns)); - // the meta object will be indexed with 0 size, we c - obj->set_in_extra_data(true); - obj->set_hash_source(oid); - - motr_multipart_upload_info upload_info; - upload_info.dest_placement = dest_placement; - bufferlist mpbl; - encode(upload_info, mpbl); - - // Create an initial entry in the bucket. The entry will be - // updated when multipart upload is completed, for example, - // size, etag etc. - bufferlist bl; - rgw_bucket_dir_entry ent; - obj->get_key().get_index_key(&ent.key); - ent.meta.owner = owner.get_id().to_str(); - ent.meta.category = RGWObjCategory::MultiMeta; - ent.meta.mtime = ceph::real_clock::now(); - ent.meta.user_data.assign(mpbl.c_str(), mpbl.c_str() + mpbl.length()); - ent.encode(bl); - - // Insert an entry into bucket multipart index so it is not shown - // when listing a bucket. - string bucket_multipart_iname = - "motr.rgw.bucket." + obj->get_bucket()->get_name() + ".multiparts"; - rc = store->do_idx_op_by_name(bucket_multipart_iname, - M0_IC_PUT, obj->get_key().get_oid(), bl); - - } while (rc == -EEXIST); - - if (rc < 0) - return rc; - - // Create object part index. - // TODO: add bucket as part of the name. - string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts"; - ldpp_dout(dpp, 20) << "MotrMultipartUpload::init(): object part index=" << obj_part_iname << dendl; - rc = store->create_motr_idx_by_name(obj_part_iname); - if (rc == -EEXIST) - rc = 0; - if (rc < 0) - // TODO: clean the bucket index entry - ldpp_dout(dpp, 0) << "Failed to create object multipart index " << obj_part_iname << dendl; - - return rc; -} - -int MotrMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct, - int num_parts, int marker, - int *next_marker, bool *truncated, - bool assume_unsorted) -{ - int rc; - vector key_vec(num_parts); - vector val_vec(num_parts); - - std::string oid = mp_obj.get_key(); - string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts"; - ldpp_dout(dpp, 20) << __func__ << ": object part index = " << obj_part_iname << dendl; - key_vec[0].clear(); - key_vec[0] = "part."; - char buf[32]; - snprintf(buf, sizeof(buf), "%08d", marker + 1); - key_vec[0].append(buf); - rc = store->next_query_by_name(obj_part_iname, key_vec, val_vec); - if (rc < 0) { - ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl; - return rc; - } - - int last_num = 0; - int part_cnt = 0; - uint32_t expected_next = 0; - ldpp_dout(dpp, 20) << __func__ << ": marker = " << marker << dendl; - for (const auto& bl: val_vec) { - if (bl.length() == 0) - break; - - RGWUploadPartInfo info; - auto iter = bl.cbegin(); - info.decode(iter); - rgw::sal::Attrs attrs_dummy; - decode(attrs_dummy, iter); - MotrObject::Meta meta; - meta.decode(iter); - - ldpp_dout(dpp, 20) << __func__ << ": part_num=" << info.num - << " part_size=" << info.size << dendl; - ldpp_dout(dpp, 20) << __func__ << ": meta:oid=[" << meta.oid.u_hi << "," << meta.oid.u_lo - << "], meta:pvid=[" << meta.pver.f_container << "," << meta.pver.f_key - << "], meta:layout id=" << meta.layout_id << dendl; - - if (!expected_next) - expected_next = info.num + 1; - else if (expected_next && info.num != expected_next) - return -EINVAL; - else expected_next = info.num + 1; - - if ((int)info.num > marker) { - last_num = info.num; - parts.emplace(info.num, std::make_unique(info, meta)); - } - - part_cnt++; - } - - // Does it have more parts? - if (truncated) - *truncated = part_cnt < num_parts? false : true; - ldpp_dout(dpp, 20) << __func__ << ": truncated=" << *truncated << dendl; - - if (next_marker) - *next_marker = last_num; - - return 0; -} - -// Heavily copy from rgw_sal_rados.cc -int MotrMultipartUpload::complete(const DoutPrefixProvider *dpp, - optional_yield y, CephContext* cct, - map& part_etags, - list& remove_objs, - uint64_t& accounted_size, bool& compressed, - RGWCompressionInfo& cs_info, off_t& off, - std::string& tag, ACLOwner& owner, - uint64_t olh_epoch, - rgw::sal::Object* target_obj) -{ - char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; - std::string etag; - bufferlist etag_bl; - MD5 hash; - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - bool truncated; - int rc; - - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): enter" << dendl; - int total_parts = 0; - int handled_parts = 0; - int max_parts = 1000; - int marker = 0; - uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; - auto etags_iter = part_etags.begin(); - rgw::sal::Attrs attrs = target_obj->get_attrs(); - - do { - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): list_parts()" << dendl; - rc = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); - if (rc == -ENOENT) { - rc = -ERR_NO_SUCH_UPLOAD; - } - if (rc < 0) - return rc; - - total_parts += parts.size(); - if (!truncated && total_parts != (int)part_etags.size()) { - ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts - << " expected: " << part_etags.size() << dendl; - rc = -ERR_INVALID_PART; - return rc; - } - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): parts.size()=" << parts.size() << dendl; - - for (auto obj_iter = parts.begin(); - etags_iter != part_etags.end() && obj_iter != parts.end(); - ++etags_iter, ++obj_iter, ++handled_parts) { - MultipartPart *mpart = obj_iter->second.get(); - MotrMultipartPart *mmpart = static_cast(mpart); - RGWUploadPartInfo *part = &mmpart->info; - - uint64_t part_size = part->accounted_size; - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part_size=" << part_size << dendl; - if (handled_parts < (int)part_etags.size() - 1 && - part_size < min_part_size) { - rc = -ERR_TOO_SMALL; - return rc; - } - - char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; - if (etags_iter->first != (int)obj_iter->first) { - ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: " - << etags_iter->first << " next uploaded: " - << obj_iter->first << dendl; - rc = -ERR_INVALID_PART; - return rc; - } - string part_etag = rgw_string_unquote(etags_iter->second); - if (part_etag.compare(part->etag) != 0) { - ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " << etags_iter->first - << " etag: " << etags_iter->second << dendl; - rc = -ERR_INVALID_PART; - return rc; - } - - hex_to_buf(part->etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE); - hash.Update((const unsigned char *)petag, sizeof(petag)); - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): calc etag " << dendl; - - string oid = mp_obj.get_part(part->num); - rgw_obj src_obj; - src_obj.init_ns(bucket->get_key(), oid, mp_ns); - -#if 0 // does Motr backend need it? - /* update manifest for part */ - if (part->manifest.empty()) { - ldpp_dout(dpp, 0) << "ERROR: empty manifest for object part: obj=" - << src_obj << dendl; - rc = -ERR_INVALID_PART; - return rc; - } else { - manifest.append(dpp, part->manifest, store->get_zone()); - } - ldpp_dout(dpp, 0) << "MotrMultipartUpload::complete(): manifest " << dendl; -#endif - - bool part_compressed = (part->cs_info.compression_type != "none"); - if ((handled_parts > 0) && - ((part_compressed != compressed) || - (cs_info.compression_type != part->cs_info.compression_type))) { - ldpp_dout(dpp, 0) << "ERROR: compression type was changed during multipart upload (" - << cs_info.compression_type << ">>" << part->cs_info.compression_type << ")" << dendl; - rc = -ERR_INVALID_PART; - return rc; - } - - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part compression" << dendl; - if (part_compressed) { - int64_t new_ofs; // offset in compression data for new part - if (cs_info.blocks.size() > 0) - new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len; - else - new_ofs = 0; - for (const auto& block : part->cs_info.blocks) { - compression_block cb; - cb.old_ofs = block.old_ofs + cs_info.orig_size; - cb.new_ofs = new_ofs; - cb.len = block.len; - cs_info.blocks.push_back(cb); - new_ofs = cb.new_ofs + cb.len; - } - if (!compressed) - cs_info.compression_type = part->cs_info.compression_type; - cs_info.orig_size += part->cs_info.orig_size; - compressed = true; - } - - // We may not need to do the following as remove_objs are those - // don't show when listing a bucket. As we store in-progress uploaded - // object's metadata in a separate index, they are not shown when - // listing a bucket. - rgw_obj_index_key remove_key; - src_obj.key.get_index_key(&remove_key); - remove_objs.push_back(remove_key); - - off += part_size; - accounted_size += part->accounted_size; - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): off=" << off << ", accounted_size = " << accounted_size << dendl; - } - } while (truncated); - hash.Final((unsigned char *)final_etag); - - buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str); - snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], - sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, - "-%lld", (long long)part_etags.size()); - etag = final_etag_str; - ldpp_dout(dpp, 20) << "calculated etag: " << etag << dendl; - etag_bl.append(etag); - attrs[RGW_ATTR_ETAG] = etag_bl; - - if (compressed) { - // write compression attribute to full object - bufferlist tmp; - encode(cs_info, tmp); - attrs[RGW_ATTR_COMPRESSION] = tmp; - } - - // Read the object's the multipart_upload_info. - // TODO: all those index name and key constructions should be implemented as - // member functions. - bufferlist bl; - std::unique_ptr meta_obj; - meta_obj = get_meta_obj(); - string bucket_multipart_iname = - "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts"; - rc = this->store->do_idx_op_by_name(bucket_multipart_iname, - M0_IC_GET, meta_obj->get_key().get_oid(), bl); - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): read entry from bucket multipart index rc=" << rc << dendl; - if (rc < 0) - return rc; - rgw_bucket_dir_entry ent; - bufferlist& blr = bl; - auto ent_iter = blr.cbegin(); - ent.decode(ent_iter); - - // Update the dir entry and insert it to the bucket index so - // the object will be seen when listing the bucket. - bufferlist update_bl; - target_obj->get_key().get_index_key(&ent.key); // Change to offical name :) - ent.meta.size = off; - ent.meta.accounted_size = accounted_size; - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): obj size=" << ent.meta.size - << " obj accounted size=" << ent.meta.accounted_size << dendl; - ent.meta.mtime = ceph::real_clock::now(); - ent.meta.etag = etag; - ent.encode(update_bl); - encode(attrs, update_bl); - MotrObject::Meta meta_dummy; - meta_dummy.encode(update_bl); - - string bucket_index_iname = "motr.rgw.bucket.index." + meta_obj->get_bucket()->get_name(); - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): target_obj name=" << target_obj->get_name() - << " target_obj oid=" << target_obj->get_oid() << dendl; - rc = store->do_idx_op_by_name(bucket_index_iname, M0_IC_PUT, - target_obj->get_name(), update_bl); - if (rc < 0) - return rc; - - // Put into metadata cache. - store->get_obj_meta_cache()->put(dpp, target_obj->get_name(), update_bl); - - // Now we can remove it from bucket multipart index. - ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): remove from bucket multipartindex " << dendl; - return store->do_idx_op_by_name(bucket_multipart_iname, - M0_IC_DEL, meta_obj->get_key().get_oid(), bl); -} - -int MotrMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs) -{ - if (!rule && !attrs) { - return 0; - } - - if (rule) { - if (!placement.empty()) { - *rule = &placement; - if (!attrs) { - /* Don't need attrs, done */ - return 0; - } - } else { - *rule = nullptr; - } - } - - std::unique_ptr meta_obj; - meta_obj = get_meta_obj(); - meta_obj->set_in_extra_data(true); - - // Read the object's the multipart_upload_info. - bufferlist bl; - string bucket_multipart_iname = - "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts"; - int rc = this->store->do_idx_op_by_name(bucket_multipart_iname, - M0_IC_GET, meta_obj->get_key().get_oid(), bl); - if (rc < 0) { - ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart info. rc=" << rc << dendl; - return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc; - } - - rgw_bucket_dir_entry ent; - bufferlist& blr = bl; - auto ent_iter = blr.cbegin(); - ent.decode(ent_iter); - - if (attrs) { - bufferlist etag_bl; - string& etag = ent.meta.etag; - ldpp_dout(dpp, 20) << "object's etag: " << ent.meta.etag << dendl; - etag_bl.append(etag.c_str(), etag.size()); - attrs->emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); - if (!rule || *rule != nullptr) { - /* placement was cached; don't actually read */ - return 0; - } - } - - /* Decode multipart_upload_info */ - motr_multipart_upload_info upload_info; - bufferlist mpbl; - mpbl.append(ent.meta.user_data.c_str(), ent.meta.user_data.size()); - auto mpbl_iter = mpbl.cbegin(); - upload_info.decode(mpbl_iter); - placement = upload_info.dest_placement; - *rule = &placement; - - return 0; -} - -std::unique_ptr MotrMultipartUpload::get_writer( - const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t part_num, - const std::string& part_num_str) -{ - return std::make_unique(dpp, y, this, - obj, store, owner, - ptail_placement_rule, part_num, part_num_str); -} - -int MotrMultipartWriter::prepare(optional_yield y) -{ - string part_obj_name = head_obj->get_bucket()->get_name() + "." + - head_obj->get_key().get_oid() + - ".part." + std::to_string(part_num); - ldpp_dout(dpp, 20) << "bucket=" << head_obj->get_bucket()->get_name() << "part_obj_name=" << part_obj_name << dendl; - part_obj = std::make_unique(this->store, rgw_obj_key(part_obj_name), head_obj->get_bucket()); - if (part_obj == nullptr) - return -ENOMEM; - - // s3 client may retry uploading part, so the part may have already - // been created. - int rc = part_obj->create_mobj(dpp, store->cctx->_conf->rgw_max_chunk_size); - if (rc == -EEXIST) { - rc = part_obj->open_mobj(dpp); - if (rc < 0) - return rc; - } - return rc; -} - -int MotrMultipartWriter::process(bufferlist&& data, uint64_t offset) -{ - int rc = part_obj->write_mobj(dpp, std::move(data), offset); - if (rc == 0) { - actual_part_size += data.length(); - ldpp_dout(dpp, 20) << " write_mobj(): actual_part_size=" << actual_part_size << dendl; - } - return rc; -} - -int MotrMultipartWriter::complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) -{ - // Should the dir entry(object metadata) be updated? For example - // mtime. - - ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): enter" << dendl; - // Add an entry into object_nnn_part_index. - bufferlist bl; - RGWUploadPartInfo info; - info.num = part_num; - info.etag = etag; - info.size = actual_part_size; - info.accounted_size = accounted_size; - info.modified = real_clock::now(); - - bool compressed; - int rc = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info); - ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): compression rc=" << rc << dendl; - if (rc < 0) { - ldpp_dout(dpp, 1) << "cannot get compression info" << dendl; - return rc; - } - encode(info, bl); - encode(attrs, bl); - part_obj->meta.encode(bl); - - string p = "part."; - char buf[32]; - snprintf(buf, sizeof(buf), "%08d", (int)part_num); - p.append(buf); - string obj_part_iname = "motr.rgw.object." + head_obj->get_bucket()->get_name() + "." + - head_obj->get_key().get_oid() + ".parts"; - ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): object part index = " << obj_part_iname << dendl; - rc = store->do_idx_op_by_name(obj_part_iname, M0_IC_PUT, p, bl); - if (rc < 0) { - return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc; - } - - return 0; -} - -std::unique_ptr MotrStore::get_role(std::string name, - std::string tenant, - std::string path, - std::string trust_policy, - std::string max_session_duration_str, - std::multimap tags) -{ - RGWRole* p = nullptr; - return std::unique_ptr(p); -} - -std::unique_ptr MotrStore::get_role(const RGWRoleInfo& info) -{ - RGWRole* p = nullptr; - return std::unique_ptr(p); -} - -std::unique_ptr MotrStore::get_role(std::string id) -{ - RGWRole* p = nullptr; - return std::unique_ptr(p); -} - -int MotrStore::get_roles(const DoutPrefixProvider *dpp, - optional_yield y, - const std::string& path_prefix, - const std::string& tenant, - vector>& roles) -{ - return 0; -} - -std::unique_ptr MotrStore::get_oidc_provider() -{ - RGWOIDCProvider* p = nullptr; - return std::unique_ptr(p); -} - -int MotrStore::get_oidc_providers(const DoutPrefixProvider *dpp, - const std::string& tenant, - vector>& providers) -{ - return 0; -} - -std::unique_ptr MotrBucket::get_multipart_upload(const std::string& oid, - std::optional upload_id, - ACLOwner owner, ceph::real_time mtime) -{ - return std::make_unique(store, this, oid, upload_id, owner, mtime); -} - -std::unique_ptr MotrStore::get_append_writer(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - const std::string& unique_tag, - uint64_t position, - uint64_t *cur_accounted_size) { - return nullptr; -} - -std::unique_ptr MotrStore::get_atomic_writer(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t olh_epoch, - const std::string& unique_tag) { - return std::make_unique(dpp, y, - obj, this, owner, - ptail_placement_rule, olh_epoch, unique_tag); -} - -const std::string& MotrStore::get_compression_type(const rgw_placement_rule& rule) -{ - return zone.zone_params->get_compression_type(rule); -} - -bool MotrStore::valid_placement(const rgw_placement_rule& rule) -{ - return zone.zone_params->valid_placement(rule); -} - -std::unique_ptr MotrStore::get_user(const rgw_user &u) -{ - ldout(cctx, 20) << "bucket's user: " << u.to_str() << dendl; - return std::make_unique(this, u); -} - -int MotrStore::get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string &key, optional_yield y, std::unique_ptr *user) -{ - int rc; - User *u; - bufferlist bl; - RGWUserInfo uinfo; - MotrAccessKey access_key; - - rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY, - M0_IC_GET, key, bl); - if (rc < 0){ - ldout(cctx, 0) << "Access key not found: rc = " << rc << dendl; - return rc; - } - - bufferlist& blr = bl; - auto iter = blr.cbegin(); - access_key.decode(iter); - - uinfo.user_id.from_str(access_key.user_id); - ldout(cctx, 0) << "Loading user: " << uinfo.user_id.id << dendl; - rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr); - if (rc < 0){ - ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl; - return rc; - } - u = new MotrUser(this, uinfo); - if (!u) - return -ENOMEM; - - user->reset(u); - return 0; -} - -int MotrStore::get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr* user) -{ - int rc; - User *u; - bufferlist bl; - RGWUserInfo uinfo; - MotrEmailInfo email_info; - rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, - M0_IC_GET, email, bl); - if (rc < 0){ - ldout(cctx, 0) << "Email Id not found: rc = " << rc << dendl; - return rc; - } - auto iter = bl.cbegin(); - email_info.decode(iter); - ldout(cctx, 0) << "Loading user: " << email_info.user_id << dendl; - uinfo.user_id.from_str(email_info.user_id); - rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr); - if (rc < 0){ - ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl; - return rc; - } - u = new MotrUser(this, uinfo); - if (!u) - return -ENOMEM; - - user->reset(u); - return 0; -} - -int MotrStore::get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) -{ - /* Swift keys and subusers are not supported for now */ - return 0; -} - -int MotrStore::store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key) -{ - int rc; - bufferlist bl; - access_key.encode(bl); - rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY, - M0_IC_PUT, access_key.id, bl); - if (rc < 0){ - ldout(cctx, 0) << "Failed to store key: rc = " << rc << dendl; - return rc; - } - return rc; -} - -int MotrStore::delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key) -{ - int rc; - bufferlist bl; - rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY, - M0_IC_DEL, access_key, bl); - if (rc < 0){ - ldout(cctx, 0) << "Failed to delete key: rc = " << rc << dendl; - } - return rc; -} - -int MotrStore::store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info ) -{ - int rc; - bufferlist bl; - email_info.encode(bl); - rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY, - M0_IC_PUT, email_info.email_id, bl); - if (rc < 0) { - ldout(cctx, 0) << "Failed to store the user by email as key: rc = " << rc << dendl; - } - return rc; -} - -std::unique_ptr MotrStore::get_object(const rgw_obj_key& k) -{ - return std::make_unique(this, k); -} - - -int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) -{ - int ret; - Bucket* bp; - - bp = new MotrBucket(this, b, u); - ret = bp->load_bucket(dpp, y); - if (ret < 0) { - delete bp; - return ret; - } - - bucket->reset(bp); - return 0; -} - -int MotrStore::get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) -{ - Bucket* bp; - - bp = new MotrBucket(this, i, u); - /* Don't need to fetch the bucket info, use the provided one */ - - bucket->reset(bp); - return 0; -} - -int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string& name, std::unique_ptr* bucket, optional_yield y) -{ - rgw_bucket b; - - b.tenant = tenant; - b.name = name; - - return get_bucket(dpp, u, b, bucket, y); -} - -bool MotrStore::is_meta_master() -{ - return true; -} - -int MotrStore::forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version *objv, - bufferlist& in_data, - JSONParser *jp, req_info& info, - optional_yield y) -{ - return 0; -} - -int MotrStore::forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, - bufferlist& in_data, - RGWXMLDecoder::XMLParser* parser, req_info& info, - optional_yield y) -{ - return 0; -} - -std::string MotrStore::zone_unique_id(uint64_t unique_num) -{ - return ""; -} - -std::string MotrStore::zone_unique_trans_id(const uint64_t unique_num) -{ - return ""; -} - -int MotrStore::get_zonegroup(const std::string& id, std::unique_ptr* group) -{ - /* XXX: for now only one zonegroup supported */ - ZoneGroup* zg; - zg = new MotrZoneGroup(this, zone.zonegroup.get_group()); - - group->reset(zg); - return 0; -} - -int MotrStore::list_all_zones(const DoutPrefixProvider* dpp, - std::list& zone_ids) -{ - zone_ids.push_back(zone.get_id()); - return 0; -} - -int MotrStore::cluster_stat(RGWClusterStat& stats) -{ - return 0; -} - -std::unique_ptr MotrStore::get_lifecycle(void) -{ - return 0; -} - -std::unique_ptr MotrStore::get_notification(Object* obj, Object* src_obj, req_state* s, - rgw::notify::EventType event_type, optional_yield y, const string* object_name) -{ - return std::make_unique(obj, src_obj, event_type); -} - -std::unique_ptr MotrStore::get_notification(const DoutPrefixProvider* dpp, Object* obj, - Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, - std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) -{ - return std::make_unique(obj, src_obj, event_type); -} - -int MotrStore::log_usage(const DoutPrefixProvider *dpp, map& usage_info) -{ - return 0; -} - -int MotrStore::log_op(const DoutPrefixProvider *dpp, string& oid, bufferlist& bl) -{ - return 0; -} - -int MotrStore::register_to_service_map(const DoutPrefixProvider *dpp, const string& daemon_type, - const map& meta) -{ - return 0; -} - -void MotrStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, - RGWRateLimitInfo& user_ratelimit, - RGWRateLimitInfo& anon_ratelimit) -{ - return; -} - -void MotrStore::get_quota(RGWQuota& quota) -{ - // XXX: Not handled for the first pass - return; -} - -int MotrStore::set_buckets_enabled(const DoutPrefixProvider *dpp, vector& buckets, bool enabled) -{ - return 0; -} - -int MotrStore::get_sync_policy_handler(const DoutPrefixProvider *dpp, - std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef *phandler, - optional_yield y) -{ - return 0; -} - -RGWDataSyncStatusManager* MotrStore::get_data_sync_manager(const rgw_zone_id& source_zone) -{ - return 0; -} - -int MotrStore::read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool *is_truncated, - RGWUsageIter& usage_iter, - map& usage) -{ - return 0; -} - -int MotrStore::trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) -{ - return 0; -} - -int MotrStore::get_config_key_val(string name, bufferlist *bl) -{ - return 0; -} - -int MotrStore::meta_list_keys_init(const DoutPrefixProvider *dpp, const string& section, const string& marker, void** phandle) -{ - return 0; -} - -int MotrStore::meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, list& keys, bool* truncated) -{ - return 0; -} - -void MotrStore::meta_list_keys_complete(void* handle) -{ - return; -} - -std::string MotrStore::meta_get_marker(void* handle) -{ - return ""; -} - -int MotrStore::meta_remove(const DoutPrefixProvider *dpp, string& metadata_key, optional_yield y) -{ - return 0; -} - -int MotrStore::open_idx(struct m0_uint128 *id, bool create, struct m0_idx *idx) -{ - m0_idx_init(idx, &container.co_realm, id); - - if (!create) - return 0; // nothing to do more - - // create index or make sure it's created - struct m0_op *op = nullptr; - int rc = m0_entity_create(nullptr, &idx->in_entity, &op); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl; - goto out; - } - - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc != 0 && rc != -EEXIST) - ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl; -out: - return rc; -} - -static void set_m0bufvec(struct m0_bufvec *bv, vector& vec) -{ - *bv->ov_buf = reinterpret_cast(vec.data()); - *bv->ov_vec.v_count = vec.size(); -} - -// idx must be opened with open_idx() beforehand -int MotrStore::do_idx_op(struct m0_idx *idx, enum m0_idx_opcode opcode, - vector& key, vector& val, bool update) -{ - int rc, rc_i; - struct m0_bufvec k, v, *vp = &v; - uint32_t flags = 0; - struct m0_op *op = nullptr; - - if (m0_bufvec_empty_alloc(&k, 1) != 0) { - ldout(cctx, 0) << "ERROR: failed to allocate key bufvec" << dendl; - return -ENOMEM; - } - - if (opcode == M0_IC_PUT || opcode == M0_IC_GET) { - rc = -ENOMEM; - if (m0_bufvec_empty_alloc(&v, 1) != 0) { - ldout(cctx, 0) << "ERROR: failed to allocate value bufvec" << dendl; - goto out; - } - } - - set_m0bufvec(&k, key); - if (opcode == M0_IC_PUT) - set_m0bufvec(&v, val); - - if (opcode == M0_IC_DEL) - vp = nullptr; - - if (opcode == M0_IC_PUT && update) - flags |= M0_OIF_OVERWRITE; - - rc = m0_idx_op(idx, opcode, &k, vp, &rc_i, flags, &op); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl; - goto out; - } - - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc != 0) { - ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl; - goto out; - } - - if (rc_i != 0) { - ldout(cctx, 0) << "ERROR: idx op failed: " << rc_i << dendl; - rc = rc_i; - goto out; - } - - if (opcode == M0_IC_GET) { - val.resize(*v.ov_vec.v_count); - memcpy(reinterpret_cast(val.data()), *v.ov_buf, *v.ov_vec.v_count); - } - -out: - m0_bufvec_free2(&k); - if (opcode == M0_IC_GET) - m0_bufvec_free(&v); // cleanup buffer after GET - else if (opcode == M0_IC_PUT) - m0_bufvec_free2(&v); - - return rc; -} - -// Retrieve a range of key/value pairs starting from keys[0]. -int MotrStore::do_idx_next_op(struct m0_idx *idx, - vector>& keys, - vector>& vals) -{ - int rc; - uint32_t i = 0; - int nr_kvp = vals.size(); - int *rcs = new int[nr_kvp]; - struct m0_bufvec k, v; - struct m0_op *op = nullptr; - - rc = m0_bufvec_empty_alloc(&k, nr_kvp)?: - m0_bufvec_empty_alloc(&v, nr_kvp); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: failed to allocate kv bufvecs" << dendl; - return rc; - } - - set_m0bufvec(&k, keys[0]); - - rc = m0_idx_op(idx, M0_IC_NEXT, &k, &v, rcs, 0, &op); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl; - goto out; - } - - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc != 0) { - ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl; - goto out; - } - - for (i = 0; i < v.ov_vec.v_nr; ++i) { - if (rcs[i] < 0) - break; - - vector& key = keys[i]; - vector& val = vals[i]; - key.resize(k.ov_vec.v_count[i]); - val.resize(v.ov_vec.v_count[i]); - memcpy(reinterpret_cast(key.data()), k.ov_buf[i], k.ov_vec.v_count[i]); - memcpy(reinterpret_cast(val.data()), v.ov_buf[i], v.ov_vec.v_count[i]); - } - -out: - k.ov_vec.v_nr = i; - v.ov_vec.v_nr = i; - m0_bufvec_free(&k); - m0_bufvec_free(&v); // cleanup buffer after GET - - delete []rcs; - return rc ?: i; -} - -// Retrieve a number of key/value pairs under the prefix starting -// from the marker at key_out[0]. -int MotrStore::next_query_by_name(string idx_name, - vector& key_out, - vector& val_out, - string prefix, string delim) -{ - unsigned nr_kvp = std::min(val_out.size(), 100UL); - struct m0_idx idx = {}; - vector> keys(nr_kvp); - vector> vals(nr_kvp); - struct m0_uint128 idx_id; - int i = 0, j, k = 0; - - index_name_to_motr_fid(idx_name, &idx_id); - int rc = open_motr_idx(&idx_id, &idx); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: next_query_by_name(): failed to open index: rc=" - << rc << dendl; - goto out; - } - - // Only the first element for keys needs to be set for NEXT query. - // The keys will be set will the returned keys from motr index. - ldout(cctx, 20) <<__func__<< ": next_query_by_name(): index=" << idx_name - << " prefix=" << prefix << " delim=" << delim << dendl; - keys[0].assign(key_out[0].begin(), key_out[0].end()); - for (i = 0; i < (int)val_out.size(); i += k, k = 0) { - rc = do_idx_next_op(&idx, keys, vals); - ldout(cctx, 20) << "do_idx_next_op() = " << rc << dendl; - if (rc < 0) { - ldout(cctx, 0) << "ERROR: NEXT query failed. " << rc << dendl; - goto out; - } - - string dir; - for (j = 0, k = 0; j < rc; ++j) { - string key(keys[j].begin(), keys[j].end()); - size_t pos = std::string::npos; - if (!delim.empty()) - pos = key.find(delim, prefix.length()); - if (pos != std::string::npos) { // DIR entry - dir.assign(key, 0, pos + 1); - if (dir.compare(0, prefix.length(), prefix) != 0) - goto out; - if (i + k == 0 || dir != key_out[i + k - 1]) // a new one - key_out[i + k++] = dir; - continue; - } - dir = ""; - if (key.compare(0, prefix.length(), prefix) != 0) - goto out; - key_out[i + k] = key; - bufferlist& vbl = val_out[i + k]; - vbl.append(reinterpret_cast(vals[j].data()), vals[j].size()); - ++k; - } - - if (rc < (int)nr_kvp) // there are no more keys to fetch - break; - - string next_key; - if (dir != "") - next_key = dir + "\xff"; // skip all dir content in 1 step - else - next_key = key_out[i + k - 1] + " "; - ldout(cctx, 0) << "do_idx_next_op(): next_key=" << next_key << dendl; - keys[0].assign(next_key.begin(), next_key.end()); - } - -out: - m0_idx_fini(&idx); - return rc < 0 ? rc : i + k; -} - -int MotrStore::delete_motr_idx_by_name(string iname) -{ - struct m0_idx idx; - struct m0_uint128 idx_id; - struct m0_op *op = nullptr; - - ldout(cctx, 20) << "delete_motr_idx_by_name=" << iname << dendl; - - index_name_to_motr_fid(iname, &idx_id); - m0_idx_init(&idx, &container.co_realm, &idx_id); - m0_entity_open(&idx.in_entity, &op); - int rc = m0_entity_delete(&idx.in_entity, &op); - if (rc < 0) - goto out; - - m0_op_launch(&op, 1); - - ldout(cctx, 70) << "waiting for op completion" << dendl; - - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc == -ENOENT) // race deletion?? - rc = 0; - else if (rc < 0) - ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl; - - ldout(cctx, 20) << "delete_motr_idx_by_name rc=" << rc << dendl; - -out: - m0_idx_fini(&idx); - return rc; -} - -int MotrStore::open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx) -{ - m0_idx_init(idx, &container.co_realm, id); - return 0; -} - -// The following marcos are from dix/fid_convert.h which are not exposed. -enum { - M0_DIX_FID_DEVICE_ID_OFFSET = 32, - M0_DIX_FID_DIX_CONTAINER_MASK = (1ULL << M0_DIX_FID_DEVICE_ID_OFFSET) - - 1, -}; - -// md5 is used here, a more robust way to convert index name to fid is -// needed to avoid collision. -void MotrStore::index_name_to_motr_fid(string iname, struct m0_uint128 *id) -{ - unsigned char md5[16]; // 128/8 = 16 - MD5 hash; - - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - hash.Update((const unsigned char *)iname.c_str(), iname.length()); - hash.Final(md5); - - memcpy(&id->u_hi, md5, 8); - memcpy(&id->u_lo, md5 + 8, 8); - ldout(cctx, 20) << "id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl; - - struct m0_fid *fid = (struct m0_fid*)id; - m0_fid_tset(fid, m0_dix_fid_type.ft_id, - fid->f_container & M0_DIX_FID_DIX_CONTAINER_MASK, fid->f_key); - ldout(cctx, 20) << "converted id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl; -} - -int MotrStore::do_idx_op_by_name(string idx_name, enum m0_idx_opcode opcode, - string key_str, bufferlist &bl, bool update) -{ - struct m0_idx idx; - vector key(key_str.begin(), key_str.end()); - vector val; - struct m0_uint128 idx_id; - - index_name_to_motr_fid(idx_name, &idx_id); - int rc = open_motr_idx(&idx_id, &idx); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: failed to open index: " << rc << dendl; - goto out; - } - - if (opcode == M0_IC_PUT) - val.assign(bl.c_str(), bl.c_str() + bl.length()); - - ldout(cctx, 20) <<__func__<< ": do_idx_op_by_name(): op=" - << (opcode == M0_IC_PUT ? "PUT" : "GET") - << " idx=" << idx_name << " key=" << key_str << dendl; - rc = do_idx_op(&idx, opcode, key, val, update); - if (rc == 0 && opcode == M0_IC_GET) - // Append the returned value (blob) to the bufferlist. - bl.append(reinterpret_cast(val.data()), val.size()); - -out: - m0_idx_fini(&idx); - return rc; -} - -int MotrStore::create_motr_idx_by_name(string iname) -{ - struct m0_idx idx = {}; - struct m0_uint128 id; - - index_name_to_motr_fid(iname, &id); - m0_idx_init(&idx, &container.co_realm, &id); - - // create index or make sure it's created - struct m0_op *op = nullptr; - int rc = m0_entity_create(nullptr, &idx.in_entity, &op); - if (rc != 0) { - ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl; - goto out; - } - - m0_op_launch(&op, 1); - rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?: - m0_rc(op); - m0_op_fini(op); - m0_op_free(op); - - if (rc != 0 && rc != -EEXIST) - ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl; -out: - m0_idx_fini(&idx); - return rc; -} - -// If a global index is checked (if it has been create) every time -// before they're queried (put/get), which takes 2 Motr operations to -// complete the query. As the global indices' name and FID are known -// already when MotrStore is created, we move the check and creation -// in newMotrStore(). -// Similar method is used for per bucket/user index. For example, -// bucket instance index is created when creating the bucket. -int MotrStore::check_n_create_global_indices() -{ - int rc = 0; - - for (const auto& iname : motr_global_indices) { - rc = create_motr_idx_by_name(iname); - if (rc < 0 && rc != -EEXIST) - break; - rc = 0; - } - - return rc; -} - -std::string MotrStore::get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) -{ - char id[M0_FID_STR_LEN]; - struct m0_confc *confc = m0_reqh2confc(&instance->m0c_reqh); - - m0_fid_print(id, ARRAY_SIZE(id), &confc->cc_root->co_id); - return std::string(id); -} - -int MotrStore::init_metadata_cache(const DoutPrefixProvider *dpp, - CephContext *cct) -{ - this->obj_meta_cache = new MotrMetaCache(dpp, cct); - this->get_obj_meta_cache()->set_enabled(true); - - this->user_cache = new MotrMetaCache(dpp, cct); - this->get_user_cache()->set_enabled(true); - - this->bucket_inst_cache = new MotrMetaCache(dpp, cct); - this->get_bucket_inst_cache()->set_enabled(true); - - return 0; -} - - int MotrLuaManager::get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) - { - return -ENOENT; - } - - int MotrLuaManager::put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) - { - return -ENOENT; - } - - int MotrLuaManager::del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) - { - return -ENOENT; - } - - int MotrLuaManager::add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) - { - return -ENOENT; - } - - int MotrLuaManager::remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) - { - return -ENOENT; - } - - int MotrLuaManager::list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) - { - return -ENOENT; - } -} // namespace rgw::sal - -extern "C" { - -void *newMotrStore(CephContext *cct) -{ - int rc = -1; - rgw::sal::MotrStore *store = new rgw::sal::MotrStore(cct); - - if (store) { - store->conf.mc_is_oostore = true; - // XXX: these params should be taken from config settings and - // cct somehow? - store->instance = nullptr; - const auto& proc_ep = g_conf().get_val("motr_my_endpoint"); - const auto& ha_ep = g_conf().get_val("motr_ha_endpoint"); - const auto& proc_fid = g_conf().get_val("motr_my_fid"); - const auto& profile = g_conf().get_val("motr_profile_fid"); - const auto& admin_proc_ep = g_conf().get_val("motr_admin_endpoint"); - const auto& admin_proc_fid = g_conf().get_val("motr_admin_fid"); - const int init_flags = cct->get_init_flags(); - ldout(cct, 0) << "INFO: motr my endpoint: " << proc_ep << dendl; - ldout(cct, 0) << "INFO: motr ha endpoint: " << ha_ep << dendl; - ldout(cct, 0) << "INFO: motr my fid: " << proc_fid << dendl; - ldout(cct, 0) << "INFO: motr profile fid: " << profile << dendl; - store->conf.mc_local_addr = proc_ep.c_str(); - store->conf.mc_process_fid = proc_fid.c_str(); - - ldout(cct, 0) << "INFO: init flags: " << init_flags << dendl; - ldout(cct, 0) << "INFO: motr admin endpoint: " << admin_proc_ep << dendl; - ldout(cct, 0) << "INFO: motr admin fid: " << admin_proc_fid << dendl; - - // HACK this is so that radosge-admin uses a different client - if (init_flags == 0) { - store->conf.mc_process_fid = admin_proc_fid.c_str(); - store->conf.mc_local_addr = admin_proc_ep.c_str(); - } else { - store->conf.mc_process_fid = proc_fid.c_str(); - store->conf.mc_local_addr = proc_ep.c_str(); - } - store->conf.mc_ha_addr = ha_ep.c_str(); - store->conf.mc_profile = profile.c_str(); - - ldout(cct, 50) << "INFO: motr profile fid: " << store->conf.mc_profile << dendl; - ldout(cct, 50) << "INFO: ha addr: " << store->conf.mc_ha_addr << dendl; - ldout(cct, 50) << "INFO: process fid: " << store->conf.mc_process_fid << dendl; - ldout(cct, 50) << "INFO: motr endpoint: " << store->conf.mc_local_addr << dendl; - - store->conf.mc_tm_recv_queue_min_len = 64; - store->conf.mc_max_rpc_msg_size = 524288; - store->conf.mc_idx_service_id = M0_IDX_DIX; - store->dix_conf.kc_create_meta = false; - store->conf.mc_idx_service_conf = &store->dix_conf; - - if (!g_conf().get_val("motr_tracing_enabled")) { - m0_trace_level_allow(M0_WARN); // allow errors and warnings in syslog anyway - m0_trace_set_mmapped_buffer(false); - } - - store->instance = nullptr; - rc = m0_client_init(&store->instance, &store->conf, true); - if (rc != 0) { - ldout(cct, 0) << "ERROR: m0_client_init() failed: " << rc << dendl; - goto out; - } - - m0_container_init(&store->container, nullptr, &M0_UBER_REALM, store->instance); - rc = store->container.co_realm.re_entity.en_sm.sm_rc; - if (rc != 0) { - ldout(cct, 0) << "ERROR: m0_container_init() failed: " << rc << dendl; - goto out; - } - - rc = m0_ufid_init(store->instance, &ufid_gr); - if (rc != 0) { - ldout(cct, 0) << "ERROR: m0_ufid_init() failed: " << rc << dendl; - goto out; - } - - // Create global indices if not yet. - rc = store->check_n_create_global_indices(); - if (rc != 0) { - ldout(cct, 0) << "ERROR: check_n_create_global_indices() failed: " << rc << dendl; - goto out; - } - - } - -out: - if (rc != 0) { - delete store; - return nullptr; - } - return store; -} - -} diff --git a/src/rgw/rgw_sal_motr.h b/src/rgw/rgw_sal_motr.h deleted file mode 100644 index 153ac8abd005..000000000000 --- a/src/rgw/rgw_sal_motr.h +++ /dev/null @@ -1,1195 +0,0 @@ - -// vim: ts=2 sw=2 expandtab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * SAL implementation for the CORTX Motr backend - * - * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -extern "C" { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wextern-c-compat" -#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" -#include "motr/config.h" -#include "motr/client.h" -#pragma clang diagnostic pop -} - -#include "rgw_sal_store.h" -#include "rgw_rados.h" -#include "rgw_notify.h" -#include "rgw_oidc_provider.h" -#include "rgw_role.h" -#include "rgw_multi.h" -#include "rgw_putobj_processor.h" - -namespace rgw::sal { - -class MotrStore; - -// Global Motr indices -#define RGW_MOTR_USERS_IDX_NAME "motr.rgw.users" -#define RGW_MOTR_BUCKET_INST_IDX_NAME "motr.rgw.bucket.instances" -#define RGW_MOTR_BUCKET_HD_IDX_NAME "motr.rgw.bucket.headers" -#define RGW_IAM_MOTR_ACCESS_KEY "motr.rgw.accesskeys" -#define RGW_IAM_MOTR_EMAIL_KEY "motr.rgw.emails" - -//#define RGW_MOTR_BUCKET_ACL_IDX_NAME "motr.rgw.bucket.acls" - -// A simplified metadata cache implementation. -// Note: MotrObjMetaCache doesn't handle the IO operations to Motr. A proxy -// class can be added to handle cache and 'real' ops. -class MotrMetaCache -{ -protected: - // MGW re-uses ObjectCache to cache object's metadata as it has already - // implemented a lru cache: (1) ObjectCache internally uses a map and lru - // list to manage cache entry. POC uses object name, user name or bucket - // name as the key to lookup and insert an entry. (2) ObjectCache::data is - // a bufferlist and can be used to store any metadata structure, such as - // object's bucket dir entry, user info or bucket instance. - // - // Note from RGW: - // The Rados Gateway stores metadata and objects in an internal cache. This - // should be kept consistent by the OSD's relaying notify events between - // multiple watching RGW processes. In the event that this notification - // protocol fails, bounding the length of time that any data in the cache will - // be assumed valid will ensure that any RGW instance that falls out of sync - // will eventually recover. This seems to be an issue mostly for large numbers - // of RGW instances under heavy use. If you would like to turn off cache expiry, - // set this value to zero. - // - // Currently POC hasn't implemented the watch-notify menchanism yet. So the - // current implementation is similar to cortx-s3server which is based on expiry - // time. TODO: see comments on distribute_cache). - // - // Beaware: Motr object data is not cached in current POC as RGW! - // RGW caches the first chunk (4MB by default). - ObjectCache cache; - -public: - // Lookup a cache entry. - int get(const DoutPrefixProvider *dpp, const std::string& name, bufferlist& data); - - // Insert a cache entry. - int put(const DoutPrefixProvider *dpp, const std::string& name, const bufferlist& data); - - // Called when an object is deleted. Notification should be sent to other - // RGW instances. - int remove(const DoutPrefixProvider *dpp, const std::string& name); - - // Make the local cache entry invalid. - void invalid(const DoutPrefixProvider *dpp, const std::string& name); - - // TODO: Distribute_cache() and watch_cb() now are only place holder functions. - // Checkout services/svc_sys_obj_cache.h/cc for reference. - // These 2 functions are designed to notify or to act on cache notification. - // It is feasible to implement the functionality using Motr's FDMI after discussing - // with Hua. - int distribute_cache(const DoutPrefixProvider *dpp, - const std::string& normal_name, - ObjectCacheInfo& obj_info, int op); - int watch_cb(const DoutPrefixProvider *dpp, - uint64_t notify_id, - uint64_t cookie, - uint64_t notifier_id, - bufferlist& bl); - - void set_enabled(bool status); - - MotrMetaCache(const DoutPrefixProvider *dpp, CephContext *cct) { - cache.set_ctx(cct); - } -}; - -struct MotrUserInfo { - RGWUserInfo info; - obj_version user_version; - rgw::sal::Attrs attrs; - - void encode(bufferlist& bl) const - { - ENCODE_START(3, 3, bl); - encode(info, bl); - encode(user_version, bl); - encode(attrs, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) - { - DECODE_START(3, bl); - decode(info, bl); - decode(user_version, bl); - decode(attrs, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(MotrUserInfo); - -struct MotrEmailInfo { - std::string user_id; - std::string email_id; - - MotrEmailInfo() {} - MotrEmailInfo(std::string _user_id, std::string _email_id ) - : user_id(std::move(_user_id)), email_id(std::move(_email_id)) {} - - void encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); - encode(user_id, bl); - encode(email_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl); - decode(user_id, bl); - decode(email_id, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(MotrEmailInfo); - -struct MotrAccessKey { - std::string id; // AccessKey - std::string key; // SecretKey - std::string user_id; // UserID - - MotrAccessKey() {} - MotrAccessKey(std::string _id, std::string _key, std::string _user_id) - : id(std::move(_id)), key(std::move(_key)), user_id(std::move(_user_id)) {} - - void encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); - encode(id, bl); - encode(key, bl); - encode(user_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl); - decode(id, bl); - decode(key, bl); - decode(user_id, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(MotrAccessKey); - -class MotrNotification : public StoreNotification { - public: - MotrNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type) : - StoreNotification(_obj, _src_obj, _type) {} - ~MotrNotification() = default; - - virtual int publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags = nullptr) override { return 0;} - virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size, - const ceph::real_time& mtime, const std::string& etag, const std::string& version) override { return 0; } -}; - -class MotrUser : public StoreUser { - private: - MotrStore *store; - struct m0_uint128 idxID = {0xe5ecb53640d4ecce, 0x6a156cd5a74aa3b8}; // MD5 of “motr.rgw.users“ - struct m0_idx idx; - - public: - std::set access_key_tracker; - MotrUser(MotrStore *_st, const rgw_user& _u) : StoreUser(_u), store(_st) { } - MotrUser(MotrStore *_st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) { } - MotrUser(MotrStore *_st) : store(_st) { } - MotrUser(MotrUser& _o) = default; - MotrUser() {} - - virtual std::unique_ptr clone() override { - return std::unique_ptr(new MotrUser(*this)); - } - int list_buckets(const DoutPrefixProvider *dpp, const std::string& marker, const std::string& end_marker, - uint64_t max, bool need_stats, BucketList& buckets, optional_yield y) override; - virtual int create_bucket(const DoutPrefixProvider* dpp, - const rgw_bucket& b, - const std::string& zonegroup_id, - rgw_placement_rule& placement_rule, - std::string& swift_ver_location, - const RGWQuotaInfo* pquota_info, - const RGWAccessControlPolicy& policy, - Attrs& attrs, - RGWBucketInfo& info, - obj_version& ep_objv, - bool exclusive, - bool obj_lock_enabled, - bool* existed, - req_info& req_info, - std::unique_ptr* bucket, - optional_yield y) override; - virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) override; - virtual int read_stats(const DoutPrefixProvider *dpp, - optional_yield y, RGWStorageStats* stats, - ceph::real_time *last_stats_sync = nullptr, - ceph::real_time *last_stats_update = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; - virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, - bool* is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; - - virtual int load_user(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info = nullptr) override; - virtual int remove_user(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider* dpp, optional_yield y) override; - - int create_user_info_idx(); - int load_user_from_idx(const DoutPrefixProvider *dpp, MotrStore *store, RGWUserInfo& info, std::map *attrs, RGWObjVersionTracker *objv_tr); - - friend class MotrBucket; -}; - -class MotrBucket : public StoreBucket { - private: - MotrStore *store; - RGWAccessControlPolicy acls; - - // RGWBucketInfo and other information that are shown when listing a bucket is - // represented in struct MotrBucketInfo. The structure is encoded and stored - // as the value of the global bucket instance index. - // TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.) - // into a different index. - struct MotrBucketInfo { - RGWBucketInfo info; - - obj_version bucket_version; - ceph::real_time mtime; - - rgw::sal::Attrs bucket_attrs; - - void encode(bufferlist& bl) const - { - ENCODE_START(4, 4, bl); - encode(info, bl); - encode(bucket_version, bl); - encode(mtime, bl); - encode(bucket_attrs, bl); //rgw_cache.h example for a map - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) - { - DECODE_START(4, bl); - decode(info, bl); - decode(bucket_version, bl); - decode(mtime, bl); - decode(bucket_attrs, bl); - DECODE_FINISH(bl); - } - }; - WRITE_CLASS_ENCODER(MotrBucketInfo); - - public: - MotrBucket(MotrStore *_st) - : store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, User* _u) - : StoreBucket(_u), - store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, const rgw_bucket& _b) - : StoreBucket(_b), - store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, const RGWBucketEnt& _e) - : StoreBucket(_e), - store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, const RGWBucketInfo& _i) - : StoreBucket(_i), - store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, const rgw_bucket& _b, User* _u) - : StoreBucket(_b, _u), - store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, const RGWBucketEnt& _e, User* _u) - : StoreBucket(_e, _u), - store(_st), - acls() { - } - - MotrBucket(MotrStore *_st, const RGWBucketInfo& _i, User* _u) - : StoreBucket(_i, _u), - store(_st), - acls() { - } - - ~MotrBucket() { } - - virtual std::unique_ptr get_object(const rgw_obj_key& k) override; - virtual int list(const DoutPrefixProvider *dpp, ListParams&, int, ListResults&, optional_yield y) override; - virtual int remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) override; - virtual int remove_bucket_bypass_gc(int concurrent_max, bool - keep_index_consistent, - optional_yield y, const - DoutPrefixProvider *dpp) override; - virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } - virtual int set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy& acl, optional_yield y) override; - virtual int load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats = false) override; - int link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y); - int unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y); - int create_bucket_index(); - int create_multipart_indices(); - virtual int read_stats(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, int shard_id, - std::string *bucket_ver, std::string *master_ver, - std::map& stats, - std::string *max_marker = nullptr, - bool *syncstopped = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) override; - virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int update_container_stats(const DoutPrefixProvider *dpp) override; - virtual int check_bucket_shards(const DoutPrefixProvider *dpp) override; - virtual int chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y) override; - virtual int put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time mtime) override; - virtual bool is_owner(User* user) override; - virtual int check_empty(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) override; - virtual int merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& attrs, optional_yield y) override; - virtual int try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime) override; - virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, - bool *is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; - virtual int remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) override; - virtual int check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) override; - virtual int rebuild_index(const DoutPrefixProvider *dpp) override; - virtual int set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) override; - virtual int purge_instance(const DoutPrefixProvider *dpp) override; - virtual std::unique_ptr clone() override { - return std::make_unique(*this); - } - virtual std::unique_ptr get_multipart_upload(const std::string& oid, - std::optional upload_id=std::nullopt, - ACLOwner owner={}, ceph::real_time mtime=real_clock::now()) override; - virtual int list_multiparts(const DoutPrefixProvider *dpp, - const std::string& prefix, - std::string& marker, - const std::string& delim, - const int& max_uploads, - std::vector>& uploads, - std::map *common_prefixes, - bool *is_truncated) override; - virtual int abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct) override; - - friend class MotrStore; -}; - -class MotrPlacementTier: public StorePlacementTier { - MotrStore* store; - RGWZoneGroupPlacementTier tier; -public: - MotrPlacementTier(MotrStore* _store, const RGWZoneGroupPlacementTier& _tier) : store(_store), tier(_tier) {} - virtual ~MotrPlacementTier() = default; - - virtual const std::string& get_tier_type() { return tier.tier_type; } - virtual const std::string& get_storage_class() { return tier.storage_class; } - virtual bool retain_head_object() { return tier.retain_head_object; } - RGWZoneGroupPlacementTier& get_rt() { return tier; } -}; - -class MotrZoneGroup : public StoreZoneGroup { -protected: - MotrStore* store; - const RGWZoneGroup group; - std::string empty; -public: - MotrZoneGroup(MotrStore* _store) : store(_store), group() {} - MotrZoneGroup(MotrStore* _store, const RGWZoneGroup& _group) : store(_store), group(_group) {} - virtual ~MotrZoneGroup() = default; - - virtual const std::string& get_id() const override { return group.get_id(); }; - virtual const std::string& get_name() const override { return group.get_name(); }; - virtual int equals(const std::string& other_zonegroup) const override { - return group.equals(other_zonegroup); - }; - /** Get the endpoint from zonegroup, or from master zone if not set */ - virtual const std::string& get_endpoint() const override; - virtual bool placement_target_exists(std::string& target) const override; - virtual bool is_master_zonegroup() const override { - return group.is_master_zonegroup(); - }; - virtual const std::string& get_api_name() const override { return group.api_name; }; - virtual int get_placement_target_names(std::set& names) const override; - virtual const std::string& get_default_placement_name() const override { - return group.default_placement.name; }; - virtual int get_hostnames(std::list& names) const override { - names = group.hostnames; - return 0; - }; - virtual int get_s3website_hostnames(std::list& names) const override { - names = group.hostnames_s3website; - return 0; - }; - virtual int get_zone_count() const override { - return group.zones.size(); - } - virtual int get_placement_tier(const rgw_placement_rule& rule, std::unique_ptr* tier); - virtual int get_zone_by_id(const std::string& id, std::unique_ptr* zone) override { - return -1; - } - virtual int get_zone_by_name(const std::string& name, std::unique_ptr* zone) override { - return -1; - } - virtual int list_zones(std::list& zone_ids) override { - zone_ids.clear(); - return 0; - } - const RGWZoneGroup& get_group() { return group; } - virtual std::unique_ptr clone() override { - return std::make_unique(store, group); - } - friend class MotrZone; -}; - -class MotrZone : public StoreZone { - protected: - MotrStore* store; - RGWRealm *realm{nullptr}; - MotrZoneGroup zonegroup; - RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */ - RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */ - RGWPeriod *current_period{nullptr}; - - public: - MotrZone(MotrStore* _store) : store(_store), zonegroup(_store) { - realm = new RGWRealm(); - zone_public_config = new RGWZone(); - zone_params = new RGWZoneParams(); - current_period = new RGWPeriod(); - - // XXX: only default and STANDARD supported for now - RGWZonePlacementInfo info; - RGWZoneStorageClasses sc; - sc.set_storage_class("STANDARD", nullptr, nullptr); - info.storage_classes = sc; - zone_params->placement_pools["default"] = info; - } - MotrZone(MotrStore* _store, MotrZoneGroup _zg) : store(_store), zonegroup(_zg) { - realm = new RGWRealm(); - // TODO: fetch zonegroup params (eg. id) from provisioner config. - //zonegroup.group.set_id("0956b174-fe14-4f97-8b50-bb7ec5e1cf62"); - //zonegroup.group.api_name = "default"; - zone_public_config = new RGWZone(); - zone_params = new RGWZoneParams(); - current_period = new RGWPeriod(); - - // XXX: only default and STANDARD supported for now - RGWZonePlacementInfo info; - RGWZoneStorageClasses sc; - sc.set_storage_class("STANDARD", nullptr, nullptr); - info.storage_classes = sc; - zone_params->placement_pools["default"] = info; - } - ~MotrZone() = default; - - virtual std::unique_ptr clone() override { - return std::make_unique(store); - } - virtual ZoneGroup& get_zonegroup() override; - virtual const std::string& get_id() override; - virtual const std::string& get_name() const override; - virtual bool is_writeable() override; - virtual bool get_redirect_endpoint(std::string* endpoint) override; - virtual bool has_zonegroup_api(const std::string& api) const override; - virtual const std::string& get_current_period_id() override; - virtual const RGWAccessKey& get_system_key() { return zone_params->system_key; } - virtual const std::string& get_realm_name() { return realm->get_name(); } - virtual const std::string& get_realm_id() { return realm->get_id(); } - virtual const std::string_view get_tier_type() { return "rgw"; } - virtual RGWBucketSyncPolicyHandlerRef get_sync_policy_handler() { return nullptr; } - friend class MotrStore; -}; - -class MotrLuaManager : public StoreLuaManager { - MotrStore* store; - - public: - MotrLuaManager(MotrStore* _s) : store(_s) - { - } - virtual ~MotrLuaManager() = default; - - /** Get a script named with the given key from the backing store */ - virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) override; - /** Put a script named with the given key to the backing store */ - virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) override; - /** Delete a script named with the given key from the backing store */ - virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) override; - /** Add a lua package */ - virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override; - /** Remove a lua package */ - virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override; - /** List lua packages */ - virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) override; -}; - -class MotrOIDCProvider : public RGWOIDCProvider { - MotrStore* store; - public: - MotrOIDCProvider(MotrStore* _store) : store(_store) {} - ~MotrOIDCProvider() = default; - - virtual int store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) override { return 0; } - virtual int read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) override { return 0; } - virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override { return 0;} - - void encode(bufferlist& bl) const { - RGWOIDCProvider::encode(bl); - } - void decode(bufferlist::const_iterator& bl) { - RGWOIDCProvider::decode(bl); - } -}; - -class MotrObject : public StoreObject { - private: - MotrStore *store; - RGWAccessControlPolicy acls; - RGWObjCategory category; - - // If this object is pat of a multipart uploaded one. - // TODO: do it in another class? MotrPartObject : public MotrObject - uint64_t part_off; - uint64_t part_size; - uint64_t part_num; - - public: - - // motr object metadata stored in index - struct Meta { - struct m0_uint128 oid = {}; - struct m0_fid pver = {}; - uint64_t layout_id = 0; - - void encode(bufferlist& bl) const - { - ENCODE_START(5, 5, bl); - encode(oid.u_hi, bl); - encode(oid.u_lo, bl); - encode(pver.f_container, bl); - encode(pver.f_key, bl); - encode(layout_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) - { - DECODE_START(5, bl); - decode(oid.u_hi, bl); - decode(oid.u_lo, bl); - decode(pver.f_container, bl); - decode(pver.f_key, bl); - decode(layout_id, bl); - DECODE_FINISH(bl); - } - }; - - struct m0_obj *mobj = NULL; - Meta meta; - - struct MotrReadOp : public ReadOp { - private: - MotrObject* source; - - // The set of part objects if the source is - // a multipart uploaded object. - std::map> part_objs; - - public: - MotrReadOp(MotrObject *_source); - - virtual int prepare(optional_yield y, const DoutPrefixProvider* dpp) override; - - /* - * Both `read` and `iterate` read up through index `end` - * *inclusive*. The number of bytes that could be returned is - * `end - ofs + 1`. - */ - virtual int read(int64_t off, int64_t end, bufferlist& bl, - optional_yield y, - const DoutPrefixProvider* dpp) override; - virtual int iterate(const DoutPrefixProvider* dpp, int64_t off, - int64_t end, RGWGetDataCB* cb, - optional_yield y) override; - - virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) override; - }; - - struct MotrDeleteOp : public DeleteOp { - private: - MotrObject* source; - - public: - MotrDeleteOp(MotrObject* _source); - - virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override; - }; - - MotrObject() = default; - - MotrObject(MotrStore *_st, const rgw_obj_key& _k) - : StoreObject(_k), store(_st), acls() {} - MotrObject(MotrStore *_st, const rgw_obj_key& _k, Bucket* _b) - : StoreObject(_k, _b), store(_st), acls() {} - - MotrObject(MotrObject& _o) = default; - - virtual ~MotrObject(); - - virtual int delete_object(const DoutPrefixProvider* dpp, - optional_yield y, - bool prevent_versioning = false) override; - virtual int copy_object(User* user, - req_info* info, const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, - const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, ceph::real_time* mtime, - const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, - bool high_precision_time, - const char* if_match, const char* if_nomatch, - AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, - RGWObjCategory category, uint64_t olh_epoch, - boost::optional delete_at, - std::string* version_id, std::string* tag, std::string* etag, - void (*progress_cb)(off_t, void *), void* progress_data, - const DoutPrefixProvider* dpp, optional_yield y) override; - virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } - virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } - virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **state, optional_yield y, bool follow_olh = true) override; - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; - virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; - virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; - virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; - virtual bool is_expired() override; - virtual void gen_rand_obj_instance_name() override; - virtual std::unique_ptr clone() override { - return std::unique_ptr(new MotrObject(*this)); - } - virtual std::unique_ptr get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) override; - virtual int transition(Bucket* bucket, - const rgw_placement_rule& placement_rule, - const real_time& mtime, - uint64_t olh_epoch, - const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override; - virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override; - - /* Swift versioning */ - virtual int swift_versioning_restore(bool& restored, - const DoutPrefixProvider* dpp) override; - virtual int swift_versioning_copy(const DoutPrefixProvider* dpp, - optional_yield y) override; - - /* OPs */ - virtual std::unique_ptr get_read_op() override; - virtual std::unique_ptr get_delete_op() override; - - /* OMAP */ - virtual int omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, - const std::set& keys, - Attrs* vals) override; - virtual int omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, - bool must_exist, optional_yield y) override; - virtual int chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) override; - private: - //int read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj = nullptr); - - public: - bool is_opened() { return mobj != NULL; } - int create_mobj(const DoutPrefixProvider *dpp, uint64_t sz); - int open_mobj(const DoutPrefixProvider *dpp); - int delete_mobj(const DoutPrefixProvider *dpp); - void close_mobj(); - int write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset); - int read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb); - unsigned get_optimal_bs(unsigned len); - - int get_part_objs(const DoutPrefixProvider *dpp, - std::map>& part_objs); - int open_part_objs(const DoutPrefixProvider* dpp, - std::map>& part_objs); - int read_multipart_obj(const DoutPrefixProvider* dpp, - int64_t off, int64_t end, RGWGetDataCB* cb, - std::map>& part_objs); - int delete_part_objs(const DoutPrefixProvider* dpp); - void set_category(RGWObjCategory _category) {category = _category;} - int get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent); - int update_version_entries(const DoutPrefixProvider *dpp); -}; - -// A placeholder locking class for multipart upload. -// TODO: implement it using Motr object locks. -class MPMotrSerializer : public StoreMPSerializer { - - public: - MPMotrSerializer(const DoutPrefixProvider *dpp, MotrStore* store, MotrObject* obj, const std::string& lock_name) {} - - virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override {return 0; } - virtual int unlock() override { return 0;} -}; - -class MotrAtomicWriter : public StoreWriter { - protected: - rgw::sal::MotrStore* store; - const rgw_user& owner; - const rgw_placement_rule *ptail_placement_rule; - uint64_t olh_epoch; - const std::string& unique_tag; - MotrObject obj; - MotrObject old_obj; - uint64_t total_data_size; // for total data being uploaded - bufferlist acc_data; // accumulated data - uint64_t acc_off; // accumulated data offset - - struct m0_bufvec buf; - struct m0_bufvec attr; - struct m0_indexvec ext; - - public: - MotrAtomicWriter(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - MotrStore* _store, - const rgw_user& _owner, - const rgw_placement_rule *_ptail_placement_rule, - uint64_t _olh_epoch, - const std::string& _unique_tag); - ~MotrAtomicWriter() = default; - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - int write(); - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; - - unsigned populate_bvec(unsigned len, bufferlist::iterator &bi); - void cleanup(); -}; - -class MotrMultipartWriter : public StoreWriter { -protected: - rgw::sal::MotrStore* store; - - // Head object. - rgw::sal::Object* head_obj; - - // Part parameters. - const uint64_t part_num; - const std::string part_num_str; - std::unique_ptr part_obj; - uint64_t actual_part_size = 0; - -public: - MotrMultipartWriter(const DoutPrefixProvider *dpp, - optional_yield y, MultipartUpload* upload, - rgw::sal::Object* obj, - MotrStore* _store, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t _part_num, const std::string& part_num_str) : - StoreWriter(dpp, y), store(_store), head_obj(obj), - part_num(_part_num), part_num_str(part_num_str) - { - } - ~MotrMultipartWriter() = default; - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; -}; - -// The implementation of multipart upload in POC roughly follows the -// cortx-s3server's design. Parts are stored in separate Motr objects. -// s3server uses a few auxiliary Motr indices to manage multipart -// related metadata: (1) Bucket multipart index (bucket_nnn_multipart_index) -// which contains metadata that answers questions such as which objects have -// started multipart upload and its upload id. This index is created during -// bucket creation. (2) Object part index (object_nnn_part_index) which stores -// metadata of a part's details (size, pvid, oid...). This index is created in -// MotrMultipartUpload::init(). (3) Extended metadata index -// (bucket_nnn_extended_metadata): once parts has been uploaded and their -// metadata saved in the part index, the user may issue multipart completion -// request. When processing the completion request, the parts are read from -// object part index and for each part an entry is created in extended index. -// The entry for the object is created in bucket (object list) index. The part -// index is deleted and an entry removed from bucket_nnn_multipart_index. Like -// bucket multipart index, bucket part extened metadata index is created during -// bucket creation. -// -// The extended metadata index is used mainly due to fault tolerant -// considerations (how to handle Motr service crash when uploading an object) -// and to avoid to create too many Motr indices (I am not sure I understand -// why many Motr indices is bad.). In our POC, to keep it simple, only 2 -// indices are maintained: bucket multipart index and object_nnn_part_index. -// -// - -class MotrMultipartPart : public StoreMultipartPart { -protected: - RGWUploadPartInfo info; - -public: - MotrObject::Meta meta; - - MotrMultipartPart(RGWUploadPartInfo _info, MotrObject::Meta _meta) : - info(_info), meta(_meta) {} - virtual ~MotrMultipartPart() = default; - - virtual uint32_t get_num() { return info.num; } - virtual uint64_t get_size() { return info.accounted_size; } - virtual const std::string& get_etag() { return info.etag; } - virtual ceph::real_time& get_mtime() { return info.modified; } - - RGWObjManifest& get_manifest() { return info.manifest; } - - friend class MotrMultipartUpload; -}; - -class MotrMultipartUpload : public StoreMultipartUpload { - MotrStore* store; - RGWMPObj mp_obj; - ACLOwner owner; - ceph::real_time mtime; - rgw_placement_rule placement; - RGWObjManifest manifest; - -public: - MotrMultipartUpload(MotrStore* _store, Bucket* _bucket, const std::string& oid, - std::optional upload_id, ACLOwner _owner, ceph::real_time _mtime) : - StoreMultipartUpload(_bucket), store(_store), mp_obj(oid, upload_id), owner(_owner), mtime(_mtime) {} - virtual ~MotrMultipartUpload() = default; - - virtual const std::string& get_meta() const { return mp_obj.get_meta(); } - virtual const std::string& get_key() const { return mp_obj.get_key(); } - virtual const std::string& get_upload_id() const { return mp_obj.get_upload_id(); } - virtual const ACLOwner& get_owner() const override { return owner; } - virtual ceph::real_time& get_mtime() { return mtime; } - virtual std::unique_ptr get_meta_obj() override; - virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) override; - virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct, - int num_parts, int marker, - int* next_marker, bool* truncated, - bool assume_unsorted = false) override; - virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override; - virtual int complete(const DoutPrefixProvider* dpp, - optional_yield y, CephContext* cct, - std::map& part_etags, - std::list& remove_objs, - uint64_t& accounted_size, bool& compressed, - RGWCompressionInfo& cs_info, off_t& off, - std::string& tag, ACLOwner& owner, - uint64_t olh_epoch, - rgw::sal::Object* target_obj) override; - virtual int get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs = nullptr) override; - virtual std::unique_ptr get_writer(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t part_num, - const std::string& part_num_str) override; - int delete_parts(const DoutPrefixProvider *dpp); -}; - -class MotrStore : public StoreDriver { - private: - MotrZone zone; - RGWSyncModuleInstanceRef sync_module; - - MotrMetaCache* obj_meta_cache; - MotrMetaCache* user_cache; - MotrMetaCache* bucket_inst_cache; - - public: - CephContext *cctx; - struct m0_client *instance; - struct m0_container container; - struct m0_realm uber_realm; - struct m0_config conf = {}; - struct m0_idx_dix_config dix_conf = {}; - - MotrStore(CephContext *c): zone(this), cctx(c) {} - ~MotrStore() { - delete obj_meta_cache; - delete user_cache; - delete bucket_inst_cache; - } - - virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) { return 0; } - virtual const std::string get_name() const override { - return "motr"; - } - - virtual std::unique_ptr get_user(const rgw_user& u) override; - virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y, std::unique_ptr* user) override; - virtual int get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr* user) override; - virtual int get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) override; - virtual std::unique_ptr get_object(const rgw_obj_key& k) override; - virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) override; - virtual int get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) override; - virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string&name, std::unique_ptr* bucket, optional_yield y) override; - virtual bool is_meta_master() override; - virtual int forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv, - bufferlist& in_data, JSONParser *jp, req_info& info, - optional_yield y) override; - virtual int forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, - bufferlist& in_data, - RGWXMLDecoder::XMLParser* parser, req_info& info, - optional_yield y) override; - virtual Zone* get_zone() { return &zone; } - virtual std::string zone_unique_id(uint64_t unique_num) override; - virtual std::string zone_unique_trans_id(const uint64_t unique_num) override; - virtual int get_zonegroup(const std::string& id, std::unique_ptr* zonegroup) override; - virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list& zone_ids) override; - virtual int cluster_stat(RGWClusterStat& stats) override; - virtual std::unique_ptr get_lifecycle(void) override; - virtual std::unique_ptr get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj, - req_state* s, rgw::notify::EventType event_type, optional_yield y, const std::string* object_name=nullptr) override; - virtual std::unique_ptr get_notification(const DoutPrefixProvider* dpp, rgw::sal::Object* obj, - rgw::sal::Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, - std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) override; - virtual RGWLC* get_rgwlc(void) override { return NULL; } - virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return NULL; } - - virtual int log_usage(const DoutPrefixProvider *dpp, std::map& usage_info) override; - virtual int log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) override; - virtual int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, - const std::map& meta) override; - virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) override; - virtual void get_quota(RGWQuota& quota) override; - virtual int set_buckets_enabled(const DoutPrefixProvider *dpp, std::vector& buckets, bool enabled) override; - virtual int get_sync_policy_handler(const DoutPrefixProvider *dpp, - std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef *phandler, - optional_yield y) override; - virtual RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone) override; - virtual void wakeup_meta_sync_shards(std::set& shard_ids) override { return; } - virtual void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, boost::container::flat_map>& shard_ids) override {} - virtual int clear_usage(const DoutPrefixProvider *dpp) override { return 0; } - virtual int read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool *is_truncated, - RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; - virtual int get_config_key_val(std::string name, bufferlist* bl) override; - virtual int meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) override; - virtual int meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, std::list& keys, bool* truncated) override; - virtual void meta_list_keys_complete(void* handle) override; - virtual std::string meta_get_marker(void *handle) override; - virtual int meta_remove(const DoutPrefixProvider *dpp, std::string& metadata_key, optional_yield y) override; - - virtual const RGWSyncModuleInstanceRef& get_sync_module() { return sync_module; } - virtual std::string get_host_id() { return ""; } - - virtual std::unique_ptr get_lua_manager() override; - virtual std::unique_ptr get_role(std::string name, - std::string tenant, - std::string path="", - std::string trust_policy="", - std::string max_session_duration_str="", - std::multimap tags={}) override; - virtual std::unique_ptr get_role(const RGWRoleInfo& info) override; - virtual std::unique_ptr get_role(std::string id) override; - virtual int get_roles(const DoutPrefixProvider *dpp, - optional_yield y, - const std::string& path_prefix, - const std::string& tenant, - std::vector>& roles) override; - virtual std::unique_ptr get_oidc_provider() override; - virtual int get_oidc_providers(const DoutPrefixProvider *dpp, - const std::string& tenant, - std::vector>& providers) override; - virtual std::unique_ptr get_append_writer(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - const std::string& unique_tag, - uint64_t position, - uint64_t *cur_accounted_size) override; - virtual std::unique_ptr get_atomic_writer(const DoutPrefixProvider *dpp, - optional_yield y, - rgw::sal::Object* obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t olh_epoch, - const std::string& unique_tag) override; - virtual const std::string& get_compression_type(const rgw_placement_rule& rule) override; - virtual bool valid_placement(const rgw_placement_rule& rule) override; - - virtual void finalize(void) override; - - virtual CephContext *ctx(void) override { - return cctx; - } - - virtual void register_admin_apis(RGWRESTMgr* mgr) override { }; - - int open_idx(struct m0_uint128 *id, bool create, struct m0_idx *out); - void close_idx(struct m0_idx *idx) { m0_idx_fini(idx); } - int do_idx_op(struct m0_idx *, enum m0_idx_opcode opcode, - std::vector& key, std::vector& val, bool update = false); - - int do_idx_next_op(struct m0_idx *idx, - std::vector>& key_vec, - std::vector>& val_vec); - int next_query_by_name(std::string idx_name, std::vector& key_str_vec, - std::vector& val_bl_vec, - std::string prefix="", std::string delim=""); - - void index_name_to_motr_fid(std::string iname, struct m0_uint128 *fid); - int open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx); - int create_motr_idx_by_name(std::string iname); - int delete_motr_idx_by_name(std::string iname); - int do_idx_op_by_name(std::string idx_name, enum m0_idx_opcode opcode, - std::string key_str, bufferlist &bl, bool update=true); - int check_n_create_global_indices(); - int store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key); - int delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key); - int store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info); - - int init_metadata_cache(const DoutPrefixProvider *dpp, CephContext *cct); - MotrMetaCache* get_obj_meta_cache() {return obj_meta_cache;} - MotrMetaCache* get_user_cache() {return user_cache;} - MotrMetaCache* get_bucket_inst_cache() {return bucket_inst_cache;} -}; - -struct obj_time_weight { - real_time mtime; - uint32_t zone_short_id; - uint64_t pg_ver; - bool high_precision; - - obj_time_weight() : zone_short_id(0), pg_ver(0), high_precision(false) {} - - bool compare_low_precision(const obj_time_weight& rhs) { - struct timespec l = ceph::real_clock::to_timespec(mtime); - struct timespec r = ceph::real_clock::to_timespec(rhs.mtime); - l.tv_nsec = 0; - r.tv_nsec = 0; - if (l > r) { - return false; - } - if (l < r) { - return true; - } - if (!zone_short_id || !rhs.zone_short_id) { - /* don't compare zone ids, if one wasn't provided */ - return false; - } - if (zone_short_id != rhs.zone_short_id) { - return (zone_short_id < rhs.zone_short_id); - } - return (pg_ver < rhs.pg_ver); - - } - - bool operator<(const obj_time_weight& rhs) { - if (!high_precision || !rhs.high_precision) { - return compare_low_precision(rhs); - } - if (mtime > rhs.mtime) { - return false; - } - if (mtime < rhs.mtime) { - return true; - } - if (!zone_short_id || !rhs.zone_short_id) { - /* don't compare zone ids, if one wasn't provided */ - return false; - } - if (zone_short_id != rhs.zone_short_id) { - return (zone_short_id < rhs.zone_short_id); - } - return (pg_ver < rhs.pg_ver); - } - - void init(const real_time& _mtime, uint32_t _short_id, uint64_t _pg_ver) { - mtime = _mtime; - zone_short_id = _short_id; - pg_ver = _pg_ver; - } - - void init(RGWObjState *state) { - mtime = state->mtime; - zone_short_id = state->zone_short_id; - pg_ver = state->pg_ver; - } -}; - -inline std::ostream& operator<<(std::ostream& out, const obj_time_weight &o) { - out << o.mtime; - - if (o.zone_short_id != 0 || o.pg_ver != 0) { - out << "[zid=" << o.zone_short_id << ", pgv=" << o.pg_ver << "]"; - } - - return out; -} - -} // namespace rgw::sal