list(APPEND librgw_common_srcs rgw_sal_dbstore.cc)
endif()
if(WITH_RADOSGW_MOTR)
- list(APPEND librgw_common_srcs rgw_sal_motr.cc)
+ list(APPEND librgw_common_srcs driver/motr/rgw_sal_motr.cc)
endif()
if(WITH_RADOSGW_DAOS)
- list(APPEND librgw_common_srcs rgw_sal_daos.cc)
+ list(APPEND librgw_common_srcs driver/motr/rgw_sal_daos.cc)
endif()
if(WITH_JAEGER)
list(APPEND librgw_common_srcs rgw_tracer.cc)
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=2 sw=2 expandtab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * SAL implementation for the CORTX DAOS backend
+ *
+ * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "rgw_sal_daos.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <filesystem>
+#include <system_error>
+
+#include "common/Clock.h"
+#include "common/errno.h"
+#include "rgw_bucket.h"
+#include "rgw_compression.h"
+#include "rgw_sal.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::vector;
+
+namespace fs = std::filesystem;
+
+namespace rgw::sal {
+
+using ::ceph::decode;
+using ::ceph::encode;
+
+int DaosUser::list_buckets(const DoutPrefixProvider* dpp, const string& marker,
+ const string& end_marker, uint64_t max,
+ bool need_stats, BucketList& buckets,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: list_user_buckets: marker=" << marker
+ << " end_marker=" << end_marker << " max=" << max << dendl;
+ int ret = 0;
+ bool is_truncated = false;
+ buckets.clear();
+ vector<struct ds3_bucket_info> bucket_infos(max);
+ daos_size_t bcount = bucket_infos.size();
+ vector<vector<uint8_t>> values(bcount, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
+ for (daos_size_t i = 0; i < bcount; i++) {
+ bucket_infos[i].encoded = values[i].data();
+ bucket_infos[i].encoded_length = values[i].size();
+ }
+
+ char daos_marker[DS3_MAX_BUCKET_NAME];
+ std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker));
+ ret = ds3_bucket_list(&bcount, bucket_infos.data(), daos_marker,
+ &is_truncated, store->ds3, nullptr);
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_list: bcount=" << bcount
+ << " ret=" << ret << dendl;
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list failed!" << ret << dendl;
+ return ret;
+ }
+
+ bucket_infos.resize(bcount);
+ values.resize(bcount);
+
+ for (const auto& bi : bucket_infos) {
+ DaosBucketInfo dbinfo;
+ bufferlist bl;
+ bl.append(reinterpret_cast<char*>(bi.encoded), bi.encoded_length);
+ auto iter = bl.cbegin();
+ dbinfo.decode(iter);
+ buckets.add(std::make_unique<DaosBucket>(this->store, dbinfo.info, this));
+ }
+
+ buckets.set_truncated(is_truncated);
+ return 0;
+}
+
+int DaosUser::create_bucket(
+ const DoutPrefixProvider* dpp, const rgw_bucket& b,
+ const std::string& zonegroup_id, rgw_placement_rule& placement_rule,
+ std::string& swift_ver_location, const RGWQuotaInfo* pquota_info,
+ const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info,
+ obj_version& ep_objv, bool exclusive, bool obj_lock_enabled, bool* existed,
+ req_info& req_info, std::unique_ptr<Bucket>* bucket_out, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: create_bucket:" << b.name << dendl;
+ int ret;
+ std::unique_ptr<Bucket> bucket;
+
+ // Look up the bucket. Create it if it doesn't exist.
+ ret = this->store->get_bucket(dpp, this, b, &bucket, y);
+ if (ret != 0 && ret != -ENOENT) {
+ return ret;
+ }
+
+ if (ret != -ENOENT) {
+ *existed = true;
+ if (swift_ver_location.empty()) {
+ swift_ver_location = bucket->get_info().swift_ver_location;
+ }
+ placement_rule.inherit_from(bucket->get_info().placement_rule);
+
+ // TODO: ACL policy
+ // // don't allow changes to the acl policy
+ // RGWAccessControlPolicy old_policy(ctx());
+ // int rc = rgw_op_get_bucket_policy_from_attr(
+ // dpp, this, u, bucket->get_attrs(), &old_policy, y);
+ // if (rc >= 0 && old_policy != policy) {
+ // bucket_out->swap(bucket);
+ // return -EEXIST;
+ //}
+ } else {
+ placement_rule.name = "default";
+ placement_rule.storage_class = "STANDARD";
+ bucket = std::make_unique<DaosBucket>(store, b, this);
+ bucket->set_attrs(attrs);
+
+ *existed = false;
+ }
+
+ // TODO: how to handle zone and multi-site.
+
+ if (!*existed) {
+ info.placement_rule = placement_rule;
+ info.bucket = b;
+ info.owner = this->get_info().user_id;
+ info.zonegroup = zonegroup_id;
+ info.creation_time = ceph::real_clock::now();
+ if (obj_lock_enabled)
+ info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED;
+ bucket->set_version(ep_objv);
+ bucket->get_info() = info;
+
+ // Create a new bucket:
+ DaosBucket* daos_bucket = static_cast<DaosBucket*>(bucket.get());
+ bufferlist bl;
+ std::unique_ptr<struct ds3_bucket_info> bucket_info =
+ daos_bucket->get_encoded_info(bl, ceph::real_time());
+ ret = ds3_bucket_create(bucket->get_name().c_str(), bucket_info.get(),
+ nullptr, store->ds3, nullptr);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_create failed! ret=" << ret
+ << dendl;
+ return ret;
+ }
+ } else {
+ bucket->set_version(ep_objv);
+ bucket->get_info() = info;
+ }
+
+ bucket_out->swap(bucket);
+
+ return ret;
+}
+
+int DaosUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosUser::read_stats(const DoutPrefixProvider* dpp, optional_yield y,
+ RGWStorageStats* stats,
+ ceph::real_time* last_stats_sync,
+ ceph::real_time* last_stats_update) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+/* stats - Not for first pass */
+int DaosUser::read_stats_async(const DoutPrefixProvider* dpp,
+ RGWGetUserStats_CB* cb) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosUser::complete_flush_stats(const DoutPrefixProvider* dpp,
+ optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosUser::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
+ uint64_t end_epoch, uint32_t max_entries,
+ bool* is_truncated, RGWUsageIter& usage_iter,
+ map<rgw_user_bucket, rgw_usage_log_entry>& usage) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosUser::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
+ uint64_t end_epoch) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) {
+ const string name = info.user_id.to_str();
+ ldpp_dout(dpp, 20) << "DEBUG: load_user, name=" << name << dendl;
+
+ DaosUserInfo duinfo;
+ int ret = read_user(dpp, name, &duinfo);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: load_user failed, name=" << name << dendl;
+ return ret;
+ }
+
+ info = duinfo.info;
+ attrs = duinfo.attrs;
+ objv_tracker.read_version = duinfo.user_version;
+ return 0;
+}
+
+int DaosUser::merge_and_store_attrs(const DoutPrefixProvider* dpp,
+ Attrs& new_attrs, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs
+ << dendl;
+ for (auto& it : new_attrs) {
+ attrs[it.first] = it.second;
+ }
+ return store_user(dpp, y, false);
+}
+
+int DaosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y,
+ bool exclusive, RGWUserInfo* old_info) {
+ const string name = info.user_id.to_str();
+ ldpp_dout(dpp, 10) << "DEBUG: Store_user(): User name=" << name << dendl;
+
+ // Read user
+ int ret = 0;
+ struct DaosUserInfo duinfo;
+ ret = read_user(dpp, name, &duinfo);
+ obj_version obj_ver = duinfo.user_version;
+ std::unique_ptr<struct ds3_user_info> old_user_info;
+ std::vector<const char*> old_access_ids;
+
+ // Check if the user already exists
+ if (ret == 0 && obj_ver.ver) {
+ // already exists.
+
+ if (old_info) {
+ *old_info = duinfo.info;
+ }
+
+ if (objv_tracker.read_version.ver != obj_ver.ver) {
+ // Object version mismatch.. return ECANCELED
+ ret = -ECANCELED;
+ ldpp_dout(dpp, 0) << "User Read version mismatch read_version="
+ << objv_tracker.read_version.ver
+ << " obj_ver=" << obj_ver.ver << dendl;
+ return ret;
+ }
+
+ if (exclusive) {
+ // return
+ return ret;
+ }
+ obj_ver.ver++;
+
+ for (auto const& [id, key] : duinfo.info.access_keys) {
+ old_access_ids.push_back(id.c_str());
+ }
+ old_user_info.reset(
+ new ds3_user_info{.name = duinfo.info.user_id.to_str().c_str(),
+ .email = duinfo.info.user_email.c_str(),
+ .access_ids = old_access_ids.data(),
+ .access_ids_nr = old_access_ids.size()});
+ } else {
+ obj_ver.ver = 1;
+ obj_ver.tag = "UserTAG";
+ }
+
+ bufferlist bl;
+ std::unique_ptr<struct ds3_user_info> user_info =
+ get_encoded_info(bl, obj_ver);
+
+ ret = ds3_user_set(name.c_str(), user_info.get(), old_user_info.get(),
+ store->ds3, nullptr);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name
+ << " ret=" << ret << dendl;
+ }
+
+ return ret;
+}
+
+int DaosUser::read_user(const DoutPrefixProvider* dpp, std::string name,
+ DaosUserInfo* duinfo) {
+ // Initialize ds3_user_info
+ bufferlist bl;
+ uint64_t size = DS3_MAX_ENCODED_LEN;
+ struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(),
+ .encoded_length = size};
+
+ int ret = ds3_user_get(name.c_str(), &user_info, store->ds3, nullptr);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "Error: ds3_user_get failed, name=" << name
+ << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ // Decode
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ duinfo->decode(iter);
+ return ret;
+}
+
+std::unique_ptr<struct ds3_user_info> DaosUser::get_encoded_info(
+ bufferlist& bl, obj_version& obj_ver) {
+ // Encode user data
+ struct DaosUserInfo duinfo;
+ duinfo.info = info;
+ duinfo.attrs = attrs;
+ duinfo.user_version = obj_ver;
+ duinfo.encode(bl);
+
+ // Initialize ds3_user_info
+ access_ids.clear();
+ for (auto const& [id, key] : info.access_keys) {
+ access_ids.push_back(id.c_str());
+ }
+ return std::unique_ptr<struct ds3_user_info>(
+ new ds3_user_info{.name = info.user_id.to_str().c_str(),
+ .email = info.user_email.c_str(),
+ .access_ids = access_ids.data(),
+ .access_ids_nr = access_ids.size(),
+ .encoded = bl.c_str(),
+ .encoded_length = bl.length()});
+}
+
+int DaosUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) {
+ const string name = info.user_id.to_str();
+
+ // TODO: the expectation is that the object version needs to be passed in as a
+ // method arg see int DB::remove_user(const DoutPrefixProvider *dpp,
+ // RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv)
+ obj_version obj_ver;
+ bufferlist bl;
+ std::unique_ptr<struct ds3_user_info> user_info =
+ get_encoded_info(bl, obj_ver);
+
+ // Remove user
+ int ret = ds3_user_remove(name.c_str(), user_info.get(), store->ds3, nullptr);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name
+ << " ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+DaosBucket::~DaosBucket() { close(nullptr); }
+
+int DaosBucket::open(const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: open, name=" << info.bucket.name.c_str()
+ << dendl;
+ // Idempotent
+ if (is_open()) {
+ return 0;
+ }
+
+ int ret = ds3_bucket_open(get_name().c_str(), &ds3b, store->ds3, nullptr);
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_open, name=" << get_name()
+ << ", ret=" << ret << dendl;
+
+ return ret;
+}
+
+int DaosBucket::close(const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: close" << dendl;
+ // Idempotent
+ if (!is_open()) {
+ return 0;
+ }
+
+ int ret = ds3_bucket_close(ds3b, nullptr);
+ ds3b = nullptr;
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_close ret=" << ret << dendl;
+
+ return ret;
+}
+
+std::unique_ptr<struct ds3_bucket_info> DaosBucket::get_encoded_info(
+ bufferlist& bl, ceph::real_time _mtime) {
+ DaosBucketInfo dbinfo;
+ dbinfo.info = info;
+ dbinfo.bucket_attrs = attrs;
+ dbinfo.mtime = _mtime;
+ dbinfo.bucket_version = bucket_version;
+ dbinfo.encode(bl);
+
+ auto bucket_info = std::make_unique<struct ds3_bucket_info>();
+ bucket_info->encoded = bl.c_str();
+ bucket_info->encoded_length = bl.length();
+ std::strncpy(bucket_info->name, get_name().c_str(), sizeof(bucket_info->name));
+ return bucket_info;
+}
+
+int DaosBucket::remove_bucket(const DoutPrefixProvider* dpp,
+ bool delete_children, bool forward_to_master,
+ req_info* req_info, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: remove_bucket, delete_children="
+
+ << delete_children
+
+ << " forward_to_master=" << forward_to_master << dendl;
+
+ return ds3_bucket_destroy(get_name().c_str(), delete_children, store->ds3,
+ nullptr);
+}
+
+int DaosBucket::remove_bucket_bypass_gc(int concurrent_max,
+ bool keep_index_consistent,
+ optional_yield y,
+ const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: remove_bucket_bypass_gc, concurrent_max="
+
+ << concurrent_max
+
+ << " keep_index_consistent=" << keep_index_consistent
+
+ << dendl;
+ return ds3_bucket_destroy(get_name().c_str(), true, store->ds3, nullptr);
+}
+
+int DaosBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive,
+ ceph::real_time _mtime) {
+ ldpp_dout(dpp, 20) << "DEBUG: put_info(): bucket name=" << get_name()
+ << dendl;
+
+ int ret = open(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ bufferlist bl;
+ std::unique_ptr<struct ds3_bucket_info> bucket_info =
+ get_encoded_info(bl, ceph::real_time());
+
+ ret = ds3_bucket_set_info(bucket_info.get(), ds3b, nullptr);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_set_info failed: " << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y,
+ bool get_stats) {
+ ldpp_dout(dpp, 20) << "DEBUG: load_bucket(): bucket name=" << get_name()
+ << dendl;
+ int ret = open(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ bufferlist bl;
+ DaosBucketInfo dbinfo;
+ uint64_t size = DS3_MAX_ENCODED_LEN;
+ struct ds3_bucket_info bucket_info = {.encoded = bl.append_hole(size).c_str(),
+ .encoded_length = size};
+
+ ret = ds3_bucket_get_info(&bucket_info, ds3b, nullptr);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_get_info failed: " << ret << dendl;
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ dbinfo.decode(iter);
+ info = dbinfo.info;
+ rgw_placement_rule placement_rule;
+ placement_rule.name = "default";
+ placement_rule.storage_class = "STANDARD";
+ info.placement_rule = placement_rule;
+
+ attrs = dbinfo.bucket_attrs;
+ mtime = dbinfo.mtime;
+ bucket_version = dbinfo.bucket_version;
+ return ret;
+}
+
+/* stats - Not for first pass */
+int DaosBucket::read_stats(const DoutPrefixProvider* dpp,
+ const bucket_index_layout_generation& idx_layout,
+ int shard_id, std::string* bucket_ver,
+ std::string* master_ver,
+ std::map<RGWObjCategory, RGWStorageStats>& stats,
+ std::string* max_marker, bool* syncstopped) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::read_stats_async(
+ const DoutPrefixProvider* dpp,
+ const bucket_index_layout_generation& idx_layout, int shard_id,
+ RGWGetBucketStats_CB* ctx) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::sync_user_stats(const DoutPrefixProvider* dpp,
+ optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::update_container_stats(const DoutPrefixProvider* dpp) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::check_bucket_shards(const DoutPrefixProvider* dpp) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::chown(const DoutPrefixProvider* dpp, User& new_user,
+ optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+/* Make sure to call load_bucket() if you need it first */
+bool DaosBucket::is_owner(User* user) {
+ return (info.owner.compare(user->get_id()) == 0);
+}
+
+int DaosBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) {
+ /* XXX: Check if bucket contains any objects */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota,
+ uint64_t obj_size, optional_yield y,
+ bool check_size_only) {
+ /* Not Handled in the first pass as stats are also needed */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::merge_and_store_attrs(const DoutPrefixProvider* dpp,
+ Attrs& new_attrs, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs
+ << dendl;
+ for (auto& it : new_attrs) {
+ attrs[it.first] = it.second;
+ }
+
+ return put_info(dpp, y, ceph::real_time());
+}
+
+int DaosBucket::try_refresh_info(const DoutPrefixProvider* dpp,
+ ceph::real_time* pmtime) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+/* XXX: usage and stats not supported in the first pass */
+int DaosBucket::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
+ uint64_t end_epoch, uint32_t max_entries,
+ bool* is_truncated, RGWUsageIter& usage_iter,
+ map<rgw_user_bucket, rgw_usage_log_entry>& usage) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
+ uint64_t end_epoch) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::remove_objs_from_index(
+ const DoutPrefixProvider* dpp,
+ std::list<rgw_obj_index_key>& objs_to_unlink) {
+ /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table.
+ * Delete all the object in the list from the object table of this
+ * bucket
+ */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::check_index(
+ const DoutPrefixProvider* dpp,
+ std::map<RGWObjCategory, RGWStorageStats>& existing_stats,
+ std::map<RGWObjCategory, RGWStorageStats>& calculated_stats) {
+ /* XXX: stats not supported yet */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::rebuild_index(const DoutPrefixProvider* dpp) {
+ /* there is no index table in DAOS. Not applicable */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::set_tag_timeout(const DoutPrefixProvider* dpp,
+ uint64_t timeout) {
+ /* XXX: CHECK: set tag timeout for all the bucket objects? */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::purge_instance(const DoutPrefixProvider* dpp) {
+ /* XXX: CHECK: for DAOS only single instance supported.
+ * Remove all the objects for that instance? Anything extra needed?
+ */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosBucket::set_acl(const DoutPrefixProvider* dpp,
+ RGWAccessControlPolicy& acl, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: set_acl" << dendl;
+ int ret = 0;
+ bufferlist aclbl;
+
+ acls = acl;
+ acl.encode(aclbl);
+
+ Attrs attrs = get_attrs();
+ attrs[RGW_ATTR_ACL] = aclbl;
+
+ return ret;
+}
+
+std::unique_ptr<Object> DaosBucket::get_object(const rgw_obj_key& k) {
+ return std::make_unique<DaosObject>(this->store, k, this);
+}
+
+bool compare_rgw_bucket_dir_entry(rgw_bucket_dir_entry& entry1,
+ rgw_bucket_dir_entry& entry2) {
+ return (entry1.key < entry2.key);
+}
+
+bool compare_multipart_upload(std::unique_ptr<MultipartUpload>& upload1,
+ std::unique_ptr<MultipartUpload>& upload2) {
+ return (upload1->get_key() < upload2->get_key());
+}
+
+int DaosBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max,
+ ListResults& results, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: list bucket=" << get_name() << " max=" << max
+ << " params=" << params << dendl;
+ // End
+ if (max == 0) {
+ return 0;
+ }
+
+ int ret = open(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Init needed structures
+ vector<struct ds3_object_info> object_infos(max);
+ uint32_t nobj = object_infos.size();
+ vector<vector<uint8_t>> values(nobj, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
+ for (uint32_t i = 0; i < nobj; i++) {
+ object_infos[i].encoded = values[i].data();
+ object_infos[i].encoded_length = values[i].size();
+ }
+
+ vector<struct ds3_common_prefix_info> common_prefixes(max);
+ uint32_t ncp = common_prefixes.size();
+
+ char daos_marker[DS3_MAX_KEY_BUFF];
+ std::strncpy(daos_marker, params.marker.get_oid().c_str(), sizeof(daos_marker));
+
+ ret = ds3_bucket_list_obj(&nobj, object_infos.data(), &ncp,
+ common_prefixes.data(), params.prefix.c_str(),
+ params.delim.c_str(), daos_marker,
+ params.list_versions, &results.is_truncated, ds3b);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list_obj failed, name="
+ << get_name() << ", ret=" << ret << dendl;
+ return ret;
+ }
+
+ object_infos.resize(nobj);
+ values.resize(nobj);
+ common_prefixes.resize(ncp);
+
+ // Fill common prefixes
+ for (auto const& cp : common_prefixes) {
+ results.common_prefixes[cp.prefix] = true;
+ }
+
+ // Decode objs
+ for (auto const& obj : object_infos) {
+ bufferlist bl;
+ rgw_bucket_dir_entry ent;
+ bl.append(reinterpret_cast<char*>(obj.encoded), obj.encoded_length);
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+ if (params.list_versions || ent.is_visible()) {
+ results.objs.emplace_back(std::move(ent));
+ }
+ }
+
+ if (!params.allow_unordered) {
+ std::sort(results.objs.begin(), results.objs.end(),
+ compare_rgw_bucket_dir_entry);
+ }
+
+ return ret;
+}
+
+int DaosBucket::list_multiparts(
+ const DoutPrefixProvider* dpp, const string& prefix, string& marker,
+ const string& delim, const int& max_uploads,
+ vector<std::unique_ptr<MultipartUpload>>& uploads,
+ map<string, bool>* common_prefixes, bool* is_truncated) {
+ ldpp_dout(dpp, 20) << "DEBUG: list_multiparts" << dendl;
+ // End of uploading
+ if (max_uploads == 0) {
+ *is_truncated = false;
+ return 0;
+ }
+
+ // Init needed structures
+ vector<struct ds3_multipart_upload_info> multipart_upload_infos(max_uploads);
+ uint32_t nmp = multipart_upload_infos.size();
+ vector<vector<uint8_t>> values(nmp, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
+ for (uint32_t i = 0; i < nmp; i++) {
+ multipart_upload_infos[i].encoded = values[i].data();
+ multipart_upload_infos[i].encoded_length = values[i].size();
+ }
+
+ vector<struct ds3_common_prefix_info> cps(max_uploads);
+ uint32_t ncp = cps.size();
+
+ char daos_marker[DS3_MAX_KEY_BUFF];
+ std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker));
+
+ int ret = ds3_bucket_list_multipart(
+ get_name().c_str(), &nmp, multipart_upload_infos.data(), &ncp, cps.data(),
+ prefix.c_str(), delim.c_str(), daos_marker, is_truncated, store->ds3);
+
+ multipart_upload_infos.resize(nmp);
+ values.resize(nmp);
+ cps.resize(ncp);
+
+ // Fill common prefixes
+ for (auto const& cp : cps) {
+ (*common_prefixes)[cp.prefix] = true;
+ }
+
+ for (auto const& mp : multipart_upload_infos) {
+ // Decode the xattr
+ bufferlist bl;
+ rgw_bucket_dir_entry ent;
+ bl.append(reinterpret_cast<char*>(mp.encoded), mp.encoded_length);
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+ string name = ent.key.name;
+
+ ACLOwner owner(rgw_user(ent.meta.owner));
+ owner.set_name(ent.meta.owner_display_name);
+ uploads.push_back(this->get_multipart_upload(
+ name, mp.upload_id, std::move(owner), ent.meta.mtime));
+ }
+
+ // Sort uploads
+ std::sort(uploads.begin(), uploads.end(), compare_multipart_upload);
+
+ return ret;
+}
+
+int DaosBucket::abort_multiparts(const DoutPrefixProvider* dpp,
+ CephContext* cct) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+void DaosStore::finalize(void) {
+ ldout(cctx, 20) << "DEBUG: finalize" << dendl;
+ int ret;
+
+ ret = ds3_disconnect(ds3, nullptr);
+ if (ret != 0) {
+ ldout(cctx, 0) << "ERROR: ds3_disconnect() failed: " << ret << dendl;
+ }
+ ds3 = nullptr;
+
+ ret = ds3_fini();
+ if (ret != 0) {
+ ldout(cctx, 0) << "ERROR: daos_fini() failed: " << ret << dendl;
+ }
+}
+
+int DaosStore::initialize(CephContext* cct, const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: initialize" << dendl;
+ int ret = ds3_init();
+
+ // DS3 init failed, allow the case where init is already done
+ if (ret != 0 && ret != DER_ALREADY) {
+ ldout(cct, 0) << "ERROR: ds3_init() failed: " << ret << dendl;
+ return ret;
+ }
+
+ // XXX: these params should be taken from config settings and
+ // cct somehow?
+ const auto& daos_pool = cct->_conf.get_val<std::string>("daos_pool");
+ ldout(cct, 20) << "INFO: daos pool: " << daos_pool << dendl;
+
+ ret = ds3_connect(daos_pool.c_str(), nullptr, &ds3, nullptr);
+
+ if (ret != 0) {
+ ldout(cct, 0) << "ERROR: ds3_connect() failed: " << ret << dendl;
+ ds3_fini();
+ }
+
+ return ret;
+}
+
+const std::string& DaosZoneGroup::get_endpoint() const {
+ if (!group.endpoints.empty()) {
+ return group.endpoints.front();
+ } else {
+ // use zonegroup's master zone endpoints
+ auto z = group.zones.find(group.master_zone);
+ if (z != group.zones.end() && !z->second.endpoints.empty()) {
+ return z->second.endpoints.front();
+ }
+ }
+ return empty;
+}
+
+bool DaosZoneGroup::placement_target_exists(std::string& target) const {
+ return !!group.placement_targets.count(target);
+}
+
+int DaosZoneGroup::get_placement_target_names(
+ std::set<std::string>& names) const {
+ for (const auto& target : group.placement_targets) {
+ names.emplace(target.second.name);
+ }
+
+ return 0;
+}
+
+int DaosZoneGroup::get_placement_tier(const rgw_placement_rule& rule,
+ std::unique_ptr<PlacementTier>* tier) {
+ std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer;
+ titer = group.placement_targets.find(rule.name);
+ if (titer == group.placement_targets.end()) {
+ return -ENOENT;
+ }
+
+ const auto& target_rule = titer->second;
+ std::map<std::string, RGWZoneGroupPlacementTier>::const_iterator ttier;
+ ttier = target_rule.tier_targets.find(rule.storage_class);
+ if (ttier == target_rule.tier_targets.end()) {
+ // not found
+ return -ENOENT;
+ }
+
+ PlacementTier* t;
+ t = new DaosPlacementTier(store, ttier->second);
+ if (!t) return -ENOMEM;
+
+ tier->reset(t);
+ return 0;
+}
+
+ZoneGroup& DaosZone::get_zonegroup() { return zonegroup; }
+
+int DaosZone::get_zonegroup(const std::string& id,
+ std::unique_ptr<ZoneGroup>* group) {
+ /* XXX: for now only one zonegroup supported */
+ ZoneGroup* zg;
+ zg = new DaosZoneGroup(store, zonegroup.get_group());
+
+ group->reset(zg);
+ return 0;
+}
+
+const rgw_zone_id& DaosZone::get_id() { return cur_zone_id; }
+
+const std::string& DaosZone::get_name() const {
+ return zone_params->get_name();
+}
+
+bool DaosZone::is_writeable() { return true; }
+
+bool DaosZone::get_redirect_endpoint(std::string* endpoint) { return false; }
+
+bool DaosZone::has_zonegroup_api(const std::string& api) const { return false; }
+
+const std::string& DaosZone::get_current_period_id() {
+ return current_period->get_id();
+}
+
+std::unique_ptr<LuaManager> DaosStore::get_lua_manager() {
+ return std::make_unique<DaosLuaManager>(this);
+}
+
+int DaosObject::get_obj_state(const DoutPrefixProvider* dpp,
+ RGWObjState** _state, optional_yield y,
+ bool follow_olh) {
+ // Get object's metadata (those stored in rgw_bucket_dir_entry)
+ ldpp_dout(dpp, 20) << "DEBUG: get_obj_state" << dendl;
+ rgw_bucket_dir_entry ent;
+ *_state = &state; // state is required even if a failure occurs
+
+ int ret = get_dir_entry_attrs(dpp, &ent);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Set object state.
+ state.exists = true;
+ state.size = ent.meta.size;
+ state.accounted_size = ent.meta.size;
+ state.mtime = ent.meta.mtime;
+
+ state.has_attrs = true;
+ bufferlist etag_bl;
+ string& etag = ent.meta.etag;
+ ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag
+ << dendl;
+ etag_bl.append(etag);
+ state.attrset[RGW_ATTR_ETAG] = etag_bl;
+ return 0;
+}
+
+DaosObject::~DaosObject() { close(nullptr); }
+
+int DaosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs,
+ Attrs* delattrs, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: DaosObject::set_obj_attrs()" << dendl;
+ // TODO handle target_obj
+ // Get object's metadata (those stored in rgw_bucket_dir_entry)
+ rgw_bucket_dir_entry ent;
+ int ret = get_dir_entry_attrs(dpp, &ent);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Update object metadata
+ Attrs updateattrs = setattrs == nullptr ? attrs : *setattrs;
+ if (delattrs) {
+ for (auto const& [attr, attrval] : *delattrs) {
+ updateattrs.erase(attr);
+ }
+ }
+
+ ret = set_dir_entry_attrs(dpp, &ent, &updateattrs);
+ return ret;
+}
+
+int DaosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp,
+ rgw_obj* target_obj) {
+ ldpp_dout(dpp, 20) << "DEBUG: DaosObject::get_obj_attrs()" << dendl;
+ // TODO handle target_obj
+ // Get object's metadata (those stored in rgw_bucket_dir_entry)
+ rgw_bucket_dir_entry ent;
+ int ret = get_dir_entry_attrs(dpp, &ent, &attrs);
+ return ret;
+}
+
+int DaosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val,
+ optional_yield y,
+ const DoutPrefixProvider* dpp) {
+ // Get object's metadata (those stored in rgw_bucket_dir_entry)
+ ldpp_dout(dpp, 20) << "DEBUG: modify_obj_attrs" << dendl;
+ rgw_bucket_dir_entry ent;
+ int ret = get_dir_entry_attrs(dpp, &ent, &attrs);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Update object attrs
+ set_atomic();
+ attrs[attr_name] = attr_val;
+
+ ret = set_dir_entry_attrs(dpp, &ent, &attrs);
+ return ret;
+}
+
+int DaosObject::delete_obj_attrs(const DoutPrefixProvider* dpp,
+ const char* attr_name, optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: delete_obj_attrs" << dendl;
+ rgw_obj target = get_obj();
+ Attrs rmattr;
+ bufferlist bl;
+
+ rmattr[attr_name] = bl;
+ return set_obj_attrs(dpp, nullptr, &rmattr, y);
+}
+
+bool DaosObject::is_expired() {
+ auto iter = attrs.find(RGW_ATTR_DELETE_AT);
+ if (iter != attrs.end()) {
+ utime_t delete_at;
+ try {
+ auto bufit = iter->second.cbegin();
+ decode(delete_at, bufit);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0)
+ << "ERROR: " << __func__
+ << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl;
+ return false;
+ }
+
+ if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Taken from rgw_rados.cc
+void DaosObject::gen_rand_obj_instance_name() {
+ enum { OBJ_INSTANCE_LEN = 32 };
+ char buf[OBJ_INSTANCE_LEN + 1];
+
+ gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN);
+ state.obj.key.set_instance(buf);
+}
+
+int DaosObject::omap_get_vals_by_keys(const DoutPrefixProvider* dpp,
+ const std::string& oid,
+ const std::set<std::string>& keys,
+ Attrs* vals) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosObject::omap_set_val_by_key(const DoutPrefixProvider* dpp,
+ const std::string& key, bufferlist& val,
+ bool must_exist, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) {
+ return 0;
+}
+
+std::unique_ptr<MPSerializer> DaosObject::get_serializer(
+ const DoutPrefixProvider* dpp, const std::string& lock_name) {
+ return std::make_unique<MPDaosSerializer>(dpp, store, this, lock_name);
+}
+
+int DaosObject::transition(Bucket* bucket,
+ const rgw_placement_rule& placement_rule,
+ const real_time& mtime, uint64_t olh_epoch,
+ const DoutPrefixProvider* dpp, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosObject::transition_to_cloud(
+ Bucket* bucket, rgw::sal::PlacementTier* tier, rgw_bucket_dir_entry& o,
+ std::set<std::string>& cloud_targets, CephContext* cct, bool update_object,
+ const DoutPrefixProvider* dpp, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+bool DaosObject::placement_rules_match(rgw_placement_rule& r1,
+ rgw_placement_rule& r2) {
+ /* XXX: support single default zone and zonegroup for now */
+ return true;
+}
+
+int DaosObject::dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y,
+ Formatter* f) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+std::unique_ptr<Object::ReadOp> DaosObject::get_read_op() {
+ return std::make_unique<DaosObject::DaosReadOp>(this);
+}
+
+DaosObject::DaosReadOp::DaosReadOp(DaosObject* _source) : source(_source) {}
+
+int DaosObject::DaosReadOp::prepare(optional_yield y,
+ const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << __func__
+ << ": bucket=" << source->get_bucket()->get_name()
+ << dendl;
+
+ if (source->get_bucket()->versioned() && !source->have_instance()) {
+ // If the bucket is versioned and no version is specified, get the latest
+ // version
+ source->set_instance(DS3_LATEST_INSTANCE);
+ }
+
+ rgw_bucket_dir_entry ent;
+ int ret = source->get_dir_entry_attrs(dpp, &ent);
+
+ // Set source object's attrs. The attrs is key/value map and is used
+ // in send_response_data() to set attributes, including etag.
+ bufferlist etag_bl;
+ string& etag = ent.meta.etag;
+ ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag
+ << dendl;
+ etag_bl.append(etag.c_str(), etag.size());
+ source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl));
+
+ source->set_key(ent.key);
+ source->set_obj_size(ent.meta.size);
+ ldpp_dout(dpp, 20) << __func__ << ": object's size: " << ent.meta.size
+ << dendl;
+
+ return ret;
+}
+
+int DaosObject::DaosReadOp::read(int64_t off, int64_t end, bufferlist& bl,
+ optional_yield y,
+ const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl;
+ int ret = source->lookup(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Calculate size, end is inclusive
+ uint64_t size = end - off + 1;
+
+ // Read
+ ret = source->read(dpp, bl, off, size);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to
+// 'end'. The returned data is processed in 'cb' which is a chain of
+// post-processing filters such as decompression, de-encryption and sending back
+// data to client (RGWGetObj_CB::handle_dta which in turn calls
+// RGWGetObj::get_data_cb() to send data back.).
+//
+// POC implements a simple sync version of iterate() function in which it reads
+// a block of data each time and call 'cb' for post-processing.
+int DaosObject::DaosReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off,
+ int64_t end, RGWGetDataCB* cb,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl;
+ int ret = source->lookup(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Calculate size, end is inclusive
+ uint64_t size = end - off + 1;
+
+ // Reserve buffers and read
+ bufferlist bl;
+ ret = source->read(dpp, bl, off, size);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Call cb to process returned data.
+ ldpp_dout(dpp, 20) << __func__ << ": call cb to process data, actual=" << size
+ << dendl;
+ cb->handle_data(bl, off, size);
+ return ret;
+}
+
+int DaosObject::DaosReadOp::get_attr(const DoutPrefixProvider* dpp,
+ const char* name, bufferlist& dest,
+ optional_yield y) {
+ Attrs attrs;
+ int ret = source->get_dir_entry_attrs(dpp, nullptr, &attrs);
+ if (!ret) {
+ return -ENODATA;
+ }
+
+ auto search = attrs.find(name);
+ if (search == attrs.end()) {
+ return -ENODATA;
+ }
+
+ dest = search->second;
+ return 0;
+}
+
+std::unique_ptr<Object::DeleteOp> DaosObject::get_delete_op() {
+ return std::make_unique<DaosObject::DaosDeleteOp>(this);
+}
+
+DaosObject::DaosDeleteOp::DaosDeleteOp(DaosObject* _source) : source(_source) {}
+
+// Implementation of DELETE OBJ also requires DaosObject::get_obj_state()
+// to retrieve and set object's state from object's metadata.
+//
+// TODO:
+// 1. The POC only deletes the Daos objects. It doesn't handle the
+// DeleteOp::params. Delete::delete_obj() in rgw_rados.cc shows how rados
+// backend process the params.
+// 2. Delete an object when its versioning is turned on.
+// 3. Handle empty directories
+// 4. Fail when file doesn't exist
+int DaosObject::DaosDeleteOp::delete_obj(const DoutPrefixProvider* dpp,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << "DaosDeleteOp::delete_obj "
+ << source->get_key().get_oid() << " from "
+ << source->get_bucket()->get_name() << dendl;
+ if (source->get_instance() == "null") {
+ source->clear_instance();
+ }
+
+ // Open bucket
+ int ret = 0;
+ std::string key = source->get_key().get_oid();
+ DaosBucket* daos_bucket = source->get_daos_bucket();
+ ret = daos_bucket->open(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Remove the daos object
+ ret = ds3_obj_destroy(key.c_str(), daos_bucket->ds3b);
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_destroy key=" << key << " ret=" << ret
+ << dendl;
+
+ // result.delete_marker = parent_op.result.delete_marker;
+ // result.version_id = parent_op.result.version_id;
+
+ return ret;
+}
+
+int DaosObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y,
+ bool prevent_versioning) {
+ ldpp_dout(dpp, 20) << "DEBUG: delete_object" << dendl;
+ DaosObject::DaosDeleteOp del_op(this);
+ del_op.params.bucket_owner = bucket->get_info().owner;
+ del_op.params.versioning_status = bucket->get_info().versioning_status();
+
+ return del_op.delete_obj(dpp, y);
+}
+
+int DaosObject::copy_object(
+ User* user, req_info* info, const rgw_zone_id& source_zone,
+ rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket,
+ rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement,
+ ceph::real_time* src_mtime, ceph::real_time* mtime,
+ const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr,
+ bool high_precision_time, const char* if_match, const char* if_nomatch,
+ AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs,
+ RGWObjCategory category, uint64_t olh_epoch,
+ boost::optional<ceph::real_time> delete_at, std::string* version_id,
+ std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*),
+ void* progress_data, const DoutPrefixProvider* dpp, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosObject::swift_versioning_restore(bool& restored,
+ const DoutPrefixProvider* dpp) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosObject::swift_versioning_copy(const DoutPrefixProvider* dpp,
+ optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosObject::lookup(const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: lookup" << dendl;
+ if (is_open()) {
+ return 0;
+ }
+
+ if (get_instance() == "null") {
+ clear_instance();
+ }
+
+ int ret = 0;
+ DaosBucket* daos_bucket = get_daos_bucket();
+ ret = daos_bucket->open(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ds3_obj_open(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b);
+
+ if (ret == -ENOENT) {
+ ldpp_dout(dpp, 20) << "DEBUG: daos object (" << get_bucket()->get_name()
+ << ", " << get_key().get_oid()
+ << ") does not exist: ret=" << ret << dendl;
+ } else if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to open daos object ("
+ << get_bucket()->get_name() << ", " << get_key().get_oid()
+ << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosObject::create(const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: create" << dendl;
+ if (is_open()) {
+ return 0;
+ }
+
+ if (get_instance() == "null") {
+ clear_instance();
+ }
+
+ int ret = 0;
+ DaosBucket* daos_bucket = get_daos_bucket();
+ ret = daos_bucket->open(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ds3_obj_create(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to create daos object ("
+ << get_bucket()->get_name() << ", " << get_key().get_oid()
+ << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosObject::close(const DoutPrefixProvider* dpp) {
+ ldpp_dout(dpp, 20) << "DEBUG: close" << dendl;
+ if (!is_open()) {
+ return 0;
+ }
+
+ int ret = ds3_obj_close(ds3o);
+ ds3o = nullptr;
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_close ret=" << ret << dendl;
+ return ret;
+}
+
+int DaosObject::write(const DoutPrefixProvider* dpp, bufferlist&& data,
+ uint64_t offset) {
+ ldpp_dout(dpp, 20) << "DEBUG: write" << dendl;
+ uint64_t size = data.length();
+ int ret = ds3_obj_write(data.c_str(), offset, &size, get_daos_bucket()->ds3b,
+ ds3o, nullptr);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to write into daos object ("
+ << get_bucket()->get_name() << ", " << get_key().get_oid()
+ << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosObject::read(const DoutPrefixProvider* dpp, bufferlist& data,
+ uint64_t offset, uint64_t& size) {
+ ldpp_dout(dpp, 20) << "DEBUG: read" << dendl;
+ int ret = ds3_obj_read(data.append_hole(size).c_str(), offset, &size,
+ get_daos_bucket()->ds3b, ds3o, nullptr);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to read from daos object ("
+ << get_bucket()->get_name() << ", " << get_key().get_oid()
+ << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+// Get the object's dirent and attrs
+int DaosObject::get_dir_entry_attrs(const DoutPrefixProvider* dpp,
+ rgw_bucket_dir_entry* ent,
+ Attrs* getattrs) {
+ ldpp_dout(dpp, 20) << "DEBUG: get_dir_entry_attrs" << dendl;
+ int ret = 0;
+ vector<uint8_t> value(DS3_MAX_ENCODED_LEN);
+ uint32_t size = value.size();
+
+ if (get_key().ns == RGW_OBJ_NS_MULTIPART) {
+ struct ds3_multipart_upload_info ui = {.encoded = value.data(),
+ .encoded_length = size};
+ ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(),
+ get_key().get_oid().c_str(), store->ds3);
+ } else {
+ ret = lookup(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ auto object_info = std::make_unique<struct ds3_object_info>();
+ object_info->encoded = value.data();
+ object_info->encoded_length = size;
+ ret = ds3_obj_get_info(object_info.get(), get_daos_bucket()->ds3b, ds3o);
+ size = object_info->encoded_length;
+ }
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to get info of daos object ("
+ << get_bucket()->get_name() << ", " << get_key().get_oid()
+ << "): ret=" << ret << dendl;
+ return ret;
+ }
+
+ rgw_bucket_dir_entry dummy_ent;
+ if (!ent) {
+ // if ent is not passed, use a dummy ent
+ ent = &dummy_ent;
+ }
+
+ bufferlist bl;
+ bl.append(reinterpret_cast<char*>(value.data()), size);
+ auto iter = bl.cbegin();
+ ent->decode(iter);
+ if (getattrs) {
+ decode(*getattrs, iter);
+ }
+
+ return ret;
+}
+// Set the object's dirent and attrs
+int DaosObject::set_dir_entry_attrs(const DoutPrefixProvider* dpp,
+ rgw_bucket_dir_entry* ent,
+ Attrs* setattrs) {
+ ldpp_dout(dpp, 20) << "DEBUG: set_dir_entry_attrs" << dendl;
+ int ret = lookup(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Set defaults
+ if (!ent) {
+ // if ent is not passed, return an error
+ return -EINVAL;
+ }
+
+ if (!setattrs) {
+ // if setattrs is not passed, use object attrs
+ setattrs = &attrs;
+ }
+
+ bufferlist wbl;
+ ent->encode(wbl);
+ encode(*setattrs, wbl);
+
+ // Write rgw_bucket_dir_entry into object xattr
+ auto object_info = std::make_unique<struct ds3_object_info>();
+ object_info->encoded = wbl.c_str();
+ object_info->encoded_length = wbl.length();
+ ret = ds3_obj_set_info(object_info.get(), get_daos_bucket()->ds3b, ds3o);
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to set info of daos object ("
+ << get_bucket()->get_name() << ", " << get_key().get_oid()
+ << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosObject::mark_as_latest(const DoutPrefixProvider* dpp,
+ ceph::real_time set_mtime) {
+ // TODO handle deletion
+ // TODO understand race conditions
+ ldpp_dout(dpp, 20) << "DEBUG: mark_as_latest" << dendl;
+
+ // Get latest version so far
+ std::unique_ptr<DaosObject> latest_object = std::make_unique<DaosObject>(
+ store, rgw_obj_key(get_name(), DS3_LATEST_INSTANCE), get_bucket());
+
+ ldpp_dout(dpp, 20) << __func__ << ": key=" << get_key().get_oid()
+ << " latest_object_key= "
+ << latest_object->get_key().get_oid() << dendl;
+
+ int ret = latest_object->lookup(dpp);
+ if (ret == 0) {
+ // Get metadata only if file exists
+ rgw_bucket_dir_entry latest_ent;
+ Attrs latest_attrs;
+ ret = latest_object->get_dir_entry_attrs(dpp, &latest_ent, &latest_attrs);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Update flags
+ latest_ent.flags = rgw_bucket_dir_entry::FLAG_VER;
+ latest_ent.meta.mtime = set_mtime;
+ ret = latest_object->set_dir_entry_attrs(dpp, &latest_ent, &latest_attrs);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ // Get or create the link [latest], make it link to the current latest
+ // version.
+ ret =
+ ds3_obj_mark_latest(get_key().get_oid().c_str(), get_daos_bucket()->ds3b);
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_mark_latest ret=" << ret << dendl;
+ return ret;
+}
+
+DaosAtomicWriter::DaosAtomicWriter(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, DaosStore* _store,
+ const rgw_user& _owner, const rgw_placement_rule* _ptail_placement_rule,
+ uint64_t _olh_epoch, const std::string& _unique_tag)
+ : StoreWriter(dpp, y),
+ store(_store),
+ owner(_owner),
+ ptail_placement_rule(_ptail_placement_rule),
+ olh_epoch(_olh_epoch),
+ unique_tag(_unique_tag),
+ obj(_store, obj->get_key(), obj->get_bucket()) {}
+
+int DaosAtomicWriter::prepare(optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: prepare" << dendl;
+ int ret = obj.create(dpp);
+ return ret;
+}
+
+// TODO: Handle concurrent writes, a unique object id is a possible solution, or
+// use DAOS transactions
+// XXX: Do we need to accumulate writes as motr does?
+int DaosAtomicWriter::process(bufferlist&& data, uint64_t offset) {
+ ldpp_dout(dpp, 20) << "DEBUG: process" << dendl;
+ if (data.length() == 0) {
+ return 0;
+ }
+
+ int ret = 0;
+ if (!obj.is_open()) {
+ ret = obj.lookup(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ // XXX: Combine multiple streams into one as motr does
+ uint64_t data_size = data.length();
+ ret = obj.write(dpp, std::move(data), offset);
+ if (ret == 0) {
+ total_data_size += data_size;
+ }
+ return ret;
+}
+
+int DaosAtomicWriter::complete(
+ size_t accounted_size, const std::string& etag, ceph::real_time* mtime,
+ ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at, const char* if_match, const char* if_nomatch,
+ const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl;
+ bufferlist bl;
+ rgw_bucket_dir_entry ent;
+ int ret;
+
+ // Set rgw_bucet_dir_entry. Some of the members of this structure may not
+ // apply to daos.
+ //
+ // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc
+ // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and
+ // how to set the dir entry. Only set the basic ones for POC, no ACLs and
+ // other attrs.
+ obj.get_key().get_index_key(&ent.key);
+ ent.meta.size = total_data_size;
+ ent.meta.accounted_size = accounted_size;
+ ent.meta.mtime =
+ real_clock::is_zero(set_mtime) ? ceph::real_clock::now() : set_mtime;
+ ent.meta.etag = etag;
+ ent.meta.owner = owner.to_str();
+ ent.meta.owner_display_name =
+ obj.get_bucket()->get_owner()->get_display_name();
+ bool is_versioned = obj.get_bucket()->versioned();
+ if (is_versioned)
+ ent.flags =
+ rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT;
+ ldpp_dout(dpp, 20) << __func__ << ": key=" << obj.get_key().get_oid()
+ << " etag: " << etag << dendl;
+ if (user_data) ent.meta.user_data = *user_data;
+
+ RGWBucketInfo& info = obj.get_bucket()->get_info();
+ if (info.obj_lock_enabled() && info.obj_lock.has_rule()) {
+ auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (iter == attrs.end()) {
+ real_time lock_until_date =
+ info.obj_lock.get_lock_until_date(ent.meta.mtime);
+ string mode = info.obj_lock.get_mode();
+ RGWObjectRetention obj_retention(mode, lock_until_date);
+ bufferlist retention_bl;
+ obj_retention.encode(retention_bl);
+ attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl;
+ }
+ }
+
+ ret = obj.set_dir_entry_attrs(dpp, &ent, &attrs);
+
+ if (is_versioned) {
+ ret = obj.mark_as_latest(dpp, set_mtime);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+int DaosMultipartUpload::abort(const DoutPrefixProvider* dpp,
+ CephContext* cct) {
+ // Remove upload from bucket multipart index
+ ldpp_dout(dpp, 20) << "DEBUG: abort" << dendl;
+ return ds3_upload_remove(bucket->get_name().c_str(), get_upload_id().c_str(),
+ store->ds3);
+}
+
+std::unique_ptr<rgw::sal::Object> DaosMultipartUpload::get_meta_obj() {
+ return bucket->get_object(
+ rgw_obj_key(get_upload_id(), string(), RGW_OBJ_NS_MULTIPART));
+}
+
+int DaosMultipartUpload::init(const DoutPrefixProvider* dpp, optional_yield y,
+ ACLOwner& _owner,
+ rgw_placement_rule& dest_placement,
+ rgw::sal::Attrs& attrs) {
+ ldpp_dout(dpp, 20) << "DEBUG: init" << dendl;
+ int ret;
+ std::string oid = mp_obj.get_key();
+
+ // Create an initial entry in the bucket. The entry will be
+ // updated when multipart upload is completed, for example,
+ // size, etag etc.
+ bufferlist bl;
+ rgw_bucket_dir_entry ent;
+ ent.key.name = oid;
+ ent.meta.owner = owner.get_id().to_str();
+ ent.meta.category = RGWObjCategory::MultiMeta;
+ ent.meta.mtime = ceph::real_clock::now();
+
+ multipart_upload_info upload_info;
+ upload_info.dest_placement = dest_placement;
+
+ ent.encode(bl);
+ encode(attrs, bl);
+ encode(upload_info, bl);
+
+ struct ds3_multipart_upload_info ui;
+ std::strcpy(ui.upload_id, MULTIPART_UPLOAD_ID_PREFIX);
+ std::strncpy(ui.key, oid.c_str(), sizeof(ui.key));
+ ui.encoded = bl.c_str();
+ ui.encoded_length = bl.length();
+ int prefix_length = strlen(ui.upload_id);
+
+ do {
+ gen_rand_alphanumeric(store->ctx(), ui.upload_id + prefix_length,
+ sizeof(ui.upload_id) - 1 - prefix_length);
+ mp_obj.init(oid, ui.upload_id);
+ ret = ds3_upload_init(&ui, bucket->get_name().c_str(), store->ds3);
+ } while (ret == -EEXIST);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to create multipart upload dir ("
+ << bucket->get_name() << "/" << get_upload_id()
+ << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosMultipartUpload::list_parts(const DoutPrefixProvider* dpp,
+ CephContext* cct, int num_parts, int marker,
+ int* next_marker, bool* truncated,
+ bool assume_unsorted) {
+ ldpp_dout(dpp, 20) << "DEBUG: list_parts" << dendl;
+ // Init needed structures
+ vector<struct ds3_multipart_part_info> multipart_part_infos(num_parts);
+ uint32_t npart = multipart_part_infos.size();
+ vector<vector<uint8_t>> values(npart, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
+ for (uint32_t i = 0; i < npart; i++) {
+ multipart_part_infos[i].encoded = values[i].data();
+ multipart_part_infos[i].encoded_length = values[i].size();
+ }
+
+ uint32_t daos_marker = marker;
+ int ret = ds3_upload_list_parts(
+ bucket->get_name().c_str(), get_upload_id().c_str(), &npart,
+ multipart_part_infos.data(), &daos_marker, truncated, store->ds3);
+
+ if (ret != 0) {
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ return ret;
+ }
+
+ multipart_part_infos.resize(npart);
+ values.resize(npart);
+ parts.clear();
+
+ for (auto const& pi : multipart_part_infos) {
+ bufferlist bl;
+ bl.append(reinterpret_cast<char*>(pi.encoded), pi.encoded_length);
+
+ std::unique_ptr<DaosMultipartPart> part =
+ std::make_unique<DaosMultipartPart>();
+ auto iter = bl.cbegin();
+ decode(part->info, iter);
+ parts[pi.part_num] = std::move(part);
+ }
+
+ if (next_marker) {
+ *next_marker = daos_marker;
+ }
+ return ret;
+}
+
+// Heavily copied from rgw_sal_rados.cc
+int DaosMultipartUpload::complete(
+ const DoutPrefixProvider* dpp, optional_yield y, CephContext* cct,
+ map<int, string>& part_etags, list<rgw_obj_index_key>& remove_objs,
+ uint64_t& accounted_size, bool& compressed, RGWCompressionInfo& cs_info,
+ off_t& off, std::string& tag, ACLOwner& owner, uint64_t olh_epoch,
+ rgw::sal::Object* target_obj) {
+ ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl;
+ char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+ std::string etag;
+ bufferlist etag_bl;
+ MD5 hash;
+ // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes
+ hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
+ bool truncated;
+ int ret;
+
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): enter" << dendl;
+ int total_parts = 0;
+ int handled_parts = 0;
+ int max_parts = 1000;
+ int marker = 0;
+ uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size;
+ auto etags_iter = part_etags.begin();
+ rgw::sal::Attrs attrs = target_obj->get_attrs();
+
+ do {
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): list_parts()"
+ << dendl;
+ ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated);
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ if (ret != 0) return ret;
+
+ total_parts += parts.size();
+ if (!truncated && total_parts != (int)part_etags.size()) {
+ ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts
+ << " expected: " << part_etags.size() << dendl;
+ ret = -ERR_INVALID_PART;
+ return ret;
+ }
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): parts.size()="
+ << parts.size() << dendl;
+
+ for (auto obj_iter = parts.begin();
+ etags_iter != part_etags.end() && obj_iter != parts.end();
+ ++etags_iter, ++obj_iter, ++handled_parts) {
+ DaosMultipartPart* part =
+ dynamic_cast<rgw::sal::DaosMultipartPart*>(obj_iter->second.get());
+ uint64_t part_size = part->get_size();
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part_size="
+ << part_size << dendl;
+ if (handled_parts < (int)part_etags.size() - 1 &&
+ part_size < min_part_size) {
+ ret = -ERR_TOO_SMALL;
+ return ret;
+ }
+
+ char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ if (etags_iter->first != (int)obj_iter->first) {
+ ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: "
+ << etags_iter->first
+ << " next uploaded: " << obj_iter->first << dendl;
+ ret = -ERR_INVALID_PART;
+ return ret;
+ }
+ string part_etag = rgw_string_unquote(etags_iter->second);
+ if (part_etag.compare(part->get_etag()) != 0) {
+ ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: "
+ << etags_iter->first
+ << " etag: " << etags_iter->second << dendl;
+ ret = -ERR_INVALID_PART;
+ return ret;
+ }
+
+ hex_to_buf(part->get_etag().c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
+ hash.Update((const unsigned char*)petag, sizeof(petag));
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): calc etag "
+ << dendl;
+
+ RGWUploadPartInfo& obj_part = part->info;
+ string oid = mp_obj.get_part(obj_part.num);
+ rgw_obj src_obj;
+ src_obj.init_ns(bucket->get_key(), oid, RGW_OBJ_NS_MULTIPART);
+
+ bool part_compressed = (obj_part.cs_info.compression_type != "none");
+ if ((handled_parts > 0) &&
+ ((part_compressed != compressed) ||
+ (cs_info.compression_type != obj_part.cs_info.compression_type))) {
+ ldpp_dout(dpp, 0)
+ << "ERROR: compression type was changed during multipart upload ("
+ << cs_info.compression_type << ">>"
+ << obj_part.cs_info.compression_type << ")" << dendl;
+ ret = -ERR_INVALID_PART;
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part compression"
+ << dendl;
+ if (part_compressed) {
+ int64_t new_ofs; // offset in compression data for new part
+ if (cs_info.blocks.size() > 0)
+ new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len;
+ else
+ new_ofs = 0;
+ for (const auto& block : obj_part.cs_info.blocks) {
+ compression_block cb;
+ cb.old_ofs = block.old_ofs + cs_info.orig_size;
+ cb.new_ofs = new_ofs;
+ cb.len = block.len;
+ cs_info.blocks.push_back(cb);
+ new_ofs = cb.new_ofs + cb.len;
+ }
+ if (!compressed)
+ cs_info.compression_type = obj_part.cs_info.compression_type;
+ cs_info.orig_size += obj_part.cs_info.orig_size;
+ compressed = true;
+ }
+
+ // We may not need to do the following as remove_objs are those
+ // don't show when listing a bucket. As we store in-progress uploaded
+ // object's metadata in a separate index, they are not shown when
+ // listing a bucket.
+ rgw_obj_index_key remove_key;
+ src_obj.key.get_index_key(&remove_key);
+
+ remove_objs.push_back(remove_key);
+
+ off += obj_part.size;
+ accounted_size += obj_part.accounted_size;
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): off=" << off
+ << ", accounted_size = " << accounted_size << dendl;
+ }
+ } while (truncated);
+ hash.Final((unsigned char*)final_etag);
+
+ buf_to_hex((unsigned char*)final_etag, sizeof(final_etag), final_etag_str);
+ snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2],
+ sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, "-%lld",
+ (long long)part_etags.size());
+ etag = final_etag_str;
+ ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl;
+
+ etag_bl.append(etag);
+
+ attrs[RGW_ATTR_ETAG] = etag_bl;
+
+ if (compressed) {
+ // write compression attribute to full object
+ bufferlist tmp;
+ encode(cs_info, tmp);
+ attrs[RGW_ATTR_COMPRESSION] = tmp;
+ }
+
+ // Different from rgw_sal_rados.cc starts here
+ // Read the object's multipart info
+ bufferlist bl;
+ uint64_t size = DS3_MAX_ENCODED_LEN;
+ struct ds3_multipart_upload_info ui = {
+ .encoded = bl.append_hole(size).c_str(), .encoded_length = size};
+ ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(),
+ get_upload_id().c_str(), store->ds3);
+ ldpp_dout(dpp, 20) << "DEBUG: ds3_upload_get_info entry="
+ << bucket->get_name() << "/" << get_upload_id() << dendl;
+ if (ret != 0) {
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ return ret;
+ }
+
+ rgw_bucket_dir_entry ent;
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+
+ // Update entry data and name
+ target_obj->get_key().get_index_key(&ent.key);
+ ent.meta.size = off;
+ ent.meta.accounted_size = accounted_size;
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): obj size="
+ << ent.meta.size
+ << " obj accounted size=" << ent.meta.accounted_size
+ << dendl;
+ ent.meta.category = RGWObjCategory::Main;
+ ent.meta.mtime = ceph::real_clock::now();
+ bool is_versioned = target_obj->get_bucket()->versioned();
+ if (is_versioned)
+ ent.flags =
+ rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT;
+ ent.meta.etag = etag;
+
+ // Open object
+ DaosObject* obj = static_cast<DaosObject*>(target_obj);
+ ret = obj->create(dpp);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Copy data from parts to object
+ uint64_t write_off = 0;
+ for (auto const& [part_num, part] : get_parts()) {
+ ds3_part_t* ds3p;
+ ret = ds3_part_open(get_bucket_name().c_str(), get_upload_id().c_str(),
+ part_num, false, &ds3p, store->ds3);
+ if (ret != 0) {
+ return ret;
+ }
+
+ // Reserve buffers and read
+ uint64_t size = part->get_size();
+ bufferlist bl;
+ ret = ds3_part_read(bl.append_hole(size).c_str(), 0, &size, ds3p,
+ store->ds3, nullptr);
+ if (ret != 0) {
+ ds3_part_close(ds3p);
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part " << part_num
+ << " size is " << size << dendl;
+
+ // write to obj
+ obj->write(dpp, std::move(bl), write_off);
+ ds3_part_close(ds3p);
+ write_off += part->get_size();
+ }
+
+ // Set attributes
+ ret = obj->set_dir_entry_attrs(dpp, &ent, &attrs);
+
+ if (is_versioned) {
+ ret = obj->mark_as_latest(dpp, ent.meta.mtime);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ // Remove upload from bucket multipart index
+ ret = ds3_upload_remove(get_bucket_name().c_str(), get_upload_id().c_str(),
+ store->ds3);
+ return ret;
+}
+
+int DaosMultipartUpload::get_info(const DoutPrefixProvider* dpp,
+ optional_yield y, rgw_placement_rule** rule,
+ rgw::sal::Attrs* attrs) {
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_info(): enter" << dendl;
+ if (!rule && !attrs) {
+ return 0;
+ }
+
+ if (rule) {
+ if (!placement.empty()) {
+ *rule = &placement;
+ if (!attrs) {
+ // Don't need attrs, done
+ return 0;
+ }
+ } else {
+ *rule = nullptr;
+ }
+ }
+
+ // Read the multipart upload dirent from index
+ bufferlist bl;
+ uint64_t size = DS3_MAX_ENCODED_LEN;
+ struct ds3_multipart_upload_info ui = {
+ .encoded = bl.append_hole(size).c_str(), .encoded_length = size};
+ int ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(),
+ get_upload_id().c_str(), store->ds3);
+
+ if (ret != 0) {
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ return ret;
+ }
+
+ multipart_upload_info upload_info;
+ rgw_bucket_dir_entry ent;
+ Attrs decoded_attrs;
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+ decode(decoded_attrs, iter);
+ ldpp_dout(dpp, 20) << "DEBUG: decoded_attrs=" << attrs << dendl;
+
+ if (attrs) {
+ *attrs = decoded_attrs;
+ if (!rule || *rule != nullptr) {
+ // placement was cached; don't actually read
+ return 0;
+ }
+ }
+
+ // Now decode the placement rule
+ decode(upload_info, iter);
+ placement = upload_info.dest_placement;
+ *rule = &placement;
+
+ return 0;
+}
+
+std::unique_ptr<Writer> DaosMultipartUpload::get_writer(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule, uint64_t part_num,
+ const std::string& part_num_str) {
+ ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_writer(): enter part="
+ << part_num << " head_obj=" << _head_obj << dendl;
+ return std::make_unique<DaosMultipartWriter>(
+ dpp, y, this, obj, store, owner, ptail_placement_rule,
+ part_num, part_num_str);
+}
+
+DaosMultipartWriter::~DaosMultipartWriter() {
+ if (is_open()) ds3_part_close(ds3p);
+}
+
+int DaosMultipartWriter::prepare(optional_yield y) {
+ ldpp_dout(dpp, 20) << "DaosMultipartWriter::prepare(): enter part="
+ << part_num_str << dendl;
+ int ret = ds3_part_open(get_bucket_name().c_str(), upload_id.c_str(),
+ part_num, true, &ds3p, store->ds3);
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ return ret;
+}
+
+const std::string& DaosMultipartWriter::get_bucket_name() {
+ return static_cast<DaosMultipartUpload*>(upload)->get_bucket_name();
+}
+
+int DaosMultipartWriter::process(bufferlist&& data, uint64_t offset) {
+ ldpp_dout(dpp, 20) << "DaosMultipartWriter::process(): enter part="
+ << part_num_str << " offset=" << offset << dendl;
+ if (data.length() == 0) {
+ return 0;
+ }
+
+ uint64_t size = data.length();
+ int ret =
+ ds3_part_write(data.c_str(), offset, &size, ds3p, store->ds3, nullptr);
+ if (ret == 0) {
+ // XXX: Combine multiple streams into one as motr does
+ actual_part_size += size;
+ } else {
+ ldpp_dout(dpp, 0) << "ERROR: failed to write into part ("
+ << get_bucket_name() << ", " << upload_id << ", "
+ << part_num << "): ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int DaosMultipartWriter::complete(
+ size_t accounted_size, const std::string& etag, ceph::real_time* mtime,
+ ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at, const char* if_match, const char* if_nomatch,
+ const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): enter part="
+ << part_num_str << dendl;
+
+ // Add an entry into part index
+ bufferlist bl;
+ RGWUploadPartInfo info;
+ info.num = part_num;
+ info.etag = etag;
+ info.size = actual_part_size;
+ info.accounted_size = accounted_size;
+ info.modified = real_clock::now();
+
+ bool compressed;
+ int ret = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info);
+ ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): compression ret="
+ << ret << dendl;
+ if (ret != 0) {
+ ldpp_dout(dpp, 1) << "cannot get compression info" << dendl;
+ return ret;
+ }
+ encode(info, bl);
+ encode(attrs, bl);
+ ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): entry size"
+ << bl.length() << dendl;
+
+ struct ds3_multipart_part_info part_info = {.part_num = part_num,
+ .encoded = bl.c_str(),
+ .encoded_length = bl.length()};
+
+ ret = ds3_part_set_info(&part_info, ds3p, store->ds3, nullptr);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to set part info (" << get_bucket_name()
+ << ", " << upload_id << ", " << part_num
+ << "): ret=" << ret << dendl;
+ if (ret == ENOENT) {
+ ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ }
+
+ return ret;
+}
+
+std::unique_ptr<RGWRole> DaosStore::get_role(
+ std::string name, std::string tenant, std::string path,
+ std::string trust_policy, std::string max_session_duration_str,
+ std::multimap<std::string, std::string> tags) {
+ RGWRole* p = nullptr;
+ return std::unique_ptr<RGWRole>(p);
+}
+
+std::unique_ptr<RGWRole> DaosStore::get_role(const RGWRoleInfo& info) {
+ RGWRole* p = nullptr;
+ return std::unique_ptr<RGWRole>(p);
+}
+
+std::unique_ptr<RGWRole> DaosStore::get_role(std::string id) {
+ RGWRole* p = nullptr;
+ return std::unique_ptr<RGWRole>(p);
+}
+
+int DaosStore::get_roles(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& path_prefix,
+ const std::string& tenant,
+ vector<std::unique_ptr<RGWRole>>& roles) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+std::unique_ptr<RGWOIDCProvider> DaosStore::get_oidc_provider() {
+ RGWOIDCProvider* p = nullptr;
+ return std::unique_ptr<RGWOIDCProvider>(p);
+}
+
+int DaosStore::get_oidc_providers(
+ const DoutPrefixProvider* dpp, const std::string& tenant,
+ vector<std::unique_ptr<RGWOIDCProvider>>& providers) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+std::unique_ptr<MultipartUpload> DaosBucket::get_multipart_upload(
+ const std::string& oid, std::optional<std::string> upload_id,
+ ACLOwner owner, ceph::real_time mtime) {
+ return std::make_unique<DaosMultipartUpload>(store, this, oid, upload_id,
+ owner, mtime);
+}
+
+std::unique_ptr<Writer> DaosStore::get_append_writer(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule,
+ const std::string& unique_tag, uint64_t position,
+ uint64_t* cur_accounted_size) {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return nullptr;
+}
+
+std::unique_ptr<Writer> DaosStore::get_atomic_writer(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch,
+ const std::string& unique_tag) {
+ ldpp_dout(dpp, 20) << "get_atomic_writer" << dendl;
+ return std::make_unique<DaosAtomicWriter>(dpp, y, obj, this,
+ owner, ptail_placement_rule,
+ olh_epoch, unique_tag);
+}
+
+const std::string& DaosStore::get_compression_type(
+ const rgw_placement_rule& rule) {
+ return zone.zone_params->get_compression_type(rule);
+}
+
+bool DaosStore::valid_placement(const rgw_placement_rule& rule) {
+ return zone.zone_params->valid_placement(rule);
+}
+
+std::unique_ptr<User> DaosStore::get_user(const rgw_user& u) {
+ ldout(cctx, 20) << "DEBUG: bucket's user: " << u.to_str() << dendl;
+ return std::make_unique<DaosUser>(this, u);
+}
+
+int DaosStore::get_user_by_access_key(const DoutPrefixProvider* dpp,
+ const std::string& key, optional_yield y,
+ std::unique_ptr<User>* user) {
+ // Initialize ds3_user_info
+ bufferlist bl;
+ uint64_t size = DS3_MAX_ENCODED_LEN;
+ struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(),
+ .encoded_length = size};
+
+ int ret = ds3_user_get_by_key(key.c_str(), &user_info, ds3, nullptr);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_key failed, key=" << key
+ << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ // Decode
+ DaosUserInfo duinfo;
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ duinfo.decode(iter);
+
+ User* u = new DaosUser(this, duinfo.info);
+ if (!u) {
+ return -ENOMEM;
+ }
+
+ user->reset(u);
+ return 0;
+}
+
+int DaosStore::get_user_by_email(const DoutPrefixProvider* dpp,
+ const std::string& email, optional_yield y,
+ std::unique_ptr<User>* user) {
+ // Initialize ds3_user_info
+ bufferlist bl;
+ uint64_t size = DS3_MAX_ENCODED_LEN;
+ struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(),
+ .encoded_length = size};
+
+ int ret = ds3_user_get_by_email(email.c_str(), &user_info, ds3, nullptr);
+
+ if (ret != 0) {
+ ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_email failed, email=" << email
+ << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ // Decode
+ DaosUserInfo duinfo;
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ duinfo.decode(iter);
+
+ User* u = new DaosUser(this, duinfo.info);
+ if (!u) {
+ return -ENOMEM;
+ }
+
+ user->reset(u);
+ return 0;
+}
+
+int DaosStore::get_user_by_swift(const DoutPrefixProvider* dpp,
+ const std::string& user_str, optional_yield y,
+ std::unique_ptr<User>* user) {
+ /* Swift keys and subusers are not supported for now */
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+std::unique_ptr<Object> DaosStore::get_object(const rgw_obj_key& k) {
+ return std::make_unique<DaosObject>(this, k);
+}
+
+inline std::ostream& operator<<(std::ostream& out, const rgw_user* u) {
+ std::string s;
+ if (u != nullptr)
+ u->to_str(s);
+ else
+ s = "(nullptr)";
+ return out << s;
+}
+
+int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u,
+ const rgw_bucket& b, std::unique_ptr<Bucket>* bucket,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << "DEBUG: get_bucket1: User: " << u << dendl;
+ int ret;
+ Bucket* bp;
+
+ bp = new DaosBucket(this, b, u);
+ ret = bp->load_bucket(dpp, y);
+ if (ret != 0) {
+ delete bp;
+ return ret;
+ }
+
+ bucket->reset(bp);
+ return 0;
+}
+
+int DaosStore::get_bucket(User* u, const RGWBucketInfo& i,
+ std::unique_ptr<Bucket>* bucket) {
+ DaosBucket* bp;
+
+ bp = new DaosBucket(this, i, u);
+ /* Don't need to fetch the bucket info, use the provided one */
+
+ bucket->reset(bp);
+ return 0;
+}
+
+int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u,
+ const std::string& tenant, const std::string& name,
+ std::unique_ptr<Bucket>* bucket, optional_yield y) {
+ ldpp_dout(dpp, 20) << "get_bucket" << dendl;
+ rgw_bucket b;
+
+ b.tenant = tenant;
+ b.name = name;
+
+ return get_bucket(dpp, u, b, bucket, y);
+}
+
+bool DaosStore::is_meta_master() { return true; }
+
+int DaosStore::forward_request_to_master(const DoutPrefixProvider* dpp,
+ User* user, obj_version* objv,
+ bufferlist& in_data, JSONParser* jp,
+ req_info& info, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosStore::forward_iam_request_to_master(const DoutPrefixProvider* dpp,
+ const RGWAccessKey& key,
+ obj_version* objv,
+ bufferlist& in_data,
+ RGWXMLDecoder::XMLParser* parser,
+ req_info& info, optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+std::string DaosStore::zone_unique_id(uint64_t unique_num) { return ""; }
+
+std::string DaosStore::zone_unique_trans_id(const uint64_t unique_num) {
+ return "";
+}
+
+int DaosStore::cluster_stat(RGWClusterStat& stats) {
+ return DAOS_NOT_IMPLEMENTED_LOG(nullptr);
+}
+
+std::unique_ptr<Lifecycle> DaosStore::get_lifecycle(void) {
+ DAOS_NOT_IMPLEMENTED_LOG(nullptr);
+ return 0;
+}
+
+std::unique_ptr<Notification> DaosStore::get_notification(
+ rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s,
+ rgw::notify::EventType event_type, const std::string* object_name) {
+ return std::make_unique<DaosNotification>(obj, src_obj, event_type);
+}
+
+std::unique_ptr<Notification> DaosStore::get_notification(
+ const DoutPrefixProvider* dpp, Object* obj, Object* src_obj,
+ rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket,
+ std::string& _user_id, std::string& _user_tenant, std::string& _req_id,
+ optional_yield y) {
+ ldpp_dout(dpp, 20) << "get_notification" << dendl;
+ return std::make_unique<DaosNotification>(obj, src_obj, event_type);
+}
+
+int DaosStore::log_usage(const DoutPrefixProvider* dpp,
+ map<rgw_user_bucket, RGWUsageBatch>& usage_info) {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return 0;
+}
+
+int DaosStore::log_op(const DoutPrefixProvider* dpp, string& oid,
+ bufferlist& bl) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosStore::register_to_service_map(const DoutPrefixProvider* dpp,
+ const string& daemon_type,
+ const map<string, string>& meta) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+void DaosStore::get_quota(RGWQuota& quota) {
+ // XXX: Not handled for the first pass
+ return;
+}
+
+void DaosStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit,
+ RGWRateLimitInfo& user_ratelimit,
+ RGWRateLimitInfo& anon_ratelimit) {
+ return;
+}
+
+int DaosStore::set_buckets_enabled(const DoutPrefixProvider* dpp,
+ std::vector<rgw_bucket>& buckets,
+ bool enabled) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosStore::get_sync_policy_handler(const DoutPrefixProvider* dpp,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> bucket,
+ RGWBucketSyncPolicyHandlerRef* phandler,
+ optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+RGWDataSyncStatusManager* DaosStore::get_data_sync_manager(
+ const rgw_zone_id& source_zone) {
+ DAOS_NOT_IMPLEMENTED_LOG(nullptr);
+ return 0;
+}
+
+int DaosStore::read_all_usage(
+ const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
+ map<rgw_user_bucket, rgw_usage_log_entry>& usage) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosStore::trim_all_usage(const DoutPrefixProvider* dpp,
+ uint64_t start_epoch, uint64_t end_epoch) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosStore::get_config_key_val(string name, bufferlist* bl) {
+ return DAOS_NOT_IMPLEMENTED_LOG(nullptr);
+}
+
+int DaosStore::meta_list_keys_init(const DoutPrefixProvider* dpp,
+ const string& section, const string& marker,
+ void** phandle) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+int DaosStore::meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle,
+ int max, list<string>& keys,
+ bool* truncated) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+void DaosStore::meta_list_keys_complete(void* handle) { return; }
+
+std::string DaosStore::meta_get_marker(void* handle) { return ""; }
+
+int DaosStore::meta_remove(const DoutPrefixProvider* dpp, string& metadata_key,
+ optional_yield y) {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+}
+
+std::string DaosStore::get_cluster_id(const DoutPrefixProvider* dpp,
+ optional_yield y) {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return "";
+}
+
+} // namespace rgw::sal
+
+extern "C" {
+
+void* newDaosStore(CephContext* cct) {
+ return new rgw::sal::DaosStore(cct);
+}
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=2 sw=2 expandtab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * SAL implementation for the CORTX Daos backend
+ *
+ * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <daos.h>
+#include <daos_s3.h>
+#include <uuid/uuid.h>
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "rgw_multi.h"
+#include "rgw_notify.h"
+#include "rgw_oidc_provider.h"
+#include "rgw_putobj_processor.h"
+#include "rgw_rados.h"
+#include "rgw_role.h"
+#include "rgw_sal_store.h"
+
+inline bool IsDebuggerAttached() {
+#ifdef DEBUG
+ char buf[4096];
+
+ const int status_fd = ::open("/proc/self/status", O_RDONLY);
+ if (status_fd == -1) return false;
+
+ const ssize_t num_read = ::read(status_fd, buf, sizeof(buf) - 1);
+ ::close(status_fd);
+
+ if (num_read <= 0) return false;
+
+ buf[num_read] = '\0';
+ constexpr char tracerPidString[] = "TracerPid:";
+ const auto tracer_pid_ptr = ::strstr(buf, tracerPidString);
+ if (!tracer_pid_ptr) return false;
+
+ for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1;
+ characterPtr <= buf + num_read; ++characterPtr) {
+ if (::isspace(*characterPtr))
+ continue;
+ else
+ return ::isdigit(*characterPtr) != 0 && *characterPtr != '0';
+ }
+#endif // DEBUG
+ return false;
+}
+
+inline void DebugBreak() {
+#ifdef DEBUG
+ // only break into the debugger if the debugger is attached
+ if (IsDebuggerAttached())
+ raise(SIGINT); // breaks into GDB and stops, can be continued
+#endif // DEBUG
+}
+
+inline int NotImplementedLog(const DoutPrefixProvider* ldpp,
+ const char* filename, int linenumber,
+ const char* functionname) {
+ if (ldpp)
+ ldpp_dout(ldpp, 20) << filename << "(" << linenumber << ") " << functionname
+ << ": Not implemented" << dendl;
+ return 0;
+}
+
+inline int NotImplementedGdbBreak(const DoutPrefixProvider* ldpp,
+ const char* filename, int linenumber,
+ const char* functionname) {
+ NotImplementedLog(ldpp, filename, linenumber, functionname);
+ DebugBreak();
+ return 0;
+}
+
+#define DAOS_NOT_IMPLEMENTED_GDB_BREAK(ldpp) \
+ NotImplementedGdbBreak(ldpp, __FILE__, __LINE__, __FUNCTION__)
+#define DAOS_NOT_IMPLEMENTED_LOG(ldpp) \
+ NotImplementedLog(ldpp, __FILE__, __LINE__, __FUNCTION__)
+
+namespace rgw::sal {
+
+class DaosStore;
+class DaosObject;
+
+#ifdef DEBUG
+// Prepends each log entry with the "filename(source_line) function_name". Makes
+// it simple to
+// associate log entries with the source that generated the log entry
+#undef ldpp_dout
+#define ldpp_dout(dpp, v) \
+ if (decltype(auto) pdpp = (dpp); \
+ pdpp) /* workaround -Wnonnull-compare for 'this' */ \
+ dout_impl(pdpp->get_cct(), ceph::dout::need_dynamic(pdpp->get_subsys()), v) \
+ pdpp->gen_prefix(*_dout) \
+ << __FILE__ << "(" << __LINE__ << ") " << __FUNCTION__ << " - "
+#endif // DEBUG
+
+struct DaosUserInfo {
+ RGWUserInfo info;
+ obj_version user_version;
+ rgw::sal::Attrs attrs;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 3, bl);
+ encode(info, bl);
+ encode(user_version, bl);
+ encode(attrs, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(3, bl);
+ decode(info, bl);
+ decode(user_version, bl);
+ decode(attrs, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(DaosUserInfo);
+
+class DaosNotification : public StoreNotification {
+ public:
+ DaosNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type)
+ : StoreNotification(_obj, _src_obj, _type) {}
+ ~DaosNotification() = default;
+
+ virtual int publish_reserve(const DoutPrefixProvider* dpp,
+ RGWObjTags* obj_tags = nullptr) override {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ }
+ virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size,
+ const ceph::real_time& mtime,
+ const std::string& etag,
+ const std::string& version) override {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ }
+};
+
+class DaosUser : public StoreUser {
+ private:
+ DaosStore* store;
+ std::vector<const char*> access_ids;
+
+ public:
+ DaosUser(DaosStore* _st, const rgw_user& _u) : StoreUser(_u), store(_st) {}
+ DaosUser(DaosStore* _st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) {}
+ DaosUser(DaosStore* _st) : store(_st) {}
+ DaosUser(DaosUser& _o) = default;
+ DaosUser() {}
+
+ virtual std::unique_ptr<User> clone() override {
+ return std::make_unique<DaosUser>(*this);
+ }
+ int list_buckets(const DoutPrefixProvider* dpp, const std::string& marker,
+ const std::string& end_marker, uint64_t max, bool need_stats,
+ BucketList& buckets, optional_yield y) override;
+ virtual int create_bucket(
+ const DoutPrefixProvider* dpp, const rgw_bucket& b,
+ const std::string& zonegroup_id, rgw_placement_rule& placement_rule,
+ std::string& swift_ver_location, const RGWQuotaInfo* pquota_info,
+ const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info,
+ obj_version& ep_objv, bool exclusive, bool obj_lock_enabled,
+ bool* existed, req_info& req_info, std::unique_ptr<Bucket>* bucket,
+ optional_yield y) override;
+ virtual int read_attrs(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp,
+ Attrs& new_attrs,
+ optional_yield y) override;
+ virtual int read_stats(const DoutPrefixProvider* dpp, optional_yield y,
+ RGWStorageStats* stats,
+ ceph::real_time* last_stats_sync = nullptr,
+ ceph::real_time* last_stats_update = nullptr) override;
+ virtual int read_stats_async(const DoutPrefixProvider* dpp,
+ RGWGetUserStats_CB* cb) override;
+ virtual int complete_flush_stats(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int read_usage(
+ const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
+ std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
+ virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
+ uint64_t end_epoch) override;
+
+ virtual int load_user(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y,
+ bool exclusive,
+ RGWUserInfo* old_info = nullptr) override;
+ virtual int remove_user(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+
+ /** Read user info without loading it */
+ int read_user(const DoutPrefixProvider* dpp, std::string name,
+ DaosUserInfo* duinfo);
+
+ std::unique_ptr<struct ds3_user_info> get_encoded_info(bufferlist& bl,
+ obj_version& obj_ver);
+
+ friend class DaosBucket;
+};
+
+// RGWBucketInfo and other information that are shown when listing a bucket is
+// represented in struct DaosBucketInfo. The structure is encoded and stored
+// as the value of the global bucket instance index.
+// TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.)
+// into a different index.
+struct DaosBucketInfo {
+ RGWBucketInfo info;
+
+ obj_version bucket_version;
+ ceph::real_time mtime;
+
+ rgw::sal::Attrs bucket_attrs;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(4, 4, bl);
+ encode(info, bl);
+ encode(bucket_version, bl);
+ encode(mtime, bl);
+ encode(bucket_attrs, bl); // rgw_cache.h example for a map
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(4, bl);
+ decode(info, bl);
+ decode(bucket_version, bl);
+ decode(mtime, bl);
+ decode(bucket_attrs, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(DaosBucketInfo);
+
+class DaosBucket : public StoreBucket {
+ private:
+ DaosStore* store;
+ RGWAccessControlPolicy acls;
+
+ public:
+ /** Container ds3b handle */
+ ds3_bucket_t* ds3b = nullptr;
+
+ DaosBucket(DaosStore* _st) : store(_st), acls() {}
+
+ DaosBucket(const DaosBucket& _daos_bucket)
+ : store(_daos_bucket.store), acls(), ds3b(nullptr) {
+ // TODO: deep copy all objects
+ }
+
+ DaosBucket(DaosStore* _st, User* _u) : StoreBucket(_u), store(_st), acls() {}
+
+ DaosBucket(DaosStore* _st, const rgw_bucket& _b)
+ : StoreBucket(_b), store(_st), acls() {}
+
+ DaosBucket(DaosStore* _st, const RGWBucketEnt& _e)
+ : StoreBucket(_e), store(_st), acls() {}
+
+ DaosBucket(DaosStore* _st, const RGWBucketInfo& _i)
+ : StoreBucket(_i), store(_st), acls() {}
+
+ DaosBucket(DaosStore* _st, const rgw_bucket& _b, User* _u)
+ : StoreBucket(_b, _u), store(_st), acls() {}
+
+ DaosBucket(DaosStore* _st, const RGWBucketEnt& _e, User* _u)
+ : StoreBucket(_e, _u), store(_st), acls() {}
+
+ DaosBucket(DaosStore* _st, const RGWBucketInfo& _i, User* _u)
+ : StoreBucket(_i, _u), store(_st), acls() {}
+
+ ~DaosBucket();
+
+ virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
+ virtual int list(const DoutPrefixProvider* dpp, ListParams&, int,
+ ListResults&, optional_yield y) override;
+ virtual int remove_bucket(const DoutPrefixProvider* dpp, bool delete_children,
+ bool forward_to_master, req_info* req_info,
+ optional_yield y) override;
+ virtual int remove_bucket_bypass_gc(int concurrent_max,
+ bool keep_index_consistent,
+ optional_yield y,
+ const DoutPrefixProvider* dpp) override;
+ virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
+ virtual int set_acl(const DoutPrefixProvider* dpp,
+ RGWAccessControlPolicy& acl, optional_yield y) override;
+ virtual int load_bucket(const DoutPrefixProvider* dpp, optional_yield y,
+ bool get_stats = false) override;
+ virtual int read_stats(const DoutPrefixProvider* dpp,
+ const bucket_index_layout_generation& idx_layout,
+ int shard_id, std::string* bucket_ver,
+ std::string* master_ver,
+ std::map<RGWObjCategory, RGWStorageStats>& stats,
+ std::string* max_marker = nullptr,
+ bool* syncstopped = nullptr) override;
+ virtual int read_stats_async(const DoutPrefixProvider* dpp,
+ const bucket_index_layout_generation& idx_layout,
+ int shard_id,
+ RGWGetBucketStats_CB* ctx) override;
+ virtual int sync_user_stats(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int update_container_stats(const DoutPrefixProvider* dpp) override;
+ virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override;
+ virtual int chown(const DoutPrefixProvider* dpp, User& new_user,
+ optional_yield y) override;
+ virtual int put_info(const DoutPrefixProvider* dpp, bool exclusive,
+ ceph::real_time mtime) override;
+ virtual bool is_owner(User* user) override;
+ virtual int check_empty(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota,
+ uint64_t obj_size, optional_yield y,
+ bool check_size_only = false) override;
+ virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& attrs,
+ optional_yield y) override;
+ virtual int try_refresh_info(const DoutPrefixProvider* dpp,
+ ceph::real_time* pmtime) override;
+ virtual int read_usage(
+ const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
+ std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
+ virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
+ uint64_t end_epoch) override;
+ virtual int remove_objs_from_index(
+ const DoutPrefixProvider* dpp,
+ std::list<rgw_obj_index_key>& objs_to_unlink) override;
+ virtual int check_index(
+ const DoutPrefixProvider* dpp,
+ std::map<RGWObjCategory, RGWStorageStats>& existing_stats,
+ std::map<RGWObjCategory, RGWStorageStats>& calculated_stats) override;
+ virtual int rebuild_index(const DoutPrefixProvider* dpp) override;
+ virtual int set_tag_timeout(const DoutPrefixProvider* dpp,
+ uint64_t timeout) override;
+ virtual int purge_instance(const DoutPrefixProvider* dpp) override;
+ virtual std::unique_ptr<Bucket> clone() override {
+ return std::make_unique<DaosBucket>(*this);
+ }
+ virtual std::unique_ptr<MultipartUpload> get_multipart_upload(
+ const std::string& oid,
+ std::optional<std::string> upload_id = std::nullopt, ACLOwner owner = {},
+ ceph::real_time mtime = real_clock::now()) override;
+ virtual int list_multiparts(
+ const DoutPrefixProvider* dpp, const std::string& prefix,
+ std::string& marker, const std::string& delim, const int& max_uploads,
+ std::vector<std::unique_ptr<MultipartUpload>>& uploads,
+ std::map<std::string, bool>* common_prefixes,
+ bool* is_truncated) override;
+ virtual int abort_multiparts(const DoutPrefixProvider* dpp,
+ CephContext* cct) override;
+
+ int open(const DoutPrefixProvider* dpp);
+ int close(const DoutPrefixProvider* dpp);
+ bool is_open() { return ds3b != nullptr; }
+ std::unique_ptr<struct ds3_bucket_info> get_encoded_info(
+ bufferlist& bl, ceph::real_time mtime);
+
+ friend class DaosStore;
+};
+
+class DaosPlacementTier : public StorePlacementTier {
+ DaosStore* store;
+ RGWZoneGroupPlacementTier tier;
+
+ public:
+ DaosPlacementTier(DaosStore* _store, const RGWZoneGroupPlacementTier& _tier)
+ : store(_store), tier(_tier) {}
+ virtual ~DaosPlacementTier() = default;
+
+ virtual const std::string& get_tier_type() { return tier.tier_type; }
+ virtual const std::string& get_storage_class() { return tier.storage_class; }
+ virtual bool retain_head_object() { return tier.retain_head_object; }
+ RGWZoneGroupPlacementTier& get_rt() { return tier; }
+};
+
+class DaosZoneGroup : public StoreZoneGroup {
+ DaosStore* store;
+ const RGWZoneGroup group;
+ std::string empty;
+
+ public:
+ DaosZoneGroup(DaosStore* _store) : store(_store), group() {}
+ DaosZoneGroup(DaosStore* _store, const RGWZoneGroup& _group)
+ : store(_store), group(_group) {}
+ virtual ~DaosZoneGroup() = default;
+
+ virtual const std::string& get_id() const override { return group.get_id(); };
+ virtual const std::string& get_name() const override {
+ return group.get_name();
+ };
+ virtual int equals(const std::string& other_zonegroup) const override {
+ return group.equals(other_zonegroup);
+ };
+ /** Get the endpoint from zonegroup, or from master zone if not set */
+ virtual const std::string& get_endpoint() const override;
+ virtual bool placement_target_exists(std::string& target) const override;
+ virtual bool is_master_zonegroup() const override {
+ return group.is_master_zonegroup();
+ };
+ virtual const std::string& get_api_name() const override {
+ return group.api_name;
+ };
+ virtual int get_placement_target_names(
+ std::set<std::string>& names) const override;
+ virtual const std::string& get_default_placement_name() const override {
+ return group.default_placement.name;
+ };
+ virtual int get_hostnames(std::list<std::string>& names) const override {
+ names = group.hostnames;
+ return 0;
+ };
+ virtual int get_s3website_hostnames(
+ std::list<std::string>& names) const override {
+ names = group.hostnames_s3website;
+ return 0;
+ };
+ virtual int get_zone_count() const override { return group.zones.size(); }
+ virtual int get_placement_tier(const rgw_placement_rule& rule,
+ std::unique_ptr<PlacementTier>* tier);
+ virtual std::unique_ptr<ZoneGroup> clone() override {
+ return std::make_unique<DaosZoneGroup>(store, group);
+ }
+ const RGWZoneGroup& get_group() { return group; }
+};
+
+class DaosZone : public StoreZone {
+ protected:
+ DaosStore* store;
+ RGWRealm* realm{nullptr};
+ DaosZoneGroup zonegroup;
+ RGWZone* zone_public_config{
+ nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */
+ RGWZoneParams* zone_params{
+ nullptr}; /* internal zone params, e.g., rados pools */
+ RGWPeriod* current_period{nullptr};
+ rgw_zone_id cur_zone_id;
+
+ public:
+ DaosZone(DaosStore* _store) : store(_store), zonegroup(_store) {
+ realm = new RGWRealm();
+ zone_public_config = new RGWZone();
+ zone_params = new RGWZoneParams();
+ current_period = new RGWPeriod();
+ cur_zone_id = rgw_zone_id(zone_params->get_id());
+
+ // XXX: only default and STANDARD supported for now
+ RGWZonePlacementInfo info;
+ RGWZoneStorageClasses sc;
+ sc.set_storage_class("STANDARD", nullptr, nullptr);
+ info.storage_classes = sc;
+ zone_params->placement_pools["default"] = info;
+ }
+ DaosZone(DaosStore* _store, DaosZoneGroup _zg)
+ : store(_store), zonegroup(_zg) {
+ realm = new RGWRealm();
+ zone_public_config = new RGWZone();
+ zone_params = new RGWZoneParams();
+ current_period = new RGWPeriod();
+ cur_zone_id = rgw_zone_id(zone_params->get_id());
+
+ // XXX: only default and STANDARD supported for now
+ RGWZonePlacementInfo info;
+ RGWZoneStorageClasses sc;
+ sc.set_storage_class("STANDARD", nullptr, nullptr);
+ info.storage_classes = sc;
+ zone_params->placement_pools["default"] = info;
+ }
+ ~DaosZone() = default;
+
+ virtual std::unique_ptr<Zone> clone() override {
+ return std::make_unique<DaosZone>(store);
+ }
+ virtual ZoneGroup& get_zonegroup() override;
+ virtual int get_zonegroup(const std::string& id,
+ std::unique_ptr<ZoneGroup>* zonegroup) override;
+ virtual const rgw_zone_id& get_id() override;
+ virtual const std::string& get_name() const override;
+ virtual bool is_writeable() override;
+ virtual bool get_redirect_endpoint(std::string* endpoint) override;
+ virtual bool has_zonegroup_api(const std::string& api) const override;
+ virtual const std::string& get_current_period_id() override;
+ virtual const RGWAccessKey& get_system_key() {
+ return zone_params->system_key;
+ }
+ virtual const std::string& get_realm_name() { return realm->get_name(); }
+ virtual const std::string& get_realm_id() { return realm->get_id(); }
+ virtual const std::string_view get_tier_type() { return "rgw"; }
+
+ friend class DaosStore;
+};
+
+class DaosLuaManager : public StoreLuaManager {
+ DaosStore* store;
+
+ public:
+ DaosLuaManager(DaosStore* _s) : store(_s) {}
+ virtual ~DaosLuaManager() = default;
+
+ virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& key, std::string& script) override {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return -ENOENT;
+ };
+
+ virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& key,
+ const std::string& script) override {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return -ENOENT;
+ };
+
+ virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& key) override {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return -ENOENT;
+ };
+
+ virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& package_name) override {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return -ENOENT;
+ };
+
+ virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& package_name) override {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return -ENOENT;
+ };
+
+ virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::lua::packages_t& packages) override {
+ DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ return -ENOENT;
+ };
+};
+
+class DaosObject : public StoreObject {
+ private:
+ DaosStore* store;
+ RGWAccessControlPolicy acls;
+
+ public:
+ struct DaosReadOp : public StoreReadOp {
+ private:
+ DaosObject* source;
+
+ public:
+ DaosReadOp(DaosObject* _source);
+
+ virtual int prepare(optional_yield y,
+ const DoutPrefixProvider* dpp) override;
+
+ /*
+ * Both `read` and `iterate` read up through index `end`
+ * *inclusive*. The number of bytes that could be returned is
+ * `end - ofs + 1`.
+ */
+ virtual int read(int64_t off, int64_t end, bufferlist& bl, optional_yield y,
+ const DoutPrefixProvider* dpp) override;
+ virtual int iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end,
+ RGWGetDataCB* cb, optional_yield y) override;
+
+ virtual int get_attr(const DoutPrefixProvider* dpp, const char* name,
+ bufferlist& dest, optional_yield y) override;
+ };
+
+ struct DaosDeleteOp : public StoreDeleteOp {
+ private:
+ DaosObject* source;
+
+ public:
+ DaosDeleteOp(DaosObject* _source);
+
+ virtual int delete_obj(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ };
+
+ ds3_obj_t* ds3o = nullptr;
+
+ DaosObject() = default;
+
+ DaosObject(DaosStore* _st, const rgw_obj_key& _k)
+ : StoreObject(_k), store(_st), acls() {}
+ DaosObject(DaosStore* _st, const rgw_obj_key& _k, Bucket* _b)
+ : StoreObject(_k, _b), store(_st), acls() {}
+
+ DaosObject(DaosObject& _o) = default;
+
+ virtual ~DaosObject();
+
+ virtual int delete_object(const DoutPrefixProvider* dpp, optional_yield y,
+ bool prevent_versioning = false) override;
+ virtual int copy_object(
+ User* user, req_info* info, const rgw_zone_id& source_zone,
+ rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket,
+ rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement,
+ ceph::real_time* src_mtime, ceph::real_time* mtime,
+ const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr,
+ bool high_precision_time, const char* if_match, const char* if_nomatch,
+ AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs,
+ RGWObjCategory category, uint64_t olh_epoch,
+ boost::optional<ceph::real_time> delete_at, std::string* version_id,
+ std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*),
+ void* progress_data, const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
+ virtual int set_acl(const RGWAccessControlPolicy& acl) override {
+ acls = acl;
+ return 0;
+ }
+
+ virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState** state,
+ optional_yield y, bool follow_olh = true) override;
+ virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs,
+ Attrs* delattrs, optional_yield y) override;
+ virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp,
+ rgw_obj* target_obj = NULL) override;
+ virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val,
+ optional_yield y,
+ const DoutPrefixProvider* dpp) override;
+ virtual int delete_obj_attrs(const DoutPrefixProvider* dpp,
+ const char* attr_name,
+ optional_yield y) override;
+ virtual bool is_expired() override;
+ virtual void gen_rand_obj_instance_name() override;
+ virtual std::unique_ptr<Object> clone() override {
+ return std::make_unique<DaosObject>(*this);
+ }
+ virtual std::unique_ptr<MPSerializer> get_serializer(
+ const DoutPrefixProvider* dpp, const std::string& lock_name) override;
+ virtual int transition(Bucket* bucket,
+ const rgw_placement_rule& placement_rule,
+ const real_time& mtime, uint64_t olh_epoch,
+ const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int transition_to_cloud(Bucket* bucket, rgw::sal::PlacementTier* tier,
+ rgw_bucket_dir_entry& o,
+ std::set<std::string>& cloud_targets,
+ CephContext* cct, bool update_object,
+ const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual bool placement_rules_match(rgw_placement_rule& r1,
+ rgw_placement_rule& r2) override;
+ virtual int dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y,
+ Formatter* f) override;
+
+ /* Swift versioning */
+ virtual int swift_versioning_restore(bool& restored,
+ const DoutPrefixProvider* dpp) override;
+ virtual int swift_versioning_copy(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+
+ /* OPs */
+ virtual std::unique_ptr<ReadOp> get_read_op() override;
+ virtual std::unique_ptr<DeleteOp> get_delete_op() override;
+
+ /* OMAP */
+ virtual int omap_get_vals_by_keys(const DoutPrefixProvider* dpp,
+ const std::string& oid,
+ const std::set<std::string>& keys,
+ Attrs* vals) override;
+ virtual int omap_set_val_by_key(const DoutPrefixProvider* dpp,
+ const std::string& key, bufferlist& val,
+ bool must_exist, optional_yield y) override;
+ virtual int chown(User& new_user, const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+
+ bool is_open() { return ds3o != nullptr; };
+ // Only lookup the object, do not create
+ int lookup(const DoutPrefixProvider* dpp);
+ // Create the object, truncate if exists
+ int create(const DoutPrefixProvider* dpp);
+ // Release the daos resources
+ int close(const DoutPrefixProvider* dpp);
+ // Write to object starting from offset
+ int write(const DoutPrefixProvider* dpp, bufferlist&& data, uint64_t offset);
+ // Read size bytes from object starting from offset
+ int read(const DoutPrefixProvider* dpp, bufferlist& data, uint64_t offset,
+ uint64_t& size);
+ // Get the object's dirent and attrs
+ int get_dir_entry_attrs(const DoutPrefixProvider* dpp,
+ rgw_bucket_dir_entry* ent, Attrs* getattrs = nullptr);
+ // Set the object's dirent and attrs
+ int set_dir_entry_attrs(const DoutPrefixProvider* dpp,
+ rgw_bucket_dir_entry* ent, Attrs* setattrs = nullptr);
+ // Marks this DAOS object as being the latest version and unmarks all other
+ // versions as latest
+ int mark_as_latest(const DoutPrefixProvider* dpp, ceph::real_time set_mtime);
+ // get_bucket casted as DaosBucket*
+ DaosBucket* get_daos_bucket() {
+ return static_cast<DaosBucket*>(get_bucket());
+ }
+};
+
+// A placeholder locking class for multipart upload.
+class MPDaosSerializer : public StoreMPSerializer {
+ public:
+ MPDaosSerializer(const DoutPrefixProvider* dpp, DaosStore* store,
+ DaosObject* obj, const std::string& lock_name) {}
+
+ virtual int try_lock(const DoutPrefixProvider* dpp, utime_t dur,
+ optional_yield y) override {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ }
+ virtual int unlock() override { return DAOS_NOT_IMPLEMENTED_LOG(nullptr); }
+};
+
+class DaosAtomicWriter : public StoreWriter {
+ protected:
+ rgw::sal::DaosStore* store;
+ const rgw_user& owner;
+ const rgw_placement_rule* ptail_placement_rule;
+ uint64_t olh_epoch;
+ const std::string& unique_tag;
+ DaosObject obj;
+ uint64_t total_data_size = 0; // for total data being uploaded
+
+ public:
+ DaosAtomicWriter(const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj,
+ DaosStore* _store, const rgw_user& _owner,
+ const rgw_placement_rule* _ptail_placement_rule,
+ uint64_t _olh_epoch, const std::string& _unique_tag);
+ ~DaosAtomicWriter() = default;
+
+ // prepare to start processing object data
+ virtual int prepare(optional_yield y) override;
+
+ // Process a bufferlist
+ virtual int process(bufferlist&& data, uint64_t offset) override;
+
+ // complete the operation and make its result visible to clients
+ virtual int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time* mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at, const char* if_match,
+ const char* if_nomatch, const std::string* user_data,
+ rgw_zone_set* zones_trace, bool* canceled,
+ optional_yield y) override;
+};
+
+class DaosMultipartWriter : public StoreWriter {
+ protected:
+ rgw::sal::DaosStore* store;
+ MultipartUpload* upload;
+ std::string upload_id;
+
+ // Part parameters.
+ const uint64_t part_num;
+ const std::string part_num_str;
+ uint64_t actual_part_size = 0;
+
+ ds3_part_t* ds3p = nullptr;
+ bool is_open() { return ds3p != nullptr; };
+
+ public:
+ DaosMultipartWriter(const DoutPrefixProvider* dpp, optional_yield y,
+ MultipartUpload* _upload,
+ rgw::sal::Object* obj,
+ DaosStore* _store, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule,
+ uint64_t _part_num, const std::string& part_num_str)
+ : StoreWriter(dpp, y),
+ store(_store),
+ upload(_upload),
+ upload_id(_upload->get_upload_id()),
+ part_num(_part_num),
+ part_num_str(part_num_str) {}
+ virtual ~DaosMultipartWriter();
+
+ // prepare to start processing object data
+ virtual int prepare(optional_yield y) override;
+
+ // Process a bufferlist
+ virtual int process(bufferlist&& data, uint64_t offset) override;
+
+ // complete the operation and make its result visible to clients
+ virtual int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time* mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at, const char* if_match,
+ const char* if_nomatch, const std::string* user_data,
+ rgw_zone_set* zones_trace, bool* canceled,
+ optional_yield y) override;
+
+ const std::string& get_bucket_name();
+};
+
+class DaosMultipartPart : public StoreMultipartPart {
+ protected:
+ RGWUploadPartInfo info;
+
+ public:
+ DaosMultipartPart() = default;
+ virtual ~DaosMultipartPart() = default;
+
+ virtual uint32_t get_num() { return info.num; }
+ virtual uint64_t get_size() { return info.accounted_size; }
+ virtual const std::string& get_etag() { return info.etag; }
+ virtual ceph::real_time& get_mtime() { return info.modified; }
+
+ friend class DaosMultipartUpload;
+};
+
+class DaosMultipartUpload : public StoreMultipartUpload {
+ DaosStore* store;
+ RGWMPObj mp_obj;
+ ACLOwner owner;
+ ceph::real_time mtime;
+ rgw_placement_rule placement;
+ RGWObjManifest manifest;
+
+ public:
+ DaosMultipartUpload(DaosStore* _store, Bucket* _bucket,
+ const std::string& oid,
+ std::optional<std::string> upload_id, ACLOwner _owner,
+ ceph::real_time _mtime)
+ : StoreMultipartUpload(_bucket),
+ store(_store),
+ mp_obj(oid, upload_id),
+ owner(_owner),
+ mtime(_mtime) {}
+ virtual ~DaosMultipartUpload() = default;
+
+ virtual const std::string& get_meta() const { return mp_obj.get_meta(); }
+ virtual const std::string& get_key() const { return mp_obj.get_key(); }
+ virtual const std::string& get_upload_id() const {
+ return mp_obj.get_upload_id();
+ }
+ virtual const ACLOwner& get_owner() const override { return owner; }
+ virtual ceph::real_time& get_mtime() { return mtime; }
+ virtual std::unique_ptr<rgw::sal::Object> get_meta_obj() override;
+ virtual int init(const DoutPrefixProvider* dpp, optional_yield y,
+ ACLOwner& owner, rgw_placement_rule& dest_placement,
+ rgw::sal::Attrs& attrs) override;
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int num_parts, int marker, int* next_marker,
+ bool* truncated,
+ bool assume_unsorted = false) override;
+ virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override;
+ virtual int complete(const DoutPrefixProvider* dpp, optional_yield y,
+ CephContext* cct, std::map<int, std::string>& part_etags,
+ std::list<rgw_obj_index_key>& remove_objs,
+ uint64_t& accounted_size, bool& compressed,
+ RGWCompressionInfo& cs_info, off_t& off,
+ std::string& tag, ACLOwner& owner, uint64_t olh_epoch,
+ rgw::sal::Object* target_obj) override;
+ virtual int get_info(const DoutPrefixProvider* dpp, optional_yield y,
+ rgw_placement_rule** rule,
+ rgw::sal::Attrs* attrs = nullptr) override;
+ virtual std::unique_ptr<Writer> get_writer(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule, uint64_t part_num,
+ const std::string& part_num_str) override;
+ const std::string& get_bucket_name() { return bucket->get_name(); }
+};
+
+class DaosStore : public StoreDriver {
+ private:
+ DaosZone zone;
+ RGWSyncModuleInstanceRef sync_module;
+
+ public:
+ ds3_t* ds3 = nullptr;
+
+ CephContext* cctx;
+
+ DaosStore(CephContext* c) : zone(this), cctx(c) {}
+ ~DaosStore() = default;
+
+ virtual const std::string get_name() const override { return "daos"; }
+
+ virtual std::unique_ptr<User> get_user(const rgw_user& u) override;
+ virtual std::string get_cluster_id(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual int get_user_by_access_key(const DoutPrefixProvider* dpp,
+ const std::string& key, optional_yield y,
+ std::unique_ptr<User>* user) override;
+ virtual int get_user_by_email(const DoutPrefixProvider* dpp,
+ const std::string& email, optional_yield y,
+ std::unique_ptr<User>* user) override;
+ virtual int get_user_by_swift(const DoutPrefixProvider* dpp,
+ const std::string& user_str, optional_yield y,
+ std::unique_ptr<User>* user) override;
+ virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
+ virtual int get_bucket(const DoutPrefixProvider* dpp, User* u,
+ const rgw_bucket& b, std::unique_ptr<Bucket>* bucket,
+ optional_yield y) override;
+ virtual int get_bucket(User* u, const RGWBucketInfo& i,
+ std::unique_ptr<Bucket>* bucket) override;
+ virtual int get_bucket(const DoutPrefixProvider* dpp, User* u,
+ const std::string& tenant, const std::string& name,
+ std::unique_ptr<Bucket>* bucket,
+ optional_yield y) override;
+ virtual bool is_meta_master() override;
+ virtual int forward_request_to_master(const DoutPrefixProvider* dpp,
+ User* user, obj_version* objv,
+ bufferlist& in_data, JSONParser* jp,
+ req_info& info,
+ optional_yield y) override;
+ virtual int forward_iam_request_to_master(
+ const DoutPrefixProvider* dpp, const RGWAccessKey& key, obj_version* objv,
+ bufferlist& in_data, RGWXMLDecoder::XMLParser* parser, req_info& info,
+ optional_yield y) override;
+ virtual Zone* get_zone() { return &zone; }
+ virtual std::string zone_unique_id(uint64_t unique_num) override;
+ virtual std::string zone_unique_trans_id(const uint64_t unique_num) override;
+ virtual int cluster_stat(RGWClusterStat& stats) override;
+ virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override;
+ virtual std::unique_ptr<Notification> get_notification(
+ rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s,
+ rgw::notify::EventType event_type, optional_yield y,
+ const std::string* object_name = nullptr) override;
+ virtual std::unique_ptr<Notification> get_notification(
+ const DoutPrefixProvider* dpp, rgw::sal::Object* obj,
+ rgw::sal::Object* src_obj, rgw::notify::EventType event_type,
+ rgw::sal::Bucket* _bucket, std::string& _user_id,
+ std::string& _user_tenant, std::string& _req_id,
+ optional_yield y) override;
+ virtual RGWLC* get_rgwlc(void) override { return NULL; }
+ virtual RGWCoroutinesManagerRegistry* get_cr_registry() override {
+ return NULL;
+ }
+
+ virtual int log_usage(
+ const DoutPrefixProvider* dpp,
+ std::map<rgw_user_bucket, RGWUsageBatch>& usage_info) override;
+ virtual int log_op(const DoutPrefixProvider* dpp, std::string& oid,
+ bufferlist& bl) override;
+ virtual int register_to_service_map(
+ const DoutPrefixProvider* dpp, const std::string& daemon_type,
+ const std::map<std::string, std::string>& meta) override;
+ virtual void get_quota(RGWQuota& quota) override;
+ virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit,
+ RGWRateLimitInfo& user_ratelimit,
+ RGWRateLimitInfo& anon_ratelimit) override;
+ virtual int set_buckets_enabled(const DoutPrefixProvider* dpp,
+ std::vector<rgw_bucket>& buckets,
+ bool enabled) override;
+ virtual uint64_t get_new_req_id() override {
+ return DAOS_NOT_IMPLEMENTED_LOG(nullptr);
+ }
+ virtual int get_sync_policy_handler(const DoutPrefixProvider* dpp,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> bucket,
+ RGWBucketSyncPolicyHandlerRef* phandler,
+ optional_yield y) override;
+ virtual RGWDataSyncStatusManager* get_data_sync_manager(
+ const rgw_zone_id& source_zone) override;
+ virtual void wakeup_meta_sync_shards(std::set<int>& shard_ids) override {
+ return;
+ }
+ virtual void wakeup_data_sync_shards(
+ const DoutPrefixProvider* dpp, const rgw_zone_id& source_zone,
+ boost::container::flat_map<
+ int, boost::container::flat_set<rgw_data_notify_entry>>& shard_ids)
+ override {
+ return;
+ }
+ virtual int clear_usage(const DoutPrefixProvider* dpp) override {
+ return DAOS_NOT_IMPLEMENTED_LOG(dpp);
+ }
+ virtual int read_all_usage(
+ const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
+ std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
+ virtual int trim_all_usage(const DoutPrefixProvider* dpp,
+ uint64_t start_epoch, uint64_t end_epoch) override;
+ virtual int get_config_key_val(std::string name, bufferlist* bl) override;
+ virtual int meta_list_keys_init(const DoutPrefixProvider* dpp,
+ const std::string& section,
+ const std::string& marker,
+ void** phandle) override;
+ virtual int meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle,
+ int max, std::list<std::string>& keys,
+ bool* truncated) override;
+ virtual void meta_list_keys_complete(void* handle) override;
+ virtual std::string meta_get_marker(void* handle) override;
+ virtual int meta_remove(const DoutPrefixProvider* dpp,
+ std::string& metadata_key, optional_yield y) override;
+
+ virtual const RGWSyncModuleInstanceRef& get_sync_module() {
+ return sync_module;
+ }
+ virtual std::string get_host_id() { return ""; }
+
+ virtual std::unique_ptr<LuaManager> get_lua_manager() override;
+ virtual std::unique_ptr<RGWRole> get_role(
+ std::string name, std::string tenant, std::string path = "",
+ std::string trust_policy = "", std::string max_session_duration_str = "",
+ std::multimap<std::string, std::string> tags = {}) override;
+ virtual std::unique_ptr<RGWRole> get_role(const RGWRoleInfo& info) override;
+ virtual std::unique_ptr<RGWRole> get_role(std::string id) override;
+ virtual int get_roles(const DoutPrefixProvider* dpp, optional_yield y,
+ const std::string& path_prefix,
+ const std::string& tenant,
+ std::vector<std::unique_ptr<RGWRole>>& roles) override;
+ virtual std::unique_ptr<RGWOIDCProvider> get_oidc_provider() override;
+ virtual int get_oidc_providers(
+ const DoutPrefixProvider* dpp, const std::string& tenant,
+ std::vector<std::unique_ptr<RGWOIDCProvider>>& providers) override;
+ virtual std::unique_ptr<Writer> get_append_writer(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule,
+ const std::string& unique_tag, uint64_t position,
+ uint64_t* cur_accounted_size) override;
+ virtual std::unique_ptr<Writer> get_atomic_writer(
+ const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Object* obj, const rgw_user& owner,
+ const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch,
+ const std::string& unique_tag) override;
+ virtual const std::string& get_compression_type(
+ const rgw_placement_rule& rule) override;
+ virtual bool valid_placement(const rgw_placement_rule& rule) override;
+
+ virtual void finalize(void) override;
+
+ virtual CephContext* ctx(void) override { return cctx; }
+
+ virtual int initialize(CephContext* cct,
+ const DoutPrefixProvider* dpp) override;
+};
+
+} // namespace rgw::sal
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=2 sw=2 expandtab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * SAL implementation for the CORTX Motr backend
+ *
+ * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+extern "C" {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wextern-c-compat"
+#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
+#include "motr/config.h"
+#include "lib/types.h"
+#include "lib/trace.h" // m0_trace_set_mmapped_buffer
+#include "motr/layout.h" // M0_OBJ_LAYOUT_ID
+#include "helpers/helpers.h" // m0_ufid_next
+#pragma clang diagnostic pop
+}
+
+#include "common/Clock.h"
+#include "common/errno.h"
+
+#include "rgw_compression.h"
+#include "rgw_sal.h"
+#include "rgw_sal_motr.h"
+#include "rgw_bucket.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using std::string;
+using std::map;
+using std::vector;
+using std::set;
+using std::list;
+
+static string mp_ns = RGW_OBJ_NS_MULTIPART;
+static struct m0_ufid_generator ufid_gr;
+
+namespace rgw::sal {
+
+using ::ceph::encode;
+using ::ceph::decode;
+
+static std::string motr_global_indices[] = {
+ RGW_MOTR_USERS_IDX_NAME,
+ RGW_MOTR_BUCKET_INST_IDX_NAME,
+ RGW_MOTR_BUCKET_HD_IDX_NAME,
+ RGW_IAM_MOTR_ACCESS_KEY,
+ RGW_IAM_MOTR_EMAIL_KEY
+};
+
+void MotrMetaCache::invalid(const DoutPrefixProvider *dpp,
+ const string& name)
+{
+ cache.invalidate_remove(dpp, name);
+}
+
+int MotrMetaCache::put(const DoutPrefixProvider *dpp,
+ const string& name,
+ const bufferlist& data)
+{
+ ldpp_dout(dpp, 0) << "Put into cache: name = " << name << dendl;
+
+ ObjectCacheInfo info;
+ info.status = 0;
+ info.data = data;
+ info.flags = CACHE_FLAG_DATA;
+ info.meta.mtime = ceph::real_clock::now();
+ info.meta.size = data.length();
+ cache.put(dpp, name, info, NULL);
+
+ // Inform other rgw instances. Do nothing if it gets some error?
+ int rc = distribute_cache(dpp, name, info, UPDATE_OBJ);
+ if (rc < 0)
+ ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << name << dendl;
+
+ return 0;
+}
+
+int MotrMetaCache::get(const DoutPrefixProvider *dpp,
+ const string& name,
+ bufferlist& data)
+{
+ ObjectCacheInfo info;
+ uint32_t flags = CACHE_FLAG_DATA;
+ int rc = cache.get(dpp, name, info, flags, NULL);
+ if (rc == 0) {
+ if (info.status < 0)
+ return info.status;
+
+ bufferlist& bl = info.data;
+ bufferlist::iterator it = bl.begin();
+ data.clear();
+
+ it.copy_all(data);
+ ldpp_dout(dpp, 0) << "Cache hit: name = " << name << dendl;
+ return 0;
+ }
+ ldpp_dout(dpp, 0) << "Cache miss: name = " << name << ", rc = "<< rc << dendl;
+ if(rc == -ENODATA)
+ return -ENOENT;
+
+ return rc;
+}
+
+int MotrMetaCache::remove(const DoutPrefixProvider *dpp,
+ const string& name)
+
+{
+ cache.invalidate_remove(dpp, name);
+
+ ObjectCacheInfo info;
+ int rc = distribute_cache(dpp, name, info, INVALIDATE_OBJ);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: " <<__func__<< "(): failed to distribute cache: rc =" << rc << dendl;
+ }
+
+ ldpp_dout(dpp, 0) << "Remove from cache: name = " << name << dendl;
+ return 0;
+}
+
+int MotrMetaCache::distribute_cache(const DoutPrefixProvider *dpp,
+ const string& normal_name,
+ ObjectCacheInfo& obj_info, int op)
+{
+ return 0;
+}
+
+int MotrMetaCache::watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl)
+{
+ return 0;
+}
+
+void MotrMetaCache::set_enabled(bool status)
+{
+ cache.set_enabled(status);
+}
+
+// TODO: properly handle the number of key/value pairs to get in
+// one query. Now the POC simply tries to retrieve all `max` number of pairs
+// with starting key `marker`.
+int MotrUser::list_buckets(const DoutPrefixProvider *dpp, const string& marker,
+ const string& end_marker, uint64_t max, bool need_stats,
+ BucketList &buckets, optional_yield y)
+{
+ int rc;
+ vector<string> keys(max);
+ vector<bufferlist> vals(max);
+ bool is_truncated = false;
+
+ ldpp_dout(dpp, 20) <<__func__<< ": list_user_buckets: marker=" << marker
+ << " end_marker=" << end_marker
+ << " max=" << max << dendl;
+
+ // Retrieve all `max` number of pairs.
+ buckets.clear();
+ string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str();
+ keys[0] = marker;
+ rc = store->next_query_by_name(user_info_iname, keys, vals);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
+ return rc;
+ }
+
+ // Process the returned pairs to add into BucketList.
+ uint64_t bcount = 0;
+ for (const auto& bl: vals) {
+ if (bl.length() == 0)
+ break;
+
+ RGWBucketEnt ent;
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+
+ std::time_t ctime = ceph::real_clock::to_time_t(ent.creation_time);
+ ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl;
+
+ if (!end_marker.empty() &&
+ end_marker.compare(ent.bucket.marker) <= 0)
+ break;
+
+ buckets.add(std::make_unique<MotrBucket>(this->store, ent, this));
+ bcount++;
+ }
+ if (bcount == max)
+ is_truncated = true;
+ buckets.set_truncated(is_truncated);
+
+ return 0;
+}
+
+int MotrUser::create_bucket(const DoutPrefixProvider* dpp,
+ const rgw_bucket& b,
+ const std::string& zonegroup_id,
+ rgw_placement_rule& placement_rule,
+ std::string& swift_ver_location,
+ const RGWQuotaInfo* pquota_info,
+ const RGWAccessControlPolicy& policy,
+ Attrs& attrs,
+ RGWBucketInfo& info,
+ obj_version& ep_objv,
+ bool exclusive,
+ bool obj_lock_enabled,
+ bool* existed,
+ req_info& req_info,
+ std::unique_ptr<Bucket>* bucket_out,
+ optional_yield y)
+{
+ int ret;
+ std::unique_ptr<Bucket> bucket;
+
+ // Look up the bucket. Create it if it doesn't exist.
+ ret = this->store->get_bucket(dpp, this, b, &bucket, y);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ if (ret != -ENOENT) {
+ *existed = true;
+ // if (swift_ver_location.empty()) {
+ // swift_ver_location = bucket->get_info().swift_ver_location;
+ // }
+ // placement_rule.inherit_from(bucket->get_info().placement_rule);
+
+ // TODO: ACL policy
+ // // don't allow changes to the acl policy
+ //RGWAccessControlPolicy old_policy(ctx());
+ //int rc = rgw_op_get_bucket_policy_from_attr(
+ // dpp, this, u, bucket->get_attrs(), &old_policy, y);
+ //if (rc >= 0 && old_policy != policy) {
+ // bucket_out->swap(bucket);
+ // return -EEXIST;
+ //}
+ } else {
+
+ placement_rule.name = "default";
+ placement_rule.storage_class = "STANDARD";
+ bucket = std::make_unique<MotrBucket>(store, b, this);
+ bucket->set_attrs(attrs);
+ *existed = false;
+ }
+
+ if (!*existed){
+ // TODO: how to handle zone and multi-site.
+ info.placement_rule = placement_rule;
+ info.bucket = b;
+ info.owner = this->get_info().user_id;
+ info.zonegroup = zonegroup_id;
+ if (obj_lock_enabled)
+ info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED;
+ bucket->set_version(ep_objv);
+ bucket->get_info() = info;
+
+ // Create a new bucket: (1) Add a key/value pair in the
+ // bucket instance index. (2) Create a new bucket index.
+ MotrBucket* mbucket = static_cast<MotrBucket*>(bucket.get());
+ ret = mbucket->put_info(dpp, y, ceph::real_time())? :
+ mbucket->create_bucket_index() ? :
+ mbucket->create_multipart_indices();
+ if (ret < 0)
+ ldpp_dout(dpp, 0) << "ERROR: failed to create bucket indices! " << ret << dendl;
+
+ // Insert the bucket entry into the user info index.
+ ret = mbucket->link_user(dpp, this, y);
+ if (ret < 0)
+ ldpp_dout(dpp, 0) << "ERROR: failed to add bucket entry! " << ret << dendl;
+ } else {
+ return -EEXIST;
+ // bucket->set_version(ep_objv);
+ // bucket->get_info() = info;
+ }
+
+ bucket_out->swap(bucket);
+
+ return ret;
+}
+
+int MotrUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y)
+{
+ return 0;
+}
+
+int MotrUser::read_stats(const DoutPrefixProvider *dpp,
+ optional_yield y, RGWStorageStats* stats,
+ ceph::real_time *last_stats_sync,
+ ceph::real_time *last_stats_update)
+{
+ return 0;
+}
+
+/* stats - Not for first pass */
+int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb)
+{
+ return 0;
+}
+
+int MotrUser::complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ return 0;
+}
+
+int MotrUser::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
+ bool *is_truncated, RGWUsageIter& usage_iter,
+ map<rgw_user_bucket, rgw_usage_log_entry>& usage)
+{
+ return 0;
+}
+
+int MotrUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch)
+{
+ return 0;
+}
+
+int MotrUser::load_user_from_idx(const DoutPrefixProvider *dpp,
+ MotrStore *store,
+ RGWUserInfo& info, map<string, bufferlist> *attrs,
+ RGWObjVersionTracker *objv_tr)
+{
+ struct MotrUserInfo muinfo;
+ bufferlist bl;
+ ldpp_dout(dpp, 20) << "info.user_id.id = " << info.user_id.id << dendl;
+ if (store->get_user_cache()->get(dpp, info.user_id.id, bl)) {
+ // Cache misses
+ int rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME,
+ M0_IC_GET, info.user_id.to_str(), bl);
+ ldpp_dout(dpp, 20) << "do_idx_op_by_name() = " << rc << dendl;
+ if (rc < 0)
+ return rc;
+
+ // Put into cache.
+ store->get_user_cache()->put(dpp, info.user_id.id, bl);
+ }
+
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ muinfo.decode(iter);
+ info = muinfo.info;
+ if (attrs)
+ *attrs = muinfo.attrs;
+ if (objv_tr)
+ {
+ objv_tr->read_version = muinfo.user_version;
+ objv_tracker.read_version = objv_tr->read_version;
+ }
+
+ if (!info.access_keys.empty()) {
+ for(auto key : info.access_keys) {
+ access_key_tracker.insert(key.first);
+ }
+ }
+
+ return 0;
+}
+
+int MotrUser::load_user(const DoutPrefixProvider *dpp,
+ optional_yield y)
+{
+ ldpp_dout(dpp, 20) << "load user: user id = " << info.user_id.to_str() << dendl;
+ return load_user_from_idx(dpp, store, info, &attrs, &objv_tracker);
+}
+
+int MotrUser::create_user_info_idx()
+{
+ string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str();
+ return store->create_motr_idx_by_name(user_info_iname);
+}
+
+int MotrUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y)
+{
+ for (auto& it : new_attrs)
+ attrs[it.first] = it.second;
+
+ return store_user(dpp, y, false);
+}
+
+int MotrUser::store_user(const DoutPrefixProvider* dpp,
+ optional_yield y, bool exclusive, RGWUserInfo* old_info)
+{
+ bufferlist bl;
+ struct MotrUserInfo muinfo;
+ RGWUserInfo orig_info;
+ RGWObjVersionTracker objv_tr = {};
+ obj_version& obj_ver = objv_tr.read_version;
+
+ ldpp_dout(dpp, 20) << "Store_user(): User = " << info.user_id.id << dendl;
+ orig_info.user_id = info.user_id;
+ // XXX: we open and close motr idx 2 times in this method:
+ // 1) on load_user_from_idx() here and 2) on do_idx_op_by_name(PUT) below.
+ // Maybe this can be optimised later somewhow.
+ int rc = load_user_from_idx(dpp, store, orig_info, nullptr, &objv_tr);
+ ldpp_dout(dpp, 10) << "Get user: rc = " << rc << dendl;
+
+ // Check if the user already exists
+ if (rc == 0 && obj_ver.ver > 0) {
+ if (old_info)
+ *old_info = orig_info;
+
+ if (obj_ver.ver != objv_tracker.read_version.ver) {
+ rc = -ECANCELED;
+ ldpp_dout(dpp, 0) << "ERROR: User Read version mismatch" << dendl;
+ goto out;
+ }
+
+ if (exclusive)
+ return rc;
+
+ obj_ver.ver++;
+ } else {
+ obj_ver.ver = 1;
+ obj_ver.tag = "UserTAG";
+ }
+
+ // Insert the user to user info index.
+ muinfo.info = info;
+ muinfo.attrs = attrs;
+ muinfo.user_version = obj_ver;
+ muinfo.encode(bl);
+ rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME,
+ M0_IC_PUT, info.user_id.to_str(), bl);
+ ldpp_dout(dpp, 10) << "Store user to motr index: rc = " << rc << dendl;
+ if (rc == 0) {
+ objv_tracker.read_version = obj_ver;
+ objv_tracker.write_version = obj_ver;
+ }
+
+ // Store access key in access key index
+ if (!info.access_keys.empty()) {
+ std::string access_key;
+ std::string secret_key;
+ std::map<std::string, RGWAccessKey>::const_iterator iter = info.access_keys.begin();
+ const RGWAccessKey& k = iter->second;
+ access_key = k.id;
+ secret_key = k.key;
+ MotrAccessKey MGWUserKeys(access_key, secret_key, info.user_id.to_str());
+ store->store_access_key(dpp, y, MGWUserKeys);
+ access_key_tracker.insert(access_key);
+ }
+
+ // Check if any key need to be deleted
+ if (access_key_tracker.size() != info.access_keys.size()) {
+ std::string key_for_deletion;
+ for (auto key : access_key_tracker) {
+ if (!info.get_key(key)) {
+ key_for_deletion = key;
+ ldpp_dout(dpp, 0) << "Deleting access key: " << key_for_deletion << dendl;
+ store->delete_access_key(dpp, y, key_for_deletion);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl;
+ }
+ }
+ }
+ if(rc >= 0){
+ access_key_tracker.erase(key_for_deletion);
+ }
+ }
+
+ if (!info.user_email.empty()) {
+ MotrEmailInfo MGWEmailInfo(info.user_id.to_str(), info.user_email);
+ store->store_email_info(dpp, y, MGWEmailInfo);
+ }
+
+ // Create user info index to store all buckets that are belong
+ // to this bucket.
+ rc = create_user_info_idx();
+ if (rc < 0 && rc != -EEXIST) {
+ ldpp_dout(dpp, 0) << "Failed to create user info index: rc = " << rc << dendl;
+ goto out;
+ }
+
+ // Put the user info into cache.
+ rc = store->get_user_cache()->put(dpp, info.user_id.id, bl);
+
+out:
+ return rc;
+}
+
+int MotrUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y)
+{
+ // Remove user info from cache
+ // Delete access keys for user
+ // Delete user info
+ // Delete user from user index
+ // Delete email for user - TODO
+ bufferlist bl;
+ int rc;
+ // Remove the user info from cache.
+ store->get_user_cache()->remove(dpp, info.user_id.id);
+
+ // Delete all access key of user
+ if (!info.access_keys.empty()) {
+ for(auto acc_key = info.access_keys.begin(); acc_key != info.access_keys.end(); acc_key++) {
+ auto access_key = acc_key->first;
+ rc = store->delete_access_key(dpp, y, access_key);
+ // TODO
+ // Check error code for access_key does not exist
+ // Continue to next step only if delete failed because key doesn't exists
+ if (rc < 0){
+ ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl;
+ }
+ }
+ }
+
+ //Delete email id
+ if (!info.user_email.empty()) {
+ rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
+ M0_IC_DEL, info.user_email, bl);
+ if (rc < 0 && rc != -ENOENT) {
+ ldpp_dout(dpp, 0) << "Unable to delete email id " << rc << dendl;
+ }
+ }
+
+ // Delete user info index
+ string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str();
+ store->delete_motr_idx_by_name(user_info_iname);
+ ldpp_dout(dpp, 10) << "Deleted user info index - " << user_info_iname << dendl;
+
+ // Delete user from user index
+ rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME,
+ M0_IC_DEL, info.user_id.to_str(), bl);
+ if (rc < 0){
+ ldpp_dout(dpp, 0) << "Unable to delete user from user index " << rc << dendl;
+ return rc;
+ }
+
+ // TODO
+ // Delete email for user
+ // rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
+ // M0_IC_DEL, info.user_email, bl);
+ // if (rc < 0){
+ // ldpp_dout(dpp, 0) << "Unable to delete email for user" << rc << dendl;
+ // return rc;
+ // }
+ return 0;
+}
+
+int MotrUser::verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider *dpp, optional_yield y)
+{
+ *verified = false;
+ return 0;
+}
+
+int MotrBucket::remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y)
+{
+ int ret;
+
+ ldpp_dout(dpp, 20) << "remove_bucket Entry=" << info.bucket.name << dendl;
+
+ // Refresh info
+ ret = load_bucket(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket load_bucket failed rc=" << ret << dendl;
+ return ret;
+ }
+
+ ListParams params;
+ params.list_versions = true;
+ params.allow_unordered = true;
+
+ ListResults results;
+
+ // 1. Check if Bucket has objects.
+ // If bucket contains objects and delete_children is true, delete all objects.
+ // Else throw error that bucket is not empty.
+ do {
+ results.objs.clear();
+
+ // Check if bucket has objects.
+ ret = list(dpp, params, 1000, results, y);
+ if (ret < 0) {
+ return ret;
+ }
+
+ // If result contains entries, bucket is not empty.
+ if (!results.objs.empty() && !delete_children) {
+ ldpp_dout(dpp, 0) << "ERROR: could not remove non-empty bucket " << info.bucket.name << dendl;
+ return -ENOTEMPTY;
+ }
+
+ for (const auto& obj : results.objs) {
+ rgw_obj_key key(obj.key);
+ if (key.instance.empty()) {
+ key.instance = "null";
+ }
+
+ std::unique_ptr<rgw::sal::Object> object = get_object(key);
+
+ ret = object->delete_object(dpp, null_yield);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket rgw_remove_object failed rc=" << ret << dendl;
+ return ret;
+ }
+ }
+ } while(results.is_truncated);
+
+ // 2. Abort Mp uploads on the bucket.
+ ret = abort_multiparts(dpp, store->ctx());
+ if (ret < 0) {
+ return ret;
+ }
+
+ // 3. Remove mp index??
+ string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts";
+ ret = store->delete_motr_idx_by_name(bucket_multipart_iname);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove multipart index rc=" << ret << dendl;
+ return ret;
+ }
+
+ // 4. Sync user stats.
+ ret = this->sync_user_stats(dpp, y);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl;
+ }
+
+ // 5. Remove the bucket from user info index. (unlink user)
+ ret = this->unlink_user(dpp, owner, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl;
+ return ret;
+ }
+
+ // 6. Remove bucket index.
+ string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name;
+ ret = store->delete_motr_idx_by_name(bucket_index_iname);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl;
+ return ret;
+ }
+
+ // 7. Remove bucket instance info.
+ bufferlist bl;
+ ret = store->get_bucket_inst_cache()->remove(dpp, info.bucket.name);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance from cache rc="
+ << ret << dendl;
+ return ret;
+ }
+
+ ret = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME,
+ M0_IC_DEL, info.bucket.name, bl);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance rc="
+ << ret << dendl;
+ return ret;
+ }
+
+ // TODO :
+ // 8. Remove Notifications
+ // if bucket has notification definitions associated with it
+ // they should be removed (note that any pending notifications on the bucket are still going to be sent)
+
+ // 9. Forward request to master.
+ if (forward_to_master) {
+ bufferlist in_data;
+ ret = store->forward_request_to_master(dpp, owner, &bucket_version, in_data, nullptr, *req_info, y);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ /* adjust error, we want to return with NoSuchBucket and not
+ * NoSuchKey */
+ ret = -ERR_NO_SUCH_BUCKET;
+ }
+ ldpp_dout(dpp, 0) << "ERROR: Forward to master failed. ret=" << ret << dendl;
+ return ret;
+ }
+ }
+
+ ldpp_dout(dpp, 20) << "remove_bucket Exit=" << info.bucket.name << dendl;
+
+ return ret;
+}
+
+int MotrBucket::remove_bucket_bypass_gc(int concurrent_max, bool
+ keep_index_consistent,
+ optional_yield y, const
+ DoutPrefixProvider *dpp) {
+ return 0;
+}
+
+int MotrBucket::put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time _mtime)
+{
+ bufferlist bl;
+ struct MotrBucketInfo mbinfo;
+
+ ldpp_dout(dpp, 20) << "put_info(): bucket_id=" << info.bucket.bucket_id << dendl;
+ mbinfo.info = info;
+ mbinfo.bucket_attrs = attrs;
+ mbinfo.mtime = _mtime;
+ mbinfo.bucket_version = bucket_version;
+ mbinfo.encode(bl);
+
+ // Insert bucket instance using bucket's marker (string).
+ int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME,
+ M0_IC_PUT, info.bucket.name, bl, !exclusive);
+ if (rc == 0)
+ store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl);
+
+ return rc;
+}
+
+int MotrBucket::load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats)
+{
+ // Get bucket instance using bucket's name (string). or bucket id?
+ bufferlist bl;
+ if (store->get_bucket_inst_cache()->get(dpp, info.bucket.name, bl)) {
+ // Cache misses.
+ ldpp_dout(dpp, 20) << "load_bucket(): name=" << info.bucket.name << dendl;
+ int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME,
+ M0_IC_GET, info.bucket.name, bl);
+ ldpp_dout(dpp, 20) << "load_bucket(): rc=" << rc << dendl;
+ if (rc < 0)
+ return rc;
+ store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl);
+ }
+
+ struct MotrBucketInfo mbinfo;
+ bufferlist& blr = bl;
+ auto iter =blr.cbegin();
+ mbinfo.decode(iter); //Decode into MotrBucketInfo.
+
+ info = mbinfo.info;
+ ldpp_dout(dpp, 20) << "load_bucket(): bucket_id=" << info.bucket.bucket_id << dendl;
+ rgw_placement_rule placement_rule;
+ placement_rule.name = "default";
+ placement_rule.storage_class = "STANDARD";
+ info.placement_rule = placement_rule;
+
+ attrs = mbinfo.bucket_attrs;
+ mtime = mbinfo.mtime;
+ bucket_version = mbinfo.bucket_version;
+
+ return 0;
+}
+
+int MotrBucket::link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y)
+{
+ bufferlist bl;
+ RGWBucketEnt new_bucket;
+ ceph::real_time creation_time = get_creation_time();
+
+ // RGWBucketEnt or cls_user_bucket_entry is the structure that is stored.
+ new_bucket.bucket = info.bucket;
+ new_bucket.size = 0;
+ if (real_clock::is_zero(creation_time))
+ creation_time = ceph::real_clock::now();
+ new_bucket.creation_time = creation_time;
+ new_bucket.encode(bl);
+ std::time_t ctime = ceph::real_clock::to_time_t(new_bucket.creation_time);
+ ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl;
+
+ // Insert the user into the user info index.
+ string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str();
+ return store->do_idx_op_by_name(user_info_idx_name,
+ M0_IC_PUT, info.bucket.name, bl);
+
+}
+
+int MotrBucket::unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y)
+{
+ // Remove the user into the user info index.
+ bufferlist bl;
+ string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str();
+ return store->do_idx_op_by_name(user_info_idx_name,
+ M0_IC_DEL, info.bucket.name, bl);
+}
+
+/* stats - Not for first pass */
+int MotrBucket::read_stats(const DoutPrefixProvider *dpp,
+ const bucket_index_layout_generation& idx_layout, int shard_id,
+ std::string *bucket_ver, std::string *master_ver,
+ std::map<RGWObjCategory, RGWStorageStats>& stats,
+ std::string *max_marker, bool *syncstopped)
+{
+ return 0;
+}
+
+int MotrBucket::create_bucket_index()
+{
+ string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name;
+ return store->create_motr_idx_by_name(bucket_index_iname);
+}
+
+int MotrBucket::create_multipart_indices()
+{
+ int rc;
+
+ // Bucket multipart index stores in-progress multipart uploads.
+ // Key is the object name + upload_id, value is a rgw_bucket_dir_entry.
+ // An entry is inserted when a multipart upload is initialised (
+ // MotrMultipartUpload::init()) and will be removed when the upload
+ // is completed (MotrMultipartUpload::complete()).
+ // MotrBucket::list_multiparts() will scan this index to return all
+ // in-progress multipart uploads in the bucket.
+ string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts";
+ rc = store->create_motr_idx_by_name(bucket_multipart_iname);
+ if (rc < 0) {
+ ldout(store->cctx, 0) << "Failed to create bucket multipart index " << bucket_multipart_iname << dendl;
+ return rc;
+ }
+
+ return 0;
+}
+
+
+int MotrBucket::read_stats_async(const DoutPrefixProvider *dpp,
+ const bucket_index_layout_generation& idx_layout,
+ int shard_id, RGWGetBucketStats_CB *ctx)
+{
+ return 0;
+}
+
+int MotrBucket::sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ return 0;
+}
+
+int MotrBucket::update_container_stats(const DoutPrefixProvider *dpp)
+{
+ return 0;
+}
+
+int MotrBucket::check_bucket_shards(const DoutPrefixProvider *dpp)
+{
+ return 0;
+}
+
+int MotrBucket::chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y)
+{
+ // TODO: update bucket with new owner
+ return 0;
+}
+
+/* Make sure to call load_bucket() if you need it first */
+bool MotrBucket::is_owner(User* user)
+{
+ return (info.owner.compare(user->get_id()) == 0);
+}
+
+int MotrBucket::check_empty(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ /* XXX: Check if bucket contains any objects */
+ return 0;
+}
+
+int MotrBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size,
+ optional_yield y, bool check_size_only)
+{
+ /* Not Handled in the first pass as stats are also needed */
+ return 0;
+}
+
+int MotrBucket::merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& new_attrs, optional_yield y)
+{
+ for (auto& it : new_attrs)
+ attrs[it.first] = it.second;
+
+ return put_info(dpp, y, ceph::real_time());
+}
+
+int MotrBucket::try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime)
+{
+ return 0;
+}
+
+/* XXX: usage and stats not supported in the first pass */
+int MotrBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool *is_truncated,
+ RGWUsageIter& usage_iter,
+ map<rgw_user_bucket, rgw_usage_log_entry>& usage)
+{
+ return 0;
+}
+
+int MotrBucket::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch)
+{
+ return 0;
+}
+
+int MotrBucket::remove_objs_from_index(const DoutPrefixProvider *dpp, std::list<rgw_obj_index_key>& objs_to_unlink)
+{
+ /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table.
+ * Delete all the object in the list from the object table of this
+ * bucket
+ */
+ return 0;
+}
+
+int MotrBucket::check_index(const DoutPrefixProvider *dpp, std::map<RGWObjCategory, RGWStorageStats>& existing_stats, std::map<RGWObjCategory, RGWStorageStats>& calculated_stats)
+{
+ /* XXX: stats not supported yet */
+ return 0;
+}
+
+int MotrBucket::rebuild_index(const DoutPrefixProvider *dpp)
+{
+ /* there is no index table in dbstore. Not applicable */
+ return 0;
+}
+
+int MotrBucket::set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout)
+{
+ /* XXX: CHECK: set tag timeout for all the bucket objects? */
+ return 0;
+}
+
+int MotrBucket::purge_instance(const DoutPrefixProvider *dpp)
+{
+ /* XXX: CHECK: for dbstore only single instance supported.
+ * Remove all the objects for that instance? Anything extra needed?
+ */
+ return 0;
+}
+
+int MotrBucket::set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy &acl, optional_yield y)
+{
+ int ret = 0;
+ bufferlist aclbl;
+
+ acls = acl;
+ acl.encode(aclbl);
+
+ Attrs attrs = get_attrs();
+ attrs[RGW_ATTR_ACL] = aclbl;
+
+ // TODO: update bucket entry with the new attrs
+
+ return ret;
+}
+
+std::unique_ptr<Object> MotrBucket::get_object(const rgw_obj_key& k)
+{
+ return std::make_unique<MotrObject>(this->store, k, this);
+}
+
+int MotrBucket::list(const DoutPrefixProvider *dpp, ListParams& params, int max, ListResults& results, optional_yield y)
+{
+ int rc;
+ vector<string> keys(max);
+ vector<bufferlist> vals(max);
+
+ ldpp_dout(dpp, 20) << "bucket=" << info.bucket.name
+ << " prefix=" << params.prefix
+ << " marker=" << params.marker
+ << " max=" << max << dendl;
+
+ // Retrieve all `max` number of pairs.
+ string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name;
+ keys[0] = params.marker.empty() ? params.prefix :
+ params.marker.get_oid();
+ rc = store->next_query_by_name(bucket_index_iname, keys, vals, params.prefix,
+ params.delim);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
+ return rc;
+ }
+
+ // Process the returned pairs to add into ListResults.
+ int i = 0;
+ for (; i < rc; ++i) {
+ if (vals[i].length() == 0) {
+ results.common_prefixes[keys[i]] = true;
+ } else {
+ rgw_bucket_dir_entry ent;
+ auto iter = vals[i].cbegin();
+ ent.decode(iter);
+ if (params.list_versions || ent.is_visible())
+ results.objs.emplace_back(std::move(ent));
+ }
+ }
+
+ if (i == max) {
+ results.is_truncated = true;
+ results.next_marker = keys[max - 1] + " ";
+ } else {
+ results.is_truncated = false;
+ }
+
+ return 0;
+}
+
+int MotrBucket::list_multiparts(const DoutPrefixProvider *dpp,
+ const string& prefix,
+ string& marker,
+ const string& delim,
+ const int& max_uploads,
+ vector<std::unique_ptr<MultipartUpload>>& uploads,
+ map<string, bool> *common_prefixes,
+ bool *is_truncated)
+{
+ int rc;
+ vector<string> key_vec(max_uploads);
+ vector<bufferlist> val_vec(max_uploads);
+
+ string bucket_multipart_iname =
+ "motr.rgw.bucket." + this->get_name() + ".multiparts";
+ key_vec[0].clear();
+ key_vec[0].assign(marker.begin(), marker.end());
+ rc = store->next_query_by_name(bucket_multipart_iname, key_vec, val_vec);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
+ return rc;
+ }
+
+ // Process the returned pairs to add into ListResults.
+ // The POC can only support listing all objects or selecting
+ // with prefix.
+ int ocount = 0;
+ rgw_obj_key last_obj_key;
+ *is_truncated = false;
+ for (const auto& bl: val_vec) {
+ if (bl.length() == 0)
+ break;
+
+ rgw_bucket_dir_entry ent;
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+
+ if (prefix.size() &&
+ (0 != ent.key.name.compare(0, prefix.size(), prefix))) {
+ ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ <<
+ ": skippping \"" << ent.key <<
+ "\" because doesn't match prefix" << dendl;
+ continue;
+ }
+
+ rgw_obj_key key(ent.key);
+ uploads.push_back(this->get_multipart_upload(key.name));
+ last_obj_key = key;
+ ocount++;
+ if (ocount == max_uploads) {
+ *is_truncated = true;
+ break;
+ }
+ }
+ marker = last_obj_key.name;
+
+ // What is common prefix? We don't handle it for now.
+
+ return 0;
+
+}
+
+int MotrBucket::abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct)
+{
+ return 0;
+}
+
+void MotrStore::finalize(void)
+{
+ // close connection with motr
+ m0_client_fini(this->instance, true);
+}
+
+const std::string& MotrZoneGroup::get_endpoint() const
+{
+ if (!group.endpoints.empty()) {
+ return group.endpoints.front();
+ } else {
+ // use zonegroup's master zone endpoints
+ auto z = group.zones.find(group.master_zone);
+ if (z != group.zones.end() && !z->second.endpoints.empty()) {
+ return z->second.endpoints.front();
+ }
+ }
+ return empty;
+}
+
+bool MotrZoneGroup::placement_target_exists(std::string& target) const
+{
+ return !!group.placement_targets.count(target);
+}
+
+int MotrZoneGroup::get_placement_target_names(std::set<std::string>& names) const
+{
+ for (const auto& target : group.placement_targets) {
+ names.emplace(target.second.name);
+ }
+
+ return 0;
+}
+
+int MotrZoneGroup::get_placement_tier(const rgw_placement_rule& rule,
+ std::unique_ptr<PlacementTier>* tier)
+{
+ std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer;
+ titer = group.placement_targets.find(rule.name);
+ if (titer == group.placement_targets.end()) {
+ return -ENOENT;
+ }
+
+ const auto& target_rule = titer->second;
+ std::map<std::string, RGWZoneGroupPlacementTier>::const_iterator ttier;
+ ttier = target_rule.tier_targets.find(rule.storage_class);
+ if (ttier == target_rule.tier_targets.end()) {
+ // not found
+ return -ENOENT;
+ }
+
+ PlacementTier* t;
+ t = new MotrPlacementTier(store, ttier->second);
+ if (!t)
+ return -ENOMEM;
+
+ tier->reset(t);
+ return 0;
+}
+
+ZoneGroup& MotrZone::get_zonegroup()
+{
+ return zonegroup;
+}
+
+const std::string& MotrZone::get_id()
+{
+ return zone_params->get_id();
+}
+
+const std::string& MotrZone::get_name() const
+{
+ return zone_params->get_name();
+}
+
+bool MotrZone::is_writeable()
+{
+ return true;
+}
+
+bool MotrZone::get_redirect_endpoint(std::string* endpoint)
+{
+ return false;
+}
+
+bool MotrZone::has_zonegroup_api(const std::string& api) const
+{
+ return (zonegroup.group.api_name == api);
+}
+
+const std::string& MotrZone::get_current_period_id()
+{
+ return current_period->get_id();
+}
+
+std::unique_ptr<LuaManager> MotrStore::get_lua_manager()
+{
+ return std::make_unique<MotrLuaManager>(this);
+}
+
+int MotrObject::get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **_state, optional_yield y, bool follow_olh)
+{
+ // Get object's metadata (those stored in rgw_bucket_dir_entry).
+ bufferlist bl;
+ if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) {
+ // Cache misses.
+ string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name();
+ int rc = this->store->do_idx_op_by_name(bucket_index_iname,
+ M0_IC_GET, this->get_key().get_oid(), bl);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl;
+ return rc;
+ }
+
+ // Put into cache.
+ this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl);
+ }
+
+ rgw_bucket_dir_entry ent;
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ ent.decode(iter);
+
+ // Set object's type.
+ this->category = ent.meta.category;
+
+ // Set object state.
+ state.exists = true;
+ state.size = ent.meta.size;
+ state.accounted_size = ent.meta.size;
+ state.mtime = ent.meta.mtime;
+
+ state.has_attrs = true;
+ bufferlist etag_bl;
+ string& etag = ent.meta.etag;
+ ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl;
+ etag_bl.append(etag);
+ state.attrset[RGW_ATTR_ETAG] = etag_bl;
+
+ return 0;
+}
+
+MotrObject::~MotrObject() {
+ this->close_mobj();
+}
+
+// int MotrObject::read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj)
+// {
+// read_op.params.attrs = &attrs;
+// read_op.params.target_obj = target_obj;
+// read_op.params.obj_size = &obj_size;
+// read_op.params.lastmod = &mtime;
+//
+// return read_op.prepare(dpp);
+// }
+
+int MotrObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y)
+{
+ // TODO: implement
+ ldpp_dout(dpp, 20) <<__func__<< ": MotrObject::set_obj_attrs()" << dendl;
+ return 0;
+}
+
+int MotrObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj)
+{
+ if (this->category == RGWObjCategory::MultiMeta)
+ return 0;
+
+ string bname, key;
+ if (target_obj) {
+ bname = target_obj->bucket.name;
+ key = target_obj->key.get_oid();
+ } else {
+ bname = this->get_bucket()->get_name();
+ key = this->get_key().get_oid();
+ }
+ ldpp_dout(dpp, 20) << "MotrObject::get_obj_attrs(): "
+ << bname << "/" << key << dendl;
+
+ // Get object's metadata (those stored in rgw_bucket_dir_entry).
+ bufferlist bl;
+ if (this->store->get_obj_meta_cache()->get(dpp, key, bl)) {
+ // Cache misses.
+ string bucket_index_iname = "motr.rgw.bucket.index." + bname;
+ int rc = this->store->do_idx_op_by_name(bucket_index_iname, M0_IC_GET, key, bl);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl;
+ return rc;
+ }
+
+ // Put into cache.
+ this->store->get_obj_meta_cache()->put(dpp, key, bl);
+ }
+
+ rgw_bucket_dir_entry ent;
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ ent.decode(iter);
+ decode(state.attrset, iter);
+
+ return 0;
+}
+
+int MotrObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp)
+{
+ rgw_obj target = get_obj();
+ int r = get_obj_attrs(y, dpp, &target);
+ if (r < 0) {
+ return r;
+ }
+ set_atomic();
+ state.attrset[attr_name] = attr_val;
+ return set_obj_attrs(dpp, &state.attrset, nullptr, y);
+}
+
+int MotrObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y)
+{
+ rgw_obj target = get_obj();
+ Attrs rmattr;
+ bufferlist bl;
+
+ set_atomic();
+ rmattr[attr_name] = bl;
+ return set_obj_attrs(dpp, nullptr, &rmattr, y);
+}
+
+bool MotrObject::is_expired() {
+ return false;
+}
+
+// Taken from rgw_rados.cc
+void MotrObject::gen_rand_obj_instance_name()
+{
+ enum {OBJ_INSTANCE_LEN = 32};
+ char buf[OBJ_INSTANCE_LEN + 1];
+
+ gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN);
+ state.obj.key.set_instance(buf);
+}
+
+int MotrObject::omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid,
+ const std::set<std::string>& keys,
+ Attrs* vals)
+{
+ return 0;
+}
+
+int MotrObject::omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val,
+ bool must_exist, optional_yield y)
+{
+ return 0;
+}
+
+int MotrObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y)
+{
+ return 0;
+}
+
+std::unique_ptr<MPSerializer> MotrObject::get_serializer(const DoutPrefixProvider *dpp,
+ const std::string& lock_name)
+{
+ return std::make_unique<MPMotrSerializer>(dpp, store, this, lock_name);
+}
+
+int MotrObject::transition(Bucket* bucket,
+ const rgw_placement_rule& placement_rule,
+ const real_time& mtime,
+ uint64_t olh_epoch,
+ const DoutPrefixProvider* dpp,
+ optional_yield y)
+{
+ return 0;
+}
+
+bool MotrObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2)
+{
+ /* XXX: support single default zone and zonegroup for now */
+ return true;
+}
+
+int MotrObject::dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f)
+{
+ return 0;
+}
+
+std::unique_ptr<Object::ReadOp> MotrObject::get_read_op()
+{
+ return std::make_unique<MotrObject::MotrReadOp>(this);
+}
+
+MotrObject::MotrReadOp::MotrReadOp(MotrObject *_source) :
+ source(_source)
+{ }
+
+int MotrObject::MotrReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp)
+{
+ int rc;
+ ldpp_dout(dpp, 20) <<__func__<< ": bucket=" << source->get_bucket()->get_name() << dendl;
+
+ rgw_bucket_dir_entry ent;
+ rc = source->get_bucket_dir_ent(dpp, ent);
+ if (rc < 0)
+ return rc;
+
+ // Set source object's attrs. The attrs is key/value map and is used
+ // in send_response_data() to set attributes, including etag.
+ bufferlist etag_bl;
+ string& etag = ent.meta.etag;
+ ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl;
+ etag_bl.append(etag.c_str(), etag.size());
+ source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl));
+
+ source->set_key(ent.key);
+ source->set_obj_size(ent.meta.size);
+ source->category = ent.meta.category;
+ *params.lastmod = ent.meta.mtime;
+
+ if (params.mod_ptr || params.unmod_ptr) {
+ // Convert all times go GMT to make them compatible
+ obj_time_weight src_weight;
+ src_weight.init(*params.lastmod, params.mod_zone_id, params.mod_pg_ver);
+ src_weight.high_precision = params.high_precision_time;
+
+ obj_time_weight dest_weight;
+ dest_weight.high_precision = params.high_precision_time;
+
+ // Check if-modified-since condition
+ if (params.mod_ptr && !params.if_nomatch) {
+ dest_weight.init(*params.mod_ptr, params.mod_zone_id, params.mod_pg_ver);
+ ldpp_dout(dpp, 10) << "If-Modified-Since: " << dest_weight << " & "
+ << "Last-Modified: " << src_weight << dendl;
+ if (!(dest_weight < src_weight)) {
+ return -ERR_NOT_MODIFIED;
+ }
+ }
+
+ // Check if-unmodified-since condition
+ if (params.unmod_ptr && !params.if_match) {
+ dest_weight.init(*params.unmod_ptr, params.mod_zone_id, params.mod_pg_ver);
+ ldpp_dout(dpp, 10) << "If-UnModified-Since: " << dest_weight << " & "
+ << "Last-Modified: " << src_weight << dendl;
+ if (dest_weight < src_weight) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+ }
+ // Check if-match condition
+ if (params.if_match) {
+ string if_match_str = rgw_string_unquote(params.if_match);
+ ldpp_dout(dpp, 10) << "ETag: " << etag << " & "
+ << "If-Match: " << if_match_str << dendl;
+ if (if_match_str.compare(etag) != 0) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+ // Check if-none-match condition
+ if (params.if_nomatch) {
+ string if_nomatch_str = rgw_string_unquote(params.if_nomatch);
+ ldpp_dout(dpp, 10) << "ETag: " << etag << " & "
+ << "If-NoMatch: " << if_nomatch_str << dendl;
+ if (if_nomatch_str.compare(etag) == 0) {
+ return -ERR_NOT_MODIFIED;
+ }
+ }
+
+ // Skip opening an empty object.
+ if(source->get_obj_size() == 0)
+ return 0;
+
+ // Open the object here.
+ if (source->category == RGWObjCategory::MultiMeta) {
+ ldpp_dout(dpp, 20) <<__func__<< ": open obj parts..." << dendl;
+ rc = source->get_part_objs(dpp, this->part_objs)? :
+ source->open_part_objs(dpp, this->part_objs);
+ return rc;
+ } else {
+ ldpp_dout(dpp, 20) <<__func__<< ": open object..." << dendl;
+ return source->open_mobj(dpp);
+ }
+}
+
+int MotrObject::MotrReadOp::read(int64_t off, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider* dpp)
+{
+ ldpp_dout(dpp, 20) << "MotrReadOp::read(): sync read." << dendl;
+ return 0;
+}
+
+// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to 'end'.
+// The returned data is processed in 'cb' which is a chain of post-processing
+// filters such as decompression, de-encryption and sending back data to client
+// (RGWGetObj_CB::handle_dta which in turn calls RGWGetObj::get_data_cb() to
+// send data back.).
+//
+// POC implements a simple sync version of iterate() function in which it reads
+// a block of data each time and call 'cb' for post-processing.
+int MotrObject::MotrReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb, optional_yield y)
+{
+ int rc;
+
+ if (source->category == RGWObjCategory::MultiMeta)
+ rc = source->read_multipart_obj(dpp, off, end, cb, part_objs);
+ else
+ rc = source->read_mobj(dpp, off, end, cb);
+
+ return rc;
+}
+
+int MotrObject::MotrReadOp::get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y)
+{
+ //return 0;
+ return -ENODATA;
+}
+
+std::unique_ptr<Object::DeleteOp> MotrObject::get_delete_op()
+{
+ return std::make_unique<MotrObject::MotrDeleteOp>(this);
+}
+
+MotrObject::MotrDeleteOp::MotrDeleteOp(MotrObject *_source) :
+ source(_source)
+{ }
+
+// Implementation of DELETE OBJ also requires MotrObject::get_obj_state()
+// to retrieve and set object's state from object's metadata.
+//
+// TODO:
+// 1. The POC only remove the object's entry from bucket index and delete
+// corresponding Motr objects. It doesn't handle the DeleteOp::params.
+// Delete::delete_obj() in rgw_rados.cc shows how rados backend process the
+// params.
+// 2. Delete an object when its versioning is turned on.
+int MotrObject::MotrDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y)
+{
+ ldpp_dout(dpp, 20) << "delete " << source->get_key().get_oid() << " from " << source->get_bucket()->get_name() << dendl;
+
+ rgw_bucket_dir_entry ent;
+ int rc = source->get_bucket_dir_ent(dpp, ent);
+ if (rc < 0) {
+ return rc;
+ }
+
+ //TODO: When integrating with background GC for object deletion,
+ // we should consider adding object entry to GC before deleting the metadata.
+ // Delete from the cache first.
+ source->store->get_obj_meta_cache()->remove(dpp, source->get_key().get_oid());
+
+ // Delete the object's entry from the bucket index.
+ bufferlist bl;
+ string bucket_index_iname = "motr.rgw.bucket.index." + source->get_bucket()->get_name();
+ rc = source->store->do_idx_op_by_name(bucket_index_iname,
+ M0_IC_DEL, source->get_key().get_oid(), bl);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "Failed to del object's entry from bucket index. " << dendl;
+ return rc;
+ }
+
+ if (ent.meta.size == 0) {
+ ldpp_dout(dpp, 0) << __func__ << ": Object size is 0, not deleting motr object." << dendl;
+ return 0;
+ }
+ // Remove the motr objects.
+ if (source->category == RGWObjCategory::MultiMeta)
+ rc = source->delete_part_objs(dpp);
+ else
+ rc = source->delete_mobj(dpp);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "Failed to delete the object from Motr. " << dendl;
+ return rc;
+ }
+
+ //result.delete_marker = parent_op.result.delete_marker;
+ //result.version_id = parent_op.result.version_id;
+ return 0;
+}
+
+int MotrObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, bool prevent_versioning)
+{
+ MotrObject::MotrDeleteOp del_op(this);
+ del_op.params.bucket_owner = bucket->get_info().owner;
+ del_op.params.versioning_status = bucket->get_info().versioning_status();
+
+ return del_op.delete_obj(dpp, y);
+}
+
+int MotrObject::copy_object(User* user,
+ req_info* info,
+ const rgw_zone_id& source_zone,
+ rgw::sal::Object* dest_object,
+ rgw::sal::Bucket* dest_bucket,
+ rgw::sal::Bucket* src_bucket,
+ const rgw_placement_rule& dest_placement,
+ ceph::real_time* src_mtime,
+ ceph::real_time* mtime,
+ const ceph::real_time* mod_ptr,
+ const ceph::real_time* unmod_ptr,
+ bool high_precision_time,
+ const char* if_match,
+ const char* if_nomatch,
+ AttrsMod attrs_mod,
+ bool copy_if_newer,
+ Attrs& attrs,
+ RGWObjCategory category,
+ uint64_t olh_epoch,
+ boost::optional<ceph::real_time> delete_at,
+ std::string* version_id,
+ std::string* tag,
+ std::string* etag,
+ void (*progress_cb)(off_t, void *),
+ void* progress_data,
+ const DoutPrefixProvider* dpp,
+ optional_yield y)
+{
+ return 0;
+}
+
+int MotrObject::swift_versioning_restore(bool& restored,
+ const DoutPrefixProvider* dpp)
+{
+ return 0;
+}
+
+int MotrObject::swift_versioning_copy(const DoutPrefixProvider* dpp,
+ optional_yield y)
+{
+ return 0;
+}
+
+MotrAtomicWriter::MotrAtomicWriter(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ MotrStore* _store,
+ const rgw_user& _owner,
+ const rgw_placement_rule *_ptail_placement_rule,
+ uint64_t _olh_epoch,
+ const std::string& _unique_tag) :
+ StoreWriter(dpp, y),
+ store(_store),
+ owner(_owner),
+ ptail_placement_rule(_ptail_placement_rule),
+ olh_epoch(_olh_epoch),
+ unique_tag(_unique_tag),
+ obj(_store, obj->get_key(), obj->get_bucket()),
+ old_obj(_store, obj->get_key(), obj->get_bucket()) {}
+
+static const unsigned MAX_BUFVEC_NR = 256;
+
+int MotrAtomicWriter::prepare(optional_yield y)
+{
+ total_data_size = 0;
+
+ if (obj.is_opened())
+ return 0;
+
+ rgw_bucket_dir_entry ent;
+ int rc = old_obj.get_bucket_dir_ent(dpp, ent);
+ if (rc == 0) {
+ ldpp_dout(dpp, 20) << __func__ << ": object exists." << dendl;
+ }
+
+ rc = m0_bufvec_empty_alloc(&buf, MAX_BUFVEC_NR) ?:
+ m0_bufvec_alloc(&attr, MAX_BUFVEC_NR, 1) ?:
+ m0_indexvec_alloc(&ext, MAX_BUFVEC_NR);
+ if (rc != 0)
+ this->cleanup();
+
+ return rc;
+}
+
+int MotrObject::create_mobj(const DoutPrefixProvider *dpp, uint64_t sz)
+{
+ if (mobj != nullptr) {
+ ldpp_dout(dpp, 0) <<__func__<< "ERROR: object is already opened" << dendl;
+ return -EINVAL;
+ }
+
+ int rc = m0_ufid_next(&ufid_gr, 1, &meta.oid);
+ if (rc != 0) {
+ ldpp_dout(dpp, 0) <<__func__<< "ERROR: m0_ufid_next() failed: " << rc << dendl;
+ return rc;
+ }
+
+ char fid_str[M0_FID_STR_LEN];
+ snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
+ ldpp_dout(dpp, 20) <<__func__<< ": sz=" << sz << " oid=" << fid_str << dendl;
+
+ int64_t lid = m0_layout_find_by_objsz(store->instance, nullptr, sz);
+ M0_ASSERT(lid > 0);
+
+ M0_ASSERT(mobj == nullptr);
+ mobj = new m0_obj();
+ m0_obj_init(mobj, &store->container.co_realm, &meta.oid, lid);
+
+ struct m0_op *op = nullptr;
+ mobj->ob_entity.en_flags |= M0_ENF_META;
+ rc = m0_entity_create(nullptr, &mobj->ob_entity, &op);
+ if (rc != 0) {
+ this->close_mobj();
+ ldpp_dout(dpp, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl;
+ return rc;
+ }
+ ldpp_dout(dpp, 20) <<__func__<< ": call m0_op_launch()..." << dendl;
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc != 0) {
+ this->close_mobj();
+ ldpp_dout(dpp, 0) << "ERROR: failed to create motr object: " << rc << dendl;
+ return rc;
+ }
+
+ meta.layout_id = mobj->ob_attr.oa_layout_id;
+ meta.pver = mobj->ob_attr.oa_pver;
+ ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id
+ << std::dec << " rc=" << rc << dendl;
+
+ // TODO: add key:user+bucket+key+obj.meta.oid value:timestamp to
+ // gc.queue.index. See more at github.com/Seagate/cortx-rgw/issues/7.
+
+ return rc;
+}
+
+int MotrObject::open_mobj(const DoutPrefixProvider *dpp)
+{
+ char fid_str[M0_FID_STR_LEN];
+ snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
+ ldpp_dout(dpp, 20) <<__func__<< ": oid=" << fid_str << dendl;
+
+ int rc;
+ if (meta.layout_id == 0) {
+ rgw_bucket_dir_entry ent;
+ rc = this->get_bucket_dir_ent(dpp, ent);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: open_mobj() failed: rc=" << rc << dendl;
+ return rc;
+ }
+ }
+
+ if (meta.layout_id == 0)
+ return -ENOENT;
+
+ M0_ASSERT(mobj == nullptr);
+ mobj = new m0_obj();
+ memset(mobj, 0, sizeof *mobj);
+ m0_obj_init(mobj, &store->container.co_realm, &meta.oid, store->conf.mc_layout_id);
+
+ struct m0_op *op = nullptr;
+ mobj->ob_attr.oa_layout_id = meta.layout_id;
+ mobj->ob_attr.oa_pver = meta.pver;
+ mobj->ob_entity.en_flags |= M0_ENF_META;
+ rc = m0_entity_open(&mobj->ob_entity, &op);
+ if (rc != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: m0_entity_open() failed: rc=" << rc << dendl;
+ this->close_mobj();
+ return rc;
+ }
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc < 0) {
+ ldpp_dout(dpp, 10) << "ERROR: failed to open motr object: rc=" << rc << dendl;
+ this->close_mobj();
+ return rc;
+ }
+
+ ldpp_dout(dpp, 20) <<__func__<< ": rc=" << rc << dendl;
+
+ return 0;
+}
+
+int MotrObject::delete_mobj(const DoutPrefixProvider *dpp)
+{
+ int rc;
+ char fid_str[M0_FID_STR_LEN];
+ snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
+ if (!meta.oid.u_hi || !meta.oid.u_lo) {
+ ldpp_dout(dpp, 20) << __func__ << ": invalid motr object oid=" << fid_str << dendl;
+ return -EINVAL;
+ }
+ ldpp_dout(dpp, 20) << __func__ << ": deleting motr object oid=" << fid_str << dendl;
+
+ // Open the object.
+ if (mobj == nullptr) {
+ rc = this->open_mobj(dpp);
+ if (rc < 0)
+ return rc;
+ }
+
+ // Create an DELETE op and execute it (sync version).
+ struct m0_op *op = nullptr;
+ mobj->ob_entity.en_flags |= M0_ENF_META;
+ rc = m0_entity_delete(&mobj->ob_entity, &op);
+ if (rc != 0) {
+ ldpp_dout(dpp, 0) << "ERROR: m0_entity_delete() failed: " << rc << dendl;
+ return rc;
+ }
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to open motr object: " << rc << dendl;
+ return rc;
+ }
+
+ this->close_mobj();
+
+ return 0;
+}
+
+void MotrObject::close_mobj()
+{
+ if (mobj == nullptr)
+ return;
+ m0_obj_fini(mobj);
+ delete mobj; mobj = nullptr;
+}
+
+int MotrObject::write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset)
+{
+ int rc;
+ unsigned bs, left;
+ struct m0_op *op;
+ char *start, *p;
+ struct m0_bufvec buf;
+ struct m0_bufvec attr;
+ struct m0_indexvec ext;
+
+ left = data.length();
+ if (left == 0)
+ return 0;
+
+ rc = m0_bufvec_empty_alloc(&buf, 1) ?:
+ m0_bufvec_alloc(&attr, 1, 1) ?:
+ m0_indexvec_alloc(&ext, 1);
+ if (rc != 0)
+ goto out;
+
+ bs = this->get_optimal_bs(left);
+ ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl;
+
+ start = data.c_str();
+
+ for (p = start; left > 0; left -= bs, p += bs, offset += bs) {
+ if (left < bs)
+ bs = this->get_optimal_bs(left);
+ if (left < bs) {
+ data.append_zero(bs - left);
+ left = bs;
+ p = data.c_str();
+ }
+ buf.ov_buf[0] = p;
+ buf.ov_vec.v_count[0] = bs;
+ ext.iv_index[0] = offset;
+ ext.iv_vec.v_count[0] = bs;
+ attr.ov_vec.v_count[0] = 0;
+
+ op = nullptr;
+ rc = m0_obj_op(this->mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op);
+ if (rc != 0)
+ goto out;
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+ if (rc != 0)
+ goto out;
+ }
+
+out:
+ m0_indexvec_free(&ext);
+ m0_bufvec_free(&attr);
+ m0_bufvec_free2(&buf);
+ return rc;
+}
+
+int MotrObject::read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb)
+{
+ int rc;
+ unsigned bs, actual, left;
+ struct m0_op *op;
+ struct m0_bufvec buf;
+ struct m0_bufvec attr;
+ struct m0_indexvec ext;
+
+ // make end pointer exclusive:
+ // it's easier to work with it this way
+ end++;
+ ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off <<
+ " end=" << end << dendl;
+ // As `off` may not be parity group size aligned, even using optimal
+ // buffer block size, simply reading data from offset `off` could come
+ // across parity group boundary. And Motr only allows page-size aligned
+ // offset.
+ //
+ // The optimal size of each IO should also take into account the data
+ // transfer size to s3 client. For example, 16MB may be nice to read
+ // data from motr, but it could be too big for network transfer.
+ //
+ // TODO: We leave proper handling of offset in the future.
+ bs = this->get_optimal_bs(end - off);
+ ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): bs=" << bs << dendl;
+
+ rc = m0_bufvec_empty_alloc(&buf, 1) ? :
+ m0_bufvec_alloc(&attr, 1, 1) ? :
+ m0_indexvec_alloc(&ext, 1);
+ if (rc < 0)
+ goto out;
+
+ left = end - off;
+ for (; left > 0; off += actual) {
+ if (left < bs)
+ bs = this->get_optimal_bs(left);
+ actual = bs;
+ if (left < bs)
+ actual = left;
+ ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off <<
+ " actual=" << actual << dendl;
+ bufferlist bl;
+ buf.ov_buf[0] = bl.append_hole(bs).c_str();
+ buf.ov_vec.v_count[0] = bs;
+ ext.iv_index[0] = off;
+ ext.iv_vec.v_count[0] = bs;
+ attr.ov_vec.v_count[0] = 0;
+
+ left -= actual;
+ // Read from Motr.
+ op = nullptr;
+ rc = m0_obj_op(this->mobj, M0_OC_READ, &ext, &buf, &attr, 0, 0, &op);
+ ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): init read op rc=" << rc << dendl;
+ if (rc != 0) {
+ ldpp_dout(dpp, 0) << __func__ << ": read failed during m0_obj_op, rc=" << rc << dendl;
+ goto out;
+ }
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+ if (rc != 0) {
+ ldpp_dout(dpp, 0) << __func__ << ": read failed, m0_op_wait rc=" << rc << dendl;
+ goto out;
+ }
+ // Call `cb` to process returned data.
+ ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): call cb to process data" << dendl;
+ cb->handle_data(bl, 0, actual);
+ }
+
+out:
+ m0_indexvec_free(&ext);
+ m0_bufvec_free(&attr);
+ m0_bufvec_free2(&buf);
+ this->close_mobj();
+
+ return rc;
+}
+
+int MotrObject::get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent)
+{
+ int rc = 0;
+ string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name();
+ int max = 1000;
+ vector<string> keys(max);
+ vector<bufferlist> vals(max);
+ bufferlist bl;
+ bufferlist::const_iterator iter;
+
+ if (this->get_bucket()->get_info().versioning_status() == BUCKET_VERSIONED ||
+ this->get_bucket()->get_info().versioning_status() == BUCKET_SUSPENDED) {
+
+ rgw_bucket_dir_entry ent_to_check;
+
+ if (this->store->get_obj_meta_cache()->get(dpp, this->get_name(), bl) == 0) {
+ iter = bl.cbegin();
+ ent_to_check.decode(iter);
+ if (ent_to_check.is_current()) {
+ ent = ent_to_check;
+ rc = 0;
+ goto out;
+ }
+ }
+
+ ldpp_dout(dpp, 20) <<__func__<< ": versioned bucket!" << dendl;
+ keys[0] = this->get_name();
+ rc = store->next_query_by_name(bucket_index_iname, keys, vals);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << __func__ << "ERROR: NEXT query failed. " << rc << dendl;
+ return rc;
+ }
+
+ rc = -ENOENT;
+ for (const auto& bl: vals) {
+ if (bl.length() == 0)
+ break;
+
+ iter = bl.cbegin();
+ ent_to_check.decode(iter);
+ if (ent_to_check.is_current()) {
+ ldpp_dout(dpp, 20) <<__func__<< ": found current version!" << dendl;
+ ent = ent_to_check;
+ rc = 0;
+
+ this->store->get_obj_meta_cache()->put(dpp, this->get_name(), bl);
+
+ break;
+ }
+ }
+ } else {
+ if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) {
+ ldpp_dout(dpp, 20) <<__func__<< ": non-versioned bucket!" << dendl;
+ rc = this->store->do_idx_op_by_name(bucket_index_iname,
+ M0_IC_GET, this->get_key().get_oid(), bl);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << __func__ << "ERROR: failed to get object's entry from bucket index: rc="
+ << rc << dendl;
+ return rc;
+ }
+ this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl);
+ }
+
+ bufferlist& blr = bl;
+ iter = blr.cbegin();
+ ent.decode(iter);
+ }
+
+out:
+ if (rc == 0) {
+ sal::Attrs dummy;
+ decode(dummy, iter);
+ meta.decode(iter);
+ ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id << dendl;
+ char fid_str[M0_FID_STR_LEN];
+ snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
+ ldpp_dout(dpp, 70) << __func__ << ": oid=" << fid_str << dendl;
+ } else
+ ldpp_dout(dpp, 0) <<__func__<< ": rc=" << rc << dendl;
+
+ return rc;
+}
+
+int MotrObject::update_version_entries(const DoutPrefixProvider *dpp)
+{
+ int rc;
+ int max = 10;
+ vector<string> keys(max);
+ vector<bufferlist> vals(max);
+
+ string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name();
+ keys[0] = this->get_name();
+ rc = store->next_query_by_name(bucket_index_iname, keys, vals);
+ ldpp_dout(dpp, 20) << "get all versions, name = " << this->get_name() << "rc = " << rc << dendl;
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
+ return rc;
+ }
+
+ // no entries returned.
+ if (rc == 0)
+ return 0;
+
+ for (const auto& bl: vals) {
+ if (bl.length() == 0)
+ break;
+
+ rgw_bucket_dir_entry ent;
+ auto iter = bl.cbegin();
+ ent.decode(iter);
+
+ if (0 != ent.key.name.compare(0, this->get_name().size(), this->get_name()))
+ continue;
+
+ if (!ent.is_current())
+ continue;
+
+ // Remove from the cache.
+ store->get_obj_meta_cache()->remove(dpp, this->get_name());
+
+ rgw::sal::Attrs attrs;
+ decode(attrs, iter);
+ MotrObject::Meta meta;
+ meta.decode(iter);
+
+ ent.flags = rgw_bucket_dir_entry::FLAG_VER;
+ string key;
+ if (ent.key.instance.empty())
+ key = ent.key.name;
+ else {
+ char buf[ent.key.name.size() + ent.key.instance.size() + 16];
+ snprintf(buf, sizeof(buf), "%s[%s]", ent.key.name.c_str(), ent.key.instance.c_str());
+ key = buf;
+ }
+ ldpp_dout(dpp, 20) << "update one version, key = " << key << dendl;
+ bufferlist ent_bl;
+ ent.encode(ent_bl);
+ encode(attrs, ent_bl);
+ meta.encode(ent_bl);
+
+ rc = store->do_idx_op_by_name(bucket_index_iname,
+ M0_IC_PUT, key, ent_bl);
+ if (rc < 0)
+ break;
+ }
+ return rc;
+}
+
+// Scan object_nnn_part_index to get all parts then open their motr objects.
+// TODO: all parts are opened in the POC. But for a large object, for example
+// a 5GB object will have about 300 parts (for default 15MB part). A better
+// way of managing opened object may be needed.
+int MotrObject::get_part_objs(const DoutPrefixProvider* dpp,
+ std::map<int, std::unique_ptr<MotrObject>>& part_objs)
+{
+ int rc;
+ int max_parts = 1000;
+ int marker = 0;
+ uint64_t off = 0;
+ bool truncated = false;
+ std::unique_ptr<rgw::sal::MultipartUpload> upload;
+
+ upload = this->get_bucket()->get_multipart_upload(this->get_name(), string());
+
+ do {
+ rc = upload->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated);
+ if (rc == -ENOENT) {
+ rc = -ERR_NO_SUCH_UPLOAD;
+ }
+ if (rc < 0)
+ return rc;
+
+ std::map<uint32_t, std::unique_ptr<MultipartPart>>& parts = upload->get_parts();
+ for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) {
+
+ MultipartPart *mpart = part_iter->second.get();
+ MotrMultipartPart *mmpart = static_cast<MotrMultipartPart *>(mpart);
+ uint32_t part_num = mmpart->get_num();
+ uint64_t part_size = mmpart->get_size();
+
+ string part_obj_name = this->get_bucket()->get_name() + "." +
+ this->get_key().get_oid() +
+ ".part." + std::to_string(part_num);
+ std::unique_ptr<rgw::sal::Object> obj;
+ obj = this->bucket->get_object(rgw_obj_key(part_obj_name));
+ std::unique_ptr<rgw::sal::MotrObject> mobj(static_cast<rgw::sal::MotrObject *>(obj.release()));
+
+ ldpp_dout(dpp, 20) << "get_part_objs: off = " << off << ", size = " << part_size << dendl;
+ mobj->part_off = off;
+ mobj->part_size = part_size;
+ mobj->part_num = part_num;
+ mobj->meta = mmpart->meta;
+
+ part_objs.emplace(part_num, std::move(mobj));
+
+ off += part_size;
+ }
+ } while (truncated);
+
+ return 0;
+}
+
+int MotrObject::open_part_objs(const DoutPrefixProvider* dpp,
+ std::map<int, std::unique_ptr<MotrObject>>& part_objs)
+{
+ //for (auto& iter: part_objs) {
+ for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) {
+ MotrObject* obj = static_cast<MotrObject *>(iter->second.get());
+ ldpp_dout(dpp, 20) << "open_part_objs: name = " << obj->get_name() << dendl;
+ int rc = obj->open_mobj(dpp);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+int MotrObject::delete_part_objs(const DoutPrefixProvider* dpp)
+{
+ std::unique_ptr<rgw::sal::MultipartUpload> upload;
+ upload = this->get_bucket()->get_multipart_upload(this->get_name(), string());
+ std::unique_ptr<rgw::sal::MotrMultipartUpload> mupload(static_cast<rgw::sal::MotrMultipartUpload *>(upload.release()));
+ return mupload->delete_parts(dpp);
+}
+
+int MotrObject::read_multipart_obj(const DoutPrefixProvider* dpp,
+ int64_t off, int64_t end, RGWGetDataCB* cb,
+ std::map<int, std::unique_ptr<MotrObject>>& part_objs)
+{
+ int64_t cursor = off;
+
+ ldpp_dout(dpp, 20) << "read_multipart_obj: off=" << off << " end=" << end << dendl;
+
+ // Find the parts which are in the (off, end) range and
+ // read data from it. Note: `end` argument is inclusive.
+ for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) {
+ MotrObject* obj = static_cast<MotrObject *>(iter->second.get());
+ int64_t part_off = obj->part_off;
+ int64_t part_size = obj->part_size;
+ int64_t part_end = obj->part_off + obj->part_size - 1;
+ ldpp_dout(dpp, 20) << "read_multipart_obj: part_off=" << part_off
+ << " part_end=" << part_end << dendl;
+ if (part_end < off)
+ continue;
+
+ int64_t local_off = cursor - obj->part_off;
+ int64_t local_end = part_end < end? part_size - 1 : end - part_off;
+ ldpp_dout(dpp, 20) << "real_multipart_obj: name=" << obj->get_name()
+ << " local_off=" << local_off
+ << " local_end=" << local_end << dendl;
+ int rc = obj->read_mobj(dpp, local_off, local_end, cb);
+ if (rc < 0)
+ return rc;
+
+ cursor = part_end + 1;
+ if (cursor > end)
+ break;
+ }
+
+ return 0;
+}
+
+static unsigned roundup(unsigned x, unsigned by)
+{
+ return ((x - 1) / by + 1) * by;
+}
+
+unsigned MotrObject::get_optimal_bs(unsigned len)
+{
+ struct m0_pool_version *pver;
+
+ pver = m0_pool_version_find(&store->instance->m0c_pools_common,
+ &mobj->ob_attr.oa_pver);
+ M0_ASSERT(pver != nullptr);
+ struct m0_pdclust_attr *pa = &pver->pv_attr;
+ uint64_t lid = M0_OBJ_LAYOUT_ID(meta.layout_id);
+ unsigned unit_sz = m0_obj_layout_id_to_unit_size(lid);
+ unsigned grp_sz = unit_sz * pa->pa_N;
+
+ // bs should be max 4-times pool-width deep counting by 1MB units, or
+ // 8-times deep counting by 512K units, 16-times deep by 256K units,
+ // and so on. Several units to one target will be aggregated to make
+ // fewer network RPCs, disk i/o operations and BE transactions.
+ // For unit sizes of 32K or less, the depth is 128, which
+ // makes it 32K * 128 == 4MB - the maximum amount per target when
+ // the performance is still good on LNet (which has max 1MB frames).
+ // TODO: it may be different on libfabric, should be re-measured.
+ unsigned depth = 128 / ((unit_sz + 0x7fff) / 0x8000);
+ if (depth == 0)
+ depth = 1;
+ // P * N / (N + K + S) - number of data units to span the pool-width
+ unsigned max_bs = depth * unit_sz * pa->pa_P * pa->pa_N /
+ (pa->pa_N + pa->pa_K + pa->pa_S);
+ max_bs = roundup(max_bs, grp_sz); // multiple of group size
+ if (len >= max_bs)
+ return max_bs;
+ else if (len <= grp_sz)
+ return grp_sz;
+ else
+ return roundup(len, grp_sz);
+}
+
+void MotrAtomicWriter::cleanup()
+{
+ m0_indexvec_free(&ext);
+ m0_bufvec_free(&attr);
+ m0_bufvec_free2(&buf);
+ acc_data.clear();
+ obj.close_mobj();
+ old_obj.close_mobj();
+}
+
+unsigned MotrAtomicWriter::populate_bvec(unsigned len, bufferlist::iterator &bi)
+{
+ unsigned i, l, done = 0;
+ const char *data;
+
+ for (i = 0; i < MAX_BUFVEC_NR && len > 0; ++i) {
+ l = bi.get_ptr_and_advance(len, &data);
+ buf.ov_buf[i] = (char*)data;
+ buf.ov_vec.v_count[i] = l;
+ ext.iv_index[i] = acc_off;
+ ext.iv_vec.v_count[i] = l;
+ attr.ov_vec.v_count[i] = 0;
+ acc_off += l;
+ len -= l;
+ done += l;
+ }
+ buf.ov_vec.v_nr = i;
+ ext.iv_vec.v_nr = i;
+
+ return done;
+}
+
+int MotrAtomicWriter::write()
+{
+ int rc;
+ unsigned bs, left;
+ struct m0_op *op;
+ bufferlist::iterator bi;
+
+ left = acc_data.length();
+
+ if (!obj.is_opened()) {
+ rc = obj.create_mobj(dpp, left);
+ if (rc == -EEXIST)
+ rc = obj.open_mobj(dpp);
+ if (rc != 0) {
+ char fid_str[M0_FID_STR_LEN];
+ snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&obj.meta.oid));
+ ldpp_dout(dpp, 0) << "ERROR: failed to create/open motr object "
+ << fid_str << " (" << obj.get_bucket()->get_name()
+ << "/" << obj.get_key().get_oid() << "): rc=" << rc
+ << dendl;
+ goto err;
+ }
+ }
+
+ total_data_size += left;
+
+ bs = obj.get_optimal_bs(left);
+ ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl;
+
+ bi = acc_data.begin();
+ while (left > 0) {
+ if (left < bs)
+ bs = obj.get_optimal_bs(left);
+ if (left < bs) {
+ acc_data.append_zero(bs - left);
+ auto off = bi.get_off();
+ bufferlist tmp;
+ acc_data.splice(off, bs, &tmp);
+ acc_data.clear();
+ acc_data.append(tmp.c_str(), bs); // make it a single buf
+ bi = acc_data.begin();
+ left = bs;
+ }
+
+ left -= this->populate_bvec(bs, bi);
+
+ op = nullptr;
+ rc = m0_obj_op(obj.mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op);
+ if (rc != 0)
+ goto err;
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+ if (rc != 0)
+ goto err;
+ }
+ acc_data.clear();
+
+ return 0;
+
+err:
+ this->cleanup();
+ return rc;
+}
+
+static const unsigned MAX_ACC_SIZE = 32 * 1024 * 1024;
+
+// Accumulate enough data first to make a reasonable decision about the
+// optimal unit size for a new object, or bs for existing object (32M seems
+// enough for 4M units in 8+2 parity groups, a common config on wide pools),
+// and then launch the write operations.
+int MotrAtomicWriter::process(bufferlist&& data, uint64_t offset)
+{
+ if (data.length() == 0) { // last call, flush data
+ int rc = 0;
+ if (acc_data.length() != 0)
+ rc = this->write();
+ this->cleanup();
+ return rc;
+ }
+
+ if (acc_data.length() == 0)
+ acc_off = offset;
+
+ acc_data.append(std::move(data));
+ if (acc_data.length() < MAX_ACC_SIZE)
+ return 0;
+
+ return this->write();
+}
+
+int MotrAtomicWriter::complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled,
+ optional_yield y)
+{
+ int rc = 0;
+
+ if (acc_data.length() != 0) { // check again, just in case
+ rc = this->write();
+ this->cleanup();
+ if (rc != 0)
+ return rc;
+ }
+
+ bufferlist bl;
+ rgw_bucket_dir_entry ent;
+
+ // Set rgw_bucet_dir_entry. Some of the member of this structure may not
+ // apply to motr. For example the storage_class.
+ //
+ // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc
+ // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and
+ // how to set the dir entry. Only set the basic ones for POC, no ACLs and
+ // other attrs.
+ obj.get_key().get_index_key(&ent.key);
+ ent.meta.size = total_data_size;
+ ent.meta.accounted_size = total_data_size;
+ ent.meta.mtime = real_clock::is_zero(set_mtime)? ceph::real_clock::now() : set_mtime;
+ ent.meta.etag = etag;
+ ent.meta.owner = owner.to_str();
+ ent.meta.owner_display_name = obj.get_bucket()->get_owner()->get_display_name();
+ bool is_versioned = obj.get_key().have_instance();
+ if (is_versioned)
+ ent.flags = rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT;
+ ldpp_dout(dpp, 20) <<__func__<< ": key=" << obj.get_key().get_oid()
+ << " etag: " << etag << " user_data=" << user_data << dendl;
+ if (user_data)
+ ent.meta.user_data = *user_data;
+ ent.encode(bl);
+
+ RGWBucketInfo &info = obj.get_bucket()->get_info();
+ if (info.obj_lock_enabled() && info.obj_lock.has_rule()) {
+ auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (iter == attrs.end()) {
+ real_time lock_until_date = info.obj_lock.get_lock_until_date(ent.meta.mtime);
+ string mode = info.obj_lock.get_mode();
+ RGWObjectRetention obj_retention(mode, lock_until_date);
+ bufferlist retention_bl;
+ obj_retention.encode(retention_bl);
+ attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl;
+ }
+ }
+ encode(attrs, bl);
+ obj.meta.encode(bl);
+ ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << obj.meta.layout_id
+ << dendl;
+ if (is_versioned) {
+ // get the list of all versioned objects with the same key and
+ // unset their FLAG_CURRENT later, if do_idx_op_by_name() is successful.
+ // Note: without distributed lock on the index - it is possible that 2
+ // CURRENT entries would appear in the bucket. For example, consider the
+ // following scenario when two clients are trying to add the new object
+ // version concurrently:
+ // client 1: reads all the CURRENT entries
+ // client 2: updates the index and sets the new CURRENT
+ // client 1: updates the index and sets the new CURRENT
+ // At the step (1) client 1 would not see the new current record from step (2),
+ // so it won't update it. As a result, two CURRENT version entries will appear
+ // in the bucket.
+ // TODO: update the current version (unset the flag) and insert the new current
+ // version can be launched in one motr op. This requires change at do_idx_op()
+ // and do_idx_op_by_name().
+ rc = obj.update_version_entries(dpp);
+ if (rc < 0)
+ return rc;
+ }
+ // Insert an entry into bucket index.
+ string bucket_index_iname = "motr.rgw.bucket.index." + obj.get_bucket()->get_name();
+ rc = store->do_idx_op_by_name(bucket_index_iname,
+ M0_IC_PUT, obj.get_key().get_oid(), bl);
+ if (rc == 0)
+ store->get_obj_meta_cache()->put(dpp, obj.get_key().get_oid(), bl);
+
+ if (old_obj.get_bucket()->get_info().versioning_status() != BUCKET_VERSIONED) {
+ // Delete old object data if exists.
+ old_obj.delete_mobj(dpp);
+ }
+
+ // TODO: We need to handle the object leak caused by parallel object upload by
+ // making use of background gc, which is currently not enabled for motr.
+ return rc;
+}
+
+int MotrMultipartUpload::delete_parts(const DoutPrefixProvider *dpp)
+{
+ int rc;
+ int max_parts = 1000;
+ int marker = 0;
+ bool truncated = false;
+
+ // Scan all parts and delete the corresponding motr objects.
+ do {
+ rc = this->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated);
+ if (rc == -ENOENT) {
+ truncated = false;
+ rc = 0;
+ }
+ if (rc < 0)
+ return rc;
+
+ std::map<uint32_t, std::unique_ptr<MultipartPart>>& parts = this->get_parts();
+ for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) {
+
+ MultipartPart *mpart = part_iter->second.get();
+ MotrMultipartPart *mmpart = static_cast<MotrMultipartPart *>(mpart);
+ uint32_t part_num = mmpart->get_num();
+
+ // Delete the part object. Note that the part object is not
+ // inserted into bucket index, only the corresponding motr object
+ // needs to be delete. That is why we don't call
+ // MotrObject::delete_object().
+ string part_obj_name = bucket->get_name() + "." +
+ mp_obj.get_key() +
+ ".part." + std::to_string(part_num);
+ std::unique_ptr<rgw::sal::Object> obj;
+ obj = this->bucket->get_object(rgw_obj_key(part_obj_name));
+ std::unique_ptr<rgw::sal::MotrObject> mobj(static_cast<rgw::sal::MotrObject *>(obj.release()));
+ mobj->meta = mmpart->meta;
+ rc = mobj->delete_mobj(dpp);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << __func__ << ": Failed to delete object from Motr. rc=" << rc << dendl;
+ return rc;
+ }
+ }
+ } while (truncated);
+
+ // Delete object part index.
+ std::string oid = mp_obj.get_key();
+ string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts";
+ return store->delete_motr_idx_by_name(obj_part_iname);
+}
+
+int MotrMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct)
+{
+ int rc;
+ // Check if multipart upload exists
+ bufferlist bl;
+ std::unique_ptr<rgw::sal::Object> meta_obj;
+ meta_obj = get_meta_obj();
+ string bucket_multipart_iname =
+ "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts";
+ rc = store->do_idx_op_by_name(bucket_multipart_iname,
+ M0_IC_GET, meta_obj->get_oid(), bl);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart upload. rc=" << rc << dendl;
+ return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc;
+ }
+
+ // Scan all parts and delete the corresponding motr objects.
+ rc = this->delete_parts(dpp);
+ if (rc < 0)
+ return rc;
+
+ bl.clear();
+ // Remove the upload from bucket multipart index.
+ rc = store->do_idx_op_by_name(bucket_multipart_iname,
+ M0_IC_DEL, meta_obj->get_key().get_oid(), bl);
+ return rc;
+}
+
+std::unique_ptr<rgw::sal::Object> MotrMultipartUpload::get_meta_obj()
+{
+ std::unique_ptr<rgw::sal::Object> obj = bucket->get_object(rgw_obj_key(get_meta(), string(), mp_ns));
+ std::unique_ptr<rgw::sal::MotrObject> mobj(static_cast<rgw::sal::MotrObject *>(obj.release()));
+ mobj->set_category(RGWObjCategory::MultiMeta);
+ return mobj;
+}
+
+struct motr_multipart_upload_info
+{
+ rgw_placement_rule dest_placement;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(dest_placement, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(dest_placement, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(motr_multipart_upload_info)
+
+int MotrMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y,
+ ACLOwner& _owner,
+ rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs)
+{
+ int rc;
+ std::string oid = mp_obj.get_key();
+
+ owner = _owner;
+
+ do {
+ char buf[33];
+ string tmp_obj_name;
+ gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1);
+ std::string upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */
+ upload_id.append(buf);
+
+ mp_obj.init(oid, upload_id);
+ tmp_obj_name = mp_obj.get_meta();
+
+ std::unique_ptr<rgw::sal::Object> obj;
+ obj = bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns));
+ // the meta object will be indexed with 0 size, we c
+ obj->set_in_extra_data(true);
+ obj->set_hash_source(oid);
+
+ motr_multipart_upload_info upload_info;
+ upload_info.dest_placement = dest_placement;
+ bufferlist mpbl;
+ encode(upload_info, mpbl);
+
+ // Create an initial entry in the bucket. The entry will be
+ // updated when multipart upload is completed, for example,
+ // size, etag etc.
+ bufferlist bl;
+ rgw_bucket_dir_entry ent;
+ obj->get_key().get_index_key(&ent.key);
+ ent.meta.owner = owner.get_id().to_str();
+ ent.meta.category = RGWObjCategory::MultiMeta;
+ ent.meta.mtime = ceph::real_clock::now();
+ ent.meta.user_data.assign(mpbl.c_str(), mpbl.c_str() + mpbl.length());
+ ent.encode(bl);
+
+ // Insert an entry into bucket multipart index so it is not shown
+ // when listing a bucket.
+ string bucket_multipart_iname =
+ "motr.rgw.bucket." + obj->get_bucket()->get_name() + ".multiparts";
+ rc = store->do_idx_op_by_name(bucket_multipart_iname,
+ M0_IC_PUT, obj->get_key().get_oid(), bl);
+
+ } while (rc == -EEXIST);
+
+ if (rc < 0)
+ return rc;
+
+ // Create object part index.
+ // TODO: add bucket as part of the name.
+ string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts";
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::init(): object part index=" << obj_part_iname << dendl;
+ rc = store->create_motr_idx_by_name(obj_part_iname);
+ if (rc == -EEXIST)
+ rc = 0;
+ if (rc < 0)
+ // TODO: clean the bucket index entry
+ ldpp_dout(dpp, 0) << "Failed to create object multipart index " << obj_part_iname << dendl;
+
+ return rc;
+}
+
+int MotrMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct,
+ int num_parts, int marker,
+ int *next_marker, bool *truncated,
+ bool assume_unsorted)
+{
+ int rc;
+ vector<string> key_vec(num_parts);
+ vector<bufferlist> val_vec(num_parts);
+
+ std::string oid = mp_obj.get_key();
+ string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts";
+ ldpp_dout(dpp, 20) << __func__ << ": object part index = " << obj_part_iname << dendl;
+ key_vec[0].clear();
+ key_vec[0] = "part.";
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%08d", marker + 1);
+ key_vec[0].append(buf);
+ rc = store->next_query_by_name(obj_part_iname, key_vec, val_vec);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
+ return rc;
+ }
+
+ int last_num = 0;
+ int part_cnt = 0;
+ uint32_t expected_next = 0;
+ ldpp_dout(dpp, 20) << __func__ << ": marker = " << marker << dendl;
+ for (const auto& bl: val_vec) {
+ if (bl.length() == 0)
+ break;
+
+ RGWUploadPartInfo info;
+ auto iter = bl.cbegin();
+ info.decode(iter);
+ rgw::sal::Attrs attrs_dummy;
+ decode(attrs_dummy, iter);
+ MotrObject::Meta meta;
+ meta.decode(iter);
+
+ ldpp_dout(dpp, 20) << __func__ << ": part_num=" << info.num
+ << " part_size=" << info.size << dendl;
+ ldpp_dout(dpp, 20) << __func__ << ": meta:oid=[" << meta.oid.u_hi << "," << meta.oid.u_lo
+ << "], meta:pvid=[" << meta.pver.f_container << "," << meta.pver.f_key
+ << "], meta:layout id=" << meta.layout_id << dendl;
+
+ if (!expected_next)
+ expected_next = info.num + 1;
+ else if (expected_next && info.num != expected_next)
+ return -EINVAL;
+ else expected_next = info.num + 1;
+
+ if ((int)info.num > marker) {
+ last_num = info.num;
+ parts.emplace(info.num, std::make_unique<MotrMultipartPart>(info, meta));
+ }
+
+ part_cnt++;
+ }
+
+ // Does it have more parts?
+ if (truncated)
+ *truncated = part_cnt < num_parts? false : true;
+ ldpp_dout(dpp, 20) << __func__ << ": truncated=" << *truncated << dendl;
+
+ if (next_marker)
+ *next_marker = last_num;
+
+ return 0;
+}
+
+// Heavily copy from rgw_sal_rados.cc
+int MotrMultipartUpload::complete(const DoutPrefixProvider *dpp,
+ optional_yield y, CephContext* cct,
+ map<int, string>& part_etags,
+ list<rgw_obj_index_key>& remove_objs,
+ uint64_t& accounted_size, bool& compressed,
+ RGWCompressionInfo& cs_info, off_t& off,
+ std::string& tag, ACLOwner& owner,
+ uint64_t olh_epoch,
+ rgw::sal::Object* target_obj)
+{
+ char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+ std::string etag;
+ bufferlist etag_bl;
+ MD5 hash;
+ // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes
+ hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
+ bool truncated;
+ int rc;
+
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): enter" << dendl;
+ int total_parts = 0;
+ int handled_parts = 0;
+ int max_parts = 1000;
+ int marker = 0;
+ uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size;
+ auto etags_iter = part_etags.begin();
+ rgw::sal::Attrs attrs = target_obj->get_attrs();
+
+ do {
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): list_parts()" << dendl;
+ rc = list_parts(dpp, cct, max_parts, marker, &marker, &truncated);
+ if (rc == -ENOENT) {
+ rc = -ERR_NO_SUCH_UPLOAD;
+ }
+ if (rc < 0)
+ return rc;
+
+ total_parts += parts.size();
+ if (!truncated && total_parts != (int)part_etags.size()) {
+ ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts
+ << " expected: " << part_etags.size() << dendl;
+ rc = -ERR_INVALID_PART;
+ return rc;
+ }
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): parts.size()=" << parts.size() << dendl;
+
+ for (auto obj_iter = parts.begin();
+ etags_iter != part_etags.end() && obj_iter != parts.end();
+ ++etags_iter, ++obj_iter, ++handled_parts) {
+ MultipartPart *mpart = obj_iter->second.get();
+ MotrMultipartPart *mmpart = static_cast<MotrMultipartPart *>(mpart);
+ RGWUploadPartInfo *part = &mmpart->info;
+
+ uint64_t part_size = part->accounted_size;
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part_size=" << part_size << dendl;
+ if (handled_parts < (int)part_etags.size() - 1 &&
+ part_size < min_part_size) {
+ rc = -ERR_TOO_SMALL;
+ return rc;
+ }
+
+ char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ if (etags_iter->first != (int)obj_iter->first) {
+ ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: "
+ << etags_iter->first << " next uploaded: "
+ << obj_iter->first << dendl;
+ rc = -ERR_INVALID_PART;
+ return rc;
+ }
+ string part_etag = rgw_string_unquote(etags_iter->second);
+ if (part_etag.compare(part->etag) != 0) {
+ ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " << etags_iter->first
+ << " etag: " << etags_iter->second << dendl;
+ rc = -ERR_INVALID_PART;
+ return rc;
+ }
+
+ hex_to_buf(part->etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
+ hash.Update((const unsigned char *)petag, sizeof(petag));
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): calc etag " << dendl;
+
+ string oid = mp_obj.get_part(part->num);
+ rgw_obj src_obj;
+ src_obj.init_ns(bucket->get_key(), oid, mp_ns);
+
+#if 0 // does Motr backend need it?
+ /* update manifest for part */
+ if (part->manifest.empty()) {
+ ldpp_dout(dpp, 0) << "ERROR: empty manifest for object part: obj="
+ << src_obj << dendl;
+ rc = -ERR_INVALID_PART;
+ return rc;
+ } else {
+ manifest.append(dpp, part->manifest, store->get_zone());
+ }
+ ldpp_dout(dpp, 0) << "MotrMultipartUpload::complete(): manifest " << dendl;
+#endif
+
+ bool part_compressed = (part->cs_info.compression_type != "none");
+ if ((handled_parts > 0) &&
+ ((part_compressed != compressed) ||
+ (cs_info.compression_type != part->cs_info.compression_type))) {
+ ldpp_dout(dpp, 0) << "ERROR: compression type was changed during multipart upload ("
+ << cs_info.compression_type << ">>" << part->cs_info.compression_type << ")" << dendl;
+ rc = -ERR_INVALID_PART;
+ return rc;
+ }
+
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part compression" << dendl;
+ if (part_compressed) {
+ int64_t new_ofs; // offset in compression data for new part
+ if (cs_info.blocks.size() > 0)
+ new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len;
+ else
+ new_ofs = 0;
+ for (const auto& block : part->cs_info.blocks) {
+ compression_block cb;
+ cb.old_ofs = block.old_ofs + cs_info.orig_size;
+ cb.new_ofs = new_ofs;
+ cb.len = block.len;
+ cs_info.blocks.push_back(cb);
+ new_ofs = cb.new_ofs + cb.len;
+ }
+ if (!compressed)
+ cs_info.compression_type = part->cs_info.compression_type;
+ cs_info.orig_size += part->cs_info.orig_size;
+ compressed = true;
+ }
+
+ // We may not need to do the following as remove_objs are those
+ // don't show when listing a bucket. As we store in-progress uploaded
+ // object's metadata in a separate index, they are not shown when
+ // listing a bucket.
+ rgw_obj_index_key remove_key;
+ src_obj.key.get_index_key(&remove_key);
+ remove_objs.push_back(remove_key);
+
+ off += part_size;
+ accounted_size += part->accounted_size;
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): off=" << off << ", accounted_size = " << accounted_size << dendl;
+ }
+ } while (truncated);
+ hash.Final((unsigned char *)final_etag);
+
+ buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str);
+ snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2],
+ sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2,
+ "-%lld", (long long)part_etags.size());
+ etag = final_etag_str;
+ ldpp_dout(dpp, 20) << "calculated etag: " << etag << dendl;
+ etag_bl.append(etag);
+ attrs[RGW_ATTR_ETAG] = etag_bl;
+
+ if (compressed) {
+ // write compression attribute to full object
+ bufferlist tmp;
+ encode(cs_info, tmp);
+ attrs[RGW_ATTR_COMPRESSION] = tmp;
+ }
+
+ // Read the object's the multipart_upload_info.
+ // TODO: all those index name and key constructions should be implemented as
+ // member functions.
+ bufferlist bl;
+ std::unique_ptr<rgw::sal::Object> meta_obj;
+ meta_obj = get_meta_obj();
+ string bucket_multipart_iname =
+ "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts";
+ rc = this->store->do_idx_op_by_name(bucket_multipart_iname,
+ M0_IC_GET, meta_obj->get_key().get_oid(), bl);
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): read entry from bucket multipart index rc=" << rc << dendl;
+ if (rc < 0)
+ return rc;
+ rgw_bucket_dir_entry ent;
+ bufferlist& blr = bl;
+ auto ent_iter = blr.cbegin();
+ ent.decode(ent_iter);
+
+ // Update the dir entry and insert it to the bucket index so
+ // the object will be seen when listing the bucket.
+ bufferlist update_bl;
+ target_obj->get_key().get_index_key(&ent.key); // Change to offical name :)
+ ent.meta.size = off;
+ ent.meta.accounted_size = accounted_size;
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): obj size=" << ent.meta.size
+ << " obj accounted size=" << ent.meta.accounted_size << dendl;
+ ent.meta.mtime = ceph::real_clock::now();
+ ent.meta.etag = etag;
+ ent.encode(update_bl);
+ encode(attrs, update_bl);
+ MotrObject::Meta meta_dummy;
+ meta_dummy.encode(update_bl);
+
+ string bucket_index_iname = "motr.rgw.bucket.index." + meta_obj->get_bucket()->get_name();
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): target_obj name=" << target_obj->get_name()
+ << " target_obj oid=" << target_obj->get_oid() << dendl;
+ rc = store->do_idx_op_by_name(bucket_index_iname, M0_IC_PUT,
+ target_obj->get_name(), update_bl);
+ if (rc < 0)
+ return rc;
+
+ // Put into metadata cache.
+ store->get_obj_meta_cache()->put(dpp, target_obj->get_name(), update_bl);
+
+ // Now we can remove it from bucket multipart index.
+ ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): remove from bucket multipartindex " << dendl;
+ return store->do_idx_op_by_name(bucket_multipart_iname,
+ M0_IC_DEL, meta_obj->get_key().get_oid(), bl);
+}
+
+int MotrMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs)
+{
+ if (!rule && !attrs) {
+ return 0;
+ }
+
+ if (rule) {
+ if (!placement.empty()) {
+ *rule = &placement;
+ if (!attrs) {
+ /* Don't need attrs, done */
+ return 0;
+ }
+ } else {
+ *rule = nullptr;
+ }
+ }
+
+ std::unique_ptr<rgw::sal::Object> meta_obj;
+ meta_obj = get_meta_obj();
+ meta_obj->set_in_extra_data(true);
+
+ // Read the object's the multipart_upload_info.
+ bufferlist bl;
+ string bucket_multipart_iname =
+ "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts";
+ int rc = this->store->do_idx_op_by_name(bucket_multipart_iname,
+ M0_IC_GET, meta_obj->get_key().get_oid(), bl);
+ if (rc < 0) {
+ ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart info. rc=" << rc << dendl;
+ return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc;
+ }
+
+ rgw_bucket_dir_entry ent;
+ bufferlist& blr = bl;
+ auto ent_iter = blr.cbegin();
+ ent.decode(ent_iter);
+
+ if (attrs) {
+ bufferlist etag_bl;
+ string& etag = ent.meta.etag;
+ ldpp_dout(dpp, 20) << "object's etag: " << ent.meta.etag << dendl;
+ etag_bl.append(etag.c_str(), etag.size());
+ attrs->emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl));
+ if (!rule || *rule != nullptr) {
+ /* placement was cached; don't actually read */
+ return 0;
+ }
+ }
+
+ /* Decode multipart_upload_info */
+ motr_multipart_upload_info upload_info;
+ bufferlist mpbl;
+ mpbl.append(ent.meta.user_data.c_str(), ent.meta.user_data.size());
+ auto mpbl_iter = mpbl.cbegin();
+ upload_info.decode(mpbl_iter);
+ placement = upload_info.dest_placement;
+ *rule = &placement;
+
+ return 0;
+}
+
+std::unique_ptr<Writer> MotrMultipartUpload::get_writer(
+ const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ uint64_t part_num,
+ const std::string& part_num_str)
+{
+ return std::make_unique<MotrMultipartWriter>(dpp, y, this,
+ obj, store, owner,
+ ptail_placement_rule, part_num, part_num_str);
+}
+
+int MotrMultipartWriter::prepare(optional_yield y)
+{
+ string part_obj_name = head_obj->get_bucket()->get_name() + "." +
+ head_obj->get_key().get_oid() +
+ ".part." + std::to_string(part_num);
+ ldpp_dout(dpp, 20) << "bucket=" << head_obj->get_bucket()->get_name() << "part_obj_name=" << part_obj_name << dendl;
+ part_obj = std::make_unique<MotrObject>(this->store, rgw_obj_key(part_obj_name), head_obj->get_bucket());
+ if (part_obj == nullptr)
+ return -ENOMEM;
+
+ // s3 client may retry uploading part, so the part may have already
+ // been created.
+ int rc = part_obj->create_mobj(dpp, store->cctx->_conf->rgw_max_chunk_size);
+ if (rc == -EEXIST) {
+ rc = part_obj->open_mobj(dpp);
+ if (rc < 0)
+ return rc;
+ }
+ return rc;
+}
+
+int MotrMultipartWriter::process(bufferlist&& data, uint64_t offset)
+{
+ int rc = part_obj->write_mobj(dpp, std::move(data), offset);
+ if (rc == 0) {
+ actual_part_size += data.length();
+ ldpp_dout(dpp, 20) << " write_mobj(): actual_part_size=" << actual_part_size << dendl;
+ }
+ return rc;
+}
+
+int MotrMultipartWriter::complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled,
+ optional_yield y)
+{
+ // Should the dir entry(object metadata) be updated? For example
+ // mtime.
+
+ ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): enter" << dendl;
+ // Add an entry into object_nnn_part_index.
+ bufferlist bl;
+ RGWUploadPartInfo info;
+ info.num = part_num;
+ info.etag = etag;
+ info.size = actual_part_size;
+ info.accounted_size = accounted_size;
+ info.modified = real_clock::now();
+
+ bool compressed;
+ int rc = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info);
+ ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): compression rc=" << rc << dendl;
+ if (rc < 0) {
+ ldpp_dout(dpp, 1) << "cannot get compression info" << dendl;
+ return rc;
+ }
+ encode(info, bl);
+ encode(attrs, bl);
+ part_obj->meta.encode(bl);
+
+ string p = "part.";
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%08d", (int)part_num);
+ p.append(buf);
+ string obj_part_iname = "motr.rgw.object." + head_obj->get_bucket()->get_name() + "." +
+ head_obj->get_key().get_oid() + ".parts";
+ ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): object part index = " << obj_part_iname << dendl;
+ rc = store->do_idx_op_by_name(obj_part_iname, M0_IC_PUT, p, bl);
+ if (rc < 0) {
+ return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc;
+ }
+
+ return 0;
+}
+
+std::unique_ptr<RGWRole> MotrStore::get_role(std::string name,
+ std::string tenant,
+ std::string path,
+ std::string trust_policy,
+ std::string max_session_duration_str,
+ std::multimap<std::string,std::string> tags)
+{
+ RGWRole* p = nullptr;
+ return std::unique_ptr<RGWRole>(p);
+}
+
+std::unique_ptr<RGWRole> MotrStore::get_role(const RGWRoleInfo& info)
+{
+ RGWRole* p = nullptr;
+ return std::unique_ptr<RGWRole>(p);
+}
+
+std::unique_ptr<RGWRole> MotrStore::get_role(std::string id)
+{
+ RGWRole* p = nullptr;
+ return std::unique_ptr<RGWRole>(p);
+}
+
+int MotrStore::get_roles(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ const std::string& path_prefix,
+ const std::string& tenant,
+ vector<std::unique_ptr<RGWRole>>& roles)
+{
+ return 0;
+}
+
+std::unique_ptr<RGWOIDCProvider> MotrStore::get_oidc_provider()
+{
+ RGWOIDCProvider* p = nullptr;
+ return std::unique_ptr<RGWOIDCProvider>(p);
+}
+
+int MotrStore::get_oidc_providers(const DoutPrefixProvider *dpp,
+ const std::string& tenant,
+ vector<std::unique_ptr<RGWOIDCProvider>>& providers)
+{
+ return 0;
+}
+
+std::unique_ptr<MultipartUpload> MotrBucket::get_multipart_upload(const std::string& oid,
+ std::optional<std::string> upload_id,
+ ACLOwner owner, ceph::real_time mtime)
+{
+ return std::make_unique<MotrMultipartUpload>(store, this, oid, upload_id, owner, mtime);
+}
+
+std::unique_ptr<Writer> MotrStore::get_append_writer(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ const std::string& unique_tag,
+ uint64_t position,
+ uint64_t *cur_accounted_size) {
+ return nullptr;
+}
+
+std::unique_ptr<Writer> MotrStore::get_atomic_writer(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ uint64_t olh_epoch,
+ const std::string& unique_tag) {
+ return std::make_unique<MotrAtomicWriter>(dpp, y,
+ obj, this, owner,
+ ptail_placement_rule, olh_epoch, unique_tag);
+}
+
+const std::string& MotrStore::get_compression_type(const rgw_placement_rule& rule)
+{
+ return zone.zone_params->get_compression_type(rule);
+}
+
+bool MotrStore::valid_placement(const rgw_placement_rule& rule)
+{
+ return zone.zone_params->valid_placement(rule);
+}
+
+std::unique_ptr<User> MotrStore::get_user(const rgw_user &u)
+{
+ ldout(cctx, 20) << "bucket's user: " << u.to_str() << dendl;
+ return std::make_unique<MotrUser>(this, u);
+}
+
+int MotrStore::get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string &key, optional_yield y, std::unique_ptr<User> *user)
+{
+ int rc;
+ User *u;
+ bufferlist bl;
+ RGWUserInfo uinfo;
+ MotrAccessKey access_key;
+
+ rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY,
+ M0_IC_GET, key, bl);
+ if (rc < 0){
+ ldout(cctx, 0) << "Access key not found: rc = " << rc << dendl;
+ return rc;
+ }
+
+ bufferlist& blr = bl;
+ auto iter = blr.cbegin();
+ access_key.decode(iter);
+
+ uinfo.user_id.from_str(access_key.user_id);
+ ldout(cctx, 0) << "Loading user: " << uinfo.user_id.id << dendl;
+ rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr);
+ if (rc < 0){
+ ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl;
+ return rc;
+ }
+ u = new MotrUser(this, uinfo);
+ if (!u)
+ return -ENOMEM;
+
+ user->reset(u);
+ return 0;
+}
+
+int MotrStore::get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr<User>* user)
+{
+ int rc;
+ User *u;
+ bufferlist bl;
+ RGWUserInfo uinfo;
+ MotrEmailInfo email_info;
+ rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
+ M0_IC_GET, email, bl);
+ if (rc < 0){
+ ldout(cctx, 0) << "Email Id not found: rc = " << rc << dendl;
+ return rc;
+ }
+ auto iter = bl.cbegin();
+ email_info.decode(iter);
+ ldout(cctx, 0) << "Loading user: " << email_info.user_id << dendl;
+ uinfo.user_id.from_str(email_info.user_id);
+ rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr);
+ if (rc < 0){
+ ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl;
+ return rc;
+ }
+ u = new MotrUser(this, uinfo);
+ if (!u)
+ return -ENOMEM;
+
+ user->reset(u);
+ return 0;
+}
+
+int MotrStore::get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr<User>* user)
+{
+ /* Swift keys and subusers are not supported for now */
+ return 0;
+}
+
+int MotrStore::store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key)
+{
+ int rc;
+ bufferlist bl;
+ access_key.encode(bl);
+ rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY,
+ M0_IC_PUT, access_key.id, bl);
+ if (rc < 0){
+ ldout(cctx, 0) << "Failed to store key: rc = " << rc << dendl;
+ return rc;
+ }
+ return rc;
+}
+
+int MotrStore::delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key)
+{
+ int rc;
+ bufferlist bl;
+ rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY,
+ M0_IC_DEL, access_key, bl);
+ if (rc < 0){
+ ldout(cctx, 0) << "Failed to delete key: rc = " << rc << dendl;
+ }
+ return rc;
+}
+
+int MotrStore::store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info )
+{
+ int rc;
+ bufferlist bl;
+ email_info.encode(bl);
+ rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
+ M0_IC_PUT, email_info.email_id, bl);
+ if (rc < 0) {
+ ldout(cctx, 0) << "Failed to store the user by email as key: rc = " << rc << dendl;
+ }
+ return rc;
+}
+
+std::unique_ptr<Object> MotrStore::get_object(const rgw_obj_key& k)
+{
+ return std::make_unique<MotrObject>(this, k);
+}
+
+
+int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr<Bucket>* bucket, optional_yield y)
+{
+ int ret;
+ Bucket* bp;
+
+ bp = new MotrBucket(this, b, u);
+ ret = bp->load_bucket(dpp, y);
+ if (ret < 0) {
+ delete bp;
+ return ret;
+ }
+
+ bucket->reset(bp);
+ return 0;
+}
+
+int MotrStore::get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr<Bucket>* bucket)
+{
+ Bucket* bp;
+
+ bp = new MotrBucket(this, i, u);
+ /* Don't need to fetch the bucket info, use the provided one */
+
+ bucket->reset(bp);
+ return 0;
+}
+
+int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string& name, std::unique_ptr<Bucket>* bucket, optional_yield y)
+{
+ rgw_bucket b;
+
+ b.tenant = tenant;
+ b.name = name;
+
+ return get_bucket(dpp, u, b, bucket, y);
+}
+
+bool MotrStore::is_meta_master()
+{
+ return true;
+}
+
+int MotrStore::forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version *objv,
+ bufferlist& in_data,
+ JSONParser *jp, req_info& info,
+ optional_yield y)
+{
+ return 0;
+}
+
+int MotrStore::forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv,
+ bufferlist& in_data,
+ RGWXMLDecoder::XMLParser* parser, req_info& info,
+ optional_yield y)
+{
+ return 0;
+}
+
+std::string MotrStore::zone_unique_id(uint64_t unique_num)
+{
+ return "";
+}
+
+std::string MotrStore::zone_unique_trans_id(const uint64_t unique_num)
+{
+ return "";
+}
+
+int MotrStore::get_zonegroup(const std::string& id, std::unique_ptr<ZoneGroup>* group)
+{
+ /* XXX: for now only one zonegroup supported */
+ ZoneGroup* zg;
+ zg = new MotrZoneGroup(this, zone.zonegroup.get_group());
+
+ group->reset(zg);
+ return 0;
+}
+
+int MotrStore::list_all_zones(const DoutPrefixProvider* dpp,
+ std::list<std::string>& zone_ids)
+{
+ zone_ids.push_back(zone.get_id());
+ return 0;
+}
+
+int MotrStore::cluster_stat(RGWClusterStat& stats)
+{
+ return 0;
+}
+
+std::unique_ptr<Lifecycle> MotrStore::get_lifecycle(void)
+{
+ return 0;
+}
+
+std::unique_ptr<Notification> MotrStore::get_notification(Object* obj, Object* src_obj, req_state* s,
+ rgw::notify::EventType event_type, optional_yield y, const string* object_name)
+{
+ return std::make_unique<MotrNotification>(obj, src_obj, event_type);
+}
+
+std::unique_ptr<Notification> MotrStore::get_notification(const DoutPrefixProvider* dpp, Object* obj,
+ Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket,
+ std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y)
+{
+ return std::make_unique<MotrNotification>(obj, src_obj, event_type);
+}
+
+int MotrStore::log_usage(const DoutPrefixProvider *dpp, map<rgw_user_bucket, RGWUsageBatch>& usage_info)
+{
+ return 0;
+}
+
+int MotrStore::log_op(const DoutPrefixProvider *dpp, string& oid, bufferlist& bl)
+{
+ return 0;
+}
+
+int MotrStore::register_to_service_map(const DoutPrefixProvider *dpp, const string& daemon_type,
+ const map<string, string>& meta)
+{
+ return 0;
+}
+
+void MotrStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit,
+ RGWRateLimitInfo& user_ratelimit,
+ RGWRateLimitInfo& anon_ratelimit)
+{
+ return;
+}
+
+void MotrStore::get_quota(RGWQuota& quota)
+{
+ // XXX: Not handled for the first pass
+ return;
+}
+
+int MotrStore::set_buckets_enabled(const DoutPrefixProvider *dpp, vector<rgw_bucket>& buckets, bool enabled)
+{
+ return 0;
+}
+
+int MotrStore::get_sync_policy_handler(const DoutPrefixProvider *dpp,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> bucket,
+ RGWBucketSyncPolicyHandlerRef *phandler,
+ optional_yield y)
+{
+ return 0;
+}
+
+RGWDataSyncStatusManager* MotrStore::get_data_sync_manager(const rgw_zone_id& source_zone)
+{
+ return 0;
+}
+
+int MotrStore::read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool *is_truncated,
+ RGWUsageIter& usage_iter,
+ map<rgw_user_bucket, rgw_usage_log_entry>& usage)
+{
+ return 0;
+}
+
+int MotrStore::trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch)
+{
+ return 0;
+}
+
+int MotrStore::get_config_key_val(string name, bufferlist *bl)
+{
+ return 0;
+}
+
+int MotrStore::meta_list_keys_init(const DoutPrefixProvider *dpp, const string& section, const string& marker, void** phandle)
+{
+ return 0;
+}
+
+int MotrStore::meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, list<string>& keys, bool* truncated)
+{
+ return 0;
+}
+
+void MotrStore::meta_list_keys_complete(void* handle)
+{
+ return;
+}
+
+std::string MotrStore::meta_get_marker(void* handle)
+{
+ return "";
+}
+
+int MotrStore::meta_remove(const DoutPrefixProvider *dpp, string& metadata_key, optional_yield y)
+{
+ return 0;
+}
+
+int MotrStore::open_idx(struct m0_uint128 *id, bool create, struct m0_idx *idx)
+{
+ m0_idx_init(idx, &container.co_realm, id);
+
+ if (!create)
+ return 0; // nothing to do more
+
+ // create index or make sure it's created
+ struct m0_op *op = nullptr;
+ int rc = m0_entity_create(nullptr, &idx->in_entity, &op);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl;
+ goto out;
+ }
+
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc != 0 && rc != -EEXIST)
+ ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl;
+out:
+ return rc;
+}
+
+static void set_m0bufvec(struct m0_bufvec *bv, vector<uint8_t>& vec)
+{
+ *bv->ov_buf = reinterpret_cast<char*>(vec.data());
+ *bv->ov_vec.v_count = vec.size();
+}
+
+// idx must be opened with open_idx() beforehand
+int MotrStore::do_idx_op(struct m0_idx *idx, enum m0_idx_opcode opcode,
+ vector<uint8_t>& key, vector<uint8_t>& val, bool update)
+{
+ int rc, rc_i;
+ struct m0_bufvec k, v, *vp = &v;
+ uint32_t flags = 0;
+ struct m0_op *op = nullptr;
+
+ if (m0_bufvec_empty_alloc(&k, 1) != 0) {
+ ldout(cctx, 0) << "ERROR: failed to allocate key bufvec" << dendl;
+ return -ENOMEM;
+ }
+
+ if (opcode == M0_IC_PUT || opcode == M0_IC_GET) {
+ rc = -ENOMEM;
+ if (m0_bufvec_empty_alloc(&v, 1) != 0) {
+ ldout(cctx, 0) << "ERROR: failed to allocate value bufvec" << dendl;
+ goto out;
+ }
+ }
+
+ set_m0bufvec(&k, key);
+ if (opcode == M0_IC_PUT)
+ set_m0bufvec(&v, val);
+
+ if (opcode == M0_IC_DEL)
+ vp = nullptr;
+
+ if (opcode == M0_IC_PUT && update)
+ flags |= M0_OIF_OVERWRITE;
+
+ rc = m0_idx_op(idx, opcode, &k, vp, &rc_i, flags, &op);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl;
+ goto out;
+ }
+
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl;
+ goto out;
+ }
+
+ if (rc_i != 0) {
+ ldout(cctx, 0) << "ERROR: idx op failed: " << rc_i << dendl;
+ rc = rc_i;
+ goto out;
+ }
+
+ if (opcode == M0_IC_GET) {
+ val.resize(*v.ov_vec.v_count);
+ memcpy(reinterpret_cast<char*>(val.data()), *v.ov_buf, *v.ov_vec.v_count);
+ }
+
+out:
+ m0_bufvec_free2(&k);
+ if (opcode == M0_IC_GET)
+ m0_bufvec_free(&v); // cleanup buffer after GET
+ else if (opcode == M0_IC_PUT)
+ m0_bufvec_free2(&v);
+
+ return rc;
+}
+
+// Retrieve a range of key/value pairs starting from keys[0].
+int MotrStore::do_idx_next_op(struct m0_idx *idx,
+ vector<vector<uint8_t>>& keys,
+ vector<vector<uint8_t>>& vals)
+{
+ int rc;
+ uint32_t i = 0;
+ int nr_kvp = vals.size();
+ int *rcs = new int[nr_kvp];
+ struct m0_bufvec k, v;
+ struct m0_op *op = nullptr;
+
+ rc = m0_bufvec_empty_alloc(&k, nr_kvp)?:
+ m0_bufvec_empty_alloc(&v, nr_kvp);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: failed to allocate kv bufvecs" << dendl;
+ return rc;
+ }
+
+ set_m0bufvec(&k, keys[0]);
+
+ rc = m0_idx_op(idx, M0_IC_NEXT, &k, &v, rcs, 0, &op);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl;
+ goto out;
+ }
+
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl;
+ goto out;
+ }
+
+ for (i = 0; i < v.ov_vec.v_nr; ++i) {
+ if (rcs[i] < 0)
+ break;
+
+ vector<uint8_t>& key = keys[i];
+ vector<uint8_t>& val = vals[i];
+ key.resize(k.ov_vec.v_count[i]);
+ val.resize(v.ov_vec.v_count[i]);
+ memcpy(reinterpret_cast<char*>(key.data()), k.ov_buf[i], k.ov_vec.v_count[i]);
+ memcpy(reinterpret_cast<char*>(val.data()), v.ov_buf[i], v.ov_vec.v_count[i]);
+ }
+
+out:
+ k.ov_vec.v_nr = i;
+ v.ov_vec.v_nr = i;
+ m0_bufvec_free(&k);
+ m0_bufvec_free(&v); // cleanup buffer after GET
+
+ delete []rcs;
+ return rc ?: i;
+}
+
+// Retrieve a number of key/value pairs under the prefix starting
+// from the marker at key_out[0].
+int MotrStore::next_query_by_name(string idx_name,
+ vector<string>& key_out,
+ vector<bufferlist>& val_out,
+ string prefix, string delim)
+{
+ unsigned nr_kvp = std::min(val_out.size(), 100UL);
+ struct m0_idx idx = {};
+ vector<vector<uint8_t>> keys(nr_kvp);
+ vector<vector<uint8_t>> vals(nr_kvp);
+ struct m0_uint128 idx_id;
+ int i = 0, j, k = 0;
+
+ index_name_to_motr_fid(idx_name, &idx_id);
+ int rc = open_motr_idx(&idx_id, &idx);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: next_query_by_name(): failed to open index: rc="
+ << rc << dendl;
+ goto out;
+ }
+
+ // Only the first element for keys needs to be set for NEXT query.
+ // The keys will be set will the returned keys from motr index.
+ ldout(cctx, 20) <<__func__<< ": next_query_by_name(): index=" << idx_name
+ << " prefix=" << prefix << " delim=" << delim << dendl;
+ keys[0].assign(key_out[0].begin(), key_out[0].end());
+ for (i = 0; i < (int)val_out.size(); i += k, k = 0) {
+ rc = do_idx_next_op(&idx, keys, vals);
+ ldout(cctx, 20) << "do_idx_next_op() = " << rc << dendl;
+ if (rc < 0) {
+ ldout(cctx, 0) << "ERROR: NEXT query failed. " << rc << dendl;
+ goto out;
+ }
+
+ string dir;
+ for (j = 0, k = 0; j < rc; ++j) {
+ string key(keys[j].begin(), keys[j].end());
+ size_t pos = std::string::npos;
+ if (!delim.empty())
+ pos = key.find(delim, prefix.length());
+ if (pos != std::string::npos) { // DIR entry
+ dir.assign(key, 0, pos + 1);
+ if (dir.compare(0, prefix.length(), prefix) != 0)
+ goto out;
+ if (i + k == 0 || dir != key_out[i + k - 1]) // a new one
+ key_out[i + k++] = dir;
+ continue;
+ }
+ dir = "";
+ if (key.compare(0, prefix.length(), prefix) != 0)
+ goto out;
+ key_out[i + k] = key;
+ bufferlist& vbl = val_out[i + k];
+ vbl.append(reinterpret_cast<char*>(vals[j].data()), vals[j].size());
+ ++k;
+ }
+
+ if (rc < (int)nr_kvp) // there are no more keys to fetch
+ break;
+
+ string next_key;
+ if (dir != "")
+ next_key = dir + "\xff"; // skip all dir content in 1 step
+ else
+ next_key = key_out[i + k - 1] + " ";
+ ldout(cctx, 0) << "do_idx_next_op(): next_key=" << next_key << dendl;
+ keys[0].assign(next_key.begin(), next_key.end());
+ }
+
+out:
+ m0_idx_fini(&idx);
+ return rc < 0 ? rc : i + k;
+}
+
+int MotrStore::delete_motr_idx_by_name(string iname)
+{
+ struct m0_idx idx;
+ struct m0_uint128 idx_id;
+ struct m0_op *op = nullptr;
+
+ ldout(cctx, 20) << "delete_motr_idx_by_name=" << iname << dendl;
+
+ index_name_to_motr_fid(iname, &idx_id);
+ m0_idx_init(&idx, &container.co_realm, &idx_id);
+ m0_entity_open(&idx.in_entity, &op);
+ int rc = m0_entity_delete(&idx.in_entity, &op);
+ if (rc < 0)
+ goto out;
+
+ m0_op_launch(&op, 1);
+
+ ldout(cctx, 70) << "waiting for op completion" << dendl;
+
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc == -ENOENT) // race deletion??
+ rc = 0;
+ else if (rc < 0)
+ ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl;
+
+ ldout(cctx, 20) << "delete_motr_idx_by_name rc=" << rc << dendl;
+
+out:
+ m0_idx_fini(&idx);
+ return rc;
+}
+
+int MotrStore::open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx)
+{
+ m0_idx_init(idx, &container.co_realm, id);
+ return 0;
+}
+
+// The following marcos are from dix/fid_convert.h which are not exposed.
+enum {
+ M0_DIX_FID_DEVICE_ID_OFFSET = 32,
+ M0_DIX_FID_DIX_CONTAINER_MASK = (1ULL << M0_DIX_FID_DEVICE_ID_OFFSET)
+ - 1,
+};
+
+// md5 is used here, a more robust way to convert index name to fid is
+// needed to avoid collision.
+void MotrStore::index_name_to_motr_fid(string iname, struct m0_uint128 *id)
+{
+ unsigned char md5[16]; // 128/8 = 16
+ MD5 hash;
+
+ // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes
+ hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
+ hash.Update((const unsigned char *)iname.c_str(), iname.length());
+ hash.Final(md5);
+
+ memcpy(&id->u_hi, md5, 8);
+ memcpy(&id->u_lo, md5 + 8, 8);
+ ldout(cctx, 20) << "id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl;
+
+ struct m0_fid *fid = (struct m0_fid*)id;
+ m0_fid_tset(fid, m0_dix_fid_type.ft_id,
+ fid->f_container & M0_DIX_FID_DIX_CONTAINER_MASK, fid->f_key);
+ ldout(cctx, 20) << "converted id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl;
+}
+
+int MotrStore::do_idx_op_by_name(string idx_name, enum m0_idx_opcode opcode,
+ string key_str, bufferlist &bl, bool update)
+{
+ struct m0_idx idx;
+ vector<uint8_t> key(key_str.begin(), key_str.end());
+ vector<uint8_t> val;
+ struct m0_uint128 idx_id;
+
+ index_name_to_motr_fid(idx_name, &idx_id);
+ int rc = open_motr_idx(&idx_id, &idx);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: failed to open index: " << rc << dendl;
+ goto out;
+ }
+
+ if (opcode == M0_IC_PUT)
+ val.assign(bl.c_str(), bl.c_str() + bl.length());
+
+ ldout(cctx, 20) <<__func__<< ": do_idx_op_by_name(): op="
+ << (opcode == M0_IC_PUT ? "PUT" : "GET")
+ << " idx=" << idx_name << " key=" << key_str << dendl;
+ rc = do_idx_op(&idx, opcode, key, val, update);
+ if (rc == 0 && opcode == M0_IC_GET)
+ // Append the returned value (blob) to the bufferlist.
+ bl.append(reinterpret_cast<char*>(val.data()), val.size());
+
+out:
+ m0_idx_fini(&idx);
+ return rc;
+}
+
+int MotrStore::create_motr_idx_by_name(string iname)
+{
+ struct m0_idx idx = {};
+ struct m0_uint128 id;
+
+ index_name_to_motr_fid(iname, &id);
+ m0_idx_init(&idx, &container.co_realm, &id);
+
+ // create index or make sure it's created
+ struct m0_op *op = nullptr;
+ int rc = m0_entity_create(nullptr, &idx.in_entity, &op);
+ if (rc != 0) {
+ ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl;
+ goto out;
+ }
+
+ m0_op_launch(&op, 1);
+ rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
+ m0_rc(op);
+ m0_op_fini(op);
+ m0_op_free(op);
+
+ if (rc != 0 && rc != -EEXIST)
+ ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl;
+out:
+ m0_idx_fini(&idx);
+ return rc;
+}
+
+// If a global index is checked (if it has been create) every time
+// before they're queried (put/get), which takes 2 Motr operations to
+// complete the query. As the global indices' name and FID are known
+// already when MotrStore is created, we move the check and creation
+// in newMotrStore().
+// Similar method is used for per bucket/user index. For example,
+// bucket instance index is created when creating the bucket.
+int MotrStore::check_n_create_global_indices()
+{
+ int rc = 0;
+
+ for (const auto& iname : motr_global_indices) {
+ rc = create_motr_idx_by_name(iname);
+ if (rc < 0 && rc != -EEXIST)
+ break;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+std::string MotrStore::get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y)
+{
+ char id[M0_FID_STR_LEN];
+ struct m0_confc *confc = m0_reqh2confc(&instance->m0c_reqh);
+
+ m0_fid_print(id, ARRAY_SIZE(id), &confc->cc_root->co_id);
+ return std::string(id);
+}
+
+int MotrStore::init_metadata_cache(const DoutPrefixProvider *dpp,
+ CephContext *cct)
+{
+ this->obj_meta_cache = new MotrMetaCache(dpp, cct);
+ this->get_obj_meta_cache()->set_enabled(true);
+
+ this->user_cache = new MotrMetaCache(dpp, cct);
+ this->get_user_cache()->set_enabled(true);
+
+ this->bucket_inst_cache = new MotrMetaCache(dpp, cct);
+ this->get_bucket_inst_cache()->set_enabled(true);
+
+ return 0;
+}
+
+ int MotrLuaManager::get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script)
+ {
+ return -ENOENT;
+ }
+
+ int MotrLuaManager::put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script)
+ {
+ return -ENOENT;
+ }
+
+ int MotrLuaManager::del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key)
+ {
+ return -ENOENT;
+ }
+
+ int MotrLuaManager::add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name)
+ {
+ return -ENOENT;
+ }
+
+ int MotrLuaManager::remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name)
+ {
+ return -ENOENT;
+ }
+
+ int MotrLuaManager::list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages)
+ {
+ return -ENOENT;
+ }
+} // namespace rgw::sal
+
+extern "C" {
+
+void *newMotrStore(CephContext *cct)
+{
+ int rc = -1;
+ rgw::sal::MotrStore *store = new rgw::sal::MotrStore(cct);
+
+ if (store) {
+ store->conf.mc_is_oostore = true;
+ // XXX: these params should be taken from config settings and
+ // cct somehow?
+ store->instance = nullptr;
+ const auto& proc_ep = g_conf().get_val<std::string>("motr_my_endpoint");
+ const auto& ha_ep = g_conf().get_val<std::string>("motr_ha_endpoint");
+ const auto& proc_fid = g_conf().get_val<std::string>("motr_my_fid");
+ const auto& profile = g_conf().get_val<std::string>("motr_profile_fid");
+ const auto& admin_proc_ep = g_conf().get_val<std::string>("motr_admin_endpoint");
+ const auto& admin_proc_fid = g_conf().get_val<std::string>("motr_admin_fid");
+ const int init_flags = cct->get_init_flags();
+ ldout(cct, 0) << "INFO: motr my endpoint: " << proc_ep << dendl;
+ ldout(cct, 0) << "INFO: motr ha endpoint: " << ha_ep << dendl;
+ ldout(cct, 0) << "INFO: motr my fid: " << proc_fid << dendl;
+ ldout(cct, 0) << "INFO: motr profile fid: " << profile << dendl;
+ store->conf.mc_local_addr = proc_ep.c_str();
+ store->conf.mc_process_fid = proc_fid.c_str();
+
+ ldout(cct, 0) << "INFO: init flags: " << init_flags << dendl;
+ ldout(cct, 0) << "INFO: motr admin endpoint: " << admin_proc_ep << dendl;
+ ldout(cct, 0) << "INFO: motr admin fid: " << admin_proc_fid << dendl;
+
+ // HACK this is so that radosge-admin uses a different client
+ if (init_flags == 0) {
+ store->conf.mc_process_fid = admin_proc_fid.c_str();
+ store->conf.mc_local_addr = admin_proc_ep.c_str();
+ } else {
+ store->conf.mc_process_fid = proc_fid.c_str();
+ store->conf.mc_local_addr = proc_ep.c_str();
+ }
+ store->conf.mc_ha_addr = ha_ep.c_str();
+ store->conf.mc_profile = profile.c_str();
+
+ ldout(cct, 50) << "INFO: motr profile fid: " << store->conf.mc_profile << dendl;
+ ldout(cct, 50) << "INFO: ha addr: " << store->conf.mc_ha_addr << dendl;
+ ldout(cct, 50) << "INFO: process fid: " << store->conf.mc_process_fid << dendl;
+ ldout(cct, 50) << "INFO: motr endpoint: " << store->conf.mc_local_addr << dendl;
+
+ store->conf.mc_tm_recv_queue_min_len = 64;
+ store->conf.mc_max_rpc_msg_size = 524288;
+ store->conf.mc_idx_service_id = M0_IDX_DIX;
+ store->dix_conf.kc_create_meta = false;
+ store->conf.mc_idx_service_conf = &store->dix_conf;
+
+ if (!g_conf().get_val<bool>("motr_tracing_enabled")) {
+ m0_trace_level_allow(M0_WARN); // allow errors and warnings in syslog anyway
+ m0_trace_set_mmapped_buffer(false);
+ }
+
+ store->instance = nullptr;
+ rc = m0_client_init(&store->instance, &store->conf, true);
+ if (rc != 0) {
+ ldout(cct, 0) << "ERROR: m0_client_init() failed: " << rc << dendl;
+ goto out;
+ }
+
+ m0_container_init(&store->container, nullptr, &M0_UBER_REALM, store->instance);
+ rc = store->container.co_realm.re_entity.en_sm.sm_rc;
+ if (rc != 0) {
+ ldout(cct, 0) << "ERROR: m0_container_init() failed: " << rc << dendl;
+ goto out;
+ }
+
+ rc = m0_ufid_init(store->instance, &ufid_gr);
+ if (rc != 0) {
+ ldout(cct, 0) << "ERROR: m0_ufid_init() failed: " << rc << dendl;
+ goto out;
+ }
+
+ // Create global indices if not yet.
+ rc = store->check_n_create_global_indices();
+ if (rc != 0) {
+ ldout(cct, 0) << "ERROR: check_n_create_global_indices() failed: " << rc << dendl;
+ goto out;
+ }
+
+ }
+
+out:
+ if (rc != 0) {
+ delete store;
+ return nullptr;
+ }
+ return store;
+}
+
+}
--- /dev/null
+
+// vim: ts=2 sw=2 expandtab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * SAL implementation for the CORTX Motr backend
+ *
+ * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+extern "C" {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wextern-c-compat"
+#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
+#include "motr/config.h"
+#include "motr/client.h"
+#pragma clang diagnostic pop
+}
+
+#include "rgw_sal_store.h"
+#include "rgw_rados.h"
+#include "rgw_notify.h"
+#include "rgw_oidc_provider.h"
+#include "rgw_role.h"
+#include "rgw_multi.h"
+#include "rgw_putobj_processor.h"
+
+namespace rgw::sal {
+
+class MotrStore;
+
+// Global Motr indices
+#define RGW_MOTR_USERS_IDX_NAME "motr.rgw.users"
+#define RGW_MOTR_BUCKET_INST_IDX_NAME "motr.rgw.bucket.instances"
+#define RGW_MOTR_BUCKET_HD_IDX_NAME "motr.rgw.bucket.headers"
+#define RGW_IAM_MOTR_ACCESS_KEY "motr.rgw.accesskeys"
+#define RGW_IAM_MOTR_EMAIL_KEY "motr.rgw.emails"
+
+//#define RGW_MOTR_BUCKET_ACL_IDX_NAME "motr.rgw.bucket.acls"
+
+// A simplified metadata cache implementation.
+// Note: MotrObjMetaCache doesn't handle the IO operations to Motr. A proxy
+// class can be added to handle cache and 'real' ops.
+class MotrMetaCache
+{
+protected:
+ // MGW re-uses ObjectCache to cache object's metadata as it has already
+ // implemented a lru cache: (1) ObjectCache internally uses a map and lru
+ // list to manage cache entry. POC uses object name, user name or bucket
+ // name as the key to lookup and insert an entry. (2) ObjectCache::data is
+ // a bufferlist and can be used to store any metadata structure, such as
+ // object's bucket dir entry, user info or bucket instance.
+ //
+ // Note from RGW:
+ // The Rados Gateway stores metadata and objects in an internal cache. This
+ // should be kept consistent by the OSD's relaying notify events between
+ // multiple watching RGW processes. In the event that this notification
+ // protocol fails, bounding the length of time that any data in the cache will
+ // be assumed valid will ensure that any RGW instance that falls out of sync
+ // will eventually recover. This seems to be an issue mostly for large numbers
+ // of RGW instances under heavy use. If you would like to turn off cache expiry,
+ // set this value to zero.
+ //
+ // Currently POC hasn't implemented the watch-notify menchanism yet. So the
+ // current implementation is similar to cortx-s3server which is based on expiry
+ // time. TODO: see comments on distribute_cache).
+ //
+ // Beaware: Motr object data is not cached in current POC as RGW!
+ // RGW caches the first chunk (4MB by default).
+ ObjectCache cache;
+
+public:
+ // Lookup a cache entry.
+ int get(const DoutPrefixProvider *dpp, const std::string& name, bufferlist& data);
+
+ // Insert a cache entry.
+ int put(const DoutPrefixProvider *dpp, const std::string& name, const bufferlist& data);
+
+ // Called when an object is deleted. Notification should be sent to other
+ // RGW instances.
+ int remove(const DoutPrefixProvider *dpp, const std::string& name);
+
+ // Make the local cache entry invalid.
+ void invalid(const DoutPrefixProvider *dpp, const std::string& name);
+
+ // TODO: Distribute_cache() and watch_cb() now are only place holder functions.
+ // Checkout services/svc_sys_obj_cache.h/cc for reference.
+ // These 2 functions are designed to notify or to act on cache notification.
+ // It is feasible to implement the functionality using Motr's FDMI after discussing
+ // with Hua.
+ int distribute_cache(const DoutPrefixProvider *dpp,
+ const std::string& normal_name,
+ ObjectCacheInfo& obj_info, int op);
+ int watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl);
+
+ void set_enabled(bool status);
+
+ MotrMetaCache(const DoutPrefixProvider *dpp, CephContext *cct) {
+ cache.set_ctx(cct);
+ }
+};
+
+struct MotrUserInfo {
+ RGWUserInfo info;
+ obj_version user_version;
+ rgw::sal::Attrs attrs;
+
+ void encode(bufferlist& bl) const
+ {
+ ENCODE_START(3, 3, bl);
+ encode(info, bl);
+ encode(user_version, bl);
+ encode(attrs, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl)
+ {
+ DECODE_START(3, bl);
+ decode(info, bl);
+ decode(user_version, bl);
+ decode(attrs, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(MotrUserInfo);
+
+struct MotrEmailInfo {
+ std::string user_id;
+ std::string email_id;
+
+ MotrEmailInfo() {}
+ MotrEmailInfo(std::string _user_id, std::string _email_id )
+ : user_id(std::move(_user_id)), email_id(std::move(_email_id)) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(user_id, bl);
+ encode(email_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
+ decode(user_id, bl);
+ decode(email_id, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(MotrEmailInfo);
+
+struct MotrAccessKey {
+ std::string id; // AccessKey
+ std::string key; // SecretKey
+ std::string user_id; // UserID
+
+ MotrAccessKey() {}
+ MotrAccessKey(std::string _id, std::string _key, std::string _user_id)
+ : id(std::move(_id)), key(std::move(_key)), user_id(std::move(_user_id)) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(id, bl);
+ encode(key, bl);
+ encode(user_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
+ decode(id, bl);
+ decode(key, bl);
+ decode(user_id, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(MotrAccessKey);
+
+class MotrNotification : public StoreNotification {
+ public:
+ MotrNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type) :
+ StoreNotification(_obj, _src_obj, _type) {}
+ ~MotrNotification() = default;
+
+ virtual int publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags = nullptr) override { return 0;}
+ virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size,
+ const ceph::real_time& mtime, const std::string& etag, const std::string& version) override { return 0; }
+};
+
+class MotrUser : public StoreUser {
+ private:
+ MotrStore *store;
+ struct m0_uint128 idxID = {0xe5ecb53640d4ecce, 0x6a156cd5a74aa3b8}; // MD5 of “motr.rgw.users“
+ struct m0_idx idx;
+
+ public:
+ std::set<std::string> access_key_tracker;
+ MotrUser(MotrStore *_st, const rgw_user& _u) : StoreUser(_u), store(_st) { }
+ MotrUser(MotrStore *_st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) { }
+ MotrUser(MotrStore *_st) : store(_st) { }
+ MotrUser(MotrUser& _o) = default;
+ MotrUser() {}
+
+ virtual std::unique_ptr<User> clone() override {
+ return std::unique_ptr<User>(new MotrUser(*this));
+ }
+ int list_buckets(const DoutPrefixProvider *dpp, const std::string& marker, const std::string& end_marker,
+ uint64_t max, bool need_stats, BucketList& buckets, optional_yield y) override;
+ virtual int create_bucket(const DoutPrefixProvider* dpp,
+ const rgw_bucket& b,
+ const std::string& zonegroup_id,
+ rgw_placement_rule& placement_rule,
+ std::string& swift_ver_location,
+ const RGWQuotaInfo* pquota_info,
+ const RGWAccessControlPolicy& policy,
+ Attrs& attrs,
+ RGWBucketInfo& info,
+ obj_version& ep_objv,
+ bool exclusive,
+ bool obj_lock_enabled,
+ bool* existed,
+ req_info& req_info,
+ std::unique_ptr<Bucket>* bucket,
+ optional_yield y) override;
+ virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y) override;
+ virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) override;
+ virtual int read_stats(const DoutPrefixProvider *dpp,
+ optional_yield y, RGWStorageStats* stats,
+ ceph::real_time *last_stats_sync = nullptr,
+ ceph::real_time *last_stats_update = nullptr) override;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override;
+ virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
+ virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
+ bool* is_truncated, RGWUsageIter& usage_iter,
+ std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
+ virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override;
+
+ virtual int load_user(const DoutPrefixProvider* dpp, optional_yield y) override;
+ virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info = nullptr) override;
+ virtual int remove_user(const DoutPrefixProvider* dpp, optional_yield y) override;
+ virtual int verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider* dpp, optional_yield y) override;
+
+ int create_user_info_idx();
+ int load_user_from_idx(const DoutPrefixProvider *dpp, MotrStore *store, RGWUserInfo& info, std::map<std::string,
+ bufferlist> *attrs, RGWObjVersionTracker *objv_tr);
+
+ friend class MotrBucket;
+};
+
+class MotrBucket : public StoreBucket {
+ private:
+ MotrStore *store;
+ RGWAccessControlPolicy acls;
+
+ // RGWBucketInfo and other information that are shown when listing a bucket is
+ // represented in struct MotrBucketInfo. The structure is encoded and stored
+ // as the value of the global bucket instance index.
+ // TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.)
+ // into a different index.
+ struct MotrBucketInfo {
+ RGWBucketInfo info;
+
+ obj_version bucket_version;
+ ceph::real_time mtime;
+
+ rgw::sal::Attrs bucket_attrs;
+
+ void encode(bufferlist& bl) const
+ {
+ ENCODE_START(4, 4, bl);
+ encode(info, bl);
+ encode(bucket_version, bl);
+ encode(mtime, bl);
+ encode(bucket_attrs, bl); //rgw_cache.h example for a map
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl)
+ {
+ DECODE_START(4, bl);
+ decode(info, bl);
+ decode(bucket_version, bl);
+ decode(mtime, bl);
+ decode(bucket_attrs, bl);
+ DECODE_FINISH(bl);
+ }
+ };
+ WRITE_CLASS_ENCODER(MotrBucketInfo);
+
+ public:
+ MotrBucket(MotrStore *_st)
+ : store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, User* _u)
+ : StoreBucket(_u),
+ store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, const rgw_bucket& _b)
+ : StoreBucket(_b),
+ store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, const RGWBucketEnt& _e)
+ : StoreBucket(_e),
+ store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, const RGWBucketInfo& _i)
+ : StoreBucket(_i),
+ store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, const rgw_bucket& _b, User* _u)
+ : StoreBucket(_b, _u),
+ store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, const RGWBucketEnt& _e, User* _u)
+ : StoreBucket(_e, _u),
+ store(_st),
+ acls() {
+ }
+
+ MotrBucket(MotrStore *_st, const RGWBucketInfo& _i, User* _u)
+ : StoreBucket(_i, _u),
+ store(_st),
+ acls() {
+ }
+
+ ~MotrBucket() { }
+
+ virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
+ virtual int list(const DoutPrefixProvider *dpp, ListParams&, int, ListResults&, optional_yield y) override;
+ virtual int remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) override;
+ virtual int remove_bucket_bypass_gc(int concurrent_max, bool
+ keep_index_consistent,
+ optional_yield y, const
+ DoutPrefixProvider *dpp) override;
+ virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
+ virtual int set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy& acl, optional_yield y) override;
+ virtual int load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats = false) override;
+ int link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y);
+ int unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y);
+ int create_bucket_index();
+ int create_multipart_indices();
+ virtual int read_stats(const DoutPrefixProvider *dpp,
+ const bucket_index_layout_generation& idx_layout, int shard_id,
+ std::string *bucket_ver, std::string *master_ver,
+ std::map<RGWObjCategory, RGWStorageStats>& stats,
+ std::string *max_marker = nullptr,
+ bool *syncstopped = nullptr) override;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp,
+ const bucket_index_layout_generation& idx_layout,
+ int shard_id, RGWGetBucketStats_CB* ctx) override;
+ virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
+ virtual int update_container_stats(const DoutPrefixProvider *dpp) override;
+ virtual int check_bucket_shards(const DoutPrefixProvider *dpp) override;
+ virtual int chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y) override;
+ virtual int put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time mtime) override;
+ virtual bool is_owner(User* user) override;
+ virtual int check_empty(const DoutPrefixProvider *dpp, optional_yield y) override;
+ virtual int check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) override;
+ virtual int merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& attrs, optional_yield y) override;
+ virtual int try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime) override;
+ virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
+ bool *is_truncated, RGWUsageIter& usage_iter,
+ std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
+ virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override;
+ virtual int remove_objs_from_index(const DoutPrefixProvider *dpp, std::list<rgw_obj_index_key>& objs_to_unlink) override;
+ virtual int check_index(const DoutPrefixProvider *dpp, std::map<RGWObjCategory, RGWStorageStats>& existing_stats, std::map<RGWObjCategory, RGWStorageStats>& calculated_stats) override;
+ virtual int rebuild_index(const DoutPrefixProvider *dpp) override;
+ virtual int set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) override;
+ virtual int purge_instance(const DoutPrefixProvider *dpp) override;
+ virtual std::unique_ptr<Bucket> clone() override {
+ return std::make_unique<MotrBucket>(*this);
+ }
+ virtual std::unique_ptr<MultipartUpload> get_multipart_upload(const std::string& oid,
+ std::optional<std::string> upload_id=std::nullopt,
+ ACLOwner owner={}, ceph::real_time mtime=real_clock::now()) override;
+ virtual int list_multiparts(const DoutPrefixProvider *dpp,
+ const std::string& prefix,
+ std::string& marker,
+ const std::string& delim,
+ const int& max_uploads,
+ std::vector<std::unique_ptr<MultipartUpload>>& uploads,
+ std::map<std::string, bool> *common_prefixes,
+ bool *is_truncated) override;
+ virtual int abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct) override;
+
+ friend class MotrStore;
+};
+
+class MotrPlacementTier: public StorePlacementTier {
+ MotrStore* store;
+ RGWZoneGroupPlacementTier tier;
+public:
+ MotrPlacementTier(MotrStore* _store, const RGWZoneGroupPlacementTier& _tier) : store(_store), tier(_tier) {}
+ virtual ~MotrPlacementTier() = default;
+
+ virtual const std::string& get_tier_type() { return tier.tier_type; }
+ virtual const std::string& get_storage_class() { return tier.storage_class; }
+ virtual bool retain_head_object() { return tier.retain_head_object; }
+ RGWZoneGroupPlacementTier& get_rt() { return tier; }
+};
+
+class MotrZoneGroup : public StoreZoneGroup {
+protected:
+ MotrStore* store;
+ const RGWZoneGroup group;
+ std::string empty;
+public:
+ MotrZoneGroup(MotrStore* _store) : store(_store), group() {}
+ MotrZoneGroup(MotrStore* _store, const RGWZoneGroup& _group) : store(_store), group(_group) {}
+ virtual ~MotrZoneGroup() = default;
+
+ virtual const std::string& get_id() const override { return group.get_id(); };
+ virtual const std::string& get_name() const override { return group.get_name(); };
+ virtual int equals(const std::string& other_zonegroup) const override {
+ return group.equals(other_zonegroup);
+ };
+ /** Get the endpoint from zonegroup, or from master zone if not set */
+ virtual const std::string& get_endpoint() const override;
+ virtual bool placement_target_exists(std::string& target) const override;
+ virtual bool is_master_zonegroup() const override {
+ return group.is_master_zonegroup();
+ };
+ virtual const std::string& get_api_name() const override { return group.api_name; };
+ virtual int get_placement_target_names(std::set<std::string>& names) const override;
+ virtual const std::string& get_default_placement_name() const override {
+ return group.default_placement.name; };
+ virtual int get_hostnames(std::list<std::string>& names) const override {
+ names = group.hostnames;
+ return 0;
+ };
+ virtual int get_s3website_hostnames(std::list<std::string>& names) const override {
+ names = group.hostnames_s3website;
+ return 0;
+ };
+ virtual int get_zone_count() const override {
+ return group.zones.size();
+ }
+ virtual int get_placement_tier(const rgw_placement_rule& rule, std::unique_ptr<PlacementTier>* tier);
+ virtual int get_zone_by_id(const std::string& id, std::unique_ptr<Zone>* zone) override {
+ return -1;
+ }
+ virtual int get_zone_by_name(const std::string& name, std::unique_ptr<Zone>* zone) override {
+ return -1;
+ }
+ virtual int list_zones(std::list<std::string>& zone_ids) override {
+ zone_ids.clear();
+ return 0;
+ }
+ const RGWZoneGroup& get_group() { return group; }
+ virtual std::unique_ptr<ZoneGroup> clone() override {
+ return std::make_unique<MotrZoneGroup>(store, group);
+ }
+ friend class MotrZone;
+};
+
+class MotrZone : public StoreZone {
+ protected:
+ MotrStore* store;
+ RGWRealm *realm{nullptr};
+ MotrZoneGroup zonegroup;
+ RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */
+ RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */
+ RGWPeriod *current_period{nullptr};
+
+ public:
+ MotrZone(MotrStore* _store) : store(_store), zonegroup(_store) {
+ realm = new RGWRealm();
+ zone_public_config = new RGWZone();
+ zone_params = new RGWZoneParams();
+ current_period = new RGWPeriod();
+
+ // XXX: only default and STANDARD supported for now
+ RGWZonePlacementInfo info;
+ RGWZoneStorageClasses sc;
+ sc.set_storage_class("STANDARD", nullptr, nullptr);
+ info.storage_classes = sc;
+ zone_params->placement_pools["default"] = info;
+ }
+ MotrZone(MotrStore* _store, MotrZoneGroup _zg) : store(_store), zonegroup(_zg) {
+ realm = new RGWRealm();
+ // TODO: fetch zonegroup params (eg. id) from provisioner config.
+ //zonegroup.group.set_id("0956b174-fe14-4f97-8b50-bb7ec5e1cf62");
+ //zonegroup.group.api_name = "default";
+ zone_public_config = new RGWZone();
+ zone_params = new RGWZoneParams();
+ current_period = new RGWPeriod();
+
+ // XXX: only default and STANDARD supported for now
+ RGWZonePlacementInfo info;
+ RGWZoneStorageClasses sc;
+ sc.set_storage_class("STANDARD", nullptr, nullptr);
+ info.storage_classes = sc;
+ zone_params->placement_pools["default"] = info;
+ }
+ ~MotrZone() = default;
+
+ virtual std::unique_ptr<Zone> clone() override {
+ return std::make_unique<MotrZone>(store);
+ }
+ virtual ZoneGroup& get_zonegroup() override;
+ virtual const std::string& get_id() override;
+ virtual const std::string& get_name() const override;
+ virtual bool is_writeable() override;
+ virtual bool get_redirect_endpoint(std::string* endpoint) override;
+ virtual bool has_zonegroup_api(const std::string& api) const override;
+ virtual const std::string& get_current_period_id() override;
+ virtual const RGWAccessKey& get_system_key() { return zone_params->system_key; }
+ virtual const std::string& get_realm_name() { return realm->get_name(); }
+ virtual const std::string& get_realm_id() { return realm->get_id(); }
+ virtual const std::string_view get_tier_type() { return "rgw"; }
+ virtual RGWBucketSyncPolicyHandlerRef get_sync_policy_handler() { return nullptr; }
+ friend class MotrStore;
+};
+
+class MotrLuaManager : public StoreLuaManager {
+ MotrStore* store;
+
+ public:
+ MotrLuaManager(MotrStore* _s) : store(_s)
+ {
+ }
+ virtual ~MotrLuaManager() = default;
+
+ /** Get a script named with the given key from the backing store */
+ virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) override;
+ /** Put a script named with the given key to the backing store */
+ virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) override;
+ /** Delete a script named with the given key from the backing store */
+ virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) override;
+ /** Add a lua package */
+ virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override;
+ /** Remove a lua package */
+ virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override;
+ /** List lua packages */
+ virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) override;
+};
+
+class MotrOIDCProvider : public RGWOIDCProvider {
+ MotrStore* store;
+ public:
+ MotrOIDCProvider(MotrStore* _store) : store(_store) {}
+ ~MotrOIDCProvider() = default;
+
+ virtual int store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) override { return 0; }
+ virtual int read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) override { return 0; }
+ virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override { return 0;}
+
+ void encode(bufferlist& bl) const {
+ RGWOIDCProvider::encode(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ RGWOIDCProvider::decode(bl);
+ }
+};
+
+class MotrObject : public StoreObject {
+ private:
+ MotrStore *store;
+ RGWAccessControlPolicy acls;
+ RGWObjCategory category;
+
+ // If this object is pat of a multipart uploaded one.
+ // TODO: do it in another class? MotrPartObject : public MotrObject
+ uint64_t part_off;
+ uint64_t part_size;
+ uint64_t part_num;
+
+ public:
+
+ // motr object metadata stored in index
+ struct Meta {
+ struct m0_uint128 oid = {};
+ struct m0_fid pver = {};
+ uint64_t layout_id = 0;
+
+ void encode(bufferlist& bl) const
+ {
+ ENCODE_START(5, 5, bl);
+ encode(oid.u_hi, bl);
+ encode(oid.u_lo, bl);
+ encode(pver.f_container, bl);
+ encode(pver.f_key, bl);
+ encode(layout_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl)
+ {
+ DECODE_START(5, bl);
+ decode(oid.u_hi, bl);
+ decode(oid.u_lo, bl);
+ decode(pver.f_container, bl);
+ decode(pver.f_key, bl);
+ decode(layout_id, bl);
+ DECODE_FINISH(bl);
+ }
+ };
+
+ struct m0_obj *mobj = NULL;
+ Meta meta;
+
+ struct MotrReadOp : public ReadOp {
+ private:
+ MotrObject* source;
+
+ // The set of part objects if the source is
+ // a multipart uploaded object.
+ std::map<int, std::unique_ptr<MotrObject>> part_objs;
+
+ public:
+ MotrReadOp(MotrObject *_source);
+
+ virtual int prepare(optional_yield y, const DoutPrefixProvider* dpp) override;
+
+ /*
+ * Both `read` and `iterate` read up through index `end`
+ * *inclusive*. The number of bytes that could be returned is
+ * `end - ofs + 1`.
+ */
+ virtual int read(int64_t off, int64_t end, bufferlist& bl,
+ optional_yield y,
+ const DoutPrefixProvider* dpp) override;
+ virtual int iterate(const DoutPrefixProvider* dpp, int64_t off,
+ int64_t end, RGWGetDataCB* cb,
+ optional_yield y) override;
+
+ virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) override;
+ };
+
+ struct MotrDeleteOp : public DeleteOp {
+ private:
+ MotrObject* source;
+
+ public:
+ MotrDeleteOp(MotrObject* _source);
+
+ virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override;
+ };
+
+ MotrObject() = default;
+
+ MotrObject(MotrStore *_st, const rgw_obj_key& _k)
+ : StoreObject(_k), store(_st), acls() {}
+ MotrObject(MotrStore *_st, const rgw_obj_key& _k, Bucket* _b)
+ : StoreObject(_k, _b), store(_st), acls() {}
+
+ MotrObject(MotrObject& _o) = default;
+
+ virtual ~MotrObject();
+
+ virtual int delete_object(const DoutPrefixProvider* dpp,
+ optional_yield y,
+ bool prevent_versioning = false) override;
+ virtual int copy_object(User* user,
+ req_info* info, const rgw_zone_id& source_zone,
+ rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket,
+ rgw::sal::Bucket* src_bucket,
+ const rgw_placement_rule& dest_placement,
+ ceph::real_time* src_mtime, ceph::real_time* mtime,
+ const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr,
+ bool high_precision_time,
+ const char* if_match, const char* if_nomatch,
+ AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs,
+ RGWObjCategory category, uint64_t olh_epoch,
+ boost::optional<ceph::real_time> delete_at,
+ std::string* version_id, std::string* tag, std::string* etag,
+ void (*progress_cb)(off_t, void *), void* progress_data,
+ const DoutPrefixProvider* dpp, optional_yield y) override;
+ virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
+ virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; }
+ virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **state, optional_yield y, bool follow_olh = true) override;
+ virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override;
+ virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override;
+ virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override;
+ virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override;
+ virtual bool is_expired() override;
+ virtual void gen_rand_obj_instance_name() override;
+ virtual std::unique_ptr<Object> clone() override {
+ return std::unique_ptr<Object>(new MotrObject(*this));
+ }
+ virtual std::unique_ptr<MPSerializer> get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) override;
+ virtual int transition(Bucket* bucket,
+ const rgw_placement_rule& placement_rule,
+ const real_time& mtime,
+ uint64_t olh_epoch,
+ const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+ virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override;
+ virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override;
+
+ /* Swift versioning */
+ virtual int swift_versioning_restore(bool& restored,
+ const DoutPrefixProvider* dpp) override;
+ virtual int swift_versioning_copy(const DoutPrefixProvider* dpp,
+ optional_yield y) override;
+
+ /* OPs */
+ virtual std::unique_ptr<ReadOp> get_read_op() override;
+ virtual std::unique_ptr<DeleteOp> get_delete_op() override;
+
+ /* OMAP */
+ virtual int omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid,
+ const std::set<std::string>& keys,
+ Attrs* vals) override;
+ virtual int omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val,
+ bool must_exist, optional_yield y) override;
+ virtual int chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) override;
+ private:
+ //int read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj = nullptr);
+
+ public:
+ bool is_opened() { return mobj != NULL; }
+ int create_mobj(const DoutPrefixProvider *dpp, uint64_t sz);
+ int open_mobj(const DoutPrefixProvider *dpp);
+ int delete_mobj(const DoutPrefixProvider *dpp);
+ void close_mobj();
+ int write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset);
+ int read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb);
+ unsigned get_optimal_bs(unsigned len);
+
+ int get_part_objs(const DoutPrefixProvider *dpp,
+ std::map<int, std::unique_ptr<MotrObject>>& part_objs);
+ int open_part_objs(const DoutPrefixProvider* dpp,
+ std::map<int, std::unique_ptr<MotrObject>>& part_objs);
+ int read_multipart_obj(const DoutPrefixProvider* dpp,
+ int64_t off, int64_t end, RGWGetDataCB* cb,
+ std::map<int, std::unique_ptr<MotrObject>>& part_objs);
+ int delete_part_objs(const DoutPrefixProvider* dpp);
+ void set_category(RGWObjCategory _category) {category = _category;}
+ int get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent);
+ int update_version_entries(const DoutPrefixProvider *dpp);
+};
+
+// A placeholder locking class for multipart upload.
+// TODO: implement it using Motr object locks.
+class MPMotrSerializer : public StoreMPSerializer {
+
+ public:
+ MPMotrSerializer(const DoutPrefixProvider *dpp, MotrStore* store, MotrObject* obj, const std::string& lock_name) {}
+
+ virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override {return 0; }
+ virtual int unlock() override { return 0;}
+};
+
+class MotrAtomicWriter : public StoreWriter {
+ protected:
+ rgw::sal::MotrStore* store;
+ const rgw_user& owner;
+ const rgw_placement_rule *ptail_placement_rule;
+ uint64_t olh_epoch;
+ const std::string& unique_tag;
+ MotrObject obj;
+ MotrObject old_obj;
+ uint64_t total_data_size; // for total data being uploaded
+ bufferlist acc_data; // accumulated data
+ uint64_t acc_off; // accumulated data offset
+
+ struct m0_bufvec buf;
+ struct m0_bufvec attr;
+ struct m0_indexvec ext;
+
+ public:
+ MotrAtomicWriter(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ MotrStore* _store,
+ const rgw_user& _owner,
+ const rgw_placement_rule *_ptail_placement_rule,
+ uint64_t _olh_epoch,
+ const std::string& _unique_tag);
+ ~MotrAtomicWriter() = default;
+
+ // prepare to start processing object data
+ virtual int prepare(optional_yield y) override;
+
+ // Process a bufferlist
+ virtual int process(bufferlist&& data, uint64_t offset) override;
+
+ int write();
+
+ // complete the operation and make its result visible to clients
+ virtual int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled,
+ optional_yield y) override;
+
+ unsigned populate_bvec(unsigned len, bufferlist::iterator &bi);
+ void cleanup();
+};
+
+class MotrMultipartWriter : public StoreWriter {
+protected:
+ rgw::sal::MotrStore* store;
+
+ // Head object.
+ rgw::sal::Object* head_obj;
+
+ // Part parameters.
+ const uint64_t part_num;
+ const std::string part_num_str;
+ std::unique_ptr<MotrObject> part_obj;
+ uint64_t actual_part_size = 0;
+
+public:
+ MotrMultipartWriter(const DoutPrefixProvider *dpp,
+ optional_yield y, MultipartUpload* upload,
+ rgw::sal::Object* obj,
+ MotrStore* _store,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ uint64_t _part_num, const std::string& part_num_str) :
+ StoreWriter(dpp, y), store(_store), head_obj(obj),
+ part_num(_part_num), part_num_str(part_num_str)
+ {
+ }
+ ~MotrMultipartWriter() = default;
+
+ // prepare to start processing object data
+ virtual int prepare(optional_yield y) override;
+
+ // Process a bufferlist
+ virtual int process(bufferlist&& data, uint64_t offset) override;
+
+ // complete the operation and make its result visible to clients
+ virtual int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled,
+ optional_yield y) override;
+};
+
+// The implementation of multipart upload in POC roughly follows the
+// cortx-s3server's design. Parts are stored in separate Motr objects.
+// s3server uses a few auxiliary Motr indices to manage multipart
+// related metadata: (1) Bucket multipart index (bucket_nnn_multipart_index)
+// which contains metadata that answers questions such as which objects have
+// started multipart upload and its upload id. This index is created during
+// bucket creation. (2) Object part index (object_nnn_part_index) which stores
+// metadata of a part's details (size, pvid, oid...). This index is created in
+// MotrMultipartUpload::init(). (3) Extended metadata index
+// (bucket_nnn_extended_metadata): once parts has been uploaded and their
+// metadata saved in the part index, the user may issue multipart completion
+// request. When processing the completion request, the parts are read from
+// object part index and for each part an entry is created in extended index.
+// The entry for the object is created in bucket (object list) index. The part
+// index is deleted and an entry removed from bucket_nnn_multipart_index. Like
+// bucket multipart index, bucket part extened metadata index is created during
+// bucket creation.
+//
+// The extended metadata index is used mainly due to fault tolerant
+// considerations (how to handle Motr service crash when uploading an object)
+// and to avoid to create too many Motr indices (I am not sure I understand
+// why many Motr indices is bad.). In our POC, to keep it simple, only 2
+// indices are maintained: bucket multipart index and object_nnn_part_index.
+//
+//
+
+class MotrMultipartPart : public StoreMultipartPart {
+protected:
+ RGWUploadPartInfo info;
+
+public:
+ MotrObject::Meta meta;
+
+ MotrMultipartPart(RGWUploadPartInfo _info, MotrObject::Meta _meta) :
+ info(_info), meta(_meta) {}
+ virtual ~MotrMultipartPart() = default;
+
+ virtual uint32_t get_num() { return info.num; }
+ virtual uint64_t get_size() { return info.accounted_size; }
+ virtual const std::string& get_etag() { return info.etag; }
+ virtual ceph::real_time& get_mtime() { return info.modified; }
+
+ RGWObjManifest& get_manifest() { return info.manifest; }
+
+ friend class MotrMultipartUpload;
+};
+
+class MotrMultipartUpload : public StoreMultipartUpload {
+ MotrStore* store;
+ RGWMPObj mp_obj;
+ ACLOwner owner;
+ ceph::real_time mtime;
+ rgw_placement_rule placement;
+ RGWObjManifest manifest;
+
+public:
+ MotrMultipartUpload(MotrStore* _store, Bucket* _bucket, const std::string& oid,
+ std::optional<std::string> upload_id, ACLOwner _owner, ceph::real_time _mtime) :
+ StoreMultipartUpload(_bucket), store(_store), mp_obj(oid, upload_id), owner(_owner), mtime(_mtime) {}
+ virtual ~MotrMultipartUpload() = default;
+
+ virtual const std::string& get_meta() const { return mp_obj.get_meta(); }
+ virtual const std::string& get_key() const { return mp_obj.get_key(); }
+ virtual const std::string& get_upload_id() const { return mp_obj.get_upload_id(); }
+ virtual const ACLOwner& get_owner() const override { return owner; }
+ virtual ceph::real_time& get_mtime() { return mtime; }
+ virtual std::unique_ptr<rgw::sal::Object> get_meta_obj() override;
+ virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) override;
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int num_parts, int marker,
+ int* next_marker, bool* truncated,
+ bool assume_unsorted = false) override;
+ virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override;
+ virtual int complete(const DoutPrefixProvider* dpp,
+ optional_yield y, CephContext* cct,
+ std::map<int, std::string>& part_etags,
+ std::list<rgw_obj_index_key>& remove_objs,
+ uint64_t& accounted_size, bool& compressed,
+ RGWCompressionInfo& cs_info, off_t& off,
+ std::string& tag, ACLOwner& owner,
+ uint64_t olh_epoch,
+ rgw::sal::Object* target_obj) override;
+ virtual int get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs = nullptr) override;
+ virtual std::unique_ptr<Writer> get_writer(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ uint64_t part_num,
+ const std::string& part_num_str) override;
+ int delete_parts(const DoutPrefixProvider *dpp);
+};
+
+class MotrStore : public StoreDriver {
+ private:
+ MotrZone zone;
+ RGWSyncModuleInstanceRef sync_module;
+
+ MotrMetaCache* obj_meta_cache;
+ MotrMetaCache* user_cache;
+ MotrMetaCache* bucket_inst_cache;
+
+ public:
+ CephContext *cctx;
+ struct m0_client *instance;
+ struct m0_container container;
+ struct m0_realm uber_realm;
+ struct m0_config conf = {};
+ struct m0_idx_dix_config dix_conf = {};
+
+ MotrStore(CephContext *c): zone(this), cctx(c) {}
+ ~MotrStore() {
+ delete obj_meta_cache;
+ delete user_cache;
+ delete bucket_inst_cache;
+ }
+
+ virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) { return 0; }
+ virtual const std::string get_name() const override {
+ return "motr";
+ }
+
+ virtual std::unique_ptr<User> get_user(const rgw_user& u) override;
+ virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) override;
+ virtual int get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y, std::unique_ptr<User>* user) override;
+ virtual int get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr<User>* user) override;
+ virtual int get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr<User>* user) override;
+ virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
+ virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr<Bucket>* bucket, optional_yield y) override;
+ virtual int get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr<Bucket>* bucket) override;
+ virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string&name, std::unique_ptr<Bucket>* bucket, optional_yield y) override;
+ virtual bool is_meta_master() override;
+ virtual int forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv,
+ bufferlist& in_data, JSONParser *jp, req_info& info,
+ optional_yield y) override;
+ virtual int forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv,
+ bufferlist& in_data,
+ RGWXMLDecoder::XMLParser* parser, req_info& info,
+ optional_yield y) override;
+ virtual Zone* get_zone() { return &zone; }
+ virtual std::string zone_unique_id(uint64_t unique_num) override;
+ virtual std::string zone_unique_trans_id(const uint64_t unique_num) override;
+ virtual int get_zonegroup(const std::string& id, std::unique_ptr<ZoneGroup>* zonegroup) override;
+ virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list<std::string>& zone_ids) override;
+ virtual int cluster_stat(RGWClusterStat& stats) override;
+ virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override;
+ virtual std::unique_ptr<Notification> get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj,
+ req_state* s, rgw::notify::EventType event_type, optional_yield y, const std::string* object_name=nullptr) override;
+ virtual std::unique_ptr<Notification> get_notification(const DoutPrefixProvider* dpp, rgw::sal::Object* obj,
+ rgw::sal::Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket,
+ std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) override;
+ virtual RGWLC* get_rgwlc(void) override { return NULL; }
+ virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return NULL; }
+
+ virtual int log_usage(const DoutPrefixProvider *dpp, std::map<rgw_user_bucket, RGWUsageBatch>& usage_info) override;
+ virtual int log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) override;
+ virtual int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type,
+ const std::map<std::string, std::string>& meta) override;
+ virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) override;
+ virtual void get_quota(RGWQuota& quota) override;
+ virtual int set_buckets_enabled(const DoutPrefixProvider *dpp, std::vector<rgw_bucket>& buckets, bool enabled) override;
+ virtual int get_sync_policy_handler(const DoutPrefixProvider *dpp,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> bucket,
+ RGWBucketSyncPolicyHandlerRef *phandler,
+ optional_yield y) override;
+ virtual RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone) override;
+ virtual void wakeup_meta_sync_shards(std::set<int>& shard_ids) override { return; }
+ virtual void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, boost::container::flat_map<int, boost::container::flat_set<rgw_data_notify_entry>>& shard_ids) override {}
+ virtual int clear_usage(const DoutPrefixProvider *dpp) override { return 0; }
+ virtual int read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool *is_truncated,
+ RGWUsageIter& usage_iter,
+ std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
+ virtual int trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override;
+ virtual int get_config_key_val(std::string name, bufferlist* bl) override;
+ virtual int meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) override;
+ virtual int meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, std::list<std::string>& keys, bool* truncated) override;
+ virtual void meta_list_keys_complete(void* handle) override;
+ virtual std::string meta_get_marker(void *handle) override;
+ virtual int meta_remove(const DoutPrefixProvider *dpp, std::string& metadata_key, optional_yield y) override;
+
+ virtual const RGWSyncModuleInstanceRef& get_sync_module() { return sync_module; }
+ virtual std::string get_host_id() { return ""; }
+
+ virtual std::unique_ptr<LuaManager> get_lua_manager() override;
+ virtual std::unique_ptr<RGWRole> get_role(std::string name,
+ std::string tenant,
+ std::string path="",
+ std::string trust_policy="",
+ std::string max_session_duration_str="",
+ std::multimap<std::string, std::string> tags={}) override;
+ virtual std::unique_ptr<RGWRole> get_role(const RGWRoleInfo& info) override;
+ virtual std::unique_ptr<RGWRole> get_role(std::string id) override;
+ virtual int get_roles(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ const std::string& path_prefix,
+ const std::string& tenant,
+ std::vector<std::unique_ptr<RGWRole>>& roles) override;
+ virtual std::unique_ptr<RGWOIDCProvider> get_oidc_provider() override;
+ virtual int get_oidc_providers(const DoutPrefixProvider *dpp,
+ const std::string& tenant,
+ std::vector<std::unique_ptr<RGWOIDCProvider>>& providers) override;
+ virtual std::unique_ptr<Writer> get_append_writer(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ const std::string& unique_tag,
+ uint64_t position,
+ uint64_t *cur_accounted_size) override;
+ virtual std::unique_ptr<Writer> get_atomic_writer(const DoutPrefixProvider *dpp,
+ optional_yield y,
+ rgw::sal::Object* obj,
+ const rgw_user& owner,
+ const rgw_placement_rule *ptail_placement_rule,
+ uint64_t olh_epoch,
+ const std::string& unique_tag) override;
+ virtual const std::string& get_compression_type(const rgw_placement_rule& rule) override;
+ virtual bool valid_placement(const rgw_placement_rule& rule) override;
+
+ virtual void finalize(void) override;
+
+ virtual CephContext *ctx(void) override {
+ return cctx;
+ }
+
+ virtual void register_admin_apis(RGWRESTMgr* mgr) override { };
+
+ int open_idx(struct m0_uint128 *id, bool create, struct m0_idx *out);
+ void close_idx(struct m0_idx *idx) { m0_idx_fini(idx); }
+ int do_idx_op(struct m0_idx *, enum m0_idx_opcode opcode,
+ std::vector<uint8_t>& key, std::vector<uint8_t>& val, bool update = false);
+
+ int do_idx_next_op(struct m0_idx *idx,
+ std::vector<std::vector<uint8_t>>& key_vec,
+ std::vector<std::vector<uint8_t>>& val_vec);
+ int next_query_by_name(std::string idx_name, std::vector<std::string>& key_str_vec,
+ std::vector<bufferlist>& val_bl_vec,
+ std::string prefix="", std::string delim="");
+
+ void index_name_to_motr_fid(std::string iname, struct m0_uint128 *fid);
+ int open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx);
+ int create_motr_idx_by_name(std::string iname);
+ int delete_motr_idx_by_name(std::string iname);
+ int do_idx_op_by_name(std::string idx_name, enum m0_idx_opcode opcode,
+ std::string key_str, bufferlist &bl, bool update=true);
+ int check_n_create_global_indices();
+ int store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key);
+ int delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key);
+ int store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info);
+
+ int init_metadata_cache(const DoutPrefixProvider *dpp, CephContext *cct);
+ MotrMetaCache* get_obj_meta_cache() {return obj_meta_cache;}
+ MotrMetaCache* get_user_cache() {return user_cache;}
+ MotrMetaCache* get_bucket_inst_cache() {return bucket_inst_cache;}
+};
+
+struct obj_time_weight {
+ real_time mtime;
+ uint32_t zone_short_id;
+ uint64_t pg_ver;
+ bool high_precision;
+
+ obj_time_weight() : zone_short_id(0), pg_ver(0), high_precision(false) {}
+
+ bool compare_low_precision(const obj_time_weight& rhs) {
+ struct timespec l = ceph::real_clock::to_timespec(mtime);
+ struct timespec r = ceph::real_clock::to_timespec(rhs.mtime);
+ l.tv_nsec = 0;
+ r.tv_nsec = 0;
+ if (l > r) {
+ return false;
+ }
+ if (l < r) {
+ return true;
+ }
+ if (!zone_short_id || !rhs.zone_short_id) {
+ /* don't compare zone ids, if one wasn't provided */
+ return false;
+ }
+ if (zone_short_id != rhs.zone_short_id) {
+ return (zone_short_id < rhs.zone_short_id);
+ }
+ return (pg_ver < rhs.pg_ver);
+
+ }
+
+ bool operator<(const obj_time_weight& rhs) {
+ if (!high_precision || !rhs.high_precision) {
+ return compare_low_precision(rhs);
+ }
+ if (mtime > rhs.mtime) {
+ return false;
+ }
+ if (mtime < rhs.mtime) {
+ return true;
+ }
+ if (!zone_short_id || !rhs.zone_short_id) {
+ /* don't compare zone ids, if one wasn't provided */
+ return false;
+ }
+ if (zone_short_id != rhs.zone_short_id) {
+ return (zone_short_id < rhs.zone_short_id);
+ }
+ return (pg_ver < rhs.pg_ver);
+ }
+
+ void init(const real_time& _mtime, uint32_t _short_id, uint64_t _pg_ver) {
+ mtime = _mtime;
+ zone_short_id = _short_id;
+ pg_ver = _pg_ver;
+ }
+
+ void init(RGWObjState *state) {
+ mtime = state->mtime;
+ zone_short_id = state->zone_short_id;
+ pg_ver = state->pg_ver;
+ }
+};
+
+inline std::ostream& operator<<(std::ostream& out, const obj_time_weight &o) {
+ out << o.mtime;
+
+ if (o.zone_short_id != 0 || o.pg_ver != 0) {
+ out << "[zid=" << o.zone_short_id << ", pgv=" << o.pg_ver << "]";
+ }
+
+ return out;
+}
+
+} // namespace rgw::sal
#endif
#ifdef WITH_RADOSGW_MOTR
-#include "rgw_sal_motr.h"
+#include "driver/motr/rgw_sal_motr.h"
#endif
#ifdef WITH_RADOSGW_DAOS
-#include "rgw_sal_daos.h"
+#include "driver/daos/rgw_sal_daos.h"
#endif
#define dout_subsys ceph_subsys_rgw
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=2 sw=2 expandtab ft=cpp
-
-/*
- * Ceph - scalable distributed file system
- *
- * SAL implementation for the CORTX DAOS backend
- *
- * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#include "rgw_sal_daos.h"
-
-#include <errno.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <filesystem>
-#include <system_error>
-
-#include "common/Clock.h"
-#include "common/errno.h"
-#include "rgw_bucket.h"
-#include "rgw_compression.h"
-#include "rgw_sal.h"
-
-#define dout_subsys ceph_subsys_rgw
-
-using std::list;
-using std::map;
-using std::set;
-using std::string;
-using std::vector;
-
-namespace fs = std::filesystem;
-
-namespace rgw::sal {
-
-using ::ceph::decode;
-using ::ceph::encode;
-
-int DaosUser::list_buckets(const DoutPrefixProvider* dpp, const string& marker,
- const string& end_marker, uint64_t max,
- bool need_stats, BucketList& buckets,
- optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: list_user_buckets: marker=" << marker
- << " end_marker=" << end_marker << " max=" << max << dendl;
- int ret = 0;
- bool is_truncated = false;
- buckets.clear();
- vector<struct ds3_bucket_info> bucket_infos(max);
- daos_size_t bcount = bucket_infos.size();
- vector<vector<uint8_t>> values(bcount, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
- for (daos_size_t i = 0; i < bcount; i++) {
- bucket_infos[i].encoded = values[i].data();
- bucket_infos[i].encoded_length = values[i].size();
- }
-
- char daos_marker[DS3_MAX_BUCKET_NAME];
- std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker));
- ret = ds3_bucket_list(&bcount, bucket_infos.data(), daos_marker,
- &is_truncated, store->ds3, nullptr);
- ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_list: bcount=" << bcount
- << " ret=" << ret << dendl;
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list failed!" << ret << dendl;
- return ret;
- }
-
- bucket_infos.resize(bcount);
- values.resize(bcount);
-
- for (const auto& bi : bucket_infos) {
- DaosBucketInfo dbinfo;
- bufferlist bl;
- bl.append(reinterpret_cast<char*>(bi.encoded), bi.encoded_length);
- auto iter = bl.cbegin();
- dbinfo.decode(iter);
- buckets.add(std::make_unique<DaosBucket>(this->store, dbinfo.info, this));
- }
-
- buckets.set_truncated(is_truncated);
- return 0;
-}
-
-int DaosUser::create_bucket(
- const DoutPrefixProvider* dpp, const rgw_bucket& b,
- const std::string& zonegroup_id, rgw_placement_rule& placement_rule,
- std::string& swift_ver_location, const RGWQuotaInfo* pquota_info,
- const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info,
- obj_version& ep_objv, bool exclusive, bool obj_lock_enabled, bool* existed,
- req_info& req_info, std::unique_ptr<Bucket>* bucket_out, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: create_bucket:" << b.name << dendl;
- int ret;
- std::unique_ptr<Bucket> bucket;
-
- // Look up the bucket. Create it if it doesn't exist.
- ret = this->store->get_bucket(dpp, this, b, &bucket, y);
- if (ret != 0 && ret != -ENOENT) {
- return ret;
- }
-
- if (ret != -ENOENT) {
- *existed = true;
- if (swift_ver_location.empty()) {
- swift_ver_location = bucket->get_info().swift_ver_location;
- }
- placement_rule.inherit_from(bucket->get_info().placement_rule);
-
- // TODO: ACL policy
- // // don't allow changes to the acl policy
- // RGWAccessControlPolicy old_policy(ctx());
- // int rc = rgw_op_get_bucket_policy_from_attr(
- // dpp, this, u, bucket->get_attrs(), &old_policy, y);
- // if (rc >= 0 && old_policy != policy) {
- // bucket_out->swap(bucket);
- // return -EEXIST;
- //}
- } else {
- placement_rule.name = "default";
- placement_rule.storage_class = "STANDARD";
- bucket = std::make_unique<DaosBucket>(store, b, this);
- bucket->set_attrs(attrs);
-
- *existed = false;
- }
-
- // TODO: how to handle zone and multi-site.
-
- if (!*existed) {
- info.placement_rule = placement_rule;
- info.bucket = b;
- info.owner = this->get_info().user_id;
- info.zonegroup = zonegroup_id;
- info.creation_time = ceph::real_clock::now();
- if (obj_lock_enabled)
- info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED;
- bucket->set_version(ep_objv);
- bucket->get_info() = info;
-
- // Create a new bucket:
- DaosBucket* daos_bucket = static_cast<DaosBucket*>(bucket.get());
- bufferlist bl;
- std::unique_ptr<struct ds3_bucket_info> bucket_info =
- daos_bucket->get_encoded_info(bl, ceph::real_time());
- ret = ds3_bucket_create(bucket->get_name().c_str(), bucket_info.get(),
- nullptr, store->ds3, nullptr);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_create failed! ret=" << ret
- << dendl;
- return ret;
- }
- } else {
- bucket->set_version(ep_objv);
- bucket->get_info() = info;
- }
-
- bucket_out->swap(bucket);
-
- return ret;
-}
-
-int DaosUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosUser::read_stats(const DoutPrefixProvider* dpp, optional_yield y,
- RGWStorageStats* stats,
- ceph::real_time* last_stats_sync,
- ceph::real_time* last_stats_update) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-/* stats - Not for first pass */
-int DaosUser::read_stats_async(const DoutPrefixProvider* dpp,
- RGWGetUserStats_CB* cb) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosUser::complete_flush_stats(const DoutPrefixProvider* dpp,
- optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosUser::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
- uint64_t end_epoch, uint32_t max_entries,
- bool* is_truncated, RGWUsageIter& usage_iter,
- map<rgw_user_bucket, rgw_usage_log_entry>& usage) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosUser::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
- uint64_t end_epoch) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) {
- const string name = info.user_id.to_str();
- ldpp_dout(dpp, 20) << "DEBUG: load_user, name=" << name << dendl;
-
- DaosUserInfo duinfo;
- int ret = read_user(dpp, name, &duinfo);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: load_user failed, name=" << name << dendl;
- return ret;
- }
-
- info = duinfo.info;
- attrs = duinfo.attrs;
- objv_tracker.read_version = duinfo.user_version;
- return 0;
-}
-
-int DaosUser::merge_and_store_attrs(const DoutPrefixProvider* dpp,
- Attrs& new_attrs, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs
- << dendl;
- for (auto& it : new_attrs) {
- attrs[it.first] = it.second;
- }
- return store_user(dpp, y, false);
-}
-
-int DaosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y,
- bool exclusive, RGWUserInfo* old_info) {
- const string name = info.user_id.to_str();
- ldpp_dout(dpp, 10) << "DEBUG: Store_user(): User name=" << name << dendl;
-
- // Read user
- int ret = 0;
- struct DaosUserInfo duinfo;
- ret = read_user(dpp, name, &duinfo);
- obj_version obj_ver = duinfo.user_version;
- std::unique_ptr<struct ds3_user_info> old_user_info;
- std::vector<const char*> old_access_ids;
-
- // Check if the user already exists
- if (ret == 0 && obj_ver.ver) {
- // already exists.
-
- if (old_info) {
- *old_info = duinfo.info;
- }
-
- if (objv_tracker.read_version.ver != obj_ver.ver) {
- // Object version mismatch.. return ECANCELED
- ret = -ECANCELED;
- ldpp_dout(dpp, 0) << "User Read version mismatch read_version="
- << objv_tracker.read_version.ver
- << " obj_ver=" << obj_ver.ver << dendl;
- return ret;
- }
-
- if (exclusive) {
- // return
- return ret;
- }
- obj_ver.ver++;
-
- for (auto const& [id, key] : duinfo.info.access_keys) {
- old_access_ids.push_back(id.c_str());
- }
- old_user_info.reset(
- new ds3_user_info{.name = duinfo.info.user_id.to_str().c_str(),
- .email = duinfo.info.user_email.c_str(),
- .access_ids = old_access_ids.data(),
- .access_ids_nr = old_access_ids.size()});
- } else {
- obj_ver.ver = 1;
- obj_ver.tag = "UserTAG";
- }
-
- bufferlist bl;
- std::unique_ptr<struct ds3_user_info> user_info =
- get_encoded_info(bl, obj_ver);
-
- ret = ds3_user_set(name.c_str(), user_info.get(), old_user_info.get(),
- store->ds3, nullptr);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name
- << " ret=" << ret << dendl;
- }
-
- return ret;
-}
-
-int DaosUser::read_user(const DoutPrefixProvider* dpp, std::string name,
- DaosUserInfo* duinfo) {
- // Initialize ds3_user_info
- bufferlist bl;
- uint64_t size = DS3_MAX_ENCODED_LEN;
- struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(),
- .encoded_length = size};
-
- int ret = ds3_user_get(name.c_str(), &user_info, store->ds3, nullptr);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "Error: ds3_user_get failed, name=" << name
- << " ret=" << ret << dendl;
- return ret;
- }
-
- // Decode
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- duinfo->decode(iter);
- return ret;
-}
-
-std::unique_ptr<struct ds3_user_info> DaosUser::get_encoded_info(
- bufferlist& bl, obj_version& obj_ver) {
- // Encode user data
- struct DaosUserInfo duinfo;
- duinfo.info = info;
- duinfo.attrs = attrs;
- duinfo.user_version = obj_ver;
- duinfo.encode(bl);
-
- // Initialize ds3_user_info
- access_ids.clear();
- for (auto const& [id, key] : info.access_keys) {
- access_ids.push_back(id.c_str());
- }
- return std::unique_ptr<struct ds3_user_info>(
- new ds3_user_info{.name = info.user_id.to_str().c_str(),
- .email = info.user_email.c_str(),
- .access_ids = access_ids.data(),
- .access_ids_nr = access_ids.size(),
- .encoded = bl.c_str(),
- .encoded_length = bl.length()});
-}
-
-int DaosUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) {
- const string name = info.user_id.to_str();
-
- // TODO: the expectation is that the object version needs to be passed in as a
- // method arg see int DB::remove_user(const DoutPrefixProvider *dpp,
- // RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv)
- obj_version obj_ver;
- bufferlist bl;
- std::unique_ptr<struct ds3_user_info> user_info =
- get_encoded_info(bl, obj_ver);
-
- // Remove user
- int ret = ds3_user_remove(name.c_str(), user_info.get(), store->ds3, nullptr);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "Error: ds3_user_set failed, name=" << name
- << " ret=" << ret << dendl;
- }
- return ret;
-}
-
-DaosBucket::~DaosBucket() { close(nullptr); }
-
-int DaosBucket::open(const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: open, name=" << info.bucket.name.c_str()
- << dendl;
- // Idempotent
- if (is_open()) {
- return 0;
- }
-
- int ret = ds3_bucket_open(get_name().c_str(), &ds3b, store->ds3, nullptr);
- ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_open, name=" << get_name()
- << ", ret=" << ret << dendl;
-
- return ret;
-}
-
-int DaosBucket::close(const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: close" << dendl;
- // Idempotent
- if (!is_open()) {
- return 0;
- }
-
- int ret = ds3_bucket_close(ds3b, nullptr);
- ds3b = nullptr;
- ldpp_dout(dpp, 20) << "DEBUG: ds3_bucket_close ret=" << ret << dendl;
-
- return ret;
-}
-
-std::unique_ptr<struct ds3_bucket_info> DaosBucket::get_encoded_info(
- bufferlist& bl, ceph::real_time _mtime) {
- DaosBucketInfo dbinfo;
- dbinfo.info = info;
- dbinfo.bucket_attrs = attrs;
- dbinfo.mtime = _mtime;
- dbinfo.bucket_version = bucket_version;
- dbinfo.encode(bl);
-
- auto bucket_info = std::make_unique<struct ds3_bucket_info>();
- bucket_info->encoded = bl.c_str();
- bucket_info->encoded_length = bl.length();
- std::strncpy(bucket_info->name, get_name().c_str(), sizeof(bucket_info->name));
- return bucket_info;
-}
-
-int DaosBucket::remove_bucket(const DoutPrefixProvider* dpp,
- bool delete_children, bool forward_to_master,
- req_info* req_info, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: remove_bucket, delete_children="
-
- << delete_children
-
- << " forward_to_master=" << forward_to_master << dendl;
-
- return ds3_bucket_destroy(get_name().c_str(), delete_children, store->ds3,
- nullptr);
-}
-
-int DaosBucket::remove_bucket_bypass_gc(int concurrent_max,
- bool keep_index_consistent,
- optional_yield y,
- const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: remove_bucket_bypass_gc, concurrent_max="
-
- << concurrent_max
-
- << " keep_index_consistent=" << keep_index_consistent
-
- << dendl;
- return ds3_bucket_destroy(get_name().c_str(), true, store->ds3, nullptr);
-}
-
-int DaosBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive,
- ceph::real_time _mtime) {
- ldpp_dout(dpp, 20) << "DEBUG: put_info(): bucket name=" << get_name()
- << dendl;
-
- int ret = open(dpp);
- if (ret != 0) {
- return ret;
- }
-
- bufferlist bl;
- std::unique_ptr<struct ds3_bucket_info> bucket_info =
- get_encoded_info(bl, ceph::real_time());
-
- ret = ds3_bucket_set_info(bucket_info.get(), ds3b, nullptr);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_set_info failed: " << ret << dendl;
- }
- return ret;
-}
-
-int DaosBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y,
- bool get_stats) {
- ldpp_dout(dpp, 20) << "DEBUG: load_bucket(): bucket name=" << get_name()
- << dendl;
- int ret = open(dpp);
- if (ret != 0) {
- return ret;
- }
-
- bufferlist bl;
- DaosBucketInfo dbinfo;
- uint64_t size = DS3_MAX_ENCODED_LEN;
- struct ds3_bucket_info bucket_info = {.encoded = bl.append_hole(size).c_str(),
- .encoded_length = size};
-
- ret = ds3_bucket_get_info(&bucket_info, ds3b, nullptr);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_get_info failed: " << ret << dendl;
- return ret;
- }
-
- auto iter = bl.cbegin();
- dbinfo.decode(iter);
- info = dbinfo.info;
- rgw_placement_rule placement_rule;
- placement_rule.name = "default";
- placement_rule.storage_class = "STANDARD";
- info.placement_rule = placement_rule;
-
- attrs = dbinfo.bucket_attrs;
- mtime = dbinfo.mtime;
- bucket_version = dbinfo.bucket_version;
- return ret;
-}
-
-/* stats - Not for first pass */
-int DaosBucket::read_stats(const DoutPrefixProvider* dpp,
- const bucket_index_layout_generation& idx_layout,
- int shard_id, std::string* bucket_ver,
- std::string* master_ver,
- std::map<RGWObjCategory, RGWStorageStats>& stats,
- std::string* max_marker, bool* syncstopped) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::read_stats_async(
- const DoutPrefixProvider* dpp,
- const bucket_index_layout_generation& idx_layout, int shard_id,
- RGWGetBucketStats_CB* ctx) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::sync_user_stats(const DoutPrefixProvider* dpp,
- optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::update_container_stats(const DoutPrefixProvider* dpp) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::check_bucket_shards(const DoutPrefixProvider* dpp) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::chown(const DoutPrefixProvider* dpp, User& new_user,
- optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-/* Make sure to call load_bucket() if you need it first */
-bool DaosBucket::is_owner(User* user) {
- return (info.owner.compare(user->get_id()) == 0);
-}
-
-int DaosBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) {
- /* XXX: Check if bucket contains any objects */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota,
- uint64_t obj_size, optional_yield y,
- bool check_size_only) {
- /* Not Handled in the first pass as stats are also needed */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::merge_and_store_attrs(const DoutPrefixProvider* dpp,
- Attrs& new_attrs, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: merge_and_store_attrs, new_attrs=" << new_attrs
- << dendl;
- for (auto& it : new_attrs) {
- attrs[it.first] = it.second;
- }
-
- return put_info(dpp, y, ceph::real_time());
-}
-
-int DaosBucket::try_refresh_info(const DoutPrefixProvider* dpp,
- ceph::real_time* pmtime) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-/* XXX: usage and stats not supported in the first pass */
-int DaosBucket::read_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
- uint64_t end_epoch, uint32_t max_entries,
- bool* is_truncated, RGWUsageIter& usage_iter,
- map<rgw_user_bucket, rgw_usage_log_entry>& usage) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
- uint64_t end_epoch) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::remove_objs_from_index(
- const DoutPrefixProvider* dpp,
- std::list<rgw_obj_index_key>& objs_to_unlink) {
- /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table.
- * Delete all the object in the list from the object table of this
- * bucket
- */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::check_index(
- const DoutPrefixProvider* dpp,
- std::map<RGWObjCategory, RGWStorageStats>& existing_stats,
- std::map<RGWObjCategory, RGWStorageStats>& calculated_stats) {
- /* XXX: stats not supported yet */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::rebuild_index(const DoutPrefixProvider* dpp) {
- /* there is no index table in DAOS. Not applicable */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::set_tag_timeout(const DoutPrefixProvider* dpp,
- uint64_t timeout) {
- /* XXX: CHECK: set tag timeout for all the bucket objects? */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::purge_instance(const DoutPrefixProvider* dpp) {
- /* XXX: CHECK: for DAOS only single instance supported.
- * Remove all the objects for that instance? Anything extra needed?
- */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosBucket::set_acl(const DoutPrefixProvider* dpp,
- RGWAccessControlPolicy& acl, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: set_acl" << dendl;
- int ret = 0;
- bufferlist aclbl;
-
- acls = acl;
- acl.encode(aclbl);
-
- Attrs attrs = get_attrs();
- attrs[RGW_ATTR_ACL] = aclbl;
-
- return ret;
-}
-
-std::unique_ptr<Object> DaosBucket::get_object(const rgw_obj_key& k) {
- return std::make_unique<DaosObject>(this->store, k, this);
-}
-
-bool compare_rgw_bucket_dir_entry(rgw_bucket_dir_entry& entry1,
- rgw_bucket_dir_entry& entry2) {
- return (entry1.key < entry2.key);
-}
-
-bool compare_multipart_upload(std::unique_ptr<MultipartUpload>& upload1,
- std::unique_ptr<MultipartUpload>& upload2) {
- return (upload1->get_key() < upload2->get_key());
-}
-
-int DaosBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max,
- ListResults& results, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: list bucket=" << get_name() << " max=" << max
- << " params=" << params << dendl;
- // End
- if (max == 0) {
- return 0;
- }
-
- int ret = open(dpp);
- if (ret != 0) {
- return ret;
- }
-
- // Init needed structures
- vector<struct ds3_object_info> object_infos(max);
- uint32_t nobj = object_infos.size();
- vector<vector<uint8_t>> values(nobj, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
- for (uint32_t i = 0; i < nobj; i++) {
- object_infos[i].encoded = values[i].data();
- object_infos[i].encoded_length = values[i].size();
- }
-
- vector<struct ds3_common_prefix_info> common_prefixes(max);
- uint32_t ncp = common_prefixes.size();
-
- char daos_marker[DS3_MAX_KEY_BUFF];
- std::strncpy(daos_marker, params.marker.get_oid().c_str(), sizeof(daos_marker));
-
- ret = ds3_bucket_list_obj(&nobj, object_infos.data(), &ncp,
- common_prefixes.data(), params.prefix.c_str(),
- params.delim.c_str(), daos_marker,
- params.list_versions, &results.is_truncated, ds3b);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: ds3_bucket_list_obj failed, name="
- << get_name() << ", ret=" << ret << dendl;
- return ret;
- }
-
- object_infos.resize(nobj);
- values.resize(nobj);
- common_prefixes.resize(ncp);
-
- // Fill common prefixes
- for (auto const& cp : common_prefixes) {
- results.common_prefixes[cp.prefix] = true;
- }
-
- // Decode objs
- for (auto const& obj : object_infos) {
- bufferlist bl;
- rgw_bucket_dir_entry ent;
- bl.append(reinterpret_cast<char*>(obj.encoded), obj.encoded_length);
- auto iter = bl.cbegin();
- ent.decode(iter);
- if (params.list_versions || ent.is_visible()) {
- results.objs.emplace_back(std::move(ent));
- }
- }
-
- if (!params.allow_unordered) {
- std::sort(results.objs.begin(), results.objs.end(),
- compare_rgw_bucket_dir_entry);
- }
-
- return ret;
-}
-
-int DaosBucket::list_multiparts(
- const DoutPrefixProvider* dpp, const string& prefix, string& marker,
- const string& delim, const int& max_uploads,
- vector<std::unique_ptr<MultipartUpload>>& uploads,
- map<string, bool>* common_prefixes, bool* is_truncated) {
- ldpp_dout(dpp, 20) << "DEBUG: list_multiparts" << dendl;
- // End of uploading
- if (max_uploads == 0) {
- *is_truncated = false;
- return 0;
- }
-
- // Init needed structures
- vector<struct ds3_multipart_upload_info> multipart_upload_infos(max_uploads);
- uint32_t nmp = multipart_upload_infos.size();
- vector<vector<uint8_t>> values(nmp, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
- for (uint32_t i = 0; i < nmp; i++) {
- multipart_upload_infos[i].encoded = values[i].data();
- multipart_upload_infos[i].encoded_length = values[i].size();
- }
-
- vector<struct ds3_common_prefix_info> cps(max_uploads);
- uint32_t ncp = cps.size();
-
- char daos_marker[DS3_MAX_KEY_BUFF];
- std::strncpy(daos_marker, marker.c_str(), sizeof(daos_marker));
-
- int ret = ds3_bucket_list_multipart(
- get_name().c_str(), &nmp, multipart_upload_infos.data(), &ncp, cps.data(),
- prefix.c_str(), delim.c_str(), daos_marker, is_truncated, store->ds3);
-
- multipart_upload_infos.resize(nmp);
- values.resize(nmp);
- cps.resize(ncp);
-
- // Fill common prefixes
- for (auto const& cp : cps) {
- (*common_prefixes)[cp.prefix] = true;
- }
-
- for (auto const& mp : multipart_upload_infos) {
- // Decode the xattr
- bufferlist bl;
- rgw_bucket_dir_entry ent;
- bl.append(reinterpret_cast<char*>(mp.encoded), mp.encoded_length);
- auto iter = bl.cbegin();
- ent.decode(iter);
- string name = ent.key.name;
-
- ACLOwner owner(rgw_user(ent.meta.owner));
- owner.set_name(ent.meta.owner_display_name);
- uploads.push_back(this->get_multipart_upload(
- name, mp.upload_id, std::move(owner), ent.meta.mtime));
- }
-
- // Sort uploads
- std::sort(uploads.begin(), uploads.end(), compare_multipart_upload);
-
- return ret;
-}
-
-int DaosBucket::abort_multiparts(const DoutPrefixProvider* dpp,
- CephContext* cct) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-void DaosStore::finalize(void) {
- ldout(cctx, 20) << "DEBUG: finalize" << dendl;
- int ret;
-
- ret = ds3_disconnect(ds3, nullptr);
- if (ret != 0) {
- ldout(cctx, 0) << "ERROR: ds3_disconnect() failed: " << ret << dendl;
- }
- ds3 = nullptr;
-
- ret = ds3_fini();
- if (ret != 0) {
- ldout(cctx, 0) << "ERROR: daos_fini() failed: " << ret << dendl;
- }
-}
-
-int DaosStore::initialize(CephContext* cct, const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: initialize" << dendl;
- int ret = ds3_init();
-
- // DS3 init failed, allow the case where init is already done
- if (ret != 0 && ret != DER_ALREADY) {
- ldout(cct, 0) << "ERROR: ds3_init() failed: " << ret << dendl;
- return ret;
- }
-
- // XXX: these params should be taken from config settings and
- // cct somehow?
- const auto& daos_pool = cct->_conf.get_val<std::string>("daos_pool");
- ldout(cct, 20) << "INFO: daos pool: " << daos_pool << dendl;
-
- ret = ds3_connect(daos_pool.c_str(), nullptr, &ds3, nullptr);
-
- if (ret != 0) {
- ldout(cct, 0) << "ERROR: ds3_connect() failed: " << ret << dendl;
- ds3_fini();
- }
-
- return ret;
-}
-
-const std::string& DaosZoneGroup::get_endpoint() const {
- if (!group.endpoints.empty()) {
- return group.endpoints.front();
- } else {
- // use zonegroup's master zone endpoints
- auto z = group.zones.find(group.master_zone);
- if (z != group.zones.end() && !z->second.endpoints.empty()) {
- return z->second.endpoints.front();
- }
- }
- return empty;
-}
-
-bool DaosZoneGroup::placement_target_exists(std::string& target) const {
- return !!group.placement_targets.count(target);
-}
-
-int DaosZoneGroup::get_placement_target_names(
- std::set<std::string>& names) const {
- for (const auto& target : group.placement_targets) {
- names.emplace(target.second.name);
- }
-
- return 0;
-}
-
-int DaosZoneGroup::get_placement_tier(const rgw_placement_rule& rule,
- std::unique_ptr<PlacementTier>* tier) {
- std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer;
- titer = group.placement_targets.find(rule.name);
- if (titer == group.placement_targets.end()) {
- return -ENOENT;
- }
-
- const auto& target_rule = titer->second;
- std::map<std::string, RGWZoneGroupPlacementTier>::const_iterator ttier;
- ttier = target_rule.tier_targets.find(rule.storage_class);
- if (ttier == target_rule.tier_targets.end()) {
- // not found
- return -ENOENT;
- }
-
- PlacementTier* t;
- t = new DaosPlacementTier(store, ttier->second);
- if (!t) return -ENOMEM;
-
- tier->reset(t);
- return 0;
-}
-
-ZoneGroup& DaosZone::get_zonegroup() { return zonegroup; }
-
-int DaosZone::get_zonegroup(const std::string& id,
- std::unique_ptr<ZoneGroup>* group) {
- /* XXX: for now only one zonegroup supported */
- ZoneGroup* zg;
- zg = new DaosZoneGroup(store, zonegroup.get_group());
-
- group->reset(zg);
- return 0;
-}
-
-const rgw_zone_id& DaosZone::get_id() { return cur_zone_id; }
-
-const std::string& DaosZone::get_name() const {
- return zone_params->get_name();
-}
-
-bool DaosZone::is_writeable() { return true; }
-
-bool DaosZone::get_redirect_endpoint(std::string* endpoint) { return false; }
-
-bool DaosZone::has_zonegroup_api(const std::string& api) const { return false; }
-
-const std::string& DaosZone::get_current_period_id() {
- return current_period->get_id();
-}
-
-std::unique_ptr<LuaManager> DaosStore::get_lua_manager() {
- return std::make_unique<DaosLuaManager>(this);
-}
-
-int DaosObject::get_obj_state(const DoutPrefixProvider* dpp,
- RGWObjState** _state, optional_yield y,
- bool follow_olh) {
- // Get object's metadata (those stored in rgw_bucket_dir_entry)
- ldpp_dout(dpp, 20) << "DEBUG: get_obj_state" << dendl;
- rgw_bucket_dir_entry ent;
- *_state = &state; // state is required even if a failure occurs
-
- int ret = get_dir_entry_attrs(dpp, &ent);
- if (ret != 0) {
- return ret;
- }
-
- // Set object state.
- state.exists = true;
- state.size = ent.meta.size;
- state.accounted_size = ent.meta.size;
- state.mtime = ent.meta.mtime;
-
- state.has_attrs = true;
- bufferlist etag_bl;
- string& etag = ent.meta.etag;
- ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag
- << dendl;
- etag_bl.append(etag);
- state.attrset[RGW_ATTR_ETAG] = etag_bl;
- return 0;
-}
-
-DaosObject::~DaosObject() { close(nullptr); }
-
-int DaosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs,
- Attrs* delattrs, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: DaosObject::set_obj_attrs()" << dendl;
- // TODO handle target_obj
- // Get object's metadata (those stored in rgw_bucket_dir_entry)
- rgw_bucket_dir_entry ent;
- int ret = get_dir_entry_attrs(dpp, &ent);
- if (ret != 0) {
- return ret;
- }
-
- // Update object metadata
- Attrs updateattrs = setattrs == nullptr ? attrs : *setattrs;
- if (delattrs) {
- for (auto const& [attr, attrval] : *delattrs) {
- updateattrs.erase(attr);
- }
- }
-
- ret = set_dir_entry_attrs(dpp, &ent, &updateattrs);
- return ret;
-}
-
-int DaosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp,
- rgw_obj* target_obj) {
- ldpp_dout(dpp, 20) << "DEBUG: DaosObject::get_obj_attrs()" << dendl;
- // TODO handle target_obj
- // Get object's metadata (those stored in rgw_bucket_dir_entry)
- rgw_bucket_dir_entry ent;
- int ret = get_dir_entry_attrs(dpp, &ent, &attrs);
- return ret;
-}
-
-int DaosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val,
- optional_yield y,
- const DoutPrefixProvider* dpp) {
- // Get object's metadata (those stored in rgw_bucket_dir_entry)
- ldpp_dout(dpp, 20) << "DEBUG: modify_obj_attrs" << dendl;
- rgw_bucket_dir_entry ent;
- int ret = get_dir_entry_attrs(dpp, &ent, &attrs);
- if (ret != 0) {
- return ret;
- }
-
- // Update object attrs
- set_atomic();
- attrs[attr_name] = attr_val;
-
- ret = set_dir_entry_attrs(dpp, &ent, &attrs);
- return ret;
-}
-
-int DaosObject::delete_obj_attrs(const DoutPrefixProvider* dpp,
- const char* attr_name, optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: delete_obj_attrs" << dendl;
- rgw_obj target = get_obj();
- Attrs rmattr;
- bufferlist bl;
-
- rmattr[attr_name] = bl;
- return set_obj_attrs(dpp, nullptr, &rmattr, y);
-}
-
-bool DaosObject::is_expired() {
- auto iter = attrs.find(RGW_ATTR_DELETE_AT);
- if (iter != attrs.end()) {
- utime_t delete_at;
- try {
- auto bufit = iter->second.cbegin();
- decode(delete_at, bufit);
- } catch (buffer::error& err) {
- ldout(store->ctx(), 0)
- << "ERROR: " << __func__
- << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl;
- return false;
- }
-
- if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) {
- return true;
- }
- }
-
- return false;
-}
-
-// Taken from rgw_rados.cc
-void DaosObject::gen_rand_obj_instance_name() {
- enum { OBJ_INSTANCE_LEN = 32 };
- char buf[OBJ_INSTANCE_LEN + 1];
-
- gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN);
- state.obj.key.set_instance(buf);
-}
-
-int DaosObject::omap_get_vals_by_keys(const DoutPrefixProvider* dpp,
- const std::string& oid,
- const std::set<std::string>& keys,
- Attrs* vals) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosObject::omap_set_val_by_key(const DoutPrefixProvider* dpp,
- const std::string& key, bufferlist& val,
- bool must_exist, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) {
- return 0;
-}
-
-std::unique_ptr<MPSerializer> DaosObject::get_serializer(
- const DoutPrefixProvider* dpp, const std::string& lock_name) {
- return std::make_unique<MPDaosSerializer>(dpp, store, this, lock_name);
-}
-
-int DaosObject::transition(Bucket* bucket,
- const rgw_placement_rule& placement_rule,
- const real_time& mtime, uint64_t olh_epoch,
- const DoutPrefixProvider* dpp, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosObject::transition_to_cloud(
- Bucket* bucket, rgw::sal::PlacementTier* tier, rgw_bucket_dir_entry& o,
- std::set<std::string>& cloud_targets, CephContext* cct, bool update_object,
- const DoutPrefixProvider* dpp, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-bool DaosObject::placement_rules_match(rgw_placement_rule& r1,
- rgw_placement_rule& r2) {
- /* XXX: support single default zone and zonegroup for now */
- return true;
-}
-
-int DaosObject::dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y,
- Formatter* f) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-std::unique_ptr<Object::ReadOp> DaosObject::get_read_op() {
- return std::make_unique<DaosObject::DaosReadOp>(this);
-}
-
-DaosObject::DaosReadOp::DaosReadOp(DaosObject* _source) : source(_source) {}
-
-int DaosObject::DaosReadOp::prepare(optional_yield y,
- const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << __func__
- << ": bucket=" << source->get_bucket()->get_name()
- << dendl;
-
- if (source->get_bucket()->versioned() && !source->have_instance()) {
- // If the bucket is versioned and no version is specified, get the latest
- // version
- source->set_instance(DS3_LATEST_INSTANCE);
- }
-
- rgw_bucket_dir_entry ent;
- int ret = source->get_dir_entry_attrs(dpp, &ent);
-
- // Set source object's attrs. The attrs is key/value map and is used
- // in send_response_data() to set attributes, including etag.
- bufferlist etag_bl;
- string& etag = ent.meta.etag;
- ldpp_dout(dpp, 20) << __func__ << ": object's etag: " << ent.meta.etag
- << dendl;
- etag_bl.append(etag.c_str(), etag.size());
- source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl));
-
- source->set_key(ent.key);
- source->set_obj_size(ent.meta.size);
- ldpp_dout(dpp, 20) << __func__ << ": object's size: " << ent.meta.size
- << dendl;
-
- return ret;
-}
-
-int DaosObject::DaosReadOp::read(int64_t off, int64_t end, bufferlist& bl,
- optional_yield y,
- const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl;
- int ret = source->lookup(dpp);
- if (ret != 0) {
- return ret;
- }
-
- // Calculate size, end is inclusive
- uint64_t size = end - off + 1;
-
- // Read
- ret = source->read(dpp, bl, off, size);
- if (ret != 0) {
- return ret;
- }
-
- return ret;
-}
-
-// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to
-// 'end'. The returned data is processed in 'cb' which is a chain of
-// post-processing filters such as decompression, de-encryption and sending back
-// data to client (RGWGetObj_CB::handle_dta which in turn calls
-// RGWGetObj::get_data_cb() to send data back.).
-//
-// POC implements a simple sync version of iterate() function in which it reads
-// a block of data each time and call 'cb' for post-processing.
-int DaosObject::DaosReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off,
- int64_t end, RGWGetDataCB* cb,
- optional_yield y) {
- ldpp_dout(dpp, 20) << __func__ << ": off=" << off << " end=" << end << dendl;
- int ret = source->lookup(dpp);
- if (ret != 0) {
- return ret;
- }
-
- // Calculate size, end is inclusive
- uint64_t size = end - off + 1;
-
- // Reserve buffers and read
- bufferlist bl;
- ret = source->read(dpp, bl, off, size);
- if (ret != 0) {
- return ret;
- }
-
- // Call cb to process returned data.
- ldpp_dout(dpp, 20) << __func__ << ": call cb to process data, actual=" << size
- << dendl;
- cb->handle_data(bl, off, size);
- return ret;
-}
-
-int DaosObject::DaosReadOp::get_attr(const DoutPrefixProvider* dpp,
- const char* name, bufferlist& dest,
- optional_yield y) {
- Attrs attrs;
- int ret = source->get_dir_entry_attrs(dpp, nullptr, &attrs);
- if (!ret) {
- return -ENODATA;
- }
-
- auto search = attrs.find(name);
- if (search == attrs.end()) {
- return -ENODATA;
- }
-
- dest = search->second;
- return 0;
-}
-
-std::unique_ptr<Object::DeleteOp> DaosObject::get_delete_op() {
- return std::make_unique<DaosObject::DaosDeleteOp>(this);
-}
-
-DaosObject::DaosDeleteOp::DaosDeleteOp(DaosObject* _source) : source(_source) {}
-
-// Implementation of DELETE OBJ also requires DaosObject::get_obj_state()
-// to retrieve and set object's state from object's metadata.
-//
-// TODO:
-// 1. The POC only deletes the Daos objects. It doesn't handle the
-// DeleteOp::params. Delete::delete_obj() in rgw_rados.cc shows how rados
-// backend process the params.
-// 2. Delete an object when its versioning is turned on.
-// 3. Handle empty directories
-// 4. Fail when file doesn't exist
-int DaosObject::DaosDeleteOp::delete_obj(const DoutPrefixProvider* dpp,
- optional_yield y) {
- ldpp_dout(dpp, 20) << "DaosDeleteOp::delete_obj "
- << source->get_key().get_oid() << " from "
- << source->get_bucket()->get_name() << dendl;
- if (source->get_instance() == "null") {
- source->clear_instance();
- }
-
- // Open bucket
- int ret = 0;
- std::string key = source->get_key().get_oid();
- DaosBucket* daos_bucket = source->get_daos_bucket();
- ret = daos_bucket->open(dpp);
- if (ret != 0) {
- return ret;
- }
-
- // Remove the daos object
- ret = ds3_obj_destroy(key.c_str(), daos_bucket->ds3b);
- ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_destroy key=" << key << " ret=" << ret
- << dendl;
-
- // result.delete_marker = parent_op.result.delete_marker;
- // result.version_id = parent_op.result.version_id;
-
- return ret;
-}
-
-int DaosObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y,
- bool prevent_versioning) {
- ldpp_dout(dpp, 20) << "DEBUG: delete_object" << dendl;
- DaosObject::DaosDeleteOp del_op(this);
- del_op.params.bucket_owner = bucket->get_info().owner;
- del_op.params.versioning_status = bucket->get_info().versioning_status();
-
- return del_op.delete_obj(dpp, y);
-}
-
-int DaosObject::copy_object(
- User* user, req_info* info, const rgw_zone_id& source_zone,
- rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket,
- rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement,
- ceph::real_time* src_mtime, ceph::real_time* mtime,
- const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr,
- bool high_precision_time, const char* if_match, const char* if_nomatch,
- AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs,
- RGWObjCategory category, uint64_t olh_epoch,
- boost::optional<ceph::real_time> delete_at, std::string* version_id,
- std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*),
- void* progress_data, const DoutPrefixProvider* dpp, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosObject::swift_versioning_restore(bool& restored,
- const DoutPrefixProvider* dpp) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosObject::swift_versioning_copy(const DoutPrefixProvider* dpp,
- optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosObject::lookup(const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: lookup" << dendl;
- if (is_open()) {
- return 0;
- }
-
- if (get_instance() == "null") {
- clear_instance();
- }
-
- int ret = 0;
- DaosBucket* daos_bucket = get_daos_bucket();
- ret = daos_bucket->open(dpp);
- if (ret != 0) {
- return ret;
- }
-
- ret = ds3_obj_open(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b);
-
- if (ret == -ENOENT) {
- ldpp_dout(dpp, 20) << "DEBUG: daos object (" << get_bucket()->get_name()
- << ", " << get_key().get_oid()
- << ") does not exist: ret=" << ret << dendl;
- } else if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to open daos object ("
- << get_bucket()->get_name() << ", " << get_key().get_oid()
- << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-int DaosObject::create(const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: create" << dendl;
- if (is_open()) {
- return 0;
- }
-
- if (get_instance() == "null") {
- clear_instance();
- }
-
- int ret = 0;
- DaosBucket* daos_bucket = get_daos_bucket();
- ret = daos_bucket->open(dpp);
- if (ret != 0) {
- return ret;
- }
-
- ret = ds3_obj_create(get_key().get_oid().c_str(), &ds3o, daos_bucket->ds3b);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to create daos object ("
- << get_bucket()->get_name() << ", " << get_key().get_oid()
- << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-int DaosObject::close(const DoutPrefixProvider* dpp) {
- ldpp_dout(dpp, 20) << "DEBUG: close" << dendl;
- if (!is_open()) {
- return 0;
- }
-
- int ret = ds3_obj_close(ds3o);
- ds3o = nullptr;
- ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_close ret=" << ret << dendl;
- return ret;
-}
-
-int DaosObject::write(const DoutPrefixProvider* dpp, bufferlist&& data,
- uint64_t offset) {
- ldpp_dout(dpp, 20) << "DEBUG: write" << dendl;
- uint64_t size = data.length();
- int ret = ds3_obj_write(data.c_str(), offset, &size, get_daos_bucket()->ds3b,
- ds3o, nullptr);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to write into daos object ("
- << get_bucket()->get_name() << ", " << get_key().get_oid()
- << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-int DaosObject::read(const DoutPrefixProvider* dpp, bufferlist& data,
- uint64_t offset, uint64_t& size) {
- ldpp_dout(dpp, 20) << "DEBUG: read" << dendl;
- int ret = ds3_obj_read(data.append_hole(size).c_str(), offset, &size,
- get_daos_bucket()->ds3b, ds3o, nullptr);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to read from daos object ("
- << get_bucket()->get_name() << ", " << get_key().get_oid()
- << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-// Get the object's dirent and attrs
-int DaosObject::get_dir_entry_attrs(const DoutPrefixProvider* dpp,
- rgw_bucket_dir_entry* ent,
- Attrs* getattrs) {
- ldpp_dout(dpp, 20) << "DEBUG: get_dir_entry_attrs" << dendl;
- int ret = 0;
- vector<uint8_t> value(DS3_MAX_ENCODED_LEN);
- uint32_t size = value.size();
-
- if (get_key().ns == RGW_OBJ_NS_MULTIPART) {
- struct ds3_multipart_upload_info ui = {.encoded = value.data(),
- .encoded_length = size};
- ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(),
- get_key().get_oid().c_str(), store->ds3);
- } else {
- ret = lookup(dpp);
- if (ret != 0) {
- return ret;
- }
-
- auto object_info = std::make_unique<struct ds3_object_info>();
- object_info->encoded = value.data();
- object_info->encoded_length = size;
- ret = ds3_obj_get_info(object_info.get(), get_daos_bucket()->ds3b, ds3o);
- size = object_info->encoded_length;
- }
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to get info of daos object ("
- << get_bucket()->get_name() << ", " << get_key().get_oid()
- << "): ret=" << ret << dendl;
- return ret;
- }
-
- rgw_bucket_dir_entry dummy_ent;
- if (!ent) {
- // if ent is not passed, use a dummy ent
- ent = &dummy_ent;
- }
-
- bufferlist bl;
- bl.append(reinterpret_cast<char*>(value.data()), size);
- auto iter = bl.cbegin();
- ent->decode(iter);
- if (getattrs) {
- decode(*getattrs, iter);
- }
-
- return ret;
-}
-// Set the object's dirent and attrs
-int DaosObject::set_dir_entry_attrs(const DoutPrefixProvider* dpp,
- rgw_bucket_dir_entry* ent,
- Attrs* setattrs) {
- ldpp_dout(dpp, 20) << "DEBUG: set_dir_entry_attrs" << dendl;
- int ret = lookup(dpp);
- if (ret != 0) {
- return ret;
- }
-
- // Set defaults
- if (!ent) {
- // if ent is not passed, return an error
- return -EINVAL;
- }
-
- if (!setattrs) {
- // if setattrs is not passed, use object attrs
- setattrs = &attrs;
- }
-
- bufferlist wbl;
- ent->encode(wbl);
- encode(*setattrs, wbl);
-
- // Write rgw_bucket_dir_entry into object xattr
- auto object_info = std::make_unique<struct ds3_object_info>();
- object_info->encoded = wbl.c_str();
- object_info->encoded_length = wbl.length();
- ret = ds3_obj_set_info(object_info.get(), get_daos_bucket()->ds3b, ds3o);
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to set info of daos object ("
- << get_bucket()->get_name() << ", " << get_key().get_oid()
- << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-int DaosObject::mark_as_latest(const DoutPrefixProvider* dpp,
- ceph::real_time set_mtime) {
- // TODO handle deletion
- // TODO understand race conditions
- ldpp_dout(dpp, 20) << "DEBUG: mark_as_latest" << dendl;
-
- // Get latest version so far
- std::unique_ptr<DaosObject> latest_object = std::make_unique<DaosObject>(
- store, rgw_obj_key(get_name(), DS3_LATEST_INSTANCE), get_bucket());
-
- ldpp_dout(dpp, 20) << __func__ << ": key=" << get_key().get_oid()
- << " latest_object_key= "
- << latest_object->get_key().get_oid() << dendl;
-
- int ret = latest_object->lookup(dpp);
- if (ret == 0) {
- // Get metadata only if file exists
- rgw_bucket_dir_entry latest_ent;
- Attrs latest_attrs;
- ret = latest_object->get_dir_entry_attrs(dpp, &latest_ent, &latest_attrs);
- if (ret != 0) {
- return ret;
- }
-
- // Update flags
- latest_ent.flags = rgw_bucket_dir_entry::FLAG_VER;
- latest_ent.meta.mtime = set_mtime;
- ret = latest_object->set_dir_entry_attrs(dpp, &latest_ent, &latest_attrs);
- if (ret != 0) {
- return ret;
- }
- }
-
- // Get or create the link [latest], make it link to the current latest
- // version.
- ret =
- ds3_obj_mark_latest(get_key().get_oid().c_str(), get_daos_bucket()->ds3b);
- ldpp_dout(dpp, 20) << "DEBUG: ds3_obj_mark_latest ret=" << ret << dendl;
- return ret;
-}
-
-DaosAtomicWriter::DaosAtomicWriter(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, DaosStore* _store,
- const rgw_user& _owner, const rgw_placement_rule* _ptail_placement_rule,
- uint64_t _olh_epoch, const std::string& _unique_tag)
- : StoreWriter(dpp, y),
- store(_store),
- owner(_owner),
- ptail_placement_rule(_ptail_placement_rule),
- olh_epoch(_olh_epoch),
- unique_tag(_unique_tag),
- obj(_store, obj->get_key(), obj->get_bucket()) {}
-
-int DaosAtomicWriter::prepare(optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: prepare" << dendl;
- int ret = obj.create(dpp);
- return ret;
-}
-
-// TODO: Handle concurrent writes, a unique object id is a possible solution, or
-// use DAOS transactions
-// XXX: Do we need to accumulate writes as motr does?
-int DaosAtomicWriter::process(bufferlist&& data, uint64_t offset) {
- ldpp_dout(dpp, 20) << "DEBUG: process" << dendl;
- if (data.length() == 0) {
- return 0;
- }
-
- int ret = 0;
- if (!obj.is_open()) {
- ret = obj.lookup(dpp);
- if (ret != 0) {
- return ret;
- }
- }
-
- // XXX: Combine multiple streams into one as motr does
- uint64_t data_size = data.length();
- ret = obj.write(dpp, std::move(data), offset);
- if (ret == 0) {
- total_data_size += data_size;
- }
- return ret;
-}
-
-int DaosAtomicWriter::complete(
- size_t accounted_size, const std::string& etag, ceph::real_time* mtime,
- ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at, const char* if_match, const char* if_nomatch,
- const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled,
- optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl;
- bufferlist bl;
- rgw_bucket_dir_entry ent;
- int ret;
-
- // Set rgw_bucet_dir_entry. Some of the members of this structure may not
- // apply to daos.
- //
- // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc
- // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and
- // how to set the dir entry. Only set the basic ones for POC, no ACLs and
- // other attrs.
- obj.get_key().get_index_key(&ent.key);
- ent.meta.size = total_data_size;
- ent.meta.accounted_size = accounted_size;
- ent.meta.mtime =
- real_clock::is_zero(set_mtime) ? ceph::real_clock::now() : set_mtime;
- ent.meta.etag = etag;
- ent.meta.owner = owner.to_str();
- ent.meta.owner_display_name =
- obj.get_bucket()->get_owner()->get_display_name();
- bool is_versioned = obj.get_bucket()->versioned();
- if (is_versioned)
- ent.flags =
- rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT;
- ldpp_dout(dpp, 20) << __func__ << ": key=" << obj.get_key().get_oid()
- << " etag: " << etag << dendl;
- if (user_data) ent.meta.user_data = *user_data;
-
- RGWBucketInfo& info = obj.get_bucket()->get_info();
- if (info.obj_lock_enabled() && info.obj_lock.has_rule()) {
- auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
- if (iter == attrs.end()) {
- real_time lock_until_date =
- info.obj_lock.get_lock_until_date(ent.meta.mtime);
- string mode = info.obj_lock.get_mode();
- RGWObjectRetention obj_retention(mode, lock_until_date);
- bufferlist retention_bl;
- obj_retention.encode(retention_bl);
- attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl;
- }
- }
-
- ret = obj.set_dir_entry_attrs(dpp, &ent, &attrs);
-
- if (is_versioned) {
- ret = obj.mark_as_latest(dpp, set_mtime);
- if (ret != 0) {
- return ret;
- }
- }
-
- return ret;
-}
-
-int DaosMultipartUpload::abort(const DoutPrefixProvider* dpp,
- CephContext* cct) {
- // Remove upload from bucket multipart index
- ldpp_dout(dpp, 20) << "DEBUG: abort" << dendl;
- return ds3_upload_remove(bucket->get_name().c_str(), get_upload_id().c_str(),
- store->ds3);
-}
-
-std::unique_ptr<rgw::sal::Object> DaosMultipartUpload::get_meta_obj() {
- return bucket->get_object(
- rgw_obj_key(get_upload_id(), string(), RGW_OBJ_NS_MULTIPART));
-}
-
-int DaosMultipartUpload::init(const DoutPrefixProvider* dpp, optional_yield y,
- ACLOwner& _owner,
- rgw_placement_rule& dest_placement,
- rgw::sal::Attrs& attrs) {
- ldpp_dout(dpp, 20) << "DEBUG: init" << dendl;
- int ret;
- std::string oid = mp_obj.get_key();
-
- // Create an initial entry in the bucket. The entry will be
- // updated when multipart upload is completed, for example,
- // size, etag etc.
- bufferlist bl;
- rgw_bucket_dir_entry ent;
- ent.key.name = oid;
- ent.meta.owner = owner.get_id().to_str();
- ent.meta.category = RGWObjCategory::MultiMeta;
- ent.meta.mtime = ceph::real_clock::now();
-
- multipart_upload_info upload_info;
- upload_info.dest_placement = dest_placement;
-
- ent.encode(bl);
- encode(attrs, bl);
- encode(upload_info, bl);
-
- struct ds3_multipart_upload_info ui;
- std::strcpy(ui.upload_id, MULTIPART_UPLOAD_ID_PREFIX);
- std::strncpy(ui.key, oid.c_str(), sizeof(ui.key));
- ui.encoded = bl.c_str();
- ui.encoded_length = bl.length();
- int prefix_length = strlen(ui.upload_id);
-
- do {
- gen_rand_alphanumeric(store->ctx(), ui.upload_id + prefix_length,
- sizeof(ui.upload_id) - 1 - prefix_length);
- mp_obj.init(oid, ui.upload_id);
- ret = ds3_upload_init(&ui, bucket->get_name().c_str(), store->ds3);
- } while (ret == -EEXIST);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to create multipart upload dir ("
- << bucket->get_name() << "/" << get_upload_id()
- << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-int DaosMultipartUpload::list_parts(const DoutPrefixProvider* dpp,
- CephContext* cct, int num_parts, int marker,
- int* next_marker, bool* truncated,
- bool assume_unsorted) {
- ldpp_dout(dpp, 20) << "DEBUG: list_parts" << dendl;
- // Init needed structures
- vector<struct ds3_multipart_part_info> multipart_part_infos(num_parts);
- uint32_t npart = multipart_part_infos.size();
- vector<vector<uint8_t>> values(npart, vector<uint8_t>(DS3_MAX_ENCODED_LEN));
- for (uint32_t i = 0; i < npart; i++) {
- multipart_part_infos[i].encoded = values[i].data();
- multipart_part_infos[i].encoded_length = values[i].size();
- }
-
- uint32_t daos_marker = marker;
- int ret = ds3_upload_list_parts(
- bucket->get_name().c_str(), get_upload_id().c_str(), &npart,
- multipart_part_infos.data(), &daos_marker, truncated, store->ds3);
-
- if (ret != 0) {
- if (ret == -ENOENT) {
- ret = -ERR_NO_SUCH_UPLOAD;
- }
- return ret;
- }
-
- multipart_part_infos.resize(npart);
- values.resize(npart);
- parts.clear();
-
- for (auto const& pi : multipart_part_infos) {
- bufferlist bl;
- bl.append(reinterpret_cast<char*>(pi.encoded), pi.encoded_length);
-
- std::unique_ptr<DaosMultipartPart> part =
- std::make_unique<DaosMultipartPart>();
- auto iter = bl.cbegin();
- decode(part->info, iter);
- parts[pi.part_num] = std::move(part);
- }
-
- if (next_marker) {
- *next_marker = daos_marker;
- }
- return ret;
-}
-
-// Heavily copied from rgw_sal_rados.cc
-int DaosMultipartUpload::complete(
- const DoutPrefixProvider* dpp, optional_yield y, CephContext* cct,
- map<int, string>& part_etags, list<rgw_obj_index_key>& remove_objs,
- uint64_t& accounted_size, bool& compressed, RGWCompressionInfo& cs_info,
- off_t& off, std::string& tag, ACLOwner& owner, uint64_t olh_epoch,
- rgw::sal::Object* target_obj) {
- ldpp_dout(dpp, 20) << "DEBUG: complete" << dendl;
- char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE];
- char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
- std::string etag;
- bufferlist etag_bl;
- MD5 hash;
- // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes
- hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
- bool truncated;
- int ret;
-
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): enter" << dendl;
- int total_parts = 0;
- int handled_parts = 0;
- int max_parts = 1000;
- int marker = 0;
- uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size;
- auto etags_iter = part_etags.begin();
- rgw::sal::Attrs attrs = target_obj->get_attrs();
-
- do {
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): list_parts()"
- << dendl;
- ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated);
- if (ret == -ENOENT) {
- ret = -ERR_NO_SUCH_UPLOAD;
- }
- if (ret != 0) return ret;
-
- total_parts += parts.size();
- if (!truncated && total_parts != (int)part_etags.size()) {
- ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts
- << " expected: " << part_etags.size() << dendl;
- ret = -ERR_INVALID_PART;
- return ret;
- }
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): parts.size()="
- << parts.size() << dendl;
-
- for (auto obj_iter = parts.begin();
- etags_iter != part_etags.end() && obj_iter != parts.end();
- ++etags_iter, ++obj_iter, ++handled_parts) {
- DaosMultipartPart* part =
- dynamic_cast<rgw::sal::DaosMultipartPart*>(obj_iter->second.get());
- uint64_t part_size = part->get_size();
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part_size="
- << part_size << dendl;
- if (handled_parts < (int)part_etags.size() - 1 &&
- part_size < min_part_size) {
- ret = -ERR_TOO_SMALL;
- return ret;
- }
-
- char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
- if (etags_iter->first != (int)obj_iter->first) {
- ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: "
- << etags_iter->first
- << " next uploaded: " << obj_iter->first << dendl;
- ret = -ERR_INVALID_PART;
- return ret;
- }
- string part_etag = rgw_string_unquote(etags_iter->second);
- if (part_etag.compare(part->get_etag()) != 0) {
- ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: "
- << etags_iter->first
- << " etag: " << etags_iter->second << dendl;
- ret = -ERR_INVALID_PART;
- return ret;
- }
-
- hex_to_buf(part->get_etag().c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
- hash.Update((const unsigned char*)petag, sizeof(petag));
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): calc etag "
- << dendl;
-
- RGWUploadPartInfo& obj_part = part->info;
- string oid = mp_obj.get_part(obj_part.num);
- rgw_obj src_obj;
- src_obj.init_ns(bucket->get_key(), oid, RGW_OBJ_NS_MULTIPART);
-
- bool part_compressed = (obj_part.cs_info.compression_type != "none");
- if ((handled_parts > 0) &&
- ((part_compressed != compressed) ||
- (cs_info.compression_type != obj_part.cs_info.compression_type))) {
- ldpp_dout(dpp, 0)
- << "ERROR: compression type was changed during multipart upload ("
- << cs_info.compression_type << ">>"
- << obj_part.cs_info.compression_type << ")" << dendl;
- ret = -ERR_INVALID_PART;
- return ret;
- }
-
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part compression"
- << dendl;
- if (part_compressed) {
- int64_t new_ofs; // offset in compression data for new part
- if (cs_info.blocks.size() > 0)
- new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len;
- else
- new_ofs = 0;
- for (const auto& block : obj_part.cs_info.blocks) {
- compression_block cb;
- cb.old_ofs = block.old_ofs + cs_info.orig_size;
- cb.new_ofs = new_ofs;
- cb.len = block.len;
- cs_info.blocks.push_back(cb);
- new_ofs = cb.new_ofs + cb.len;
- }
- if (!compressed)
- cs_info.compression_type = obj_part.cs_info.compression_type;
- cs_info.orig_size += obj_part.cs_info.orig_size;
- compressed = true;
- }
-
- // We may not need to do the following as remove_objs are those
- // don't show when listing a bucket. As we store in-progress uploaded
- // object's metadata in a separate index, they are not shown when
- // listing a bucket.
- rgw_obj_index_key remove_key;
- src_obj.key.get_index_key(&remove_key);
-
- remove_objs.push_back(remove_key);
-
- off += obj_part.size;
- accounted_size += obj_part.accounted_size;
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): off=" << off
- << ", accounted_size = " << accounted_size << dendl;
- }
- } while (truncated);
- hash.Final((unsigned char*)final_etag);
-
- buf_to_hex((unsigned char*)final_etag, sizeof(final_etag), final_etag_str);
- snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2],
- sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, "-%lld",
- (long long)part_etags.size());
- etag = final_etag_str;
- ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl;
-
- etag_bl.append(etag);
-
- attrs[RGW_ATTR_ETAG] = etag_bl;
-
- if (compressed) {
- // write compression attribute to full object
- bufferlist tmp;
- encode(cs_info, tmp);
- attrs[RGW_ATTR_COMPRESSION] = tmp;
- }
-
- // Different from rgw_sal_rados.cc starts here
- // Read the object's multipart info
- bufferlist bl;
- uint64_t size = DS3_MAX_ENCODED_LEN;
- struct ds3_multipart_upload_info ui = {
- .encoded = bl.append_hole(size).c_str(), .encoded_length = size};
- ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(),
- get_upload_id().c_str(), store->ds3);
- ldpp_dout(dpp, 20) << "DEBUG: ds3_upload_get_info entry="
- << bucket->get_name() << "/" << get_upload_id() << dendl;
- if (ret != 0) {
- if (ret == -ENOENT) {
- ret = -ERR_NO_SUCH_UPLOAD;
- }
- return ret;
- }
-
- rgw_bucket_dir_entry ent;
- auto iter = bl.cbegin();
- ent.decode(iter);
-
- // Update entry data and name
- target_obj->get_key().get_index_key(&ent.key);
- ent.meta.size = off;
- ent.meta.accounted_size = accounted_size;
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): obj size="
- << ent.meta.size
- << " obj accounted size=" << ent.meta.accounted_size
- << dendl;
- ent.meta.category = RGWObjCategory::Main;
- ent.meta.mtime = ceph::real_clock::now();
- bool is_versioned = target_obj->get_bucket()->versioned();
- if (is_versioned)
- ent.flags =
- rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT;
- ent.meta.etag = etag;
-
- // Open object
- DaosObject* obj = static_cast<DaosObject*>(target_obj);
- ret = obj->create(dpp);
- if (ret != 0) {
- return ret;
- }
-
- // Copy data from parts to object
- uint64_t write_off = 0;
- for (auto const& [part_num, part] : get_parts()) {
- ds3_part_t* ds3p;
- ret = ds3_part_open(get_bucket_name().c_str(), get_upload_id().c_str(),
- part_num, false, &ds3p, store->ds3);
- if (ret != 0) {
- return ret;
- }
-
- // Reserve buffers and read
- uint64_t size = part->get_size();
- bufferlist bl;
- ret = ds3_part_read(bl.append_hole(size).c_str(), 0, &size, ds3p,
- store->ds3, nullptr);
- if (ret != 0) {
- ds3_part_close(ds3p);
- return ret;
- }
-
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::complete(): part " << part_num
- << " size is " << size << dendl;
-
- // write to obj
- obj->write(dpp, std::move(bl), write_off);
- ds3_part_close(ds3p);
- write_off += part->get_size();
- }
-
- // Set attributes
- ret = obj->set_dir_entry_attrs(dpp, &ent, &attrs);
-
- if (is_versioned) {
- ret = obj->mark_as_latest(dpp, ent.meta.mtime);
- if (ret != 0) {
- return ret;
- }
- }
-
- // Remove upload from bucket multipart index
- ret = ds3_upload_remove(get_bucket_name().c_str(), get_upload_id().c_str(),
- store->ds3);
- return ret;
-}
-
-int DaosMultipartUpload::get_info(const DoutPrefixProvider* dpp,
- optional_yield y, rgw_placement_rule** rule,
- rgw::sal::Attrs* attrs) {
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_info(): enter" << dendl;
- if (!rule && !attrs) {
- return 0;
- }
-
- if (rule) {
- if (!placement.empty()) {
- *rule = &placement;
- if (!attrs) {
- // Don't need attrs, done
- return 0;
- }
- } else {
- *rule = nullptr;
- }
- }
-
- // Read the multipart upload dirent from index
- bufferlist bl;
- uint64_t size = DS3_MAX_ENCODED_LEN;
- struct ds3_multipart_upload_info ui = {
- .encoded = bl.append_hole(size).c_str(), .encoded_length = size};
- int ret = ds3_upload_get_info(&ui, bucket->get_name().c_str(),
- get_upload_id().c_str(), store->ds3);
-
- if (ret != 0) {
- if (ret == -ENOENT) {
- ret = -ERR_NO_SUCH_UPLOAD;
- }
- return ret;
- }
-
- multipart_upload_info upload_info;
- rgw_bucket_dir_entry ent;
- Attrs decoded_attrs;
- auto iter = bl.cbegin();
- ent.decode(iter);
- decode(decoded_attrs, iter);
- ldpp_dout(dpp, 20) << "DEBUG: decoded_attrs=" << attrs << dendl;
-
- if (attrs) {
- *attrs = decoded_attrs;
- if (!rule || *rule != nullptr) {
- // placement was cached; don't actually read
- return 0;
- }
- }
-
- // Now decode the placement rule
- decode(upload_info, iter);
- placement = upload_info.dest_placement;
- *rule = &placement;
-
- return 0;
-}
-
-std::unique_ptr<Writer> DaosMultipartUpload::get_writer(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule, uint64_t part_num,
- const std::string& part_num_str) {
- ldpp_dout(dpp, 20) << "DaosMultipartUpload::get_writer(): enter part="
- << part_num << " head_obj=" << _head_obj << dendl;
- return std::make_unique<DaosMultipartWriter>(
- dpp, y, this, obj, store, owner, ptail_placement_rule,
- part_num, part_num_str);
-}
-
-DaosMultipartWriter::~DaosMultipartWriter() {
- if (is_open()) ds3_part_close(ds3p);
-}
-
-int DaosMultipartWriter::prepare(optional_yield y) {
- ldpp_dout(dpp, 20) << "DaosMultipartWriter::prepare(): enter part="
- << part_num_str << dendl;
- int ret = ds3_part_open(get_bucket_name().c_str(), upload_id.c_str(),
- part_num, true, &ds3p, store->ds3);
- if (ret == -ENOENT) {
- ret = -ERR_NO_SUCH_UPLOAD;
- }
- return ret;
-}
-
-const std::string& DaosMultipartWriter::get_bucket_name() {
- return static_cast<DaosMultipartUpload*>(upload)->get_bucket_name();
-}
-
-int DaosMultipartWriter::process(bufferlist&& data, uint64_t offset) {
- ldpp_dout(dpp, 20) << "DaosMultipartWriter::process(): enter part="
- << part_num_str << " offset=" << offset << dendl;
- if (data.length() == 0) {
- return 0;
- }
-
- uint64_t size = data.length();
- int ret =
- ds3_part_write(data.c_str(), offset, &size, ds3p, store->ds3, nullptr);
- if (ret == 0) {
- // XXX: Combine multiple streams into one as motr does
- actual_part_size += size;
- } else {
- ldpp_dout(dpp, 0) << "ERROR: failed to write into part ("
- << get_bucket_name() << ", " << upload_id << ", "
- << part_num << "): ret=" << ret << dendl;
- }
- return ret;
-}
-
-int DaosMultipartWriter::complete(
- size_t accounted_size, const std::string& etag, ceph::real_time* mtime,
- ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at, const char* if_match, const char* if_nomatch,
- const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled,
- optional_yield y) {
- ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): enter part="
- << part_num_str << dendl;
-
- // Add an entry into part index
- bufferlist bl;
- RGWUploadPartInfo info;
- info.num = part_num;
- info.etag = etag;
- info.size = actual_part_size;
- info.accounted_size = accounted_size;
- info.modified = real_clock::now();
-
- bool compressed;
- int ret = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info);
- ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): compression ret="
- << ret << dendl;
- if (ret != 0) {
- ldpp_dout(dpp, 1) << "cannot get compression info" << dendl;
- return ret;
- }
- encode(info, bl);
- encode(attrs, bl);
- ldpp_dout(dpp, 20) << "DaosMultipartWriter::complete(): entry size"
- << bl.length() << dendl;
-
- struct ds3_multipart_part_info part_info = {.part_num = part_num,
- .encoded = bl.c_str(),
- .encoded_length = bl.length()};
-
- ret = ds3_part_set_info(&part_info, ds3p, store->ds3, nullptr);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to set part info (" << get_bucket_name()
- << ", " << upload_id << ", " << part_num
- << "): ret=" << ret << dendl;
- if (ret == ENOENT) {
- ret = -ERR_NO_SUCH_UPLOAD;
- }
- }
-
- return ret;
-}
-
-std::unique_ptr<RGWRole> DaosStore::get_role(
- std::string name, std::string tenant, std::string path,
- std::string trust_policy, std::string max_session_duration_str,
- std::multimap<std::string, std::string> tags) {
- RGWRole* p = nullptr;
- return std::unique_ptr<RGWRole>(p);
-}
-
-std::unique_ptr<RGWRole> DaosStore::get_role(const RGWRoleInfo& info) {
- RGWRole* p = nullptr;
- return std::unique_ptr<RGWRole>(p);
-}
-
-std::unique_ptr<RGWRole> DaosStore::get_role(std::string id) {
- RGWRole* p = nullptr;
- return std::unique_ptr<RGWRole>(p);
-}
-
-int DaosStore::get_roles(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& path_prefix,
- const std::string& tenant,
- vector<std::unique_ptr<RGWRole>>& roles) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-std::unique_ptr<RGWOIDCProvider> DaosStore::get_oidc_provider() {
- RGWOIDCProvider* p = nullptr;
- return std::unique_ptr<RGWOIDCProvider>(p);
-}
-
-int DaosStore::get_oidc_providers(
- const DoutPrefixProvider* dpp, const std::string& tenant,
- vector<std::unique_ptr<RGWOIDCProvider>>& providers) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-std::unique_ptr<MultipartUpload> DaosBucket::get_multipart_upload(
- const std::string& oid, std::optional<std::string> upload_id,
- ACLOwner owner, ceph::real_time mtime) {
- return std::make_unique<DaosMultipartUpload>(store, this, oid, upload_id,
- owner, mtime);
-}
-
-std::unique_ptr<Writer> DaosStore::get_append_writer(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule,
- const std::string& unique_tag, uint64_t position,
- uint64_t* cur_accounted_size) {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return nullptr;
-}
-
-std::unique_ptr<Writer> DaosStore::get_atomic_writer(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch,
- const std::string& unique_tag) {
- ldpp_dout(dpp, 20) << "get_atomic_writer" << dendl;
- return std::make_unique<DaosAtomicWriter>(dpp, y, obj, this,
- owner, ptail_placement_rule,
- olh_epoch, unique_tag);
-}
-
-const std::string& DaosStore::get_compression_type(
- const rgw_placement_rule& rule) {
- return zone.zone_params->get_compression_type(rule);
-}
-
-bool DaosStore::valid_placement(const rgw_placement_rule& rule) {
- return zone.zone_params->valid_placement(rule);
-}
-
-std::unique_ptr<User> DaosStore::get_user(const rgw_user& u) {
- ldout(cctx, 20) << "DEBUG: bucket's user: " << u.to_str() << dendl;
- return std::make_unique<DaosUser>(this, u);
-}
-
-int DaosStore::get_user_by_access_key(const DoutPrefixProvider* dpp,
- const std::string& key, optional_yield y,
- std::unique_ptr<User>* user) {
- // Initialize ds3_user_info
- bufferlist bl;
- uint64_t size = DS3_MAX_ENCODED_LEN;
- struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(),
- .encoded_length = size};
-
- int ret = ds3_user_get_by_key(key.c_str(), &user_info, ds3, nullptr);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_key failed, key=" << key
- << " ret=" << ret << dendl;
- return ret;
- }
-
- // Decode
- DaosUserInfo duinfo;
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- duinfo.decode(iter);
-
- User* u = new DaosUser(this, duinfo.info);
- if (!u) {
- return -ENOMEM;
- }
-
- user->reset(u);
- return 0;
-}
-
-int DaosStore::get_user_by_email(const DoutPrefixProvider* dpp,
- const std::string& email, optional_yield y,
- std::unique_ptr<User>* user) {
- // Initialize ds3_user_info
- bufferlist bl;
- uint64_t size = DS3_MAX_ENCODED_LEN;
- struct ds3_user_info user_info = {.encoded = bl.append_hole(size).c_str(),
- .encoded_length = size};
-
- int ret = ds3_user_get_by_email(email.c_str(), &user_info, ds3, nullptr);
-
- if (ret != 0) {
- ldpp_dout(dpp, 0) << "Error: ds3_user_get_by_email failed, email=" << email
- << " ret=" << ret << dendl;
- return ret;
- }
-
- // Decode
- DaosUserInfo duinfo;
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- duinfo.decode(iter);
-
- User* u = new DaosUser(this, duinfo.info);
- if (!u) {
- return -ENOMEM;
- }
-
- user->reset(u);
- return 0;
-}
-
-int DaosStore::get_user_by_swift(const DoutPrefixProvider* dpp,
- const std::string& user_str, optional_yield y,
- std::unique_ptr<User>* user) {
- /* Swift keys and subusers are not supported for now */
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-std::unique_ptr<Object> DaosStore::get_object(const rgw_obj_key& k) {
- return std::make_unique<DaosObject>(this, k);
-}
-
-inline std::ostream& operator<<(std::ostream& out, const rgw_user* u) {
- std::string s;
- if (u != nullptr)
- u->to_str(s);
- else
- s = "(nullptr)";
- return out << s;
-}
-
-int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u,
- const rgw_bucket& b, std::unique_ptr<Bucket>* bucket,
- optional_yield y) {
- ldpp_dout(dpp, 20) << "DEBUG: get_bucket1: User: " << u << dendl;
- int ret;
- Bucket* bp;
-
- bp = new DaosBucket(this, b, u);
- ret = bp->load_bucket(dpp, y);
- if (ret != 0) {
- delete bp;
- return ret;
- }
-
- bucket->reset(bp);
- return 0;
-}
-
-int DaosStore::get_bucket(User* u, const RGWBucketInfo& i,
- std::unique_ptr<Bucket>* bucket) {
- DaosBucket* bp;
-
- bp = new DaosBucket(this, i, u);
- /* Don't need to fetch the bucket info, use the provided one */
-
- bucket->reset(bp);
- return 0;
-}
-
-int DaosStore::get_bucket(const DoutPrefixProvider* dpp, User* u,
- const std::string& tenant, const std::string& name,
- std::unique_ptr<Bucket>* bucket, optional_yield y) {
- ldpp_dout(dpp, 20) << "get_bucket" << dendl;
- rgw_bucket b;
-
- b.tenant = tenant;
- b.name = name;
-
- return get_bucket(dpp, u, b, bucket, y);
-}
-
-bool DaosStore::is_meta_master() { return true; }
-
-int DaosStore::forward_request_to_master(const DoutPrefixProvider* dpp,
- User* user, obj_version* objv,
- bufferlist& in_data, JSONParser* jp,
- req_info& info, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosStore::forward_iam_request_to_master(const DoutPrefixProvider* dpp,
- const RGWAccessKey& key,
- obj_version* objv,
- bufferlist& in_data,
- RGWXMLDecoder::XMLParser* parser,
- req_info& info, optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-std::string DaosStore::zone_unique_id(uint64_t unique_num) { return ""; }
-
-std::string DaosStore::zone_unique_trans_id(const uint64_t unique_num) {
- return "";
-}
-
-int DaosStore::cluster_stat(RGWClusterStat& stats) {
- return DAOS_NOT_IMPLEMENTED_LOG(nullptr);
-}
-
-std::unique_ptr<Lifecycle> DaosStore::get_lifecycle(void) {
- DAOS_NOT_IMPLEMENTED_LOG(nullptr);
- return 0;
-}
-
-std::unique_ptr<Notification> DaosStore::get_notification(
- rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s,
- rgw::notify::EventType event_type, const std::string* object_name) {
- return std::make_unique<DaosNotification>(obj, src_obj, event_type);
-}
-
-std::unique_ptr<Notification> DaosStore::get_notification(
- const DoutPrefixProvider* dpp, Object* obj, Object* src_obj,
- rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket,
- std::string& _user_id, std::string& _user_tenant, std::string& _req_id,
- optional_yield y) {
- ldpp_dout(dpp, 20) << "get_notification" << dendl;
- return std::make_unique<DaosNotification>(obj, src_obj, event_type);
-}
-
-int DaosStore::log_usage(const DoutPrefixProvider* dpp,
- map<rgw_user_bucket, RGWUsageBatch>& usage_info) {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return 0;
-}
-
-int DaosStore::log_op(const DoutPrefixProvider* dpp, string& oid,
- bufferlist& bl) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosStore::register_to_service_map(const DoutPrefixProvider* dpp,
- const string& daemon_type,
- const map<string, string>& meta) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-void DaosStore::get_quota(RGWQuota& quota) {
- // XXX: Not handled for the first pass
- return;
-}
-
-void DaosStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit,
- RGWRateLimitInfo& user_ratelimit,
- RGWRateLimitInfo& anon_ratelimit) {
- return;
-}
-
-int DaosStore::set_buckets_enabled(const DoutPrefixProvider* dpp,
- std::vector<rgw_bucket>& buckets,
- bool enabled) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosStore::get_sync_policy_handler(const DoutPrefixProvider* dpp,
- std::optional<rgw_zone_id> zone,
- std::optional<rgw_bucket> bucket,
- RGWBucketSyncPolicyHandlerRef* phandler,
- optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-RGWDataSyncStatusManager* DaosStore::get_data_sync_manager(
- const rgw_zone_id& source_zone) {
- DAOS_NOT_IMPLEMENTED_LOG(nullptr);
- return 0;
-}
-
-int DaosStore::read_all_usage(
- const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
- map<rgw_user_bucket, rgw_usage_log_entry>& usage) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosStore::trim_all_usage(const DoutPrefixProvider* dpp,
- uint64_t start_epoch, uint64_t end_epoch) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosStore::get_config_key_val(string name, bufferlist* bl) {
- return DAOS_NOT_IMPLEMENTED_LOG(nullptr);
-}
-
-int DaosStore::meta_list_keys_init(const DoutPrefixProvider* dpp,
- const string& section, const string& marker,
- void** phandle) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-int DaosStore::meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle,
- int max, list<string>& keys,
- bool* truncated) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-void DaosStore::meta_list_keys_complete(void* handle) { return; }
-
-std::string DaosStore::meta_get_marker(void* handle) { return ""; }
-
-int DaosStore::meta_remove(const DoutPrefixProvider* dpp, string& metadata_key,
- optional_yield y) {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
-}
-
-std::string DaosStore::get_cluster_id(const DoutPrefixProvider* dpp,
- optional_yield y) {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return "";
-}
-
-} // namespace rgw::sal
-
-extern "C" {
-
-void* newDaosStore(CephContext* cct) {
- return new rgw::sal::DaosStore(cct);
-}
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=2 sw=2 expandtab ft=cpp
-
-/*
- * Ceph - scalable distributed file system
- *
- * SAL implementation for the CORTX Daos backend
- *
- * Copyright (C) 2022 Seagate Technology LLC and/or its Affiliates
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#pragma once
-
-#include <daos.h>
-#include <daos_s3.h>
-#include <uuid/uuid.h>
-
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "rgw_multi.h"
-#include "rgw_notify.h"
-#include "rgw_oidc_provider.h"
-#include "rgw_putobj_processor.h"
-#include "rgw_rados.h"
-#include "rgw_role.h"
-#include "rgw_sal_store.h"
-
-inline bool IsDebuggerAttached() {
-#ifdef DEBUG
- char buf[4096];
-
- const int status_fd = ::open("/proc/self/status", O_RDONLY);
- if (status_fd == -1) return false;
-
- const ssize_t num_read = ::read(status_fd, buf, sizeof(buf) - 1);
- ::close(status_fd);
-
- if (num_read <= 0) return false;
-
- buf[num_read] = '\0';
- constexpr char tracerPidString[] = "TracerPid:";
- const auto tracer_pid_ptr = ::strstr(buf, tracerPidString);
- if (!tracer_pid_ptr) return false;
-
- for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1;
- characterPtr <= buf + num_read; ++characterPtr) {
- if (::isspace(*characterPtr))
- continue;
- else
- return ::isdigit(*characterPtr) != 0 && *characterPtr != '0';
- }
-#endif // DEBUG
- return false;
-}
-
-inline void DebugBreak() {
-#ifdef DEBUG
- // only break into the debugger if the debugger is attached
- if (IsDebuggerAttached())
- raise(SIGINT); // breaks into GDB and stops, can be continued
-#endif // DEBUG
-}
-
-inline int NotImplementedLog(const DoutPrefixProvider* ldpp,
- const char* filename, int linenumber,
- const char* functionname) {
- if (ldpp)
- ldpp_dout(ldpp, 20) << filename << "(" << linenumber << ") " << functionname
- << ": Not implemented" << dendl;
- return 0;
-}
-
-inline int NotImplementedGdbBreak(const DoutPrefixProvider* ldpp,
- const char* filename, int linenumber,
- const char* functionname) {
- NotImplementedLog(ldpp, filename, linenumber, functionname);
- DebugBreak();
- return 0;
-}
-
-#define DAOS_NOT_IMPLEMENTED_GDB_BREAK(ldpp) \
- NotImplementedGdbBreak(ldpp, __FILE__, __LINE__, __FUNCTION__)
-#define DAOS_NOT_IMPLEMENTED_LOG(ldpp) \
- NotImplementedLog(ldpp, __FILE__, __LINE__, __FUNCTION__)
-
-namespace rgw::sal {
-
-class DaosStore;
-class DaosObject;
-
-#ifdef DEBUG
-// Prepends each log entry with the "filename(source_line) function_name". Makes
-// it simple to
-// associate log entries with the source that generated the log entry
-#undef ldpp_dout
-#define ldpp_dout(dpp, v) \
- if (decltype(auto) pdpp = (dpp); \
- pdpp) /* workaround -Wnonnull-compare for 'this' */ \
- dout_impl(pdpp->get_cct(), ceph::dout::need_dynamic(pdpp->get_subsys()), v) \
- pdpp->gen_prefix(*_dout) \
- << __FILE__ << "(" << __LINE__ << ") " << __FUNCTION__ << " - "
-#endif // DEBUG
-
-struct DaosUserInfo {
- RGWUserInfo info;
- obj_version user_version;
- rgw::sal::Attrs attrs;
-
- void encode(bufferlist& bl) const {
- ENCODE_START(3, 3, bl);
- encode(info, bl);
- encode(user_version, bl);
- encode(attrs, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl) {
- DECODE_START(3, bl);
- decode(info, bl);
- decode(user_version, bl);
- decode(attrs, bl);
- DECODE_FINISH(bl);
- }
-};
-WRITE_CLASS_ENCODER(DaosUserInfo);
-
-class DaosNotification : public StoreNotification {
- public:
- DaosNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type)
- : StoreNotification(_obj, _src_obj, _type) {}
- ~DaosNotification() = default;
-
- virtual int publish_reserve(const DoutPrefixProvider* dpp,
- RGWObjTags* obj_tags = nullptr) override {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
- }
- virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size,
- const ceph::real_time& mtime,
- const std::string& etag,
- const std::string& version) override {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
- }
-};
-
-class DaosUser : public StoreUser {
- private:
- DaosStore* store;
- std::vector<const char*> access_ids;
-
- public:
- DaosUser(DaosStore* _st, const rgw_user& _u) : StoreUser(_u), store(_st) {}
- DaosUser(DaosStore* _st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) {}
- DaosUser(DaosStore* _st) : store(_st) {}
- DaosUser(DaosUser& _o) = default;
- DaosUser() {}
-
- virtual std::unique_ptr<User> clone() override {
- return std::make_unique<DaosUser>(*this);
- }
- int list_buckets(const DoutPrefixProvider* dpp, const std::string& marker,
- const std::string& end_marker, uint64_t max, bool need_stats,
- BucketList& buckets, optional_yield y) override;
- virtual int create_bucket(
- const DoutPrefixProvider* dpp, const rgw_bucket& b,
- const std::string& zonegroup_id, rgw_placement_rule& placement_rule,
- std::string& swift_ver_location, const RGWQuotaInfo* pquota_info,
- const RGWAccessControlPolicy& policy, Attrs& attrs, RGWBucketInfo& info,
- obj_version& ep_objv, bool exclusive, bool obj_lock_enabled,
- bool* existed, req_info& req_info, std::unique_ptr<Bucket>* bucket,
- optional_yield y) override;
- virtual int read_attrs(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp,
- Attrs& new_attrs,
- optional_yield y) override;
- virtual int read_stats(const DoutPrefixProvider* dpp, optional_yield y,
- RGWStorageStats* stats,
- ceph::real_time* last_stats_sync = nullptr,
- ceph::real_time* last_stats_update = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider* dpp,
- RGWGetUserStats_CB* cb) override;
- virtual int complete_flush_stats(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int read_usage(
- const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
- std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
- virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
- uint64_t end_epoch) override;
-
- virtual int load_user(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y,
- bool exclusive,
- RGWUserInfo* old_info = nullptr) override;
- virtual int remove_user(const DoutPrefixProvider* dpp,
- optional_yield y) override;
-
- /** Read user info without loading it */
- int read_user(const DoutPrefixProvider* dpp, std::string name,
- DaosUserInfo* duinfo);
-
- std::unique_ptr<struct ds3_user_info> get_encoded_info(bufferlist& bl,
- obj_version& obj_ver);
-
- friend class DaosBucket;
-};
-
-// RGWBucketInfo and other information that are shown when listing a bucket is
-// represented in struct DaosBucketInfo. The structure is encoded and stored
-// as the value of the global bucket instance index.
-// TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.)
-// into a different index.
-struct DaosBucketInfo {
- RGWBucketInfo info;
-
- obj_version bucket_version;
- ceph::real_time mtime;
-
- rgw::sal::Attrs bucket_attrs;
-
- void encode(bufferlist& bl) const {
- ENCODE_START(4, 4, bl);
- encode(info, bl);
- encode(bucket_version, bl);
- encode(mtime, bl);
- encode(bucket_attrs, bl); // rgw_cache.h example for a map
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl) {
- DECODE_START(4, bl);
- decode(info, bl);
- decode(bucket_version, bl);
- decode(mtime, bl);
- decode(bucket_attrs, bl);
- DECODE_FINISH(bl);
- }
-};
-WRITE_CLASS_ENCODER(DaosBucketInfo);
-
-class DaosBucket : public StoreBucket {
- private:
- DaosStore* store;
- RGWAccessControlPolicy acls;
-
- public:
- /** Container ds3b handle */
- ds3_bucket_t* ds3b = nullptr;
-
- DaosBucket(DaosStore* _st) : store(_st), acls() {}
-
- DaosBucket(const DaosBucket& _daos_bucket)
- : store(_daos_bucket.store), acls(), ds3b(nullptr) {
- // TODO: deep copy all objects
- }
-
- DaosBucket(DaosStore* _st, User* _u) : StoreBucket(_u), store(_st), acls() {}
-
- DaosBucket(DaosStore* _st, const rgw_bucket& _b)
- : StoreBucket(_b), store(_st), acls() {}
-
- DaosBucket(DaosStore* _st, const RGWBucketEnt& _e)
- : StoreBucket(_e), store(_st), acls() {}
-
- DaosBucket(DaosStore* _st, const RGWBucketInfo& _i)
- : StoreBucket(_i), store(_st), acls() {}
-
- DaosBucket(DaosStore* _st, const rgw_bucket& _b, User* _u)
- : StoreBucket(_b, _u), store(_st), acls() {}
-
- DaosBucket(DaosStore* _st, const RGWBucketEnt& _e, User* _u)
- : StoreBucket(_e, _u), store(_st), acls() {}
-
- DaosBucket(DaosStore* _st, const RGWBucketInfo& _i, User* _u)
- : StoreBucket(_i, _u), store(_st), acls() {}
-
- ~DaosBucket();
-
- virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
- virtual int list(const DoutPrefixProvider* dpp, ListParams&, int,
- ListResults&, optional_yield y) override;
- virtual int remove_bucket(const DoutPrefixProvider* dpp, bool delete_children,
- bool forward_to_master, req_info* req_info,
- optional_yield y) override;
- virtual int remove_bucket_bypass_gc(int concurrent_max,
- bool keep_index_consistent,
- optional_yield y,
- const DoutPrefixProvider* dpp) override;
- virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
- virtual int set_acl(const DoutPrefixProvider* dpp,
- RGWAccessControlPolicy& acl, optional_yield y) override;
- virtual int load_bucket(const DoutPrefixProvider* dpp, optional_yield y,
- bool get_stats = false) override;
- virtual int read_stats(const DoutPrefixProvider* dpp,
- const bucket_index_layout_generation& idx_layout,
- int shard_id, std::string* bucket_ver,
- std::string* master_ver,
- std::map<RGWObjCategory, RGWStorageStats>& stats,
- std::string* max_marker = nullptr,
- bool* syncstopped = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider* dpp,
- const bucket_index_layout_generation& idx_layout,
- int shard_id,
- RGWGetBucketStats_CB* ctx) override;
- virtual int sync_user_stats(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int update_container_stats(const DoutPrefixProvider* dpp) override;
- virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override;
- virtual int chown(const DoutPrefixProvider* dpp, User& new_user,
- optional_yield y) override;
- virtual int put_info(const DoutPrefixProvider* dpp, bool exclusive,
- ceph::real_time mtime) override;
- virtual bool is_owner(User* user) override;
- virtual int check_empty(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int check_quota(const DoutPrefixProvider* dpp, RGWQuota& quota,
- uint64_t obj_size, optional_yield y,
- bool check_size_only = false) override;
- virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& attrs,
- optional_yield y) override;
- virtual int try_refresh_info(const DoutPrefixProvider* dpp,
- ceph::real_time* pmtime) override;
- virtual int read_usage(
- const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
- std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
- virtual int trim_usage(const DoutPrefixProvider* dpp, uint64_t start_epoch,
- uint64_t end_epoch) override;
- virtual int remove_objs_from_index(
- const DoutPrefixProvider* dpp,
- std::list<rgw_obj_index_key>& objs_to_unlink) override;
- virtual int check_index(
- const DoutPrefixProvider* dpp,
- std::map<RGWObjCategory, RGWStorageStats>& existing_stats,
- std::map<RGWObjCategory, RGWStorageStats>& calculated_stats) override;
- virtual int rebuild_index(const DoutPrefixProvider* dpp) override;
- virtual int set_tag_timeout(const DoutPrefixProvider* dpp,
- uint64_t timeout) override;
- virtual int purge_instance(const DoutPrefixProvider* dpp) override;
- virtual std::unique_ptr<Bucket> clone() override {
- return std::make_unique<DaosBucket>(*this);
- }
- virtual std::unique_ptr<MultipartUpload> get_multipart_upload(
- const std::string& oid,
- std::optional<std::string> upload_id = std::nullopt, ACLOwner owner = {},
- ceph::real_time mtime = real_clock::now()) override;
- virtual int list_multiparts(
- const DoutPrefixProvider* dpp, const std::string& prefix,
- std::string& marker, const std::string& delim, const int& max_uploads,
- std::vector<std::unique_ptr<MultipartUpload>>& uploads,
- std::map<std::string, bool>* common_prefixes,
- bool* is_truncated) override;
- virtual int abort_multiparts(const DoutPrefixProvider* dpp,
- CephContext* cct) override;
-
- int open(const DoutPrefixProvider* dpp);
- int close(const DoutPrefixProvider* dpp);
- bool is_open() { return ds3b != nullptr; }
- std::unique_ptr<struct ds3_bucket_info> get_encoded_info(
- bufferlist& bl, ceph::real_time mtime);
-
- friend class DaosStore;
-};
-
-class DaosPlacementTier : public StorePlacementTier {
- DaosStore* store;
- RGWZoneGroupPlacementTier tier;
-
- public:
- DaosPlacementTier(DaosStore* _store, const RGWZoneGroupPlacementTier& _tier)
- : store(_store), tier(_tier) {}
- virtual ~DaosPlacementTier() = default;
-
- virtual const std::string& get_tier_type() { return tier.tier_type; }
- virtual const std::string& get_storage_class() { return tier.storage_class; }
- virtual bool retain_head_object() { return tier.retain_head_object; }
- RGWZoneGroupPlacementTier& get_rt() { return tier; }
-};
-
-class DaosZoneGroup : public StoreZoneGroup {
- DaosStore* store;
- const RGWZoneGroup group;
- std::string empty;
-
- public:
- DaosZoneGroup(DaosStore* _store) : store(_store), group() {}
- DaosZoneGroup(DaosStore* _store, const RGWZoneGroup& _group)
- : store(_store), group(_group) {}
- virtual ~DaosZoneGroup() = default;
-
- virtual const std::string& get_id() const override { return group.get_id(); };
- virtual const std::string& get_name() const override {
- return group.get_name();
- };
- virtual int equals(const std::string& other_zonegroup) const override {
- return group.equals(other_zonegroup);
- };
- /** Get the endpoint from zonegroup, or from master zone if not set */
- virtual const std::string& get_endpoint() const override;
- virtual bool placement_target_exists(std::string& target) const override;
- virtual bool is_master_zonegroup() const override {
- return group.is_master_zonegroup();
- };
- virtual const std::string& get_api_name() const override {
- return group.api_name;
- };
- virtual int get_placement_target_names(
- std::set<std::string>& names) const override;
- virtual const std::string& get_default_placement_name() const override {
- return group.default_placement.name;
- };
- virtual int get_hostnames(std::list<std::string>& names) const override {
- names = group.hostnames;
- return 0;
- };
- virtual int get_s3website_hostnames(
- std::list<std::string>& names) const override {
- names = group.hostnames_s3website;
- return 0;
- };
- virtual int get_zone_count() const override { return group.zones.size(); }
- virtual int get_placement_tier(const rgw_placement_rule& rule,
- std::unique_ptr<PlacementTier>* tier);
- virtual std::unique_ptr<ZoneGroup> clone() override {
- return std::make_unique<DaosZoneGroup>(store, group);
- }
- const RGWZoneGroup& get_group() { return group; }
-};
-
-class DaosZone : public StoreZone {
- protected:
- DaosStore* store;
- RGWRealm* realm{nullptr};
- DaosZoneGroup zonegroup;
- RGWZone* zone_public_config{
- nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */
- RGWZoneParams* zone_params{
- nullptr}; /* internal zone params, e.g., rados pools */
- RGWPeriod* current_period{nullptr};
- rgw_zone_id cur_zone_id;
-
- public:
- DaosZone(DaosStore* _store) : store(_store), zonegroup(_store) {
- realm = new RGWRealm();
- zone_public_config = new RGWZone();
- zone_params = new RGWZoneParams();
- current_period = new RGWPeriod();
- cur_zone_id = rgw_zone_id(zone_params->get_id());
-
- // XXX: only default and STANDARD supported for now
- RGWZonePlacementInfo info;
- RGWZoneStorageClasses sc;
- sc.set_storage_class("STANDARD", nullptr, nullptr);
- info.storage_classes = sc;
- zone_params->placement_pools["default"] = info;
- }
- DaosZone(DaosStore* _store, DaosZoneGroup _zg)
- : store(_store), zonegroup(_zg) {
- realm = new RGWRealm();
- zone_public_config = new RGWZone();
- zone_params = new RGWZoneParams();
- current_period = new RGWPeriod();
- cur_zone_id = rgw_zone_id(zone_params->get_id());
-
- // XXX: only default and STANDARD supported for now
- RGWZonePlacementInfo info;
- RGWZoneStorageClasses sc;
- sc.set_storage_class("STANDARD", nullptr, nullptr);
- info.storage_classes = sc;
- zone_params->placement_pools["default"] = info;
- }
- ~DaosZone() = default;
-
- virtual std::unique_ptr<Zone> clone() override {
- return std::make_unique<DaosZone>(store);
- }
- virtual ZoneGroup& get_zonegroup() override;
- virtual int get_zonegroup(const std::string& id,
- std::unique_ptr<ZoneGroup>* zonegroup) override;
- virtual const rgw_zone_id& get_id() override;
- virtual const std::string& get_name() const override;
- virtual bool is_writeable() override;
- virtual bool get_redirect_endpoint(std::string* endpoint) override;
- virtual bool has_zonegroup_api(const std::string& api) const override;
- virtual const std::string& get_current_period_id() override;
- virtual const RGWAccessKey& get_system_key() {
- return zone_params->system_key;
- }
- virtual const std::string& get_realm_name() { return realm->get_name(); }
- virtual const std::string& get_realm_id() { return realm->get_id(); }
- virtual const std::string_view get_tier_type() { return "rgw"; }
-
- friend class DaosStore;
-};
-
-class DaosLuaManager : public StoreLuaManager {
- DaosStore* store;
-
- public:
- DaosLuaManager(DaosStore* _s) : store(_s) {}
- virtual ~DaosLuaManager() = default;
-
- virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& key, std::string& script) override {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return -ENOENT;
- };
-
- virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& key,
- const std::string& script) override {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return -ENOENT;
- };
-
- virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& key) override {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return -ENOENT;
- };
-
- virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& package_name) override {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return -ENOENT;
- };
-
- virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& package_name) override {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return -ENOENT;
- };
-
- virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y,
- rgw::lua::packages_t& packages) override {
- DAOS_NOT_IMPLEMENTED_LOG(dpp);
- return -ENOENT;
- };
-};
-
-class DaosObject : public StoreObject {
- private:
- DaosStore* store;
- RGWAccessControlPolicy acls;
-
- public:
- struct DaosReadOp : public StoreReadOp {
- private:
- DaosObject* source;
-
- public:
- DaosReadOp(DaosObject* _source);
-
- virtual int prepare(optional_yield y,
- const DoutPrefixProvider* dpp) override;
-
- /*
- * Both `read` and `iterate` read up through index `end`
- * *inclusive*. The number of bytes that could be returned is
- * `end - ofs + 1`.
- */
- virtual int read(int64_t off, int64_t end, bufferlist& bl, optional_yield y,
- const DoutPrefixProvider* dpp) override;
- virtual int iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end,
- RGWGetDataCB* cb, optional_yield y) override;
-
- virtual int get_attr(const DoutPrefixProvider* dpp, const char* name,
- bufferlist& dest, optional_yield y) override;
- };
-
- struct DaosDeleteOp : public StoreDeleteOp {
- private:
- DaosObject* source;
-
- public:
- DaosDeleteOp(DaosObject* _source);
-
- virtual int delete_obj(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- };
-
- ds3_obj_t* ds3o = nullptr;
-
- DaosObject() = default;
-
- DaosObject(DaosStore* _st, const rgw_obj_key& _k)
- : StoreObject(_k), store(_st), acls() {}
- DaosObject(DaosStore* _st, const rgw_obj_key& _k, Bucket* _b)
- : StoreObject(_k, _b), store(_st), acls() {}
-
- DaosObject(DaosObject& _o) = default;
-
- virtual ~DaosObject();
-
- virtual int delete_object(const DoutPrefixProvider* dpp, optional_yield y,
- bool prevent_versioning = false) override;
- virtual int copy_object(
- User* user, req_info* info, const rgw_zone_id& source_zone,
- rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket,
- rgw::sal::Bucket* src_bucket, const rgw_placement_rule& dest_placement,
- ceph::real_time* src_mtime, ceph::real_time* mtime,
- const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr,
- bool high_precision_time, const char* if_match, const char* if_nomatch,
- AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs,
- RGWObjCategory category, uint64_t olh_epoch,
- boost::optional<ceph::real_time> delete_at, std::string* version_id,
- std::string* tag, std::string* etag, void (*progress_cb)(off_t, void*),
- void* progress_data, const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
- virtual int set_acl(const RGWAccessControlPolicy& acl) override {
- acls = acl;
- return 0;
- }
-
- virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState** state,
- optional_yield y, bool follow_olh = true) override;
- virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs,
- Attrs* delattrs, optional_yield y) override;
- virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp,
- rgw_obj* target_obj = NULL) override;
- virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val,
- optional_yield y,
- const DoutPrefixProvider* dpp) override;
- virtual int delete_obj_attrs(const DoutPrefixProvider* dpp,
- const char* attr_name,
- optional_yield y) override;
- virtual bool is_expired() override;
- virtual void gen_rand_obj_instance_name() override;
- virtual std::unique_ptr<Object> clone() override {
- return std::make_unique<DaosObject>(*this);
- }
- virtual std::unique_ptr<MPSerializer> get_serializer(
- const DoutPrefixProvider* dpp, const std::string& lock_name) override;
- virtual int transition(Bucket* bucket,
- const rgw_placement_rule& placement_rule,
- const real_time& mtime, uint64_t olh_epoch,
- const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int transition_to_cloud(Bucket* bucket, rgw::sal::PlacementTier* tier,
- rgw_bucket_dir_entry& o,
- std::set<std::string>& cloud_targets,
- CephContext* cct, bool update_object,
- const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual bool placement_rules_match(rgw_placement_rule& r1,
- rgw_placement_rule& r2) override;
- virtual int dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y,
- Formatter* f) override;
-
- /* Swift versioning */
- virtual int swift_versioning_restore(bool& restored,
- const DoutPrefixProvider* dpp) override;
- virtual int swift_versioning_copy(const DoutPrefixProvider* dpp,
- optional_yield y) override;
-
- /* OPs */
- virtual std::unique_ptr<ReadOp> get_read_op() override;
- virtual std::unique_ptr<DeleteOp> get_delete_op() override;
-
- /* OMAP */
- virtual int omap_get_vals_by_keys(const DoutPrefixProvider* dpp,
- const std::string& oid,
- const std::set<std::string>& keys,
- Attrs* vals) override;
- virtual int omap_set_val_by_key(const DoutPrefixProvider* dpp,
- const std::string& key, bufferlist& val,
- bool must_exist, optional_yield y) override;
- virtual int chown(User& new_user, const DoutPrefixProvider* dpp,
- optional_yield y) override;
-
- bool is_open() { return ds3o != nullptr; };
- // Only lookup the object, do not create
- int lookup(const DoutPrefixProvider* dpp);
- // Create the object, truncate if exists
- int create(const DoutPrefixProvider* dpp);
- // Release the daos resources
- int close(const DoutPrefixProvider* dpp);
- // Write to object starting from offset
- int write(const DoutPrefixProvider* dpp, bufferlist&& data, uint64_t offset);
- // Read size bytes from object starting from offset
- int read(const DoutPrefixProvider* dpp, bufferlist& data, uint64_t offset,
- uint64_t& size);
- // Get the object's dirent and attrs
- int get_dir_entry_attrs(const DoutPrefixProvider* dpp,
- rgw_bucket_dir_entry* ent, Attrs* getattrs = nullptr);
- // Set the object's dirent and attrs
- int set_dir_entry_attrs(const DoutPrefixProvider* dpp,
- rgw_bucket_dir_entry* ent, Attrs* setattrs = nullptr);
- // Marks this DAOS object as being the latest version and unmarks all other
- // versions as latest
- int mark_as_latest(const DoutPrefixProvider* dpp, ceph::real_time set_mtime);
- // get_bucket casted as DaosBucket*
- DaosBucket* get_daos_bucket() {
- return static_cast<DaosBucket*>(get_bucket());
- }
-};
-
-// A placeholder locking class for multipart upload.
-class MPDaosSerializer : public StoreMPSerializer {
- public:
- MPDaosSerializer(const DoutPrefixProvider* dpp, DaosStore* store,
- DaosObject* obj, const std::string& lock_name) {}
-
- virtual int try_lock(const DoutPrefixProvider* dpp, utime_t dur,
- optional_yield y) override {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
- }
- virtual int unlock() override { return DAOS_NOT_IMPLEMENTED_LOG(nullptr); }
-};
-
-class DaosAtomicWriter : public StoreWriter {
- protected:
- rgw::sal::DaosStore* store;
- const rgw_user& owner;
- const rgw_placement_rule* ptail_placement_rule;
- uint64_t olh_epoch;
- const std::string& unique_tag;
- DaosObject obj;
- uint64_t total_data_size = 0; // for total data being uploaded
-
- public:
- DaosAtomicWriter(const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj,
- DaosStore* _store, const rgw_user& _owner,
- const rgw_placement_rule* _ptail_placement_rule,
- uint64_t _olh_epoch, const std::string& _unique_tag);
- ~DaosAtomicWriter() = default;
-
- // prepare to start processing object data
- virtual int prepare(optional_yield y) override;
-
- // Process a bufferlist
- virtual int process(bufferlist&& data, uint64_t offset) override;
-
- // complete the operation and make its result visible to clients
- virtual int complete(size_t accounted_size, const std::string& etag,
- ceph::real_time* mtime, ceph::real_time set_mtime,
- std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at, const char* if_match,
- const char* if_nomatch, const std::string* user_data,
- rgw_zone_set* zones_trace, bool* canceled,
- optional_yield y) override;
-};
-
-class DaosMultipartWriter : public StoreWriter {
- protected:
- rgw::sal::DaosStore* store;
- MultipartUpload* upload;
- std::string upload_id;
-
- // Part parameters.
- const uint64_t part_num;
- const std::string part_num_str;
- uint64_t actual_part_size = 0;
-
- ds3_part_t* ds3p = nullptr;
- bool is_open() { return ds3p != nullptr; };
-
- public:
- DaosMultipartWriter(const DoutPrefixProvider* dpp, optional_yield y,
- MultipartUpload* _upload,
- rgw::sal::Object* obj,
- DaosStore* _store, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule,
- uint64_t _part_num, const std::string& part_num_str)
- : StoreWriter(dpp, y),
- store(_store),
- upload(_upload),
- upload_id(_upload->get_upload_id()),
- part_num(_part_num),
- part_num_str(part_num_str) {}
- virtual ~DaosMultipartWriter();
-
- // prepare to start processing object data
- virtual int prepare(optional_yield y) override;
-
- // Process a bufferlist
- virtual int process(bufferlist&& data, uint64_t offset) override;
-
- // complete the operation and make its result visible to clients
- virtual int complete(size_t accounted_size, const std::string& etag,
- ceph::real_time* mtime, ceph::real_time set_mtime,
- std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at, const char* if_match,
- const char* if_nomatch, const std::string* user_data,
- rgw_zone_set* zones_trace, bool* canceled,
- optional_yield y) override;
-
- const std::string& get_bucket_name();
-};
-
-class DaosMultipartPart : public StoreMultipartPart {
- protected:
- RGWUploadPartInfo info;
-
- public:
- DaosMultipartPart() = default;
- virtual ~DaosMultipartPart() = default;
-
- virtual uint32_t get_num() { return info.num; }
- virtual uint64_t get_size() { return info.accounted_size; }
- virtual const std::string& get_etag() { return info.etag; }
- virtual ceph::real_time& get_mtime() { return info.modified; }
-
- friend class DaosMultipartUpload;
-};
-
-class DaosMultipartUpload : public StoreMultipartUpload {
- DaosStore* store;
- RGWMPObj mp_obj;
- ACLOwner owner;
- ceph::real_time mtime;
- rgw_placement_rule placement;
- RGWObjManifest manifest;
-
- public:
- DaosMultipartUpload(DaosStore* _store, Bucket* _bucket,
- const std::string& oid,
- std::optional<std::string> upload_id, ACLOwner _owner,
- ceph::real_time _mtime)
- : StoreMultipartUpload(_bucket),
- store(_store),
- mp_obj(oid, upload_id),
- owner(_owner),
- mtime(_mtime) {}
- virtual ~DaosMultipartUpload() = default;
-
- virtual const std::string& get_meta() const { return mp_obj.get_meta(); }
- virtual const std::string& get_key() const { return mp_obj.get_key(); }
- virtual const std::string& get_upload_id() const {
- return mp_obj.get_upload_id();
- }
- virtual const ACLOwner& get_owner() const override { return owner; }
- virtual ceph::real_time& get_mtime() { return mtime; }
- virtual std::unique_ptr<rgw::sal::Object> get_meta_obj() override;
- virtual int init(const DoutPrefixProvider* dpp, optional_yield y,
- ACLOwner& owner, rgw_placement_rule& dest_placement,
- rgw::sal::Attrs& attrs) override;
- virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
- int num_parts, int marker, int* next_marker,
- bool* truncated,
- bool assume_unsorted = false) override;
- virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override;
- virtual int complete(const DoutPrefixProvider* dpp, optional_yield y,
- CephContext* cct, std::map<int, std::string>& part_etags,
- std::list<rgw_obj_index_key>& remove_objs,
- uint64_t& accounted_size, bool& compressed,
- RGWCompressionInfo& cs_info, off_t& off,
- std::string& tag, ACLOwner& owner, uint64_t olh_epoch,
- rgw::sal::Object* target_obj) override;
- virtual int get_info(const DoutPrefixProvider* dpp, optional_yield y,
- rgw_placement_rule** rule,
- rgw::sal::Attrs* attrs = nullptr) override;
- virtual std::unique_ptr<Writer> get_writer(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule, uint64_t part_num,
- const std::string& part_num_str) override;
- const std::string& get_bucket_name() { return bucket->get_name(); }
-};
-
-class DaosStore : public StoreDriver {
- private:
- DaosZone zone;
- RGWSyncModuleInstanceRef sync_module;
-
- public:
- ds3_t* ds3 = nullptr;
-
- CephContext* cctx;
-
- DaosStore(CephContext* c) : zone(this), cctx(c) {}
- ~DaosStore() = default;
-
- virtual const std::string get_name() const override { return "daos"; }
-
- virtual std::unique_ptr<User> get_user(const rgw_user& u) override;
- virtual std::string get_cluster_id(const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual int get_user_by_access_key(const DoutPrefixProvider* dpp,
- const std::string& key, optional_yield y,
- std::unique_ptr<User>* user) override;
- virtual int get_user_by_email(const DoutPrefixProvider* dpp,
- const std::string& email, optional_yield y,
- std::unique_ptr<User>* user) override;
- virtual int get_user_by_swift(const DoutPrefixProvider* dpp,
- const std::string& user_str, optional_yield y,
- std::unique_ptr<User>* user) override;
- virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
- virtual int get_bucket(const DoutPrefixProvider* dpp, User* u,
- const rgw_bucket& b, std::unique_ptr<Bucket>* bucket,
- optional_yield y) override;
- virtual int get_bucket(User* u, const RGWBucketInfo& i,
- std::unique_ptr<Bucket>* bucket) override;
- virtual int get_bucket(const DoutPrefixProvider* dpp, User* u,
- const std::string& tenant, const std::string& name,
- std::unique_ptr<Bucket>* bucket,
- optional_yield y) override;
- virtual bool is_meta_master() override;
- virtual int forward_request_to_master(const DoutPrefixProvider* dpp,
- User* user, obj_version* objv,
- bufferlist& in_data, JSONParser* jp,
- req_info& info,
- optional_yield y) override;
- virtual int forward_iam_request_to_master(
- const DoutPrefixProvider* dpp, const RGWAccessKey& key, obj_version* objv,
- bufferlist& in_data, RGWXMLDecoder::XMLParser* parser, req_info& info,
- optional_yield y) override;
- virtual Zone* get_zone() { return &zone; }
- virtual std::string zone_unique_id(uint64_t unique_num) override;
- virtual std::string zone_unique_trans_id(const uint64_t unique_num) override;
- virtual int cluster_stat(RGWClusterStat& stats) override;
- virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override;
- virtual std::unique_ptr<Notification> get_notification(
- rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s,
- rgw::notify::EventType event_type, optional_yield y,
- const std::string* object_name = nullptr) override;
- virtual std::unique_ptr<Notification> get_notification(
- const DoutPrefixProvider* dpp, rgw::sal::Object* obj,
- rgw::sal::Object* src_obj, rgw::notify::EventType event_type,
- rgw::sal::Bucket* _bucket, std::string& _user_id,
- std::string& _user_tenant, std::string& _req_id,
- optional_yield y) override;
- virtual RGWLC* get_rgwlc(void) override { return NULL; }
- virtual RGWCoroutinesManagerRegistry* get_cr_registry() override {
- return NULL;
- }
-
- virtual int log_usage(
- const DoutPrefixProvider* dpp,
- std::map<rgw_user_bucket, RGWUsageBatch>& usage_info) override;
- virtual int log_op(const DoutPrefixProvider* dpp, std::string& oid,
- bufferlist& bl) override;
- virtual int register_to_service_map(
- const DoutPrefixProvider* dpp, const std::string& daemon_type,
- const std::map<std::string, std::string>& meta) override;
- virtual void get_quota(RGWQuota& quota) override;
- virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit,
- RGWRateLimitInfo& user_ratelimit,
- RGWRateLimitInfo& anon_ratelimit) override;
- virtual int set_buckets_enabled(const DoutPrefixProvider* dpp,
- std::vector<rgw_bucket>& buckets,
- bool enabled) override;
- virtual uint64_t get_new_req_id() override {
- return DAOS_NOT_IMPLEMENTED_LOG(nullptr);
- }
- virtual int get_sync_policy_handler(const DoutPrefixProvider* dpp,
- std::optional<rgw_zone_id> zone,
- std::optional<rgw_bucket> bucket,
- RGWBucketSyncPolicyHandlerRef* phandler,
- optional_yield y) override;
- virtual RGWDataSyncStatusManager* get_data_sync_manager(
- const rgw_zone_id& source_zone) override;
- virtual void wakeup_meta_sync_shards(std::set<int>& shard_ids) override {
- return;
- }
- virtual void wakeup_data_sync_shards(
- const DoutPrefixProvider* dpp, const rgw_zone_id& source_zone,
- boost::container::flat_map<
- int, boost::container::flat_set<rgw_data_notify_entry>>& shard_ids)
- override {
- return;
- }
- virtual int clear_usage(const DoutPrefixProvider* dpp) override {
- return DAOS_NOT_IMPLEMENTED_LOG(dpp);
- }
- virtual int read_all_usage(
- const DoutPrefixProvider* dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter,
- std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
- virtual int trim_all_usage(const DoutPrefixProvider* dpp,
- uint64_t start_epoch, uint64_t end_epoch) override;
- virtual int get_config_key_val(std::string name, bufferlist* bl) override;
- virtual int meta_list_keys_init(const DoutPrefixProvider* dpp,
- const std::string& section,
- const std::string& marker,
- void** phandle) override;
- virtual int meta_list_keys_next(const DoutPrefixProvider* dpp, void* handle,
- int max, std::list<std::string>& keys,
- bool* truncated) override;
- virtual void meta_list_keys_complete(void* handle) override;
- virtual std::string meta_get_marker(void* handle) override;
- virtual int meta_remove(const DoutPrefixProvider* dpp,
- std::string& metadata_key, optional_yield y) override;
-
- virtual const RGWSyncModuleInstanceRef& get_sync_module() {
- return sync_module;
- }
- virtual std::string get_host_id() { return ""; }
-
- virtual std::unique_ptr<LuaManager> get_lua_manager() override;
- virtual std::unique_ptr<RGWRole> get_role(
- std::string name, std::string tenant, std::string path = "",
- std::string trust_policy = "", std::string max_session_duration_str = "",
- std::multimap<std::string, std::string> tags = {}) override;
- virtual std::unique_ptr<RGWRole> get_role(const RGWRoleInfo& info) override;
- virtual std::unique_ptr<RGWRole> get_role(std::string id) override;
- virtual int get_roles(const DoutPrefixProvider* dpp, optional_yield y,
- const std::string& path_prefix,
- const std::string& tenant,
- std::vector<std::unique_ptr<RGWRole>>& roles) override;
- virtual std::unique_ptr<RGWOIDCProvider> get_oidc_provider() override;
- virtual int get_oidc_providers(
- const DoutPrefixProvider* dpp, const std::string& tenant,
- std::vector<std::unique_ptr<RGWOIDCProvider>>& providers) override;
- virtual std::unique_ptr<Writer> get_append_writer(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule,
- const std::string& unique_tag, uint64_t position,
- uint64_t* cur_accounted_size) override;
- virtual std::unique_ptr<Writer> get_atomic_writer(
- const DoutPrefixProvider* dpp, optional_yield y,
- rgw::sal::Object* obj, const rgw_user& owner,
- const rgw_placement_rule* ptail_placement_rule, uint64_t olh_epoch,
- const std::string& unique_tag) override;
- virtual const std::string& get_compression_type(
- const rgw_placement_rule& rule) override;
- virtual bool valid_placement(const rgw_placement_rule& rule) override;
-
- virtual void finalize(void) override;
-
- virtual CephContext* ctx(void) override { return cctx; }
-
- virtual int initialize(CephContext* cct,
- const DoutPrefixProvider* dpp) override;
-};
-
-} // namespace rgw::sal
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=2 sw=2 expandtab ft=cpp
-
-/*
- * Ceph - scalable distributed file system
- *
- * SAL implementation for the CORTX Motr backend
- *
- * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#include <errno.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-extern "C" {
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wextern-c-compat"
-#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
-#include "motr/config.h"
-#include "lib/types.h"
-#include "lib/trace.h" // m0_trace_set_mmapped_buffer
-#include "motr/layout.h" // M0_OBJ_LAYOUT_ID
-#include "helpers/helpers.h" // m0_ufid_next
-#pragma clang diagnostic pop
-}
-
-#include "common/Clock.h"
-#include "common/errno.h"
-
-#include "rgw_compression.h"
-#include "rgw_sal.h"
-#include "rgw_sal_motr.h"
-#include "rgw_bucket.h"
-
-#define dout_subsys ceph_subsys_rgw
-
-using std::string;
-using std::map;
-using std::vector;
-using std::set;
-using std::list;
-
-static string mp_ns = RGW_OBJ_NS_MULTIPART;
-static struct m0_ufid_generator ufid_gr;
-
-namespace rgw::sal {
-
-using ::ceph::encode;
-using ::ceph::decode;
-
-static std::string motr_global_indices[] = {
- RGW_MOTR_USERS_IDX_NAME,
- RGW_MOTR_BUCKET_INST_IDX_NAME,
- RGW_MOTR_BUCKET_HD_IDX_NAME,
- RGW_IAM_MOTR_ACCESS_KEY,
- RGW_IAM_MOTR_EMAIL_KEY
-};
-
-void MotrMetaCache::invalid(const DoutPrefixProvider *dpp,
- const string& name)
-{
- cache.invalidate_remove(dpp, name);
-}
-
-int MotrMetaCache::put(const DoutPrefixProvider *dpp,
- const string& name,
- const bufferlist& data)
-{
- ldpp_dout(dpp, 0) << "Put into cache: name = " << name << dendl;
-
- ObjectCacheInfo info;
- info.status = 0;
- info.data = data;
- info.flags = CACHE_FLAG_DATA;
- info.meta.mtime = ceph::real_clock::now();
- info.meta.size = data.length();
- cache.put(dpp, name, info, NULL);
-
- // Inform other rgw instances. Do nothing if it gets some error?
- int rc = distribute_cache(dpp, name, info, UPDATE_OBJ);
- if (rc < 0)
- ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << name << dendl;
-
- return 0;
-}
-
-int MotrMetaCache::get(const DoutPrefixProvider *dpp,
- const string& name,
- bufferlist& data)
-{
- ObjectCacheInfo info;
- uint32_t flags = CACHE_FLAG_DATA;
- int rc = cache.get(dpp, name, info, flags, NULL);
- if (rc == 0) {
- if (info.status < 0)
- return info.status;
-
- bufferlist& bl = info.data;
- bufferlist::iterator it = bl.begin();
- data.clear();
-
- it.copy_all(data);
- ldpp_dout(dpp, 0) << "Cache hit: name = " << name << dendl;
- return 0;
- }
- ldpp_dout(dpp, 0) << "Cache miss: name = " << name << ", rc = "<< rc << dendl;
- if(rc == -ENODATA)
- return -ENOENT;
-
- return rc;
-}
-
-int MotrMetaCache::remove(const DoutPrefixProvider *dpp,
- const string& name)
-
-{
- cache.invalidate_remove(dpp, name);
-
- ObjectCacheInfo info;
- int rc = distribute_cache(dpp, name, info, INVALIDATE_OBJ);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: " <<__func__<< "(): failed to distribute cache: rc =" << rc << dendl;
- }
-
- ldpp_dout(dpp, 0) << "Remove from cache: name = " << name << dendl;
- return 0;
-}
-
-int MotrMetaCache::distribute_cache(const DoutPrefixProvider *dpp,
- const string& normal_name,
- ObjectCacheInfo& obj_info, int op)
-{
- return 0;
-}
-
-int MotrMetaCache::watch_cb(const DoutPrefixProvider *dpp,
- uint64_t notify_id,
- uint64_t cookie,
- uint64_t notifier_id,
- bufferlist& bl)
-{
- return 0;
-}
-
-void MotrMetaCache::set_enabled(bool status)
-{
- cache.set_enabled(status);
-}
-
-// TODO: properly handle the number of key/value pairs to get in
-// one query. Now the POC simply tries to retrieve all `max` number of pairs
-// with starting key `marker`.
-int MotrUser::list_buckets(const DoutPrefixProvider *dpp, const string& marker,
- const string& end_marker, uint64_t max, bool need_stats,
- BucketList &buckets, optional_yield y)
-{
- int rc;
- vector<string> keys(max);
- vector<bufferlist> vals(max);
- bool is_truncated = false;
-
- ldpp_dout(dpp, 20) <<__func__<< ": list_user_buckets: marker=" << marker
- << " end_marker=" << end_marker
- << " max=" << max << dendl;
-
- // Retrieve all `max` number of pairs.
- buckets.clear();
- string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str();
- keys[0] = marker;
- rc = store->next_query_by_name(user_info_iname, keys, vals);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
- return rc;
- }
-
- // Process the returned pairs to add into BucketList.
- uint64_t bcount = 0;
- for (const auto& bl: vals) {
- if (bl.length() == 0)
- break;
-
- RGWBucketEnt ent;
- auto iter = bl.cbegin();
- ent.decode(iter);
-
- std::time_t ctime = ceph::real_clock::to_time_t(ent.creation_time);
- ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl;
-
- if (!end_marker.empty() &&
- end_marker.compare(ent.bucket.marker) <= 0)
- break;
-
- buckets.add(std::make_unique<MotrBucket>(this->store, ent, this));
- bcount++;
- }
- if (bcount == max)
- is_truncated = true;
- buckets.set_truncated(is_truncated);
-
- return 0;
-}
-
-int MotrUser::create_bucket(const DoutPrefixProvider* dpp,
- const rgw_bucket& b,
- const std::string& zonegroup_id,
- rgw_placement_rule& placement_rule,
- std::string& swift_ver_location,
- const RGWQuotaInfo* pquota_info,
- const RGWAccessControlPolicy& policy,
- Attrs& attrs,
- RGWBucketInfo& info,
- obj_version& ep_objv,
- bool exclusive,
- bool obj_lock_enabled,
- bool* existed,
- req_info& req_info,
- std::unique_ptr<Bucket>* bucket_out,
- optional_yield y)
-{
- int ret;
- std::unique_ptr<Bucket> bucket;
-
- // Look up the bucket. Create it if it doesn't exist.
- ret = this->store->get_bucket(dpp, this, b, &bucket, y);
- if (ret < 0 && ret != -ENOENT)
- return ret;
-
- if (ret != -ENOENT) {
- *existed = true;
- // if (swift_ver_location.empty()) {
- // swift_ver_location = bucket->get_info().swift_ver_location;
- // }
- // placement_rule.inherit_from(bucket->get_info().placement_rule);
-
- // TODO: ACL policy
- // // don't allow changes to the acl policy
- //RGWAccessControlPolicy old_policy(ctx());
- //int rc = rgw_op_get_bucket_policy_from_attr(
- // dpp, this, u, bucket->get_attrs(), &old_policy, y);
- //if (rc >= 0 && old_policy != policy) {
- // bucket_out->swap(bucket);
- // return -EEXIST;
- //}
- } else {
-
- placement_rule.name = "default";
- placement_rule.storage_class = "STANDARD";
- bucket = std::make_unique<MotrBucket>(store, b, this);
- bucket->set_attrs(attrs);
- *existed = false;
- }
-
- if (!*existed){
- // TODO: how to handle zone and multi-site.
- info.placement_rule = placement_rule;
- info.bucket = b;
- info.owner = this->get_info().user_id;
- info.zonegroup = zonegroup_id;
- if (obj_lock_enabled)
- info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED;
- bucket->set_version(ep_objv);
- bucket->get_info() = info;
-
- // Create a new bucket: (1) Add a key/value pair in the
- // bucket instance index. (2) Create a new bucket index.
- MotrBucket* mbucket = static_cast<MotrBucket*>(bucket.get());
- ret = mbucket->put_info(dpp, y, ceph::real_time())? :
- mbucket->create_bucket_index() ? :
- mbucket->create_multipart_indices();
- if (ret < 0)
- ldpp_dout(dpp, 0) << "ERROR: failed to create bucket indices! " << ret << dendl;
-
- // Insert the bucket entry into the user info index.
- ret = mbucket->link_user(dpp, this, y);
- if (ret < 0)
- ldpp_dout(dpp, 0) << "ERROR: failed to add bucket entry! " << ret << dendl;
- } else {
- return -EEXIST;
- // bucket->set_version(ep_objv);
- // bucket->get_info() = info;
- }
-
- bucket_out->swap(bucket);
-
- return ret;
-}
-
-int MotrUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y)
-{
- return 0;
-}
-
-int MotrUser::read_stats(const DoutPrefixProvider *dpp,
- optional_yield y, RGWStorageStats* stats,
- ceph::real_time *last_stats_sync,
- ceph::real_time *last_stats_update)
-{
- return 0;
-}
-
-/* stats - Not for first pass */
-int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb)
-{
- return 0;
-}
-
-int MotrUser::complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y)
-{
- return 0;
-}
-
-int MotrUser::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
- bool *is_truncated, RGWUsageIter& usage_iter,
- map<rgw_user_bucket, rgw_usage_log_entry>& usage)
-{
- return 0;
-}
-
-int MotrUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch)
-{
- return 0;
-}
-
-int MotrUser::load_user_from_idx(const DoutPrefixProvider *dpp,
- MotrStore *store,
- RGWUserInfo& info, map<string, bufferlist> *attrs,
- RGWObjVersionTracker *objv_tr)
-{
- struct MotrUserInfo muinfo;
- bufferlist bl;
- ldpp_dout(dpp, 20) << "info.user_id.id = " << info.user_id.id << dendl;
- if (store->get_user_cache()->get(dpp, info.user_id.id, bl)) {
- // Cache misses
- int rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME,
- M0_IC_GET, info.user_id.to_str(), bl);
- ldpp_dout(dpp, 20) << "do_idx_op_by_name() = " << rc << dendl;
- if (rc < 0)
- return rc;
-
- // Put into cache.
- store->get_user_cache()->put(dpp, info.user_id.id, bl);
- }
-
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- muinfo.decode(iter);
- info = muinfo.info;
- if (attrs)
- *attrs = muinfo.attrs;
- if (objv_tr)
- {
- objv_tr->read_version = muinfo.user_version;
- objv_tracker.read_version = objv_tr->read_version;
- }
-
- if (!info.access_keys.empty()) {
- for(auto key : info.access_keys) {
- access_key_tracker.insert(key.first);
- }
- }
-
- return 0;
-}
-
-int MotrUser::load_user(const DoutPrefixProvider *dpp,
- optional_yield y)
-{
- ldpp_dout(dpp, 20) << "load user: user id = " << info.user_id.to_str() << dendl;
- return load_user_from_idx(dpp, store, info, &attrs, &objv_tracker);
-}
-
-int MotrUser::create_user_info_idx()
-{
- string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str();
- return store->create_motr_idx_by_name(user_info_iname);
-}
-
-int MotrUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y)
-{
- for (auto& it : new_attrs)
- attrs[it.first] = it.second;
-
- return store_user(dpp, y, false);
-}
-
-int MotrUser::store_user(const DoutPrefixProvider* dpp,
- optional_yield y, bool exclusive, RGWUserInfo* old_info)
-{
- bufferlist bl;
- struct MotrUserInfo muinfo;
- RGWUserInfo orig_info;
- RGWObjVersionTracker objv_tr = {};
- obj_version& obj_ver = objv_tr.read_version;
-
- ldpp_dout(dpp, 20) << "Store_user(): User = " << info.user_id.id << dendl;
- orig_info.user_id = info.user_id;
- // XXX: we open and close motr idx 2 times in this method:
- // 1) on load_user_from_idx() here and 2) on do_idx_op_by_name(PUT) below.
- // Maybe this can be optimised later somewhow.
- int rc = load_user_from_idx(dpp, store, orig_info, nullptr, &objv_tr);
- ldpp_dout(dpp, 10) << "Get user: rc = " << rc << dendl;
-
- // Check if the user already exists
- if (rc == 0 && obj_ver.ver > 0) {
- if (old_info)
- *old_info = orig_info;
-
- if (obj_ver.ver != objv_tracker.read_version.ver) {
- rc = -ECANCELED;
- ldpp_dout(dpp, 0) << "ERROR: User Read version mismatch" << dendl;
- goto out;
- }
-
- if (exclusive)
- return rc;
-
- obj_ver.ver++;
- } else {
- obj_ver.ver = 1;
- obj_ver.tag = "UserTAG";
- }
-
- // Insert the user to user info index.
- muinfo.info = info;
- muinfo.attrs = attrs;
- muinfo.user_version = obj_ver;
- muinfo.encode(bl);
- rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME,
- M0_IC_PUT, info.user_id.to_str(), bl);
- ldpp_dout(dpp, 10) << "Store user to motr index: rc = " << rc << dendl;
- if (rc == 0) {
- objv_tracker.read_version = obj_ver;
- objv_tracker.write_version = obj_ver;
- }
-
- // Store access key in access key index
- if (!info.access_keys.empty()) {
- std::string access_key;
- std::string secret_key;
- std::map<std::string, RGWAccessKey>::const_iterator iter = info.access_keys.begin();
- const RGWAccessKey& k = iter->second;
- access_key = k.id;
- secret_key = k.key;
- MotrAccessKey MGWUserKeys(access_key, secret_key, info.user_id.to_str());
- store->store_access_key(dpp, y, MGWUserKeys);
- access_key_tracker.insert(access_key);
- }
-
- // Check if any key need to be deleted
- if (access_key_tracker.size() != info.access_keys.size()) {
- std::string key_for_deletion;
- for (auto key : access_key_tracker) {
- if (!info.get_key(key)) {
- key_for_deletion = key;
- ldpp_dout(dpp, 0) << "Deleting access key: " << key_for_deletion << dendl;
- store->delete_access_key(dpp, y, key_for_deletion);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl;
- }
- }
- }
- if(rc >= 0){
- access_key_tracker.erase(key_for_deletion);
- }
- }
-
- if (!info.user_email.empty()) {
- MotrEmailInfo MGWEmailInfo(info.user_id.to_str(), info.user_email);
- store->store_email_info(dpp, y, MGWEmailInfo);
- }
-
- // Create user info index to store all buckets that are belong
- // to this bucket.
- rc = create_user_info_idx();
- if (rc < 0 && rc != -EEXIST) {
- ldpp_dout(dpp, 0) << "Failed to create user info index: rc = " << rc << dendl;
- goto out;
- }
-
- // Put the user info into cache.
- rc = store->get_user_cache()->put(dpp, info.user_id.id, bl);
-
-out:
- return rc;
-}
-
-int MotrUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y)
-{
- // Remove user info from cache
- // Delete access keys for user
- // Delete user info
- // Delete user from user index
- // Delete email for user - TODO
- bufferlist bl;
- int rc;
- // Remove the user info from cache.
- store->get_user_cache()->remove(dpp, info.user_id.id);
-
- // Delete all access key of user
- if (!info.access_keys.empty()) {
- for(auto acc_key = info.access_keys.begin(); acc_key != info.access_keys.end(); acc_key++) {
- auto access_key = acc_key->first;
- rc = store->delete_access_key(dpp, y, access_key);
- // TODO
- // Check error code for access_key does not exist
- // Continue to next step only if delete failed because key doesn't exists
- if (rc < 0){
- ldpp_dout(dpp, 0) << "Unable to delete access key" << rc << dendl;
- }
- }
- }
-
- //Delete email id
- if (!info.user_email.empty()) {
- rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
- M0_IC_DEL, info.user_email, bl);
- if (rc < 0 && rc != -ENOENT) {
- ldpp_dout(dpp, 0) << "Unable to delete email id " << rc << dendl;
- }
- }
-
- // Delete user info index
- string user_info_iname = "motr.rgw.user.info." + info.user_id.to_str();
- store->delete_motr_idx_by_name(user_info_iname);
- ldpp_dout(dpp, 10) << "Deleted user info index - " << user_info_iname << dendl;
-
- // Delete user from user index
- rc = store->do_idx_op_by_name(RGW_MOTR_USERS_IDX_NAME,
- M0_IC_DEL, info.user_id.to_str(), bl);
- if (rc < 0){
- ldpp_dout(dpp, 0) << "Unable to delete user from user index " << rc << dendl;
- return rc;
- }
-
- // TODO
- // Delete email for user
- // rc = store->do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
- // M0_IC_DEL, info.user_email, bl);
- // if (rc < 0){
- // ldpp_dout(dpp, 0) << "Unable to delete email for user" << rc << dendl;
- // return rc;
- // }
- return 0;
-}
-
-int MotrUser::verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider *dpp, optional_yield y)
-{
- *verified = false;
- return 0;
-}
-
-int MotrBucket::remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y)
-{
- int ret;
-
- ldpp_dout(dpp, 20) << "remove_bucket Entry=" << info.bucket.name << dendl;
-
- // Refresh info
- ret = load_bucket(dpp, y);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket load_bucket failed rc=" << ret << dendl;
- return ret;
- }
-
- ListParams params;
- params.list_versions = true;
- params.allow_unordered = true;
-
- ListResults results;
-
- // 1. Check if Bucket has objects.
- // If bucket contains objects and delete_children is true, delete all objects.
- // Else throw error that bucket is not empty.
- do {
- results.objs.clear();
-
- // Check if bucket has objects.
- ret = list(dpp, params, 1000, results, y);
- if (ret < 0) {
- return ret;
- }
-
- // If result contains entries, bucket is not empty.
- if (!results.objs.empty() && !delete_children) {
- ldpp_dout(dpp, 0) << "ERROR: could not remove non-empty bucket " << info.bucket.name << dendl;
- return -ENOTEMPTY;
- }
-
- for (const auto& obj : results.objs) {
- rgw_obj_key key(obj.key);
- if (key.instance.empty()) {
- key.instance = "null";
- }
-
- std::unique_ptr<rgw::sal::Object> object = get_object(key);
-
- ret = object->delete_object(dpp, null_yield);
- if (ret < 0 && ret != -ENOENT) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket rgw_remove_object failed rc=" << ret << dendl;
- return ret;
- }
- }
- } while(results.is_truncated);
-
- // 2. Abort Mp uploads on the bucket.
- ret = abort_multiparts(dpp, store->ctx());
- if (ret < 0) {
- return ret;
- }
-
- // 3. Remove mp index??
- string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts";
- ret = store->delete_motr_idx_by_name(bucket_multipart_iname);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove multipart index rc=" << ret << dendl;
- return ret;
- }
-
- // 4. Sync user stats.
- ret = this->sync_user_stats(dpp, y);
- if (ret < 0) {
- ldout(store->ctx(), 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl;
- }
-
- // 5. Remove the bucket from user info index. (unlink user)
- ret = this->unlink_user(dpp, owner, y);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl;
- return ret;
- }
-
- // 6. Remove bucket index.
- string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name;
- ret = store->delete_motr_idx_by_name(bucket_index_iname);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket unlink_user failed rc=" << ret << dendl;
- return ret;
- }
-
- // 7. Remove bucket instance info.
- bufferlist bl;
- ret = store->get_bucket_inst_cache()->remove(dpp, info.bucket.name);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance from cache rc="
- << ret << dendl;
- return ret;
- }
-
- ret = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME,
- M0_IC_DEL, info.bucket.name, bl);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: remove_bucket failed to remove bucket instance rc="
- << ret << dendl;
- return ret;
- }
-
- // TODO :
- // 8. Remove Notifications
- // if bucket has notification definitions associated with it
- // they should be removed (note that any pending notifications on the bucket are still going to be sent)
-
- // 9. Forward request to master.
- if (forward_to_master) {
- bufferlist in_data;
- ret = store->forward_request_to_master(dpp, owner, &bucket_version, in_data, nullptr, *req_info, y);
- if (ret < 0) {
- if (ret == -ENOENT) {
- /* adjust error, we want to return with NoSuchBucket and not
- * NoSuchKey */
- ret = -ERR_NO_SUCH_BUCKET;
- }
- ldpp_dout(dpp, 0) << "ERROR: Forward to master failed. ret=" << ret << dendl;
- return ret;
- }
- }
-
- ldpp_dout(dpp, 20) << "remove_bucket Exit=" << info.bucket.name << dendl;
-
- return ret;
-}
-
-int MotrBucket::remove_bucket_bypass_gc(int concurrent_max, bool
- keep_index_consistent,
- optional_yield y, const
- DoutPrefixProvider *dpp) {
- return 0;
-}
-
-int MotrBucket::put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time _mtime)
-{
- bufferlist bl;
- struct MotrBucketInfo mbinfo;
-
- ldpp_dout(dpp, 20) << "put_info(): bucket_id=" << info.bucket.bucket_id << dendl;
- mbinfo.info = info;
- mbinfo.bucket_attrs = attrs;
- mbinfo.mtime = _mtime;
- mbinfo.bucket_version = bucket_version;
- mbinfo.encode(bl);
-
- // Insert bucket instance using bucket's marker (string).
- int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME,
- M0_IC_PUT, info.bucket.name, bl, !exclusive);
- if (rc == 0)
- store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl);
-
- return rc;
-}
-
-int MotrBucket::load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats)
-{
- // Get bucket instance using bucket's name (string). or bucket id?
- bufferlist bl;
- if (store->get_bucket_inst_cache()->get(dpp, info.bucket.name, bl)) {
- // Cache misses.
- ldpp_dout(dpp, 20) << "load_bucket(): name=" << info.bucket.name << dendl;
- int rc = store->do_idx_op_by_name(RGW_MOTR_BUCKET_INST_IDX_NAME,
- M0_IC_GET, info.bucket.name, bl);
- ldpp_dout(dpp, 20) << "load_bucket(): rc=" << rc << dendl;
- if (rc < 0)
- return rc;
- store->get_bucket_inst_cache()->put(dpp, info.bucket.name, bl);
- }
-
- struct MotrBucketInfo mbinfo;
- bufferlist& blr = bl;
- auto iter =blr.cbegin();
- mbinfo.decode(iter); //Decode into MotrBucketInfo.
-
- info = mbinfo.info;
- ldpp_dout(dpp, 20) << "load_bucket(): bucket_id=" << info.bucket.bucket_id << dendl;
- rgw_placement_rule placement_rule;
- placement_rule.name = "default";
- placement_rule.storage_class = "STANDARD";
- info.placement_rule = placement_rule;
-
- attrs = mbinfo.bucket_attrs;
- mtime = mbinfo.mtime;
- bucket_version = mbinfo.bucket_version;
-
- return 0;
-}
-
-int MotrBucket::link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y)
-{
- bufferlist bl;
- RGWBucketEnt new_bucket;
- ceph::real_time creation_time = get_creation_time();
-
- // RGWBucketEnt or cls_user_bucket_entry is the structure that is stored.
- new_bucket.bucket = info.bucket;
- new_bucket.size = 0;
- if (real_clock::is_zero(creation_time))
- creation_time = ceph::real_clock::now();
- new_bucket.creation_time = creation_time;
- new_bucket.encode(bl);
- std::time_t ctime = ceph::real_clock::to_time_t(new_bucket.creation_time);
- ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl;
-
- // Insert the user into the user info index.
- string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str();
- return store->do_idx_op_by_name(user_info_idx_name,
- M0_IC_PUT, info.bucket.name, bl);
-
-}
-
-int MotrBucket::unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y)
-{
- // Remove the user into the user info index.
- bufferlist bl;
- string user_info_idx_name = "motr.rgw.user.info." + new_user->get_info().user_id.to_str();
- return store->do_idx_op_by_name(user_info_idx_name,
- M0_IC_DEL, info.bucket.name, bl);
-}
-
-/* stats - Not for first pass */
-int MotrBucket::read_stats(const DoutPrefixProvider *dpp,
- const bucket_index_layout_generation& idx_layout, int shard_id,
- std::string *bucket_ver, std::string *master_ver,
- std::map<RGWObjCategory, RGWStorageStats>& stats,
- std::string *max_marker, bool *syncstopped)
-{
- return 0;
-}
-
-int MotrBucket::create_bucket_index()
-{
- string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name;
- return store->create_motr_idx_by_name(bucket_index_iname);
-}
-
-int MotrBucket::create_multipart_indices()
-{
- int rc;
-
- // Bucket multipart index stores in-progress multipart uploads.
- // Key is the object name + upload_id, value is a rgw_bucket_dir_entry.
- // An entry is inserted when a multipart upload is initialised (
- // MotrMultipartUpload::init()) and will be removed when the upload
- // is completed (MotrMultipartUpload::complete()).
- // MotrBucket::list_multiparts() will scan this index to return all
- // in-progress multipart uploads in the bucket.
- string bucket_multipart_iname = "motr.rgw.bucket." + info.bucket.name + ".multiparts";
- rc = store->create_motr_idx_by_name(bucket_multipart_iname);
- if (rc < 0) {
- ldout(store->cctx, 0) << "Failed to create bucket multipart index " << bucket_multipart_iname << dendl;
- return rc;
- }
-
- return 0;
-}
-
-
-int MotrBucket::read_stats_async(const DoutPrefixProvider *dpp,
- const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB *ctx)
-{
- return 0;
-}
-
-int MotrBucket::sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y)
-{
- return 0;
-}
-
-int MotrBucket::update_container_stats(const DoutPrefixProvider *dpp)
-{
- return 0;
-}
-
-int MotrBucket::check_bucket_shards(const DoutPrefixProvider *dpp)
-{
- return 0;
-}
-
-int MotrBucket::chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y)
-{
- // TODO: update bucket with new owner
- return 0;
-}
-
-/* Make sure to call load_bucket() if you need it first */
-bool MotrBucket::is_owner(User* user)
-{
- return (info.owner.compare(user->get_id()) == 0);
-}
-
-int MotrBucket::check_empty(const DoutPrefixProvider *dpp, optional_yield y)
-{
- /* XXX: Check if bucket contains any objects */
- return 0;
-}
-
-int MotrBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size,
- optional_yield y, bool check_size_only)
-{
- /* Not Handled in the first pass as stats are also needed */
- return 0;
-}
-
-int MotrBucket::merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& new_attrs, optional_yield y)
-{
- for (auto& it : new_attrs)
- attrs[it.first] = it.second;
-
- return put_info(dpp, y, ceph::real_time());
-}
-
-int MotrBucket::try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime)
-{
- return 0;
-}
-
-/* XXX: usage and stats not supported in the first pass */
-int MotrBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool *is_truncated,
- RGWUsageIter& usage_iter,
- map<rgw_user_bucket, rgw_usage_log_entry>& usage)
-{
- return 0;
-}
-
-int MotrBucket::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch)
-{
- return 0;
-}
-
-int MotrBucket::remove_objs_from_index(const DoutPrefixProvider *dpp, std::list<rgw_obj_index_key>& objs_to_unlink)
-{
- /* XXX: CHECK: Unlike RadosStore, there is no seperate bucket index table.
- * Delete all the object in the list from the object table of this
- * bucket
- */
- return 0;
-}
-
-int MotrBucket::check_index(const DoutPrefixProvider *dpp, std::map<RGWObjCategory, RGWStorageStats>& existing_stats, std::map<RGWObjCategory, RGWStorageStats>& calculated_stats)
-{
- /* XXX: stats not supported yet */
- return 0;
-}
-
-int MotrBucket::rebuild_index(const DoutPrefixProvider *dpp)
-{
- /* there is no index table in dbstore. Not applicable */
- return 0;
-}
-
-int MotrBucket::set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout)
-{
- /* XXX: CHECK: set tag timeout for all the bucket objects? */
- return 0;
-}
-
-int MotrBucket::purge_instance(const DoutPrefixProvider *dpp)
-{
- /* XXX: CHECK: for dbstore only single instance supported.
- * Remove all the objects for that instance? Anything extra needed?
- */
- return 0;
-}
-
-int MotrBucket::set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy &acl, optional_yield y)
-{
- int ret = 0;
- bufferlist aclbl;
-
- acls = acl;
- acl.encode(aclbl);
-
- Attrs attrs = get_attrs();
- attrs[RGW_ATTR_ACL] = aclbl;
-
- // TODO: update bucket entry with the new attrs
-
- return ret;
-}
-
-std::unique_ptr<Object> MotrBucket::get_object(const rgw_obj_key& k)
-{
- return std::make_unique<MotrObject>(this->store, k, this);
-}
-
-int MotrBucket::list(const DoutPrefixProvider *dpp, ListParams& params, int max, ListResults& results, optional_yield y)
-{
- int rc;
- vector<string> keys(max);
- vector<bufferlist> vals(max);
-
- ldpp_dout(dpp, 20) << "bucket=" << info.bucket.name
- << " prefix=" << params.prefix
- << " marker=" << params.marker
- << " max=" << max << dendl;
-
- // Retrieve all `max` number of pairs.
- string bucket_index_iname = "motr.rgw.bucket.index." + info.bucket.name;
- keys[0] = params.marker.empty() ? params.prefix :
- params.marker.get_oid();
- rc = store->next_query_by_name(bucket_index_iname, keys, vals, params.prefix,
- params.delim);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
- return rc;
- }
-
- // Process the returned pairs to add into ListResults.
- int i = 0;
- for (; i < rc; ++i) {
- if (vals[i].length() == 0) {
- results.common_prefixes[keys[i]] = true;
- } else {
- rgw_bucket_dir_entry ent;
- auto iter = vals[i].cbegin();
- ent.decode(iter);
- if (params.list_versions || ent.is_visible())
- results.objs.emplace_back(std::move(ent));
- }
- }
-
- if (i == max) {
- results.is_truncated = true;
- results.next_marker = keys[max - 1] + " ";
- } else {
- results.is_truncated = false;
- }
-
- return 0;
-}
-
-int MotrBucket::list_multiparts(const DoutPrefixProvider *dpp,
- const string& prefix,
- string& marker,
- const string& delim,
- const int& max_uploads,
- vector<std::unique_ptr<MultipartUpload>>& uploads,
- map<string, bool> *common_prefixes,
- bool *is_truncated)
-{
- int rc;
- vector<string> key_vec(max_uploads);
- vector<bufferlist> val_vec(max_uploads);
-
- string bucket_multipart_iname =
- "motr.rgw.bucket." + this->get_name() + ".multiparts";
- key_vec[0].clear();
- key_vec[0].assign(marker.begin(), marker.end());
- rc = store->next_query_by_name(bucket_multipart_iname, key_vec, val_vec);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
- return rc;
- }
-
- // Process the returned pairs to add into ListResults.
- // The POC can only support listing all objects or selecting
- // with prefix.
- int ocount = 0;
- rgw_obj_key last_obj_key;
- *is_truncated = false;
- for (const auto& bl: val_vec) {
- if (bl.length() == 0)
- break;
-
- rgw_bucket_dir_entry ent;
- auto iter = bl.cbegin();
- ent.decode(iter);
-
- if (prefix.size() &&
- (0 != ent.key.name.compare(0, prefix.size(), prefix))) {
- ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ <<
- ": skippping \"" << ent.key <<
- "\" because doesn't match prefix" << dendl;
- continue;
- }
-
- rgw_obj_key key(ent.key);
- uploads.push_back(this->get_multipart_upload(key.name));
- last_obj_key = key;
- ocount++;
- if (ocount == max_uploads) {
- *is_truncated = true;
- break;
- }
- }
- marker = last_obj_key.name;
-
- // What is common prefix? We don't handle it for now.
-
- return 0;
-
-}
-
-int MotrBucket::abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct)
-{
- return 0;
-}
-
-void MotrStore::finalize(void)
-{
- // close connection with motr
- m0_client_fini(this->instance, true);
-}
-
-const std::string& MotrZoneGroup::get_endpoint() const
-{
- if (!group.endpoints.empty()) {
- return group.endpoints.front();
- } else {
- // use zonegroup's master zone endpoints
- auto z = group.zones.find(group.master_zone);
- if (z != group.zones.end() && !z->second.endpoints.empty()) {
- return z->second.endpoints.front();
- }
- }
- return empty;
-}
-
-bool MotrZoneGroup::placement_target_exists(std::string& target) const
-{
- return !!group.placement_targets.count(target);
-}
-
-int MotrZoneGroup::get_placement_target_names(std::set<std::string>& names) const
-{
- for (const auto& target : group.placement_targets) {
- names.emplace(target.second.name);
- }
-
- return 0;
-}
-
-int MotrZoneGroup::get_placement_tier(const rgw_placement_rule& rule,
- std::unique_ptr<PlacementTier>* tier)
-{
- std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer;
- titer = group.placement_targets.find(rule.name);
- if (titer == group.placement_targets.end()) {
- return -ENOENT;
- }
-
- const auto& target_rule = titer->second;
- std::map<std::string, RGWZoneGroupPlacementTier>::const_iterator ttier;
- ttier = target_rule.tier_targets.find(rule.storage_class);
- if (ttier == target_rule.tier_targets.end()) {
- // not found
- return -ENOENT;
- }
-
- PlacementTier* t;
- t = new MotrPlacementTier(store, ttier->second);
- if (!t)
- return -ENOMEM;
-
- tier->reset(t);
- return 0;
-}
-
-ZoneGroup& MotrZone::get_zonegroup()
-{
- return zonegroup;
-}
-
-const std::string& MotrZone::get_id()
-{
- return zone_params->get_id();
-}
-
-const std::string& MotrZone::get_name() const
-{
- return zone_params->get_name();
-}
-
-bool MotrZone::is_writeable()
-{
- return true;
-}
-
-bool MotrZone::get_redirect_endpoint(std::string* endpoint)
-{
- return false;
-}
-
-bool MotrZone::has_zonegroup_api(const std::string& api) const
-{
- return (zonegroup.group.api_name == api);
-}
-
-const std::string& MotrZone::get_current_period_id()
-{
- return current_period->get_id();
-}
-
-std::unique_ptr<LuaManager> MotrStore::get_lua_manager()
-{
- return std::make_unique<MotrLuaManager>(this);
-}
-
-int MotrObject::get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **_state, optional_yield y, bool follow_olh)
-{
- // Get object's metadata (those stored in rgw_bucket_dir_entry).
- bufferlist bl;
- if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) {
- // Cache misses.
- string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name();
- int rc = this->store->do_idx_op_by_name(bucket_index_iname,
- M0_IC_GET, this->get_key().get_oid(), bl);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl;
- return rc;
- }
-
- // Put into cache.
- this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl);
- }
-
- rgw_bucket_dir_entry ent;
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- ent.decode(iter);
-
- // Set object's type.
- this->category = ent.meta.category;
-
- // Set object state.
- state.exists = true;
- state.size = ent.meta.size;
- state.accounted_size = ent.meta.size;
- state.mtime = ent.meta.mtime;
-
- state.has_attrs = true;
- bufferlist etag_bl;
- string& etag = ent.meta.etag;
- ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl;
- etag_bl.append(etag);
- state.attrset[RGW_ATTR_ETAG] = etag_bl;
-
- return 0;
-}
-
-MotrObject::~MotrObject() {
- this->close_mobj();
-}
-
-// int MotrObject::read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj)
-// {
-// read_op.params.attrs = &attrs;
-// read_op.params.target_obj = target_obj;
-// read_op.params.obj_size = &obj_size;
-// read_op.params.lastmod = &mtime;
-//
-// return read_op.prepare(dpp);
-// }
-
-int MotrObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y)
-{
- // TODO: implement
- ldpp_dout(dpp, 20) <<__func__<< ": MotrObject::set_obj_attrs()" << dendl;
- return 0;
-}
-
-int MotrObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj)
-{
- if (this->category == RGWObjCategory::MultiMeta)
- return 0;
-
- string bname, key;
- if (target_obj) {
- bname = target_obj->bucket.name;
- key = target_obj->key.get_oid();
- } else {
- bname = this->get_bucket()->get_name();
- key = this->get_key().get_oid();
- }
- ldpp_dout(dpp, 20) << "MotrObject::get_obj_attrs(): "
- << bname << "/" << key << dendl;
-
- // Get object's metadata (those stored in rgw_bucket_dir_entry).
- bufferlist bl;
- if (this->store->get_obj_meta_cache()->get(dpp, key, bl)) {
- // Cache misses.
- string bucket_index_iname = "motr.rgw.bucket.index." + bname;
- int rc = this->store->do_idx_op_by_name(bucket_index_iname, M0_IC_GET, key, bl);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "Failed to get object's entry from bucket index. " << dendl;
- return rc;
- }
-
- // Put into cache.
- this->store->get_obj_meta_cache()->put(dpp, key, bl);
- }
-
- rgw_bucket_dir_entry ent;
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- ent.decode(iter);
- decode(state.attrset, iter);
-
- return 0;
-}
-
-int MotrObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp)
-{
- rgw_obj target = get_obj();
- int r = get_obj_attrs(y, dpp, &target);
- if (r < 0) {
- return r;
- }
- set_atomic();
- state.attrset[attr_name] = attr_val;
- return set_obj_attrs(dpp, &state.attrset, nullptr, y);
-}
-
-int MotrObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y)
-{
- rgw_obj target = get_obj();
- Attrs rmattr;
- bufferlist bl;
-
- set_atomic();
- rmattr[attr_name] = bl;
- return set_obj_attrs(dpp, nullptr, &rmattr, y);
-}
-
-bool MotrObject::is_expired() {
- return false;
-}
-
-// Taken from rgw_rados.cc
-void MotrObject::gen_rand_obj_instance_name()
-{
- enum {OBJ_INSTANCE_LEN = 32};
- char buf[OBJ_INSTANCE_LEN + 1];
-
- gen_rand_alphanumeric_no_underscore(store->ctx(), buf, OBJ_INSTANCE_LEN);
- state.obj.key.set_instance(buf);
-}
-
-int MotrObject::omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid,
- const std::set<std::string>& keys,
- Attrs* vals)
-{
- return 0;
-}
-
-int MotrObject::omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val,
- bool must_exist, optional_yield y)
-{
- return 0;
-}
-
-int MotrObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y)
-{
- return 0;
-}
-
-std::unique_ptr<MPSerializer> MotrObject::get_serializer(const DoutPrefixProvider *dpp,
- const std::string& lock_name)
-{
- return std::make_unique<MPMotrSerializer>(dpp, store, this, lock_name);
-}
-
-int MotrObject::transition(Bucket* bucket,
- const rgw_placement_rule& placement_rule,
- const real_time& mtime,
- uint64_t olh_epoch,
- const DoutPrefixProvider* dpp,
- optional_yield y)
-{
- return 0;
-}
-
-bool MotrObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2)
-{
- /* XXX: support single default zone and zonegroup for now */
- return true;
-}
-
-int MotrObject::dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f)
-{
- return 0;
-}
-
-std::unique_ptr<Object::ReadOp> MotrObject::get_read_op()
-{
- return std::make_unique<MotrObject::MotrReadOp>(this);
-}
-
-MotrObject::MotrReadOp::MotrReadOp(MotrObject *_source) :
- source(_source)
-{ }
-
-int MotrObject::MotrReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp)
-{
- int rc;
- ldpp_dout(dpp, 20) <<__func__<< ": bucket=" << source->get_bucket()->get_name() << dendl;
-
- rgw_bucket_dir_entry ent;
- rc = source->get_bucket_dir_ent(dpp, ent);
- if (rc < 0)
- return rc;
-
- // Set source object's attrs. The attrs is key/value map and is used
- // in send_response_data() to set attributes, including etag.
- bufferlist etag_bl;
- string& etag = ent.meta.etag;
- ldpp_dout(dpp, 20) <<__func__<< ": object's etag: " << ent.meta.etag << dendl;
- etag_bl.append(etag.c_str(), etag.size());
- source->get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl));
-
- source->set_key(ent.key);
- source->set_obj_size(ent.meta.size);
- source->category = ent.meta.category;
- *params.lastmod = ent.meta.mtime;
-
- if (params.mod_ptr || params.unmod_ptr) {
- // Convert all times go GMT to make them compatible
- obj_time_weight src_weight;
- src_weight.init(*params.lastmod, params.mod_zone_id, params.mod_pg_ver);
- src_weight.high_precision = params.high_precision_time;
-
- obj_time_weight dest_weight;
- dest_weight.high_precision = params.high_precision_time;
-
- // Check if-modified-since condition
- if (params.mod_ptr && !params.if_nomatch) {
- dest_weight.init(*params.mod_ptr, params.mod_zone_id, params.mod_pg_ver);
- ldpp_dout(dpp, 10) << "If-Modified-Since: " << dest_weight << " & "
- << "Last-Modified: " << src_weight << dendl;
- if (!(dest_weight < src_weight)) {
- return -ERR_NOT_MODIFIED;
- }
- }
-
- // Check if-unmodified-since condition
- if (params.unmod_ptr && !params.if_match) {
- dest_weight.init(*params.unmod_ptr, params.mod_zone_id, params.mod_pg_ver);
- ldpp_dout(dpp, 10) << "If-UnModified-Since: " << dest_weight << " & "
- << "Last-Modified: " << src_weight << dendl;
- if (dest_weight < src_weight) {
- return -ERR_PRECONDITION_FAILED;
- }
- }
- }
- // Check if-match condition
- if (params.if_match) {
- string if_match_str = rgw_string_unquote(params.if_match);
- ldpp_dout(dpp, 10) << "ETag: " << etag << " & "
- << "If-Match: " << if_match_str << dendl;
- if (if_match_str.compare(etag) != 0) {
- return -ERR_PRECONDITION_FAILED;
- }
- }
- // Check if-none-match condition
- if (params.if_nomatch) {
- string if_nomatch_str = rgw_string_unquote(params.if_nomatch);
- ldpp_dout(dpp, 10) << "ETag: " << etag << " & "
- << "If-NoMatch: " << if_nomatch_str << dendl;
- if (if_nomatch_str.compare(etag) == 0) {
- return -ERR_NOT_MODIFIED;
- }
- }
-
- // Skip opening an empty object.
- if(source->get_obj_size() == 0)
- return 0;
-
- // Open the object here.
- if (source->category == RGWObjCategory::MultiMeta) {
- ldpp_dout(dpp, 20) <<__func__<< ": open obj parts..." << dendl;
- rc = source->get_part_objs(dpp, this->part_objs)? :
- source->open_part_objs(dpp, this->part_objs);
- return rc;
- } else {
- ldpp_dout(dpp, 20) <<__func__<< ": open object..." << dendl;
- return source->open_mobj(dpp);
- }
-}
-
-int MotrObject::MotrReadOp::read(int64_t off, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider* dpp)
-{
- ldpp_dout(dpp, 20) << "MotrReadOp::read(): sync read." << dendl;
- return 0;
-}
-
-// RGWGetObj::execute() calls ReadOp::iterate() to read object from 'off' to 'end'.
-// The returned data is processed in 'cb' which is a chain of post-processing
-// filters such as decompression, de-encryption and sending back data to client
-// (RGWGetObj_CB::handle_dta which in turn calls RGWGetObj::get_data_cb() to
-// send data back.).
-//
-// POC implements a simple sync version of iterate() function in which it reads
-// a block of data each time and call 'cb' for post-processing.
-int MotrObject::MotrReadOp::iterate(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb, optional_yield y)
-{
- int rc;
-
- if (source->category == RGWObjCategory::MultiMeta)
- rc = source->read_multipart_obj(dpp, off, end, cb, part_objs);
- else
- rc = source->read_mobj(dpp, off, end, cb);
-
- return rc;
-}
-
-int MotrObject::MotrReadOp::get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y)
-{
- //return 0;
- return -ENODATA;
-}
-
-std::unique_ptr<Object::DeleteOp> MotrObject::get_delete_op()
-{
- return std::make_unique<MotrObject::MotrDeleteOp>(this);
-}
-
-MotrObject::MotrDeleteOp::MotrDeleteOp(MotrObject *_source) :
- source(_source)
-{ }
-
-// Implementation of DELETE OBJ also requires MotrObject::get_obj_state()
-// to retrieve and set object's state from object's metadata.
-//
-// TODO:
-// 1. The POC only remove the object's entry from bucket index and delete
-// corresponding Motr objects. It doesn't handle the DeleteOp::params.
-// Delete::delete_obj() in rgw_rados.cc shows how rados backend process the
-// params.
-// 2. Delete an object when its versioning is turned on.
-int MotrObject::MotrDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y)
-{
- ldpp_dout(dpp, 20) << "delete " << source->get_key().get_oid() << " from " << source->get_bucket()->get_name() << dendl;
-
- rgw_bucket_dir_entry ent;
- int rc = source->get_bucket_dir_ent(dpp, ent);
- if (rc < 0) {
- return rc;
- }
-
- //TODO: When integrating with background GC for object deletion,
- // we should consider adding object entry to GC before deleting the metadata.
- // Delete from the cache first.
- source->store->get_obj_meta_cache()->remove(dpp, source->get_key().get_oid());
-
- // Delete the object's entry from the bucket index.
- bufferlist bl;
- string bucket_index_iname = "motr.rgw.bucket.index." + source->get_bucket()->get_name();
- rc = source->store->do_idx_op_by_name(bucket_index_iname,
- M0_IC_DEL, source->get_key().get_oid(), bl);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "Failed to del object's entry from bucket index. " << dendl;
- return rc;
- }
-
- if (ent.meta.size == 0) {
- ldpp_dout(dpp, 0) << __func__ << ": Object size is 0, not deleting motr object." << dendl;
- return 0;
- }
- // Remove the motr objects.
- if (source->category == RGWObjCategory::MultiMeta)
- rc = source->delete_part_objs(dpp);
- else
- rc = source->delete_mobj(dpp);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "Failed to delete the object from Motr. " << dendl;
- return rc;
- }
-
- //result.delete_marker = parent_op.result.delete_marker;
- //result.version_id = parent_op.result.version_id;
- return 0;
-}
-
-int MotrObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, bool prevent_versioning)
-{
- MotrObject::MotrDeleteOp del_op(this);
- del_op.params.bucket_owner = bucket->get_info().owner;
- del_op.params.versioning_status = bucket->get_info().versioning_status();
-
- return del_op.delete_obj(dpp, y);
-}
-
-int MotrObject::copy_object(User* user,
- req_info* info,
- const rgw_zone_id& source_zone,
- rgw::sal::Object* dest_object,
- rgw::sal::Bucket* dest_bucket,
- rgw::sal::Bucket* src_bucket,
- const rgw_placement_rule& dest_placement,
- ceph::real_time* src_mtime,
- ceph::real_time* mtime,
- const ceph::real_time* mod_ptr,
- const ceph::real_time* unmod_ptr,
- bool high_precision_time,
- const char* if_match,
- const char* if_nomatch,
- AttrsMod attrs_mod,
- bool copy_if_newer,
- Attrs& attrs,
- RGWObjCategory category,
- uint64_t olh_epoch,
- boost::optional<ceph::real_time> delete_at,
- std::string* version_id,
- std::string* tag,
- std::string* etag,
- void (*progress_cb)(off_t, void *),
- void* progress_data,
- const DoutPrefixProvider* dpp,
- optional_yield y)
-{
- return 0;
-}
-
-int MotrObject::swift_versioning_restore(bool& restored,
- const DoutPrefixProvider* dpp)
-{
- return 0;
-}
-
-int MotrObject::swift_versioning_copy(const DoutPrefixProvider* dpp,
- optional_yield y)
-{
- return 0;
-}
-
-MotrAtomicWriter::MotrAtomicWriter(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- MotrStore* _store,
- const rgw_user& _owner,
- const rgw_placement_rule *_ptail_placement_rule,
- uint64_t _olh_epoch,
- const std::string& _unique_tag) :
- StoreWriter(dpp, y),
- store(_store),
- owner(_owner),
- ptail_placement_rule(_ptail_placement_rule),
- olh_epoch(_olh_epoch),
- unique_tag(_unique_tag),
- obj(_store, obj->get_key(), obj->get_bucket()),
- old_obj(_store, obj->get_key(), obj->get_bucket()) {}
-
-static const unsigned MAX_BUFVEC_NR = 256;
-
-int MotrAtomicWriter::prepare(optional_yield y)
-{
- total_data_size = 0;
-
- if (obj.is_opened())
- return 0;
-
- rgw_bucket_dir_entry ent;
- int rc = old_obj.get_bucket_dir_ent(dpp, ent);
- if (rc == 0) {
- ldpp_dout(dpp, 20) << __func__ << ": object exists." << dendl;
- }
-
- rc = m0_bufvec_empty_alloc(&buf, MAX_BUFVEC_NR) ?:
- m0_bufvec_alloc(&attr, MAX_BUFVEC_NR, 1) ?:
- m0_indexvec_alloc(&ext, MAX_BUFVEC_NR);
- if (rc != 0)
- this->cleanup();
-
- return rc;
-}
-
-int MotrObject::create_mobj(const DoutPrefixProvider *dpp, uint64_t sz)
-{
- if (mobj != nullptr) {
- ldpp_dout(dpp, 0) <<__func__<< "ERROR: object is already opened" << dendl;
- return -EINVAL;
- }
-
- int rc = m0_ufid_next(&ufid_gr, 1, &meta.oid);
- if (rc != 0) {
- ldpp_dout(dpp, 0) <<__func__<< "ERROR: m0_ufid_next() failed: " << rc << dendl;
- return rc;
- }
-
- char fid_str[M0_FID_STR_LEN];
- snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
- ldpp_dout(dpp, 20) <<__func__<< ": sz=" << sz << " oid=" << fid_str << dendl;
-
- int64_t lid = m0_layout_find_by_objsz(store->instance, nullptr, sz);
- M0_ASSERT(lid > 0);
-
- M0_ASSERT(mobj == nullptr);
- mobj = new m0_obj();
- m0_obj_init(mobj, &store->container.co_realm, &meta.oid, lid);
-
- struct m0_op *op = nullptr;
- mobj->ob_entity.en_flags |= M0_ENF_META;
- rc = m0_entity_create(nullptr, &mobj->ob_entity, &op);
- if (rc != 0) {
- this->close_mobj();
- ldpp_dout(dpp, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl;
- return rc;
- }
- ldpp_dout(dpp, 20) <<__func__<< ": call m0_op_launch()..." << dendl;
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc != 0) {
- this->close_mobj();
- ldpp_dout(dpp, 0) << "ERROR: failed to create motr object: " << rc << dendl;
- return rc;
- }
-
- meta.layout_id = mobj->ob_attr.oa_layout_id;
- meta.pver = mobj->ob_attr.oa_pver;
- ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id
- << std::dec << " rc=" << rc << dendl;
-
- // TODO: add key:user+bucket+key+obj.meta.oid value:timestamp to
- // gc.queue.index. See more at github.com/Seagate/cortx-rgw/issues/7.
-
- return rc;
-}
-
-int MotrObject::open_mobj(const DoutPrefixProvider *dpp)
-{
- char fid_str[M0_FID_STR_LEN];
- snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
- ldpp_dout(dpp, 20) <<__func__<< ": oid=" << fid_str << dendl;
-
- int rc;
- if (meta.layout_id == 0) {
- rgw_bucket_dir_entry ent;
- rc = this->get_bucket_dir_ent(dpp, ent);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: open_mobj() failed: rc=" << rc << dendl;
- return rc;
- }
- }
-
- if (meta.layout_id == 0)
- return -ENOENT;
-
- M0_ASSERT(mobj == nullptr);
- mobj = new m0_obj();
- memset(mobj, 0, sizeof *mobj);
- m0_obj_init(mobj, &store->container.co_realm, &meta.oid, store->conf.mc_layout_id);
-
- struct m0_op *op = nullptr;
- mobj->ob_attr.oa_layout_id = meta.layout_id;
- mobj->ob_attr.oa_pver = meta.pver;
- mobj->ob_entity.en_flags |= M0_ENF_META;
- rc = m0_entity_open(&mobj->ob_entity, &op);
- if (rc != 0) {
- ldpp_dout(dpp, 0) << "ERROR: m0_entity_open() failed: rc=" << rc << dendl;
- this->close_mobj();
- return rc;
- }
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc < 0) {
- ldpp_dout(dpp, 10) << "ERROR: failed to open motr object: rc=" << rc << dendl;
- this->close_mobj();
- return rc;
- }
-
- ldpp_dout(dpp, 20) <<__func__<< ": rc=" << rc << dendl;
-
- return 0;
-}
-
-int MotrObject::delete_mobj(const DoutPrefixProvider *dpp)
-{
- int rc;
- char fid_str[M0_FID_STR_LEN];
- snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
- if (!meta.oid.u_hi || !meta.oid.u_lo) {
- ldpp_dout(dpp, 20) << __func__ << ": invalid motr object oid=" << fid_str << dendl;
- return -EINVAL;
- }
- ldpp_dout(dpp, 20) << __func__ << ": deleting motr object oid=" << fid_str << dendl;
-
- // Open the object.
- if (mobj == nullptr) {
- rc = this->open_mobj(dpp);
- if (rc < 0)
- return rc;
- }
-
- // Create an DELETE op and execute it (sync version).
- struct m0_op *op = nullptr;
- mobj->ob_entity.en_flags |= M0_ENF_META;
- rc = m0_entity_delete(&mobj->ob_entity, &op);
- if (rc != 0) {
- ldpp_dout(dpp, 0) << "ERROR: m0_entity_delete() failed: " << rc << dendl;
- return rc;
- }
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to open motr object: " << rc << dendl;
- return rc;
- }
-
- this->close_mobj();
-
- return 0;
-}
-
-void MotrObject::close_mobj()
-{
- if (mobj == nullptr)
- return;
- m0_obj_fini(mobj);
- delete mobj; mobj = nullptr;
-}
-
-int MotrObject::write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset)
-{
- int rc;
- unsigned bs, left;
- struct m0_op *op;
- char *start, *p;
- struct m0_bufvec buf;
- struct m0_bufvec attr;
- struct m0_indexvec ext;
-
- left = data.length();
- if (left == 0)
- return 0;
-
- rc = m0_bufvec_empty_alloc(&buf, 1) ?:
- m0_bufvec_alloc(&attr, 1, 1) ?:
- m0_indexvec_alloc(&ext, 1);
- if (rc != 0)
- goto out;
-
- bs = this->get_optimal_bs(left);
- ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl;
-
- start = data.c_str();
-
- for (p = start; left > 0; left -= bs, p += bs, offset += bs) {
- if (left < bs)
- bs = this->get_optimal_bs(left);
- if (left < bs) {
- data.append_zero(bs - left);
- left = bs;
- p = data.c_str();
- }
- buf.ov_buf[0] = p;
- buf.ov_vec.v_count[0] = bs;
- ext.iv_index[0] = offset;
- ext.iv_vec.v_count[0] = bs;
- attr.ov_vec.v_count[0] = 0;
-
- op = nullptr;
- rc = m0_obj_op(this->mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op);
- if (rc != 0)
- goto out;
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
- if (rc != 0)
- goto out;
- }
-
-out:
- m0_indexvec_free(&ext);
- m0_bufvec_free(&attr);
- m0_bufvec_free2(&buf);
- return rc;
-}
-
-int MotrObject::read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb)
-{
- int rc;
- unsigned bs, actual, left;
- struct m0_op *op;
- struct m0_bufvec buf;
- struct m0_bufvec attr;
- struct m0_indexvec ext;
-
- // make end pointer exclusive:
- // it's easier to work with it this way
- end++;
- ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off <<
- " end=" << end << dendl;
- // As `off` may not be parity group size aligned, even using optimal
- // buffer block size, simply reading data from offset `off` could come
- // across parity group boundary. And Motr only allows page-size aligned
- // offset.
- //
- // The optimal size of each IO should also take into account the data
- // transfer size to s3 client. For example, 16MB may be nice to read
- // data from motr, but it could be too big for network transfer.
- //
- // TODO: We leave proper handling of offset in the future.
- bs = this->get_optimal_bs(end - off);
- ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): bs=" << bs << dendl;
-
- rc = m0_bufvec_empty_alloc(&buf, 1) ? :
- m0_bufvec_alloc(&attr, 1, 1) ? :
- m0_indexvec_alloc(&ext, 1);
- if (rc < 0)
- goto out;
-
- left = end - off;
- for (; left > 0; off += actual) {
- if (left < bs)
- bs = this->get_optimal_bs(left);
- actual = bs;
- if (left < bs)
- actual = left;
- ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): off=" << off <<
- " actual=" << actual << dendl;
- bufferlist bl;
- buf.ov_buf[0] = bl.append_hole(bs).c_str();
- buf.ov_vec.v_count[0] = bs;
- ext.iv_index[0] = off;
- ext.iv_vec.v_count[0] = bs;
- attr.ov_vec.v_count[0] = 0;
-
- left -= actual;
- // Read from Motr.
- op = nullptr;
- rc = m0_obj_op(this->mobj, M0_OC_READ, &ext, &buf, &attr, 0, 0, &op);
- ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): init read op rc=" << rc << dendl;
- if (rc != 0) {
- ldpp_dout(dpp, 0) << __func__ << ": read failed during m0_obj_op, rc=" << rc << dendl;
- goto out;
- }
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
- if (rc != 0) {
- ldpp_dout(dpp, 0) << __func__ << ": read failed, m0_op_wait rc=" << rc << dendl;
- goto out;
- }
- // Call `cb` to process returned data.
- ldpp_dout(dpp, 20) << "MotrObject::read_mobj(): call cb to process data" << dendl;
- cb->handle_data(bl, 0, actual);
- }
-
-out:
- m0_indexvec_free(&ext);
- m0_bufvec_free(&attr);
- m0_bufvec_free2(&buf);
- this->close_mobj();
-
- return rc;
-}
-
-int MotrObject::get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent)
-{
- int rc = 0;
- string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name();
- int max = 1000;
- vector<string> keys(max);
- vector<bufferlist> vals(max);
- bufferlist bl;
- bufferlist::const_iterator iter;
-
- if (this->get_bucket()->get_info().versioning_status() == BUCKET_VERSIONED ||
- this->get_bucket()->get_info().versioning_status() == BUCKET_SUSPENDED) {
-
- rgw_bucket_dir_entry ent_to_check;
-
- if (this->store->get_obj_meta_cache()->get(dpp, this->get_name(), bl) == 0) {
- iter = bl.cbegin();
- ent_to_check.decode(iter);
- if (ent_to_check.is_current()) {
- ent = ent_to_check;
- rc = 0;
- goto out;
- }
- }
-
- ldpp_dout(dpp, 20) <<__func__<< ": versioned bucket!" << dendl;
- keys[0] = this->get_name();
- rc = store->next_query_by_name(bucket_index_iname, keys, vals);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << __func__ << "ERROR: NEXT query failed. " << rc << dendl;
- return rc;
- }
-
- rc = -ENOENT;
- for (const auto& bl: vals) {
- if (bl.length() == 0)
- break;
-
- iter = bl.cbegin();
- ent_to_check.decode(iter);
- if (ent_to_check.is_current()) {
- ldpp_dout(dpp, 20) <<__func__<< ": found current version!" << dendl;
- ent = ent_to_check;
- rc = 0;
-
- this->store->get_obj_meta_cache()->put(dpp, this->get_name(), bl);
-
- break;
- }
- }
- } else {
- if (this->store->get_obj_meta_cache()->get(dpp, this->get_key().get_oid(), bl)) {
- ldpp_dout(dpp, 20) <<__func__<< ": non-versioned bucket!" << dendl;
- rc = this->store->do_idx_op_by_name(bucket_index_iname,
- M0_IC_GET, this->get_key().get_oid(), bl);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << __func__ << "ERROR: failed to get object's entry from bucket index: rc="
- << rc << dendl;
- return rc;
- }
- this->store->get_obj_meta_cache()->put(dpp, this->get_key().get_oid(), bl);
- }
-
- bufferlist& blr = bl;
- iter = blr.cbegin();
- ent.decode(iter);
- }
-
-out:
- if (rc == 0) {
- sal::Attrs dummy;
- decode(dummy, iter);
- meta.decode(iter);
- ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << meta.layout_id << dendl;
- char fid_str[M0_FID_STR_LEN];
- snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&meta.oid));
- ldpp_dout(dpp, 70) << __func__ << ": oid=" << fid_str << dendl;
- } else
- ldpp_dout(dpp, 0) <<__func__<< ": rc=" << rc << dendl;
-
- return rc;
-}
-
-int MotrObject::update_version_entries(const DoutPrefixProvider *dpp)
-{
- int rc;
- int max = 10;
- vector<string> keys(max);
- vector<bufferlist> vals(max);
-
- string bucket_index_iname = "motr.rgw.bucket.index." + this->get_bucket()->get_name();
- keys[0] = this->get_name();
- rc = store->next_query_by_name(bucket_index_iname, keys, vals);
- ldpp_dout(dpp, 20) << "get all versions, name = " << this->get_name() << "rc = " << rc << dendl;
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
- return rc;
- }
-
- // no entries returned.
- if (rc == 0)
- return 0;
-
- for (const auto& bl: vals) {
- if (bl.length() == 0)
- break;
-
- rgw_bucket_dir_entry ent;
- auto iter = bl.cbegin();
- ent.decode(iter);
-
- if (0 != ent.key.name.compare(0, this->get_name().size(), this->get_name()))
- continue;
-
- if (!ent.is_current())
- continue;
-
- // Remove from the cache.
- store->get_obj_meta_cache()->remove(dpp, this->get_name());
-
- rgw::sal::Attrs attrs;
- decode(attrs, iter);
- MotrObject::Meta meta;
- meta.decode(iter);
-
- ent.flags = rgw_bucket_dir_entry::FLAG_VER;
- string key;
- if (ent.key.instance.empty())
- key = ent.key.name;
- else {
- char buf[ent.key.name.size() + ent.key.instance.size() + 16];
- snprintf(buf, sizeof(buf), "%s[%s]", ent.key.name.c_str(), ent.key.instance.c_str());
- key = buf;
- }
- ldpp_dout(dpp, 20) << "update one version, key = " << key << dendl;
- bufferlist ent_bl;
- ent.encode(ent_bl);
- encode(attrs, ent_bl);
- meta.encode(ent_bl);
-
- rc = store->do_idx_op_by_name(bucket_index_iname,
- M0_IC_PUT, key, ent_bl);
- if (rc < 0)
- break;
- }
- return rc;
-}
-
-// Scan object_nnn_part_index to get all parts then open their motr objects.
-// TODO: all parts are opened in the POC. But for a large object, for example
-// a 5GB object will have about 300 parts (for default 15MB part). A better
-// way of managing opened object may be needed.
-int MotrObject::get_part_objs(const DoutPrefixProvider* dpp,
- std::map<int, std::unique_ptr<MotrObject>>& part_objs)
-{
- int rc;
- int max_parts = 1000;
- int marker = 0;
- uint64_t off = 0;
- bool truncated = false;
- std::unique_ptr<rgw::sal::MultipartUpload> upload;
-
- upload = this->get_bucket()->get_multipart_upload(this->get_name(), string());
-
- do {
- rc = upload->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated);
- if (rc == -ENOENT) {
- rc = -ERR_NO_SUCH_UPLOAD;
- }
- if (rc < 0)
- return rc;
-
- std::map<uint32_t, std::unique_ptr<MultipartPart>>& parts = upload->get_parts();
- for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) {
-
- MultipartPart *mpart = part_iter->second.get();
- MotrMultipartPart *mmpart = static_cast<MotrMultipartPart *>(mpart);
- uint32_t part_num = mmpart->get_num();
- uint64_t part_size = mmpart->get_size();
-
- string part_obj_name = this->get_bucket()->get_name() + "." +
- this->get_key().get_oid() +
- ".part." + std::to_string(part_num);
- std::unique_ptr<rgw::sal::Object> obj;
- obj = this->bucket->get_object(rgw_obj_key(part_obj_name));
- std::unique_ptr<rgw::sal::MotrObject> mobj(static_cast<rgw::sal::MotrObject *>(obj.release()));
-
- ldpp_dout(dpp, 20) << "get_part_objs: off = " << off << ", size = " << part_size << dendl;
- mobj->part_off = off;
- mobj->part_size = part_size;
- mobj->part_num = part_num;
- mobj->meta = mmpart->meta;
-
- part_objs.emplace(part_num, std::move(mobj));
-
- off += part_size;
- }
- } while (truncated);
-
- return 0;
-}
-
-int MotrObject::open_part_objs(const DoutPrefixProvider* dpp,
- std::map<int, std::unique_ptr<MotrObject>>& part_objs)
-{
- //for (auto& iter: part_objs) {
- for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) {
- MotrObject* obj = static_cast<MotrObject *>(iter->second.get());
- ldpp_dout(dpp, 20) << "open_part_objs: name = " << obj->get_name() << dendl;
- int rc = obj->open_mobj(dpp);
- if (rc < 0)
- return rc;
- }
-
- return 0;
-}
-
-int MotrObject::delete_part_objs(const DoutPrefixProvider* dpp)
-{
- std::unique_ptr<rgw::sal::MultipartUpload> upload;
- upload = this->get_bucket()->get_multipart_upload(this->get_name(), string());
- std::unique_ptr<rgw::sal::MotrMultipartUpload> mupload(static_cast<rgw::sal::MotrMultipartUpload *>(upload.release()));
- return mupload->delete_parts(dpp);
-}
-
-int MotrObject::read_multipart_obj(const DoutPrefixProvider* dpp,
- int64_t off, int64_t end, RGWGetDataCB* cb,
- std::map<int, std::unique_ptr<MotrObject>>& part_objs)
-{
- int64_t cursor = off;
-
- ldpp_dout(dpp, 20) << "read_multipart_obj: off=" << off << " end=" << end << dendl;
-
- // Find the parts which are in the (off, end) range and
- // read data from it. Note: `end` argument is inclusive.
- for (auto iter = part_objs.begin(); iter != part_objs.end(); ++iter) {
- MotrObject* obj = static_cast<MotrObject *>(iter->second.get());
- int64_t part_off = obj->part_off;
- int64_t part_size = obj->part_size;
- int64_t part_end = obj->part_off + obj->part_size - 1;
- ldpp_dout(dpp, 20) << "read_multipart_obj: part_off=" << part_off
- << " part_end=" << part_end << dendl;
- if (part_end < off)
- continue;
-
- int64_t local_off = cursor - obj->part_off;
- int64_t local_end = part_end < end? part_size - 1 : end - part_off;
- ldpp_dout(dpp, 20) << "real_multipart_obj: name=" << obj->get_name()
- << " local_off=" << local_off
- << " local_end=" << local_end << dendl;
- int rc = obj->read_mobj(dpp, local_off, local_end, cb);
- if (rc < 0)
- return rc;
-
- cursor = part_end + 1;
- if (cursor > end)
- break;
- }
-
- return 0;
-}
-
-static unsigned roundup(unsigned x, unsigned by)
-{
- return ((x - 1) / by + 1) * by;
-}
-
-unsigned MotrObject::get_optimal_bs(unsigned len)
-{
- struct m0_pool_version *pver;
-
- pver = m0_pool_version_find(&store->instance->m0c_pools_common,
- &mobj->ob_attr.oa_pver);
- M0_ASSERT(pver != nullptr);
- struct m0_pdclust_attr *pa = &pver->pv_attr;
- uint64_t lid = M0_OBJ_LAYOUT_ID(meta.layout_id);
- unsigned unit_sz = m0_obj_layout_id_to_unit_size(lid);
- unsigned grp_sz = unit_sz * pa->pa_N;
-
- // bs should be max 4-times pool-width deep counting by 1MB units, or
- // 8-times deep counting by 512K units, 16-times deep by 256K units,
- // and so on. Several units to one target will be aggregated to make
- // fewer network RPCs, disk i/o operations and BE transactions.
- // For unit sizes of 32K or less, the depth is 128, which
- // makes it 32K * 128 == 4MB - the maximum amount per target when
- // the performance is still good on LNet (which has max 1MB frames).
- // TODO: it may be different on libfabric, should be re-measured.
- unsigned depth = 128 / ((unit_sz + 0x7fff) / 0x8000);
- if (depth == 0)
- depth = 1;
- // P * N / (N + K + S) - number of data units to span the pool-width
- unsigned max_bs = depth * unit_sz * pa->pa_P * pa->pa_N /
- (pa->pa_N + pa->pa_K + pa->pa_S);
- max_bs = roundup(max_bs, grp_sz); // multiple of group size
- if (len >= max_bs)
- return max_bs;
- else if (len <= grp_sz)
- return grp_sz;
- else
- return roundup(len, grp_sz);
-}
-
-void MotrAtomicWriter::cleanup()
-{
- m0_indexvec_free(&ext);
- m0_bufvec_free(&attr);
- m0_bufvec_free2(&buf);
- acc_data.clear();
- obj.close_mobj();
- old_obj.close_mobj();
-}
-
-unsigned MotrAtomicWriter::populate_bvec(unsigned len, bufferlist::iterator &bi)
-{
- unsigned i, l, done = 0;
- const char *data;
-
- for (i = 0; i < MAX_BUFVEC_NR && len > 0; ++i) {
- l = bi.get_ptr_and_advance(len, &data);
- buf.ov_buf[i] = (char*)data;
- buf.ov_vec.v_count[i] = l;
- ext.iv_index[i] = acc_off;
- ext.iv_vec.v_count[i] = l;
- attr.ov_vec.v_count[i] = 0;
- acc_off += l;
- len -= l;
- done += l;
- }
- buf.ov_vec.v_nr = i;
- ext.iv_vec.v_nr = i;
-
- return done;
-}
-
-int MotrAtomicWriter::write()
-{
- int rc;
- unsigned bs, left;
- struct m0_op *op;
- bufferlist::iterator bi;
-
- left = acc_data.length();
-
- if (!obj.is_opened()) {
- rc = obj.create_mobj(dpp, left);
- if (rc == -EEXIST)
- rc = obj.open_mobj(dpp);
- if (rc != 0) {
- char fid_str[M0_FID_STR_LEN];
- snprintf(fid_str, ARRAY_SIZE(fid_str), U128X_F, U128_P(&obj.meta.oid));
- ldpp_dout(dpp, 0) << "ERROR: failed to create/open motr object "
- << fid_str << " (" << obj.get_bucket()->get_name()
- << "/" << obj.get_key().get_oid() << "): rc=" << rc
- << dendl;
- goto err;
- }
- }
-
- total_data_size += left;
-
- bs = obj.get_optimal_bs(left);
- ldpp_dout(dpp, 20) <<__func__<< ": left=" << left << " bs=" << bs << dendl;
-
- bi = acc_data.begin();
- while (left > 0) {
- if (left < bs)
- bs = obj.get_optimal_bs(left);
- if (left < bs) {
- acc_data.append_zero(bs - left);
- auto off = bi.get_off();
- bufferlist tmp;
- acc_data.splice(off, bs, &tmp);
- acc_data.clear();
- acc_data.append(tmp.c_str(), bs); // make it a single buf
- bi = acc_data.begin();
- left = bs;
- }
-
- left -= this->populate_bvec(bs, bi);
-
- op = nullptr;
- rc = m0_obj_op(obj.mobj, M0_OC_WRITE, &ext, &buf, &attr, 0, 0, &op);
- if (rc != 0)
- goto err;
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
- if (rc != 0)
- goto err;
- }
- acc_data.clear();
-
- return 0;
-
-err:
- this->cleanup();
- return rc;
-}
-
-static const unsigned MAX_ACC_SIZE = 32 * 1024 * 1024;
-
-// Accumulate enough data first to make a reasonable decision about the
-// optimal unit size for a new object, or bs for existing object (32M seems
-// enough for 4M units in 8+2 parity groups, a common config on wide pools),
-// and then launch the write operations.
-int MotrAtomicWriter::process(bufferlist&& data, uint64_t offset)
-{
- if (data.length() == 0) { // last call, flush data
- int rc = 0;
- if (acc_data.length() != 0)
- rc = this->write();
- this->cleanup();
- return rc;
- }
-
- if (acc_data.length() == 0)
- acc_off = offset;
-
- acc_data.append(std::move(data));
- if (acc_data.length() < MAX_ACC_SIZE)
- return 0;
-
- return this->write();
-}
-
-int MotrAtomicWriter::complete(size_t accounted_size, const std::string& etag,
- ceph::real_time *mtime, ceph::real_time set_mtime,
- std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch,
- const std::string *user_data,
- rgw_zone_set *zones_trace, bool *canceled,
- optional_yield y)
-{
- int rc = 0;
-
- if (acc_data.length() != 0) { // check again, just in case
- rc = this->write();
- this->cleanup();
- if (rc != 0)
- return rc;
- }
-
- bufferlist bl;
- rgw_bucket_dir_entry ent;
-
- // Set rgw_bucet_dir_entry. Some of the member of this structure may not
- // apply to motr. For example the storage_class.
- //
- // Checkout AtomicObjectProcessor::complete() in rgw_putobj_processor.cc
- // and RGWRados::Object::Write::write_meta() in rgw_rados.cc for what and
- // how to set the dir entry. Only set the basic ones for POC, no ACLs and
- // other attrs.
- obj.get_key().get_index_key(&ent.key);
- ent.meta.size = total_data_size;
- ent.meta.accounted_size = total_data_size;
- ent.meta.mtime = real_clock::is_zero(set_mtime)? ceph::real_clock::now() : set_mtime;
- ent.meta.etag = etag;
- ent.meta.owner = owner.to_str();
- ent.meta.owner_display_name = obj.get_bucket()->get_owner()->get_display_name();
- bool is_versioned = obj.get_key().have_instance();
- if (is_versioned)
- ent.flags = rgw_bucket_dir_entry::FLAG_VER | rgw_bucket_dir_entry::FLAG_CURRENT;
- ldpp_dout(dpp, 20) <<__func__<< ": key=" << obj.get_key().get_oid()
- << " etag: " << etag << " user_data=" << user_data << dendl;
- if (user_data)
- ent.meta.user_data = *user_data;
- ent.encode(bl);
-
- RGWBucketInfo &info = obj.get_bucket()->get_info();
- if (info.obj_lock_enabled() && info.obj_lock.has_rule()) {
- auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
- if (iter == attrs.end()) {
- real_time lock_until_date = info.obj_lock.get_lock_until_date(ent.meta.mtime);
- string mode = info.obj_lock.get_mode();
- RGWObjectRetention obj_retention(mode, lock_until_date);
- bufferlist retention_bl;
- obj_retention.encode(retention_bl);
- attrs[RGW_ATTR_OBJECT_RETENTION] = retention_bl;
- }
- }
- encode(attrs, bl);
- obj.meta.encode(bl);
- ldpp_dout(dpp, 20) <<__func__<< ": lid=0x" << std::hex << obj.meta.layout_id
- << dendl;
- if (is_versioned) {
- // get the list of all versioned objects with the same key and
- // unset their FLAG_CURRENT later, if do_idx_op_by_name() is successful.
- // Note: without distributed lock on the index - it is possible that 2
- // CURRENT entries would appear in the bucket. For example, consider the
- // following scenario when two clients are trying to add the new object
- // version concurrently:
- // client 1: reads all the CURRENT entries
- // client 2: updates the index and sets the new CURRENT
- // client 1: updates the index and sets the new CURRENT
- // At the step (1) client 1 would not see the new current record from step (2),
- // so it won't update it. As a result, two CURRENT version entries will appear
- // in the bucket.
- // TODO: update the current version (unset the flag) and insert the new current
- // version can be launched in one motr op. This requires change at do_idx_op()
- // and do_idx_op_by_name().
- rc = obj.update_version_entries(dpp);
- if (rc < 0)
- return rc;
- }
- // Insert an entry into bucket index.
- string bucket_index_iname = "motr.rgw.bucket.index." + obj.get_bucket()->get_name();
- rc = store->do_idx_op_by_name(bucket_index_iname,
- M0_IC_PUT, obj.get_key().get_oid(), bl);
- if (rc == 0)
- store->get_obj_meta_cache()->put(dpp, obj.get_key().get_oid(), bl);
-
- if (old_obj.get_bucket()->get_info().versioning_status() != BUCKET_VERSIONED) {
- // Delete old object data if exists.
- old_obj.delete_mobj(dpp);
- }
-
- // TODO: We need to handle the object leak caused by parallel object upload by
- // making use of background gc, which is currently not enabled for motr.
- return rc;
-}
-
-int MotrMultipartUpload::delete_parts(const DoutPrefixProvider *dpp)
-{
- int rc;
- int max_parts = 1000;
- int marker = 0;
- bool truncated = false;
-
- // Scan all parts and delete the corresponding motr objects.
- do {
- rc = this->list_parts(dpp, store->ctx(), max_parts, marker, &marker, &truncated);
- if (rc == -ENOENT) {
- truncated = false;
- rc = 0;
- }
- if (rc < 0)
- return rc;
-
- std::map<uint32_t, std::unique_ptr<MultipartPart>>& parts = this->get_parts();
- for (auto part_iter = parts.begin(); part_iter != parts.end(); ++part_iter) {
-
- MultipartPart *mpart = part_iter->second.get();
- MotrMultipartPart *mmpart = static_cast<MotrMultipartPart *>(mpart);
- uint32_t part_num = mmpart->get_num();
-
- // Delete the part object. Note that the part object is not
- // inserted into bucket index, only the corresponding motr object
- // needs to be delete. That is why we don't call
- // MotrObject::delete_object().
- string part_obj_name = bucket->get_name() + "." +
- mp_obj.get_key() +
- ".part." + std::to_string(part_num);
- std::unique_ptr<rgw::sal::Object> obj;
- obj = this->bucket->get_object(rgw_obj_key(part_obj_name));
- std::unique_ptr<rgw::sal::MotrObject> mobj(static_cast<rgw::sal::MotrObject *>(obj.release()));
- mobj->meta = mmpart->meta;
- rc = mobj->delete_mobj(dpp);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << __func__ << ": Failed to delete object from Motr. rc=" << rc << dendl;
- return rc;
- }
- }
- } while (truncated);
-
- // Delete object part index.
- std::string oid = mp_obj.get_key();
- string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts";
- return store->delete_motr_idx_by_name(obj_part_iname);
-}
-
-int MotrMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct)
-{
- int rc;
- // Check if multipart upload exists
- bufferlist bl;
- std::unique_ptr<rgw::sal::Object> meta_obj;
- meta_obj = get_meta_obj();
- string bucket_multipart_iname =
- "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts";
- rc = store->do_idx_op_by_name(bucket_multipart_iname,
- M0_IC_GET, meta_obj->get_oid(), bl);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart upload. rc=" << rc << dendl;
- return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc;
- }
-
- // Scan all parts and delete the corresponding motr objects.
- rc = this->delete_parts(dpp);
- if (rc < 0)
- return rc;
-
- bl.clear();
- // Remove the upload from bucket multipart index.
- rc = store->do_idx_op_by_name(bucket_multipart_iname,
- M0_IC_DEL, meta_obj->get_key().get_oid(), bl);
- return rc;
-}
-
-std::unique_ptr<rgw::sal::Object> MotrMultipartUpload::get_meta_obj()
-{
- std::unique_ptr<rgw::sal::Object> obj = bucket->get_object(rgw_obj_key(get_meta(), string(), mp_ns));
- std::unique_ptr<rgw::sal::MotrObject> mobj(static_cast<rgw::sal::MotrObject *>(obj.release()));
- mobj->set_category(RGWObjCategory::MultiMeta);
- return mobj;
-}
-
-struct motr_multipart_upload_info
-{
- rgw_placement_rule dest_placement;
-
- void encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
- encode(dest_placement, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl) {
- DECODE_START(1, bl);
- decode(dest_placement, bl);
- DECODE_FINISH(bl);
- }
-};
-WRITE_CLASS_ENCODER(motr_multipart_upload_info)
-
-int MotrMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y,
- ACLOwner& _owner,
- rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs)
-{
- int rc;
- std::string oid = mp_obj.get_key();
-
- owner = _owner;
-
- do {
- char buf[33];
- string tmp_obj_name;
- gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1);
- std::string upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */
- upload_id.append(buf);
-
- mp_obj.init(oid, upload_id);
- tmp_obj_name = mp_obj.get_meta();
-
- std::unique_ptr<rgw::sal::Object> obj;
- obj = bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns));
- // the meta object will be indexed with 0 size, we c
- obj->set_in_extra_data(true);
- obj->set_hash_source(oid);
-
- motr_multipart_upload_info upload_info;
- upload_info.dest_placement = dest_placement;
- bufferlist mpbl;
- encode(upload_info, mpbl);
-
- // Create an initial entry in the bucket. The entry will be
- // updated when multipart upload is completed, for example,
- // size, etag etc.
- bufferlist bl;
- rgw_bucket_dir_entry ent;
- obj->get_key().get_index_key(&ent.key);
- ent.meta.owner = owner.get_id().to_str();
- ent.meta.category = RGWObjCategory::MultiMeta;
- ent.meta.mtime = ceph::real_clock::now();
- ent.meta.user_data.assign(mpbl.c_str(), mpbl.c_str() + mpbl.length());
- ent.encode(bl);
-
- // Insert an entry into bucket multipart index so it is not shown
- // when listing a bucket.
- string bucket_multipart_iname =
- "motr.rgw.bucket." + obj->get_bucket()->get_name() + ".multiparts";
- rc = store->do_idx_op_by_name(bucket_multipart_iname,
- M0_IC_PUT, obj->get_key().get_oid(), bl);
-
- } while (rc == -EEXIST);
-
- if (rc < 0)
- return rc;
-
- // Create object part index.
- // TODO: add bucket as part of the name.
- string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts";
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::init(): object part index=" << obj_part_iname << dendl;
- rc = store->create_motr_idx_by_name(obj_part_iname);
- if (rc == -EEXIST)
- rc = 0;
- if (rc < 0)
- // TODO: clean the bucket index entry
- ldpp_dout(dpp, 0) << "Failed to create object multipart index " << obj_part_iname << dendl;
-
- return rc;
-}
-
-int MotrMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct,
- int num_parts, int marker,
- int *next_marker, bool *truncated,
- bool assume_unsorted)
-{
- int rc;
- vector<string> key_vec(num_parts);
- vector<bufferlist> val_vec(num_parts);
-
- std::string oid = mp_obj.get_key();
- string obj_part_iname = "motr.rgw.object." + bucket->get_name() + "." + oid + ".parts";
- ldpp_dout(dpp, 20) << __func__ << ": object part index = " << obj_part_iname << dendl;
- key_vec[0].clear();
- key_vec[0] = "part.";
- char buf[32];
- snprintf(buf, sizeof(buf), "%08d", marker + 1);
- key_vec[0].append(buf);
- rc = store->next_query_by_name(obj_part_iname, key_vec, val_vec);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << "ERROR: NEXT query failed. " << rc << dendl;
- return rc;
- }
-
- int last_num = 0;
- int part_cnt = 0;
- uint32_t expected_next = 0;
- ldpp_dout(dpp, 20) << __func__ << ": marker = " << marker << dendl;
- for (const auto& bl: val_vec) {
- if (bl.length() == 0)
- break;
-
- RGWUploadPartInfo info;
- auto iter = bl.cbegin();
- info.decode(iter);
- rgw::sal::Attrs attrs_dummy;
- decode(attrs_dummy, iter);
- MotrObject::Meta meta;
- meta.decode(iter);
-
- ldpp_dout(dpp, 20) << __func__ << ": part_num=" << info.num
- << " part_size=" << info.size << dendl;
- ldpp_dout(dpp, 20) << __func__ << ": meta:oid=[" << meta.oid.u_hi << "," << meta.oid.u_lo
- << "], meta:pvid=[" << meta.pver.f_container << "," << meta.pver.f_key
- << "], meta:layout id=" << meta.layout_id << dendl;
-
- if (!expected_next)
- expected_next = info.num + 1;
- else if (expected_next && info.num != expected_next)
- return -EINVAL;
- else expected_next = info.num + 1;
-
- if ((int)info.num > marker) {
- last_num = info.num;
- parts.emplace(info.num, std::make_unique<MotrMultipartPart>(info, meta));
- }
-
- part_cnt++;
- }
-
- // Does it have more parts?
- if (truncated)
- *truncated = part_cnt < num_parts? false : true;
- ldpp_dout(dpp, 20) << __func__ << ": truncated=" << *truncated << dendl;
-
- if (next_marker)
- *next_marker = last_num;
-
- return 0;
-}
-
-// Heavily copy from rgw_sal_rados.cc
-int MotrMultipartUpload::complete(const DoutPrefixProvider *dpp,
- optional_yield y, CephContext* cct,
- map<int, string>& part_etags,
- list<rgw_obj_index_key>& remove_objs,
- uint64_t& accounted_size, bool& compressed,
- RGWCompressionInfo& cs_info, off_t& off,
- std::string& tag, ACLOwner& owner,
- uint64_t olh_epoch,
- rgw::sal::Object* target_obj)
-{
- char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE];
- char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
- std::string etag;
- bufferlist etag_bl;
- MD5 hash;
- // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes
- hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
- bool truncated;
- int rc;
-
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): enter" << dendl;
- int total_parts = 0;
- int handled_parts = 0;
- int max_parts = 1000;
- int marker = 0;
- uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size;
- auto etags_iter = part_etags.begin();
- rgw::sal::Attrs attrs = target_obj->get_attrs();
-
- do {
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): list_parts()" << dendl;
- rc = list_parts(dpp, cct, max_parts, marker, &marker, &truncated);
- if (rc == -ENOENT) {
- rc = -ERR_NO_SUCH_UPLOAD;
- }
- if (rc < 0)
- return rc;
-
- total_parts += parts.size();
- if (!truncated && total_parts != (int)part_etags.size()) {
- ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts
- << " expected: " << part_etags.size() << dendl;
- rc = -ERR_INVALID_PART;
- return rc;
- }
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): parts.size()=" << parts.size() << dendl;
-
- for (auto obj_iter = parts.begin();
- etags_iter != part_etags.end() && obj_iter != parts.end();
- ++etags_iter, ++obj_iter, ++handled_parts) {
- MultipartPart *mpart = obj_iter->second.get();
- MotrMultipartPart *mmpart = static_cast<MotrMultipartPart *>(mpart);
- RGWUploadPartInfo *part = &mmpart->info;
-
- uint64_t part_size = part->accounted_size;
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part_size=" << part_size << dendl;
- if (handled_parts < (int)part_etags.size() - 1 &&
- part_size < min_part_size) {
- rc = -ERR_TOO_SMALL;
- return rc;
- }
-
- char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
- if (etags_iter->first != (int)obj_iter->first) {
- ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: "
- << etags_iter->first << " next uploaded: "
- << obj_iter->first << dendl;
- rc = -ERR_INVALID_PART;
- return rc;
- }
- string part_etag = rgw_string_unquote(etags_iter->second);
- if (part_etag.compare(part->etag) != 0) {
- ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " << etags_iter->first
- << " etag: " << etags_iter->second << dendl;
- rc = -ERR_INVALID_PART;
- return rc;
- }
-
- hex_to_buf(part->etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
- hash.Update((const unsigned char *)petag, sizeof(petag));
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): calc etag " << dendl;
-
- string oid = mp_obj.get_part(part->num);
- rgw_obj src_obj;
- src_obj.init_ns(bucket->get_key(), oid, mp_ns);
-
-#if 0 // does Motr backend need it?
- /* update manifest for part */
- if (part->manifest.empty()) {
- ldpp_dout(dpp, 0) << "ERROR: empty manifest for object part: obj="
- << src_obj << dendl;
- rc = -ERR_INVALID_PART;
- return rc;
- } else {
- manifest.append(dpp, part->manifest, store->get_zone());
- }
- ldpp_dout(dpp, 0) << "MotrMultipartUpload::complete(): manifest " << dendl;
-#endif
-
- bool part_compressed = (part->cs_info.compression_type != "none");
- if ((handled_parts > 0) &&
- ((part_compressed != compressed) ||
- (cs_info.compression_type != part->cs_info.compression_type))) {
- ldpp_dout(dpp, 0) << "ERROR: compression type was changed during multipart upload ("
- << cs_info.compression_type << ">>" << part->cs_info.compression_type << ")" << dendl;
- rc = -ERR_INVALID_PART;
- return rc;
- }
-
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): part compression" << dendl;
- if (part_compressed) {
- int64_t new_ofs; // offset in compression data for new part
- if (cs_info.blocks.size() > 0)
- new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len;
- else
- new_ofs = 0;
- for (const auto& block : part->cs_info.blocks) {
- compression_block cb;
- cb.old_ofs = block.old_ofs + cs_info.orig_size;
- cb.new_ofs = new_ofs;
- cb.len = block.len;
- cs_info.blocks.push_back(cb);
- new_ofs = cb.new_ofs + cb.len;
- }
- if (!compressed)
- cs_info.compression_type = part->cs_info.compression_type;
- cs_info.orig_size += part->cs_info.orig_size;
- compressed = true;
- }
-
- // We may not need to do the following as remove_objs are those
- // don't show when listing a bucket. As we store in-progress uploaded
- // object's metadata in a separate index, they are not shown when
- // listing a bucket.
- rgw_obj_index_key remove_key;
- src_obj.key.get_index_key(&remove_key);
- remove_objs.push_back(remove_key);
-
- off += part_size;
- accounted_size += part->accounted_size;
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): off=" << off << ", accounted_size = " << accounted_size << dendl;
- }
- } while (truncated);
- hash.Final((unsigned char *)final_etag);
-
- buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str);
- snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2],
- sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2,
- "-%lld", (long long)part_etags.size());
- etag = final_etag_str;
- ldpp_dout(dpp, 20) << "calculated etag: " << etag << dendl;
- etag_bl.append(etag);
- attrs[RGW_ATTR_ETAG] = etag_bl;
-
- if (compressed) {
- // write compression attribute to full object
- bufferlist tmp;
- encode(cs_info, tmp);
- attrs[RGW_ATTR_COMPRESSION] = tmp;
- }
-
- // Read the object's the multipart_upload_info.
- // TODO: all those index name and key constructions should be implemented as
- // member functions.
- bufferlist bl;
- std::unique_ptr<rgw::sal::Object> meta_obj;
- meta_obj = get_meta_obj();
- string bucket_multipart_iname =
- "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts";
- rc = this->store->do_idx_op_by_name(bucket_multipart_iname,
- M0_IC_GET, meta_obj->get_key().get_oid(), bl);
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): read entry from bucket multipart index rc=" << rc << dendl;
- if (rc < 0)
- return rc;
- rgw_bucket_dir_entry ent;
- bufferlist& blr = bl;
- auto ent_iter = blr.cbegin();
- ent.decode(ent_iter);
-
- // Update the dir entry and insert it to the bucket index so
- // the object will be seen when listing the bucket.
- bufferlist update_bl;
- target_obj->get_key().get_index_key(&ent.key); // Change to offical name :)
- ent.meta.size = off;
- ent.meta.accounted_size = accounted_size;
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): obj size=" << ent.meta.size
- << " obj accounted size=" << ent.meta.accounted_size << dendl;
- ent.meta.mtime = ceph::real_clock::now();
- ent.meta.etag = etag;
- ent.encode(update_bl);
- encode(attrs, update_bl);
- MotrObject::Meta meta_dummy;
- meta_dummy.encode(update_bl);
-
- string bucket_index_iname = "motr.rgw.bucket.index." + meta_obj->get_bucket()->get_name();
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): target_obj name=" << target_obj->get_name()
- << " target_obj oid=" << target_obj->get_oid() << dendl;
- rc = store->do_idx_op_by_name(bucket_index_iname, M0_IC_PUT,
- target_obj->get_name(), update_bl);
- if (rc < 0)
- return rc;
-
- // Put into metadata cache.
- store->get_obj_meta_cache()->put(dpp, target_obj->get_name(), update_bl);
-
- // Now we can remove it from bucket multipart index.
- ldpp_dout(dpp, 20) << "MotrMultipartUpload::complete(): remove from bucket multipartindex " << dendl;
- return store->do_idx_op_by_name(bucket_multipart_iname,
- M0_IC_DEL, meta_obj->get_key().get_oid(), bl);
-}
-
-int MotrMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs)
-{
- if (!rule && !attrs) {
- return 0;
- }
-
- if (rule) {
- if (!placement.empty()) {
- *rule = &placement;
- if (!attrs) {
- /* Don't need attrs, done */
- return 0;
- }
- } else {
- *rule = nullptr;
- }
- }
-
- std::unique_ptr<rgw::sal::Object> meta_obj;
- meta_obj = get_meta_obj();
- meta_obj->set_in_extra_data(true);
-
- // Read the object's the multipart_upload_info.
- bufferlist bl;
- string bucket_multipart_iname =
- "motr.rgw.bucket." + meta_obj->get_bucket()->get_name() + ".multiparts";
- int rc = this->store->do_idx_op_by_name(bucket_multipart_iname,
- M0_IC_GET, meta_obj->get_key().get_oid(), bl);
- if (rc < 0) {
- ldpp_dout(dpp, 0) << __func__ << ": Failed to get multipart info. rc=" << rc << dendl;
- return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc;
- }
-
- rgw_bucket_dir_entry ent;
- bufferlist& blr = bl;
- auto ent_iter = blr.cbegin();
- ent.decode(ent_iter);
-
- if (attrs) {
- bufferlist etag_bl;
- string& etag = ent.meta.etag;
- ldpp_dout(dpp, 20) << "object's etag: " << ent.meta.etag << dendl;
- etag_bl.append(etag.c_str(), etag.size());
- attrs->emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl));
- if (!rule || *rule != nullptr) {
- /* placement was cached; don't actually read */
- return 0;
- }
- }
-
- /* Decode multipart_upload_info */
- motr_multipart_upload_info upload_info;
- bufferlist mpbl;
- mpbl.append(ent.meta.user_data.c_str(), ent.meta.user_data.size());
- auto mpbl_iter = mpbl.cbegin();
- upload_info.decode(mpbl_iter);
- placement = upload_info.dest_placement;
- *rule = &placement;
-
- return 0;
-}
-
-std::unique_ptr<Writer> MotrMultipartUpload::get_writer(
- const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- uint64_t part_num,
- const std::string& part_num_str)
-{
- return std::make_unique<MotrMultipartWriter>(dpp, y, this,
- obj, store, owner,
- ptail_placement_rule, part_num, part_num_str);
-}
-
-int MotrMultipartWriter::prepare(optional_yield y)
-{
- string part_obj_name = head_obj->get_bucket()->get_name() + "." +
- head_obj->get_key().get_oid() +
- ".part." + std::to_string(part_num);
- ldpp_dout(dpp, 20) << "bucket=" << head_obj->get_bucket()->get_name() << "part_obj_name=" << part_obj_name << dendl;
- part_obj = std::make_unique<MotrObject>(this->store, rgw_obj_key(part_obj_name), head_obj->get_bucket());
- if (part_obj == nullptr)
- return -ENOMEM;
-
- // s3 client may retry uploading part, so the part may have already
- // been created.
- int rc = part_obj->create_mobj(dpp, store->cctx->_conf->rgw_max_chunk_size);
- if (rc == -EEXIST) {
- rc = part_obj->open_mobj(dpp);
- if (rc < 0)
- return rc;
- }
- return rc;
-}
-
-int MotrMultipartWriter::process(bufferlist&& data, uint64_t offset)
-{
- int rc = part_obj->write_mobj(dpp, std::move(data), offset);
- if (rc == 0) {
- actual_part_size += data.length();
- ldpp_dout(dpp, 20) << " write_mobj(): actual_part_size=" << actual_part_size << dendl;
- }
- return rc;
-}
-
-int MotrMultipartWriter::complete(size_t accounted_size, const std::string& etag,
- ceph::real_time *mtime, ceph::real_time set_mtime,
- std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch,
- const std::string *user_data,
- rgw_zone_set *zones_trace, bool *canceled,
- optional_yield y)
-{
- // Should the dir entry(object metadata) be updated? For example
- // mtime.
-
- ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): enter" << dendl;
- // Add an entry into object_nnn_part_index.
- bufferlist bl;
- RGWUploadPartInfo info;
- info.num = part_num;
- info.etag = etag;
- info.size = actual_part_size;
- info.accounted_size = accounted_size;
- info.modified = real_clock::now();
-
- bool compressed;
- int rc = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info);
- ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): compression rc=" << rc << dendl;
- if (rc < 0) {
- ldpp_dout(dpp, 1) << "cannot get compression info" << dendl;
- return rc;
- }
- encode(info, bl);
- encode(attrs, bl);
- part_obj->meta.encode(bl);
-
- string p = "part.";
- char buf[32];
- snprintf(buf, sizeof(buf), "%08d", (int)part_num);
- p.append(buf);
- string obj_part_iname = "motr.rgw.object." + head_obj->get_bucket()->get_name() + "." +
- head_obj->get_key().get_oid() + ".parts";
- ldpp_dout(dpp, 20) << "MotrMultipartWriter::complete(): object part index = " << obj_part_iname << dendl;
- rc = store->do_idx_op_by_name(obj_part_iname, M0_IC_PUT, p, bl);
- if (rc < 0) {
- return rc == -ENOENT ? -ERR_NO_SUCH_UPLOAD : rc;
- }
-
- return 0;
-}
-
-std::unique_ptr<RGWRole> MotrStore::get_role(std::string name,
- std::string tenant,
- std::string path,
- std::string trust_policy,
- std::string max_session_duration_str,
- std::multimap<std::string,std::string> tags)
-{
- RGWRole* p = nullptr;
- return std::unique_ptr<RGWRole>(p);
-}
-
-std::unique_ptr<RGWRole> MotrStore::get_role(const RGWRoleInfo& info)
-{
- RGWRole* p = nullptr;
- return std::unique_ptr<RGWRole>(p);
-}
-
-std::unique_ptr<RGWRole> MotrStore::get_role(std::string id)
-{
- RGWRole* p = nullptr;
- return std::unique_ptr<RGWRole>(p);
-}
-
-int MotrStore::get_roles(const DoutPrefixProvider *dpp,
- optional_yield y,
- const std::string& path_prefix,
- const std::string& tenant,
- vector<std::unique_ptr<RGWRole>>& roles)
-{
- return 0;
-}
-
-std::unique_ptr<RGWOIDCProvider> MotrStore::get_oidc_provider()
-{
- RGWOIDCProvider* p = nullptr;
- return std::unique_ptr<RGWOIDCProvider>(p);
-}
-
-int MotrStore::get_oidc_providers(const DoutPrefixProvider *dpp,
- const std::string& tenant,
- vector<std::unique_ptr<RGWOIDCProvider>>& providers)
-{
- return 0;
-}
-
-std::unique_ptr<MultipartUpload> MotrBucket::get_multipart_upload(const std::string& oid,
- std::optional<std::string> upload_id,
- ACLOwner owner, ceph::real_time mtime)
-{
- return std::make_unique<MotrMultipartUpload>(store, this, oid, upload_id, owner, mtime);
-}
-
-std::unique_ptr<Writer> MotrStore::get_append_writer(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- const std::string& unique_tag,
- uint64_t position,
- uint64_t *cur_accounted_size) {
- return nullptr;
-}
-
-std::unique_ptr<Writer> MotrStore::get_atomic_writer(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- uint64_t olh_epoch,
- const std::string& unique_tag) {
- return std::make_unique<MotrAtomicWriter>(dpp, y,
- obj, this, owner,
- ptail_placement_rule, olh_epoch, unique_tag);
-}
-
-const std::string& MotrStore::get_compression_type(const rgw_placement_rule& rule)
-{
- return zone.zone_params->get_compression_type(rule);
-}
-
-bool MotrStore::valid_placement(const rgw_placement_rule& rule)
-{
- return zone.zone_params->valid_placement(rule);
-}
-
-std::unique_ptr<User> MotrStore::get_user(const rgw_user &u)
-{
- ldout(cctx, 20) << "bucket's user: " << u.to_str() << dendl;
- return std::make_unique<MotrUser>(this, u);
-}
-
-int MotrStore::get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string &key, optional_yield y, std::unique_ptr<User> *user)
-{
- int rc;
- User *u;
- bufferlist bl;
- RGWUserInfo uinfo;
- MotrAccessKey access_key;
-
- rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY,
- M0_IC_GET, key, bl);
- if (rc < 0){
- ldout(cctx, 0) << "Access key not found: rc = " << rc << dendl;
- return rc;
- }
-
- bufferlist& blr = bl;
- auto iter = blr.cbegin();
- access_key.decode(iter);
-
- uinfo.user_id.from_str(access_key.user_id);
- ldout(cctx, 0) << "Loading user: " << uinfo.user_id.id << dendl;
- rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr);
- if (rc < 0){
- ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl;
- return rc;
- }
- u = new MotrUser(this, uinfo);
- if (!u)
- return -ENOMEM;
-
- user->reset(u);
- return 0;
-}
-
-int MotrStore::get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr<User>* user)
-{
- int rc;
- User *u;
- bufferlist bl;
- RGWUserInfo uinfo;
- MotrEmailInfo email_info;
- rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
- M0_IC_GET, email, bl);
- if (rc < 0){
- ldout(cctx, 0) << "Email Id not found: rc = " << rc << dendl;
- return rc;
- }
- auto iter = bl.cbegin();
- email_info.decode(iter);
- ldout(cctx, 0) << "Loading user: " << email_info.user_id << dendl;
- uinfo.user_id.from_str(email_info.user_id);
- rc = MotrUser().load_user_from_idx(dpp, this, uinfo, nullptr, nullptr);
- if (rc < 0){
- ldout(cctx, 0) << "Failed to load user: rc = " << rc << dendl;
- return rc;
- }
- u = new MotrUser(this, uinfo);
- if (!u)
- return -ENOMEM;
-
- user->reset(u);
- return 0;
-}
-
-int MotrStore::get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr<User>* user)
-{
- /* Swift keys and subusers are not supported for now */
- return 0;
-}
-
-int MotrStore::store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key)
-{
- int rc;
- bufferlist bl;
- access_key.encode(bl);
- rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY,
- M0_IC_PUT, access_key.id, bl);
- if (rc < 0){
- ldout(cctx, 0) << "Failed to store key: rc = " << rc << dendl;
- return rc;
- }
- return rc;
-}
-
-int MotrStore::delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key)
-{
- int rc;
- bufferlist bl;
- rc = do_idx_op_by_name(RGW_IAM_MOTR_ACCESS_KEY,
- M0_IC_DEL, access_key, bl);
- if (rc < 0){
- ldout(cctx, 0) << "Failed to delete key: rc = " << rc << dendl;
- }
- return rc;
-}
-
-int MotrStore::store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info )
-{
- int rc;
- bufferlist bl;
- email_info.encode(bl);
- rc = do_idx_op_by_name(RGW_IAM_MOTR_EMAIL_KEY,
- M0_IC_PUT, email_info.email_id, bl);
- if (rc < 0) {
- ldout(cctx, 0) << "Failed to store the user by email as key: rc = " << rc << dendl;
- }
- return rc;
-}
-
-std::unique_ptr<Object> MotrStore::get_object(const rgw_obj_key& k)
-{
- return std::make_unique<MotrObject>(this, k);
-}
-
-
-int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr<Bucket>* bucket, optional_yield y)
-{
- int ret;
- Bucket* bp;
-
- bp = new MotrBucket(this, b, u);
- ret = bp->load_bucket(dpp, y);
- if (ret < 0) {
- delete bp;
- return ret;
- }
-
- bucket->reset(bp);
- return 0;
-}
-
-int MotrStore::get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr<Bucket>* bucket)
-{
- Bucket* bp;
-
- bp = new MotrBucket(this, i, u);
- /* Don't need to fetch the bucket info, use the provided one */
-
- bucket->reset(bp);
- return 0;
-}
-
-int MotrStore::get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string& name, std::unique_ptr<Bucket>* bucket, optional_yield y)
-{
- rgw_bucket b;
-
- b.tenant = tenant;
- b.name = name;
-
- return get_bucket(dpp, u, b, bucket, y);
-}
-
-bool MotrStore::is_meta_master()
-{
- return true;
-}
-
-int MotrStore::forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version *objv,
- bufferlist& in_data,
- JSONParser *jp, req_info& info,
- optional_yield y)
-{
- return 0;
-}
-
-int MotrStore::forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv,
- bufferlist& in_data,
- RGWXMLDecoder::XMLParser* parser, req_info& info,
- optional_yield y)
-{
- return 0;
-}
-
-std::string MotrStore::zone_unique_id(uint64_t unique_num)
-{
- return "";
-}
-
-std::string MotrStore::zone_unique_trans_id(const uint64_t unique_num)
-{
- return "";
-}
-
-int MotrStore::get_zonegroup(const std::string& id, std::unique_ptr<ZoneGroup>* group)
-{
- /* XXX: for now only one zonegroup supported */
- ZoneGroup* zg;
- zg = new MotrZoneGroup(this, zone.zonegroup.get_group());
-
- group->reset(zg);
- return 0;
-}
-
-int MotrStore::list_all_zones(const DoutPrefixProvider* dpp,
- std::list<std::string>& zone_ids)
-{
- zone_ids.push_back(zone.get_id());
- return 0;
-}
-
-int MotrStore::cluster_stat(RGWClusterStat& stats)
-{
- return 0;
-}
-
-std::unique_ptr<Lifecycle> MotrStore::get_lifecycle(void)
-{
- return 0;
-}
-
-std::unique_ptr<Notification> MotrStore::get_notification(Object* obj, Object* src_obj, req_state* s,
- rgw::notify::EventType event_type, optional_yield y, const string* object_name)
-{
- return std::make_unique<MotrNotification>(obj, src_obj, event_type);
-}
-
-std::unique_ptr<Notification> MotrStore::get_notification(const DoutPrefixProvider* dpp, Object* obj,
- Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket,
- std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y)
-{
- return std::make_unique<MotrNotification>(obj, src_obj, event_type);
-}
-
-int MotrStore::log_usage(const DoutPrefixProvider *dpp, map<rgw_user_bucket, RGWUsageBatch>& usage_info)
-{
- return 0;
-}
-
-int MotrStore::log_op(const DoutPrefixProvider *dpp, string& oid, bufferlist& bl)
-{
- return 0;
-}
-
-int MotrStore::register_to_service_map(const DoutPrefixProvider *dpp, const string& daemon_type,
- const map<string, string>& meta)
-{
- return 0;
-}
-
-void MotrStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit,
- RGWRateLimitInfo& user_ratelimit,
- RGWRateLimitInfo& anon_ratelimit)
-{
- return;
-}
-
-void MotrStore::get_quota(RGWQuota& quota)
-{
- // XXX: Not handled for the first pass
- return;
-}
-
-int MotrStore::set_buckets_enabled(const DoutPrefixProvider *dpp, vector<rgw_bucket>& buckets, bool enabled)
-{
- return 0;
-}
-
-int MotrStore::get_sync_policy_handler(const DoutPrefixProvider *dpp,
- std::optional<rgw_zone_id> zone,
- std::optional<rgw_bucket> bucket,
- RGWBucketSyncPolicyHandlerRef *phandler,
- optional_yield y)
-{
- return 0;
-}
-
-RGWDataSyncStatusManager* MotrStore::get_data_sync_manager(const rgw_zone_id& source_zone)
-{
- return 0;
-}
-
-int MotrStore::read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool *is_truncated,
- RGWUsageIter& usage_iter,
- map<rgw_user_bucket, rgw_usage_log_entry>& usage)
-{
- return 0;
-}
-
-int MotrStore::trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch)
-{
- return 0;
-}
-
-int MotrStore::get_config_key_val(string name, bufferlist *bl)
-{
- return 0;
-}
-
-int MotrStore::meta_list_keys_init(const DoutPrefixProvider *dpp, const string& section, const string& marker, void** phandle)
-{
- return 0;
-}
-
-int MotrStore::meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, list<string>& keys, bool* truncated)
-{
- return 0;
-}
-
-void MotrStore::meta_list_keys_complete(void* handle)
-{
- return;
-}
-
-std::string MotrStore::meta_get_marker(void* handle)
-{
- return "";
-}
-
-int MotrStore::meta_remove(const DoutPrefixProvider *dpp, string& metadata_key, optional_yield y)
-{
- return 0;
-}
-
-int MotrStore::open_idx(struct m0_uint128 *id, bool create, struct m0_idx *idx)
-{
- m0_idx_init(idx, &container.co_realm, id);
-
- if (!create)
- return 0; // nothing to do more
-
- // create index or make sure it's created
- struct m0_op *op = nullptr;
- int rc = m0_entity_create(nullptr, &idx->in_entity, &op);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl;
- goto out;
- }
-
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc != 0 && rc != -EEXIST)
- ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl;
-out:
- return rc;
-}
-
-static void set_m0bufvec(struct m0_bufvec *bv, vector<uint8_t>& vec)
-{
- *bv->ov_buf = reinterpret_cast<char*>(vec.data());
- *bv->ov_vec.v_count = vec.size();
-}
-
-// idx must be opened with open_idx() beforehand
-int MotrStore::do_idx_op(struct m0_idx *idx, enum m0_idx_opcode opcode,
- vector<uint8_t>& key, vector<uint8_t>& val, bool update)
-{
- int rc, rc_i;
- struct m0_bufvec k, v, *vp = &v;
- uint32_t flags = 0;
- struct m0_op *op = nullptr;
-
- if (m0_bufvec_empty_alloc(&k, 1) != 0) {
- ldout(cctx, 0) << "ERROR: failed to allocate key bufvec" << dendl;
- return -ENOMEM;
- }
-
- if (opcode == M0_IC_PUT || opcode == M0_IC_GET) {
- rc = -ENOMEM;
- if (m0_bufvec_empty_alloc(&v, 1) != 0) {
- ldout(cctx, 0) << "ERROR: failed to allocate value bufvec" << dendl;
- goto out;
- }
- }
-
- set_m0bufvec(&k, key);
- if (opcode == M0_IC_PUT)
- set_m0bufvec(&v, val);
-
- if (opcode == M0_IC_DEL)
- vp = nullptr;
-
- if (opcode == M0_IC_PUT && update)
- flags |= M0_OIF_OVERWRITE;
-
- rc = m0_idx_op(idx, opcode, &k, vp, &rc_i, flags, &op);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl;
- goto out;
- }
-
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl;
- goto out;
- }
-
- if (rc_i != 0) {
- ldout(cctx, 0) << "ERROR: idx op failed: " << rc_i << dendl;
- rc = rc_i;
- goto out;
- }
-
- if (opcode == M0_IC_GET) {
- val.resize(*v.ov_vec.v_count);
- memcpy(reinterpret_cast<char*>(val.data()), *v.ov_buf, *v.ov_vec.v_count);
- }
-
-out:
- m0_bufvec_free2(&k);
- if (opcode == M0_IC_GET)
- m0_bufvec_free(&v); // cleanup buffer after GET
- else if (opcode == M0_IC_PUT)
- m0_bufvec_free2(&v);
-
- return rc;
-}
-
-// Retrieve a range of key/value pairs starting from keys[0].
-int MotrStore::do_idx_next_op(struct m0_idx *idx,
- vector<vector<uint8_t>>& keys,
- vector<vector<uint8_t>>& vals)
-{
- int rc;
- uint32_t i = 0;
- int nr_kvp = vals.size();
- int *rcs = new int[nr_kvp];
- struct m0_bufvec k, v;
- struct m0_op *op = nullptr;
-
- rc = m0_bufvec_empty_alloc(&k, nr_kvp)?:
- m0_bufvec_empty_alloc(&v, nr_kvp);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: failed to allocate kv bufvecs" << dendl;
- return rc;
- }
-
- set_m0bufvec(&k, keys[0]);
-
- rc = m0_idx_op(idx, M0_IC_NEXT, &k, &v, rcs, 0, &op);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: failed to init index op: " << rc << dendl;
- goto out;
- }
-
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: op failed: " << rc << dendl;
- goto out;
- }
-
- for (i = 0; i < v.ov_vec.v_nr; ++i) {
- if (rcs[i] < 0)
- break;
-
- vector<uint8_t>& key = keys[i];
- vector<uint8_t>& val = vals[i];
- key.resize(k.ov_vec.v_count[i]);
- val.resize(v.ov_vec.v_count[i]);
- memcpy(reinterpret_cast<char*>(key.data()), k.ov_buf[i], k.ov_vec.v_count[i]);
- memcpy(reinterpret_cast<char*>(val.data()), v.ov_buf[i], v.ov_vec.v_count[i]);
- }
-
-out:
- k.ov_vec.v_nr = i;
- v.ov_vec.v_nr = i;
- m0_bufvec_free(&k);
- m0_bufvec_free(&v); // cleanup buffer after GET
-
- delete []rcs;
- return rc ?: i;
-}
-
-// Retrieve a number of key/value pairs under the prefix starting
-// from the marker at key_out[0].
-int MotrStore::next_query_by_name(string idx_name,
- vector<string>& key_out,
- vector<bufferlist>& val_out,
- string prefix, string delim)
-{
- unsigned nr_kvp = std::min(val_out.size(), 100UL);
- struct m0_idx idx = {};
- vector<vector<uint8_t>> keys(nr_kvp);
- vector<vector<uint8_t>> vals(nr_kvp);
- struct m0_uint128 idx_id;
- int i = 0, j, k = 0;
-
- index_name_to_motr_fid(idx_name, &idx_id);
- int rc = open_motr_idx(&idx_id, &idx);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: next_query_by_name(): failed to open index: rc="
- << rc << dendl;
- goto out;
- }
-
- // Only the first element for keys needs to be set for NEXT query.
- // The keys will be set will the returned keys from motr index.
- ldout(cctx, 20) <<__func__<< ": next_query_by_name(): index=" << idx_name
- << " prefix=" << prefix << " delim=" << delim << dendl;
- keys[0].assign(key_out[0].begin(), key_out[0].end());
- for (i = 0; i < (int)val_out.size(); i += k, k = 0) {
- rc = do_idx_next_op(&idx, keys, vals);
- ldout(cctx, 20) << "do_idx_next_op() = " << rc << dendl;
- if (rc < 0) {
- ldout(cctx, 0) << "ERROR: NEXT query failed. " << rc << dendl;
- goto out;
- }
-
- string dir;
- for (j = 0, k = 0; j < rc; ++j) {
- string key(keys[j].begin(), keys[j].end());
- size_t pos = std::string::npos;
- if (!delim.empty())
- pos = key.find(delim, prefix.length());
- if (pos != std::string::npos) { // DIR entry
- dir.assign(key, 0, pos + 1);
- if (dir.compare(0, prefix.length(), prefix) != 0)
- goto out;
- if (i + k == 0 || dir != key_out[i + k - 1]) // a new one
- key_out[i + k++] = dir;
- continue;
- }
- dir = "";
- if (key.compare(0, prefix.length(), prefix) != 0)
- goto out;
- key_out[i + k] = key;
- bufferlist& vbl = val_out[i + k];
- vbl.append(reinterpret_cast<char*>(vals[j].data()), vals[j].size());
- ++k;
- }
-
- if (rc < (int)nr_kvp) // there are no more keys to fetch
- break;
-
- string next_key;
- if (dir != "")
- next_key = dir + "\xff"; // skip all dir content in 1 step
- else
- next_key = key_out[i + k - 1] + " ";
- ldout(cctx, 0) << "do_idx_next_op(): next_key=" << next_key << dendl;
- keys[0].assign(next_key.begin(), next_key.end());
- }
-
-out:
- m0_idx_fini(&idx);
- return rc < 0 ? rc : i + k;
-}
-
-int MotrStore::delete_motr_idx_by_name(string iname)
-{
- struct m0_idx idx;
- struct m0_uint128 idx_id;
- struct m0_op *op = nullptr;
-
- ldout(cctx, 20) << "delete_motr_idx_by_name=" << iname << dendl;
-
- index_name_to_motr_fid(iname, &idx_id);
- m0_idx_init(&idx, &container.co_realm, &idx_id);
- m0_entity_open(&idx.in_entity, &op);
- int rc = m0_entity_delete(&idx.in_entity, &op);
- if (rc < 0)
- goto out;
-
- m0_op_launch(&op, 1);
-
- ldout(cctx, 70) << "waiting for op completion" << dendl;
-
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc == -ENOENT) // race deletion??
- rc = 0;
- else if (rc < 0)
- ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl;
-
- ldout(cctx, 20) << "delete_motr_idx_by_name rc=" << rc << dendl;
-
-out:
- m0_idx_fini(&idx);
- return rc;
-}
-
-int MotrStore::open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx)
-{
- m0_idx_init(idx, &container.co_realm, id);
- return 0;
-}
-
-// The following marcos are from dix/fid_convert.h which are not exposed.
-enum {
- M0_DIX_FID_DEVICE_ID_OFFSET = 32,
- M0_DIX_FID_DIX_CONTAINER_MASK = (1ULL << M0_DIX_FID_DEVICE_ID_OFFSET)
- - 1,
-};
-
-// md5 is used here, a more robust way to convert index name to fid is
-// needed to avoid collision.
-void MotrStore::index_name_to_motr_fid(string iname, struct m0_uint128 *id)
-{
- unsigned char md5[16]; // 128/8 = 16
- MD5 hash;
-
- // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes
- hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
- hash.Update((const unsigned char *)iname.c_str(), iname.length());
- hash.Final(md5);
-
- memcpy(&id->u_hi, md5, 8);
- memcpy(&id->u_lo, md5 + 8, 8);
- ldout(cctx, 20) << "id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl;
-
- struct m0_fid *fid = (struct m0_fid*)id;
- m0_fid_tset(fid, m0_dix_fid_type.ft_id,
- fid->f_container & M0_DIX_FID_DIX_CONTAINER_MASK, fid->f_key);
- ldout(cctx, 20) << "converted id = 0x" << std::hex << id->u_hi << ":0x" << std::hex << id->u_lo << dendl;
-}
-
-int MotrStore::do_idx_op_by_name(string idx_name, enum m0_idx_opcode opcode,
- string key_str, bufferlist &bl, bool update)
-{
- struct m0_idx idx;
- vector<uint8_t> key(key_str.begin(), key_str.end());
- vector<uint8_t> val;
- struct m0_uint128 idx_id;
-
- index_name_to_motr_fid(idx_name, &idx_id);
- int rc = open_motr_idx(&idx_id, &idx);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: failed to open index: " << rc << dendl;
- goto out;
- }
-
- if (opcode == M0_IC_PUT)
- val.assign(bl.c_str(), bl.c_str() + bl.length());
-
- ldout(cctx, 20) <<__func__<< ": do_idx_op_by_name(): op="
- << (opcode == M0_IC_PUT ? "PUT" : "GET")
- << " idx=" << idx_name << " key=" << key_str << dendl;
- rc = do_idx_op(&idx, opcode, key, val, update);
- if (rc == 0 && opcode == M0_IC_GET)
- // Append the returned value (blob) to the bufferlist.
- bl.append(reinterpret_cast<char*>(val.data()), val.size());
-
-out:
- m0_idx_fini(&idx);
- return rc;
-}
-
-int MotrStore::create_motr_idx_by_name(string iname)
-{
- struct m0_idx idx = {};
- struct m0_uint128 id;
-
- index_name_to_motr_fid(iname, &id);
- m0_idx_init(&idx, &container.co_realm, &id);
-
- // create index or make sure it's created
- struct m0_op *op = nullptr;
- int rc = m0_entity_create(nullptr, &idx.in_entity, &op);
- if (rc != 0) {
- ldout(cctx, 0) << "ERROR: m0_entity_create() failed: " << rc << dendl;
- goto out;
- }
-
- m0_op_launch(&op, 1);
- rc = m0_op_wait(op, M0_BITS(M0_OS_FAILED, M0_OS_STABLE), M0_TIME_NEVER) ?:
- m0_rc(op);
- m0_op_fini(op);
- m0_op_free(op);
-
- if (rc != 0 && rc != -EEXIST)
- ldout(cctx, 0) << "ERROR: index create failed: " << rc << dendl;
-out:
- m0_idx_fini(&idx);
- return rc;
-}
-
-// If a global index is checked (if it has been create) every time
-// before they're queried (put/get), which takes 2 Motr operations to
-// complete the query. As the global indices' name and FID are known
-// already when MotrStore is created, we move the check and creation
-// in newMotrStore().
-// Similar method is used for per bucket/user index. For example,
-// bucket instance index is created when creating the bucket.
-int MotrStore::check_n_create_global_indices()
-{
- int rc = 0;
-
- for (const auto& iname : motr_global_indices) {
- rc = create_motr_idx_by_name(iname);
- if (rc < 0 && rc != -EEXIST)
- break;
- rc = 0;
- }
-
- return rc;
-}
-
-std::string MotrStore::get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y)
-{
- char id[M0_FID_STR_LEN];
- struct m0_confc *confc = m0_reqh2confc(&instance->m0c_reqh);
-
- m0_fid_print(id, ARRAY_SIZE(id), &confc->cc_root->co_id);
- return std::string(id);
-}
-
-int MotrStore::init_metadata_cache(const DoutPrefixProvider *dpp,
- CephContext *cct)
-{
- this->obj_meta_cache = new MotrMetaCache(dpp, cct);
- this->get_obj_meta_cache()->set_enabled(true);
-
- this->user_cache = new MotrMetaCache(dpp, cct);
- this->get_user_cache()->set_enabled(true);
-
- this->bucket_inst_cache = new MotrMetaCache(dpp, cct);
- this->get_bucket_inst_cache()->set_enabled(true);
-
- return 0;
-}
-
- int MotrLuaManager::get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script)
- {
- return -ENOENT;
- }
-
- int MotrLuaManager::put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script)
- {
- return -ENOENT;
- }
-
- int MotrLuaManager::del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key)
- {
- return -ENOENT;
- }
-
- int MotrLuaManager::add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name)
- {
- return -ENOENT;
- }
-
- int MotrLuaManager::remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name)
- {
- return -ENOENT;
- }
-
- int MotrLuaManager::list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages)
- {
- return -ENOENT;
- }
-} // namespace rgw::sal
-
-extern "C" {
-
-void *newMotrStore(CephContext *cct)
-{
- int rc = -1;
- rgw::sal::MotrStore *store = new rgw::sal::MotrStore(cct);
-
- if (store) {
- store->conf.mc_is_oostore = true;
- // XXX: these params should be taken from config settings and
- // cct somehow?
- store->instance = nullptr;
- const auto& proc_ep = g_conf().get_val<std::string>("motr_my_endpoint");
- const auto& ha_ep = g_conf().get_val<std::string>("motr_ha_endpoint");
- const auto& proc_fid = g_conf().get_val<std::string>("motr_my_fid");
- const auto& profile = g_conf().get_val<std::string>("motr_profile_fid");
- const auto& admin_proc_ep = g_conf().get_val<std::string>("motr_admin_endpoint");
- const auto& admin_proc_fid = g_conf().get_val<std::string>("motr_admin_fid");
- const int init_flags = cct->get_init_flags();
- ldout(cct, 0) << "INFO: motr my endpoint: " << proc_ep << dendl;
- ldout(cct, 0) << "INFO: motr ha endpoint: " << ha_ep << dendl;
- ldout(cct, 0) << "INFO: motr my fid: " << proc_fid << dendl;
- ldout(cct, 0) << "INFO: motr profile fid: " << profile << dendl;
- store->conf.mc_local_addr = proc_ep.c_str();
- store->conf.mc_process_fid = proc_fid.c_str();
-
- ldout(cct, 0) << "INFO: init flags: " << init_flags << dendl;
- ldout(cct, 0) << "INFO: motr admin endpoint: " << admin_proc_ep << dendl;
- ldout(cct, 0) << "INFO: motr admin fid: " << admin_proc_fid << dendl;
-
- // HACK this is so that radosge-admin uses a different client
- if (init_flags == 0) {
- store->conf.mc_process_fid = admin_proc_fid.c_str();
- store->conf.mc_local_addr = admin_proc_ep.c_str();
- } else {
- store->conf.mc_process_fid = proc_fid.c_str();
- store->conf.mc_local_addr = proc_ep.c_str();
- }
- store->conf.mc_ha_addr = ha_ep.c_str();
- store->conf.mc_profile = profile.c_str();
-
- ldout(cct, 50) << "INFO: motr profile fid: " << store->conf.mc_profile << dendl;
- ldout(cct, 50) << "INFO: ha addr: " << store->conf.mc_ha_addr << dendl;
- ldout(cct, 50) << "INFO: process fid: " << store->conf.mc_process_fid << dendl;
- ldout(cct, 50) << "INFO: motr endpoint: " << store->conf.mc_local_addr << dendl;
-
- store->conf.mc_tm_recv_queue_min_len = 64;
- store->conf.mc_max_rpc_msg_size = 524288;
- store->conf.mc_idx_service_id = M0_IDX_DIX;
- store->dix_conf.kc_create_meta = false;
- store->conf.mc_idx_service_conf = &store->dix_conf;
-
- if (!g_conf().get_val<bool>("motr_tracing_enabled")) {
- m0_trace_level_allow(M0_WARN); // allow errors and warnings in syslog anyway
- m0_trace_set_mmapped_buffer(false);
- }
-
- store->instance = nullptr;
- rc = m0_client_init(&store->instance, &store->conf, true);
- if (rc != 0) {
- ldout(cct, 0) << "ERROR: m0_client_init() failed: " << rc << dendl;
- goto out;
- }
-
- m0_container_init(&store->container, nullptr, &M0_UBER_REALM, store->instance);
- rc = store->container.co_realm.re_entity.en_sm.sm_rc;
- if (rc != 0) {
- ldout(cct, 0) << "ERROR: m0_container_init() failed: " << rc << dendl;
- goto out;
- }
-
- rc = m0_ufid_init(store->instance, &ufid_gr);
- if (rc != 0) {
- ldout(cct, 0) << "ERROR: m0_ufid_init() failed: " << rc << dendl;
- goto out;
- }
-
- // Create global indices if not yet.
- rc = store->check_n_create_global_indices();
- if (rc != 0) {
- ldout(cct, 0) << "ERROR: check_n_create_global_indices() failed: " << rc << dendl;
- goto out;
- }
-
- }
-
-out:
- if (rc != 0) {
- delete store;
- return nullptr;
- }
- return store;
-}
-
-}
+++ /dev/null
-
-// vim: ts=2 sw=2 expandtab ft=cpp
-
-/*
- * Ceph - scalable distributed file system
- *
- * SAL implementation for the CORTX Motr backend
- *
- * Copyright (C) 2021 Seagate Technology LLC and/or its Affiliates
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#pragma once
-
-extern "C" {
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wextern-c-compat"
-#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
-#include "motr/config.h"
-#include "motr/client.h"
-#pragma clang diagnostic pop
-}
-
-#include "rgw_sal_store.h"
-#include "rgw_rados.h"
-#include "rgw_notify.h"
-#include "rgw_oidc_provider.h"
-#include "rgw_role.h"
-#include "rgw_multi.h"
-#include "rgw_putobj_processor.h"
-
-namespace rgw::sal {
-
-class MotrStore;
-
-// Global Motr indices
-#define RGW_MOTR_USERS_IDX_NAME "motr.rgw.users"
-#define RGW_MOTR_BUCKET_INST_IDX_NAME "motr.rgw.bucket.instances"
-#define RGW_MOTR_BUCKET_HD_IDX_NAME "motr.rgw.bucket.headers"
-#define RGW_IAM_MOTR_ACCESS_KEY "motr.rgw.accesskeys"
-#define RGW_IAM_MOTR_EMAIL_KEY "motr.rgw.emails"
-
-//#define RGW_MOTR_BUCKET_ACL_IDX_NAME "motr.rgw.bucket.acls"
-
-// A simplified metadata cache implementation.
-// Note: MotrObjMetaCache doesn't handle the IO operations to Motr. A proxy
-// class can be added to handle cache and 'real' ops.
-class MotrMetaCache
-{
-protected:
- // MGW re-uses ObjectCache to cache object's metadata as it has already
- // implemented a lru cache: (1) ObjectCache internally uses a map and lru
- // list to manage cache entry. POC uses object name, user name or bucket
- // name as the key to lookup and insert an entry. (2) ObjectCache::data is
- // a bufferlist and can be used to store any metadata structure, such as
- // object's bucket dir entry, user info or bucket instance.
- //
- // Note from RGW:
- // The Rados Gateway stores metadata and objects in an internal cache. This
- // should be kept consistent by the OSD's relaying notify events between
- // multiple watching RGW processes. In the event that this notification
- // protocol fails, bounding the length of time that any data in the cache will
- // be assumed valid will ensure that any RGW instance that falls out of sync
- // will eventually recover. This seems to be an issue mostly for large numbers
- // of RGW instances under heavy use. If you would like to turn off cache expiry,
- // set this value to zero.
- //
- // Currently POC hasn't implemented the watch-notify menchanism yet. So the
- // current implementation is similar to cortx-s3server which is based on expiry
- // time. TODO: see comments on distribute_cache).
- //
- // Beaware: Motr object data is not cached in current POC as RGW!
- // RGW caches the first chunk (4MB by default).
- ObjectCache cache;
-
-public:
- // Lookup a cache entry.
- int get(const DoutPrefixProvider *dpp, const std::string& name, bufferlist& data);
-
- // Insert a cache entry.
- int put(const DoutPrefixProvider *dpp, const std::string& name, const bufferlist& data);
-
- // Called when an object is deleted. Notification should be sent to other
- // RGW instances.
- int remove(const DoutPrefixProvider *dpp, const std::string& name);
-
- // Make the local cache entry invalid.
- void invalid(const DoutPrefixProvider *dpp, const std::string& name);
-
- // TODO: Distribute_cache() and watch_cb() now are only place holder functions.
- // Checkout services/svc_sys_obj_cache.h/cc for reference.
- // These 2 functions are designed to notify or to act on cache notification.
- // It is feasible to implement the functionality using Motr's FDMI after discussing
- // with Hua.
- int distribute_cache(const DoutPrefixProvider *dpp,
- const std::string& normal_name,
- ObjectCacheInfo& obj_info, int op);
- int watch_cb(const DoutPrefixProvider *dpp,
- uint64_t notify_id,
- uint64_t cookie,
- uint64_t notifier_id,
- bufferlist& bl);
-
- void set_enabled(bool status);
-
- MotrMetaCache(const DoutPrefixProvider *dpp, CephContext *cct) {
- cache.set_ctx(cct);
- }
-};
-
-struct MotrUserInfo {
- RGWUserInfo info;
- obj_version user_version;
- rgw::sal::Attrs attrs;
-
- void encode(bufferlist& bl) const
- {
- ENCODE_START(3, 3, bl);
- encode(info, bl);
- encode(user_version, bl);
- encode(attrs, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl)
- {
- DECODE_START(3, bl);
- decode(info, bl);
- decode(user_version, bl);
- decode(attrs, bl);
- DECODE_FINISH(bl);
- }
-};
-WRITE_CLASS_ENCODER(MotrUserInfo);
-
-struct MotrEmailInfo {
- std::string user_id;
- std::string email_id;
-
- MotrEmailInfo() {}
- MotrEmailInfo(std::string _user_id, std::string _email_id )
- : user_id(std::move(_user_id)), email_id(std::move(_email_id)) {}
-
- void encode(bufferlist& bl) const {
- ENCODE_START(2, 2, bl);
- encode(user_id, bl);
- encode(email_id, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl) {
- DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
- decode(user_id, bl);
- decode(email_id, bl);
- DECODE_FINISH(bl);
- }
-};
-WRITE_CLASS_ENCODER(MotrEmailInfo);
-
-struct MotrAccessKey {
- std::string id; // AccessKey
- std::string key; // SecretKey
- std::string user_id; // UserID
-
- MotrAccessKey() {}
- MotrAccessKey(std::string _id, std::string _key, std::string _user_id)
- : id(std::move(_id)), key(std::move(_key)), user_id(std::move(_user_id)) {}
-
- void encode(bufferlist& bl) const {
- ENCODE_START(2, 2, bl);
- encode(id, bl);
- encode(key, bl);
- encode(user_id, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl) {
- DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
- decode(id, bl);
- decode(key, bl);
- decode(user_id, bl);
- DECODE_FINISH(bl);
- }
-};
-WRITE_CLASS_ENCODER(MotrAccessKey);
-
-class MotrNotification : public StoreNotification {
- public:
- MotrNotification(Object* _obj, Object* _src_obj, rgw::notify::EventType _type) :
- StoreNotification(_obj, _src_obj, _type) {}
- ~MotrNotification() = default;
-
- virtual int publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags = nullptr) override { return 0;}
- virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size,
- const ceph::real_time& mtime, const std::string& etag, const std::string& version) override { return 0; }
-};
-
-class MotrUser : public StoreUser {
- private:
- MotrStore *store;
- struct m0_uint128 idxID = {0xe5ecb53640d4ecce, 0x6a156cd5a74aa3b8}; // MD5 of “motr.rgw.users“
- struct m0_idx idx;
-
- public:
- std::set<std::string> access_key_tracker;
- MotrUser(MotrStore *_st, const rgw_user& _u) : StoreUser(_u), store(_st) { }
- MotrUser(MotrStore *_st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) { }
- MotrUser(MotrStore *_st) : store(_st) { }
- MotrUser(MotrUser& _o) = default;
- MotrUser() {}
-
- virtual std::unique_ptr<User> clone() override {
- return std::unique_ptr<User>(new MotrUser(*this));
- }
- int list_buckets(const DoutPrefixProvider *dpp, const std::string& marker, const std::string& end_marker,
- uint64_t max, bool need_stats, BucketList& buckets, optional_yield y) override;
- virtual int create_bucket(const DoutPrefixProvider* dpp,
- const rgw_bucket& b,
- const std::string& zonegroup_id,
- rgw_placement_rule& placement_rule,
- std::string& swift_ver_location,
- const RGWQuotaInfo* pquota_info,
- const RGWAccessControlPolicy& policy,
- Attrs& attrs,
- RGWBucketInfo& info,
- obj_version& ep_objv,
- bool exclusive,
- bool obj_lock_enabled,
- bool* existed,
- req_info& req_info,
- std::unique_ptr<Bucket>* bucket,
- optional_yield y) override;
- virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y) override;
- virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) override;
- virtual int read_stats(const DoutPrefixProvider *dpp,
- optional_yield y, RGWStorageStats* stats,
- ceph::real_time *last_stats_sync = nullptr,
- ceph::real_time *last_stats_update = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override;
- virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
- virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
- bool* is_truncated, RGWUsageIter& usage_iter,
- std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
- virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override;
-
- virtual int load_user(const DoutPrefixProvider* dpp, optional_yield y) override;
- virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info = nullptr) override;
- virtual int remove_user(const DoutPrefixProvider* dpp, optional_yield y) override;
- virtual int verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider* dpp, optional_yield y) override;
-
- int create_user_info_idx();
- int load_user_from_idx(const DoutPrefixProvider *dpp, MotrStore *store, RGWUserInfo& info, std::map<std::string,
- bufferlist> *attrs, RGWObjVersionTracker *objv_tr);
-
- friend class MotrBucket;
-};
-
-class MotrBucket : public StoreBucket {
- private:
- MotrStore *store;
- RGWAccessControlPolicy acls;
-
- // RGWBucketInfo and other information that are shown when listing a bucket is
- // represented in struct MotrBucketInfo. The structure is encoded and stored
- // as the value of the global bucket instance index.
- // TODO: compare pros and cons of separating the bucket_attrs (ACLs, tag etc.)
- // into a different index.
- struct MotrBucketInfo {
- RGWBucketInfo info;
-
- obj_version bucket_version;
- ceph::real_time mtime;
-
- rgw::sal::Attrs bucket_attrs;
-
- void encode(bufferlist& bl) const
- {
- ENCODE_START(4, 4, bl);
- encode(info, bl);
- encode(bucket_version, bl);
- encode(mtime, bl);
- encode(bucket_attrs, bl); //rgw_cache.h example for a map
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl)
- {
- DECODE_START(4, bl);
- decode(info, bl);
- decode(bucket_version, bl);
- decode(mtime, bl);
- decode(bucket_attrs, bl);
- DECODE_FINISH(bl);
- }
- };
- WRITE_CLASS_ENCODER(MotrBucketInfo);
-
- public:
- MotrBucket(MotrStore *_st)
- : store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, User* _u)
- : StoreBucket(_u),
- store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, const rgw_bucket& _b)
- : StoreBucket(_b),
- store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, const RGWBucketEnt& _e)
- : StoreBucket(_e),
- store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, const RGWBucketInfo& _i)
- : StoreBucket(_i),
- store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, const rgw_bucket& _b, User* _u)
- : StoreBucket(_b, _u),
- store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, const RGWBucketEnt& _e, User* _u)
- : StoreBucket(_e, _u),
- store(_st),
- acls() {
- }
-
- MotrBucket(MotrStore *_st, const RGWBucketInfo& _i, User* _u)
- : StoreBucket(_i, _u),
- store(_st),
- acls() {
- }
-
- ~MotrBucket() { }
-
- virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
- virtual int list(const DoutPrefixProvider *dpp, ListParams&, int, ListResults&, optional_yield y) override;
- virtual int remove_bucket(const DoutPrefixProvider *dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) override;
- virtual int remove_bucket_bypass_gc(int concurrent_max, bool
- keep_index_consistent,
- optional_yield y, const
- DoutPrefixProvider *dpp) override;
- virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
- virtual int set_acl(const DoutPrefixProvider *dpp, RGWAccessControlPolicy& acl, optional_yield y) override;
- virtual int load_bucket(const DoutPrefixProvider *dpp, optional_yield y, bool get_stats = false) override;
- int link_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y);
- int unlink_user(const DoutPrefixProvider* dpp, User* new_user, optional_yield y);
- int create_bucket_index();
- int create_multipart_indices();
- virtual int read_stats(const DoutPrefixProvider *dpp,
- const bucket_index_layout_generation& idx_layout, int shard_id,
- std::string *bucket_ver, std::string *master_ver,
- std::map<RGWObjCategory, RGWStorageStats>& stats,
- std::string *max_marker = nullptr,
- bool *syncstopped = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider *dpp,
- const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx) override;
- virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
- virtual int update_container_stats(const DoutPrefixProvider *dpp) override;
- virtual int check_bucket_shards(const DoutPrefixProvider *dpp) override;
- virtual int chown(const DoutPrefixProvider *dpp, User& new_user, optional_yield y) override;
- virtual int put_info(const DoutPrefixProvider *dpp, bool exclusive, ceph::real_time mtime) override;
- virtual bool is_owner(User* user) override;
- virtual int check_empty(const DoutPrefixProvider *dpp, optional_yield y) override;
- virtual int check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) override;
- virtual int merge_and_store_attrs(const DoutPrefixProvider *dpp, Attrs& attrs, optional_yield y) override;
- virtual int try_refresh_info(const DoutPrefixProvider *dpp, ceph::real_time *pmtime) override;
- virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
- bool *is_truncated, RGWUsageIter& usage_iter,
- std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
- virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override;
- virtual int remove_objs_from_index(const DoutPrefixProvider *dpp, std::list<rgw_obj_index_key>& objs_to_unlink) override;
- virtual int check_index(const DoutPrefixProvider *dpp, std::map<RGWObjCategory, RGWStorageStats>& existing_stats, std::map<RGWObjCategory, RGWStorageStats>& calculated_stats) override;
- virtual int rebuild_index(const DoutPrefixProvider *dpp) override;
- virtual int set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) override;
- virtual int purge_instance(const DoutPrefixProvider *dpp) override;
- virtual std::unique_ptr<Bucket> clone() override {
- return std::make_unique<MotrBucket>(*this);
- }
- virtual std::unique_ptr<MultipartUpload> get_multipart_upload(const std::string& oid,
- std::optional<std::string> upload_id=std::nullopt,
- ACLOwner owner={}, ceph::real_time mtime=real_clock::now()) override;
- virtual int list_multiparts(const DoutPrefixProvider *dpp,
- const std::string& prefix,
- std::string& marker,
- const std::string& delim,
- const int& max_uploads,
- std::vector<std::unique_ptr<MultipartUpload>>& uploads,
- std::map<std::string, bool> *common_prefixes,
- bool *is_truncated) override;
- virtual int abort_multiparts(const DoutPrefixProvider *dpp, CephContext *cct) override;
-
- friend class MotrStore;
-};
-
-class MotrPlacementTier: public StorePlacementTier {
- MotrStore* store;
- RGWZoneGroupPlacementTier tier;
-public:
- MotrPlacementTier(MotrStore* _store, const RGWZoneGroupPlacementTier& _tier) : store(_store), tier(_tier) {}
- virtual ~MotrPlacementTier() = default;
-
- virtual const std::string& get_tier_type() { return tier.tier_type; }
- virtual const std::string& get_storage_class() { return tier.storage_class; }
- virtual bool retain_head_object() { return tier.retain_head_object; }
- RGWZoneGroupPlacementTier& get_rt() { return tier; }
-};
-
-class MotrZoneGroup : public StoreZoneGroup {
-protected:
- MotrStore* store;
- const RGWZoneGroup group;
- std::string empty;
-public:
- MotrZoneGroup(MotrStore* _store) : store(_store), group() {}
- MotrZoneGroup(MotrStore* _store, const RGWZoneGroup& _group) : store(_store), group(_group) {}
- virtual ~MotrZoneGroup() = default;
-
- virtual const std::string& get_id() const override { return group.get_id(); };
- virtual const std::string& get_name() const override { return group.get_name(); };
- virtual int equals(const std::string& other_zonegroup) const override {
- return group.equals(other_zonegroup);
- };
- /** Get the endpoint from zonegroup, or from master zone if not set */
- virtual const std::string& get_endpoint() const override;
- virtual bool placement_target_exists(std::string& target) const override;
- virtual bool is_master_zonegroup() const override {
- return group.is_master_zonegroup();
- };
- virtual const std::string& get_api_name() const override { return group.api_name; };
- virtual int get_placement_target_names(std::set<std::string>& names) const override;
- virtual const std::string& get_default_placement_name() const override {
- return group.default_placement.name; };
- virtual int get_hostnames(std::list<std::string>& names) const override {
- names = group.hostnames;
- return 0;
- };
- virtual int get_s3website_hostnames(std::list<std::string>& names) const override {
- names = group.hostnames_s3website;
- return 0;
- };
- virtual int get_zone_count() const override {
- return group.zones.size();
- }
- virtual int get_placement_tier(const rgw_placement_rule& rule, std::unique_ptr<PlacementTier>* tier);
- virtual int get_zone_by_id(const std::string& id, std::unique_ptr<Zone>* zone) override {
- return -1;
- }
- virtual int get_zone_by_name(const std::string& name, std::unique_ptr<Zone>* zone) override {
- return -1;
- }
- virtual int list_zones(std::list<std::string>& zone_ids) override {
- zone_ids.clear();
- return 0;
- }
- const RGWZoneGroup& get_group() { return group; }
- virtual std::unique_ptr<ZoneGroup> clone() override {
- return std::make_unique<MotrZoneGroup>(store, group);
- }
- friend class MotrZone;
-};
-
-class MotrZone : public StoreZone {
- protected:
- MotrStore* store;
- RGWRealm *realm{nullptr};
- MotrZoneGroup zonegroup;
- RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */
- RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */
- RGWPeriod *current_period{nullptr};
-
- public:
- MotrZone(MotrStore* _store) : store(_store), zonegroup(_store) {
- realm = new RGWRealm();
- zone_public_config = new RGWZone();
- zone_params = new RGWZoneParams();
- current_period = new RGWPeriod();
-
- // XXX: only default and STANDARD supported for now
- RGWZonePlacementInfo info;
- RGWZoneStorageClasses sc;
- sc.set_storage_class("STANDARD", nullptr, nullptr);
- info.storage_classes = sc;
- zone_params->placement_pools["default"] = info;
- }
- MotrZone(MotrStore* _store, MotrZoneGroup _zg) : store(_store), zonegroup(_zg) {
- realm = new RGWRealm();
- // TODO: fetch zonegroup params (eg. id) from provisioner config.
- //zonegroup.group.set_id("0956b174-fe14-4f97-8b50-bb7ec5e1cf62");
- //zonegroup.group.api_name = "default";
- zone_public_config = new RGWZone();
- zone_params = new RGWZoneParams();
- current_period = new RGWPeriod();
-
- // XXX: only default and STANDARD supported for now
- RGWZonePlacementInfo info;
- RGWZoneStorageClasses sc;
- sc.set_storage_class("STANDARD", nullptr, nullptr);
- info.storage_classes = sc;
- zone_params->placement_pools["default"] = info;
- }
- ~MotrZone() = default;
-
- virtual std::unique_ptr<Zone> clone() override {
- return std::make_unique<MotrZone>(store);
- }
- virtual ZoneGroup& get_zonegroup() override;
- virtual const std::string& get_id() override;
- virtual const std::string& get_name() const override;
- virtual bool is_writeable() override;
- virtual bool get_redirect_endpoint(std::string* endpoint) override;
- virtual bool has_zonegroup_api(const std::string& api) const override;
- virtual const std::string& get_current_period_id() override;
- virtual const RGWAccessKey& get_system_key() { return zone_params->system_key; }
- virtual const std::string& get_realm_name() { return realm->get_name(); }
- virtual const std::string& get_realm_id() { return realm->get_id(); }
- virtual const std::string_view get_tier_type() { return "rgw"; }
- virtual RGWBucketSyncPolicyHandlerRef get_sync_policy_handler() { return nullptr; }
- friend class MotrStore;
-};
-
-class MotrLuaManager : public StoreLuaManager {
- MotrStore* store;
-
- public:
- MotrLuaManager(MotrStore* _s) : store(_s)
- {
- }
- virtual ~MotrLuaManager() = default;
-
- /** Get a script named with the given key from the backing store */
- virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) override;
- /** Put a script named with the given key to the backing store */
- virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) override;
- /** Delete a script named with the given key from the backing store */
- virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) override;
- /** Add a lua package */
- virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override;
- /** Remove a lua package */
- virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override;
- /** List lua packages */
- virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) override;
-};
-
-class MotrOIDCProvider : public RGWOIDCProvider {
- MotrStore* store;
- public:
- MotrOIDCProvider(MotrStore* _store) : store(_store) {}
- ~MotrOIDCProvider() = default;
-
- virtual int store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) override { return 0; }
- virtual int read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) override { return 0; }
- virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override { return 0;}
-
- void encode(bufferlist& bl) const {
- RGWOIDCProvider::encode(bl);
- }
- void decode(bufferlist::const_iterator& bl) {
- RGWOIDCProvider::decode(bl);
- }
-};
-
-class MotrObject : public StoreObject {
- private:
- MotrStore *store;
- RGWAccessControlPolicy acls;
- RGWObjCategory category;
-
- // If this object is pat of a multipart uploaded one.
- // TODO: do it in another class? MotrPartObject : public MotrObject
- uint64_t part_off;
- uint64_t part_size;
- uint64_t part_num;
-
- public:
-
- // motr object metadata stored in index
- struct Meta {
- struct m0_uint128 oid = {};
- struct m0_fid pver = {};
- uint64_t layout_id = 0;
-
- void encode(bufferlist& bl) const
- {
- ENCODE_START(5, 5, bl);
- encode(oid.u_hi, bl);
- encode(oid.u_lo, bl);
- encode(pver.f_container, bl);
- encode(pver.f_key, bl);
- encode(layout_id, bl);
- ENCODE_FINISH(bl);
- }
-
- void decode(bufferlist::const_iterator& bl)
- {
- DECODE_START(5, bl);
- decode(oid.u_hi, bl);
- decode(oid.u_lo, bl);
- decode(pver.f_container, bl);
- decode(pver.f_key, bl);
- decode(layout_id, bl);
- DECODE_FINISH(bl);
- }
- };
-
- struct m0_obj *mobj = NULL;
- Meta meta;
-
- struct MotrReadOp : public ReadOp {
- private:
- MotrObject* source;
-
- // The set of part objects if the source is
- // a multipart uploaded object.
- std::map<int, std::unique_ptr<MotrObject>> part_objs;
-
- public:
- MotrReadOp(MotrObject *_source);
-
- virtual int prepare(optional_yield y, const DoutPrefixProvider* dpp) override;
-
- /*
- * Both `read` and `iterate` read up through index `end`
- * *inclusive*. The number of bytes that could be returned is
- * `end - ofs + 1`.
- */
- virtual int read(int64_t off, int64_t end, bufferlist& bl,
- optional_yield y,
- const DoutPrefixProvider* dpp) override;
- virtual int iterate(const DoutPrefixProvider* dpp, int64_t off,
- int64_t end, RGWGetDataCB* cb,
- optional_yield y) override;
-
- virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) override;
- };
-
- struct MotrDeleteOp : public DeleteOp {
- private:
- MotrObject* source;
-
- public:
- MotrDeleteOp(MotrObject* _source);
-
- virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override;
- };
-
- MotrObject() = default;
-
- MotrObject(MotrStore *_st, const rgw_obj_key& _k)
- : StoreObject(_k), store(_st), acls() {}
- MotrObject(MotrStore *_st, const rgw_obj_key& _k, Bucket* _b)
- : StoreObject(_k, _b), store(_st), acls() {}
-
- MotrObject(MotrObject& _o) = default;
-
- virtual ~MotrObject();
-
- virtual int delete_object(const DoutPrefixProvider* dpp,
- optional_yield y,
- bool prevent_versioning = false) override;
- virtual int copy_object(User* user,
- req_info* info, const rgw_zone_id& source_zone,
- rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket,
- rgw::sal::Bucket* src_bucket,
- const rgw_placement_rule& dest_placement,
- ceph::real_time* src_mtime, ceph::real_time* mtime,
- const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr,
- bool high_precision_time,
- const char* if_match, const char* if_nomatch,
- AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs,
- RGWObjCategory category, uint64_t olh_epoch,
- boost::optional<ceph::real_time> delete_at,
- std::string* version_id, std::string* tag, std::string* etag,
- void (*progress_cb)(off_t, void *), void* progress_data,
- const DoutPrefixProvider* dpp, optional_yield y) override;
- virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
- virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; }
- virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **state, optional_yield y, bool follow_olh = true) override;
- virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override;
- virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override;
- virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override;
- virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override;
- virtual bool is_expired() override;
- virtual void gen_rand_obj_instance_name() override;
- virtual std::unique_ptr<Object> clone() override {
- return std::unique_ptr<Object>(new MotrObject(*this));
- }
- virtual std::unique_ptr<MPSerializer> get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) override;
- virtual int transition(Bucket* bucket,
- const rgw_placement_rule& placement_rule,
- const real_time& mtime,
- uint64_t olh_epoch,
- const DoutPrefixProvider* dpp,
- optional_yield y) override;
- virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override;
- virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override;
-
- /* Swift versioning */
- virtual int swift_versioning_restore(bool& restored,
- const DoutPrefixProvider* dpp) override;
- virtual int swift_versioning_copy(const DoutPrefixProvider* dpp,
- optional_yield y) override;
-
- /* OPs */
- virtual std::unique_ptr<ReadOp> get_read_op() override;
- virtual std::unique_ptr<DeleteOp> get_delete_op() override;
-
- /* OMAP */
- virtual int omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid,
- const std::set<std::string>& keys,
- Attrs* vals) override;
- virtual int omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val,
- bool must_exist, optional_yield y) override;
- virtual int chown(User& new_user, const DoutPrefixProvider* dpp, optional_yield y) override;
- private:
- //int read_attrs(const DoutPrefixProvider* dpp, Motr::Object::Read &read_op, optional_yield y, rgw_obj* target_obj = nullptr);
-
- public:
- bool is_opened() { return mobj != NULL; }
- int create_mobj(const DoutPrefixProvider *dpp, uint64_t sz);
- int open_mobj(const DoutPrefixProvider *dpp);
- int delete_mobj(const DoutPrefixProvider *dpp);
- void close_mobj();
- int write_mobj(const DoutPrefixProvider *dpp, bufferlist&& data, uint64_t offset);
- int read_mobj(const DoutPrefixProvider* dpp, int64_t off, int64_t end, RGWGetDataCB* cb);
- unsigned get_optimal_bs(unsigned len);
-
- int get_part_objs(const DoutPrefixProvider *dpp,
- std::map<int, std::unique_ptr<MotrObject>>& part_objs);
- int open_part_objs(const DoutPrefixProvider* dpp,
- std::map<int, std::unique_ptr<MotrObject>>& part_objs);
- int read_multipart_obj(const DoutPrefixProvider* dpp,
- int64_t off, int64_t end, RGWGetDataCB* cb,
- std::map<int, std::unique_ptr<MotrObject>>& part_objs);
- int delete_part_objs(const DoutPrefixProvider* dpp);
- void set_category(RGWObjCategory _category) {category = _category;}
- int get_bucket_dir_ent(const DoutPrefixProvider *dpp, rgw_bucket_dir_entry& ent);
- int update_version_entries(const DoutPrefixProvider *dpp);
-};
-
-// A placeholder locking class for multipart upload.
-// TODO: implement it using Motr object locks.
-class MPMotrSerializer : public StoreMPSerializer {
-
- public:
- MPMotrSerializer(const DoutPrefixProvider *dpp, MotrStore* store, MotrObject* obj, const std::string& lock_name) {}
-
- virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override {return 0; }
- virtual int unlock() override { return 0;}
-};
-
-class MotrAtomicWriter : public StoreWriter {
- protected:
- rgw::sal::MotrStore* store;
- const rgw_user& owner;
- const rgw_placement_rule *ptail_placement_rule;
- uint64_t olh_epoch;
- const std::string& unique_tag;
- MotrObject obj;
- MotrObject old_obj;
- uint64_t total_data_size; // for total data being uploaded
- bufferlist acc_data; // accumulated data
- uint64_t acc_off; // accumulated data offset
-
- struct m0_bufvec buf;
- struct m0_bufvec attr;
- struct m0_indexvec ext;
-
- public:
- MotrAtomicWriter(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- MotrStore* _store,
- const rgw_user& _owner,
- const rgw_placement_rule *_ptail_placement_rule,
- uint64_t _olh_epoch,
- const std::string& _unique_tag);
- ~MotrAtomicWriter() = default;
-
- // prepare to start processing object data
- virtual int prepare(optional_yield y) override;
-
- // Process a bufferlist
- virtual int process(bufferlist&& data, uint64_t offset) override;
-
- int write();
-
- // complete the operation and make its result visible to clients
- virtual int complete(size_t accounted_size, const std::string& etag,
- ceph::real_time *mtime, ceph::real_time set_mtime,
- std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch,
- const std::string *user_data,
- rgw_zone_set *zones_trace, bool *canceled,
- optional_yield y) override;
-
- unsigned populate_bvec(unsigned len, bufferlist::iterator &bi);
- void cleanup();
-};
-
-class MotrMultipartWriter : public StoreWriter {
-protected:
- rgw::sal::MotrStore* store;
-
- // Head object.
- rgw::sal::Object* head_obj;
-
- // Part parameters.
- const uint64_t part_num;
- const std::string part_num_str;
- std::unique_ptr<MotrObject> part_obj;
- uint64_t actual_part_size = 0;
-
-public:
- MotrMultipartWriter(const DoutPrefixProvider *dpp,
- optional_yield y, MultipartUpload* upload,
- rgw::sal::Object* obj,
- MotrStore* _store,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- uint64_t _part_num, const std::string& part_num_str) :
- StoreWriter(dpp, y), store(_store), head_obj(obj),
- part_num(_part_num), part_num_str(part_num_str)
- {
- }
- ~MotrMultipartWriter() = default;
-
- // prepare to start processing object data
- virtual int prepare(optional_yield y) override;
-
- // Process a bufferlist
- virtual int process(bufferlist&& data, uint64_t offset) override;
-
- // complete the operation and make its result visible to clients
- virtual int complete(size_t accounted_size, const std::string& etag,
- ceph::real_time *mtime, ceph::real_time set_mtime,
- std::map<std::string, bufferlist>& attrs,
- ceph::real_time delete_at,
- const char *if_match, const char *if_nomatch,
- const std::string *user_data,
- rgw_zone_set *zones_trace, bool *canceled,
- optional_yield y) override;
-};
-
-// The implementation of multipart upload in POC roughly follows the
-// cortx-s3server's design. Parts are stored in separate Motr objects.
-// s3server uses a few auxiliary Motr indices to manage multipart
-// related metadata: (1) Bucket multipart index (bucket_nnn_multipart_index)
-// which contains metadata that answers questions such as which objects have
-// started multipart upload and its upload id. This index is created during
-// bucket creation. (2) Object part index (object_nnn_part_index) which stores
-// metadata of a part's details (size, pvid, oid...). This index is created in
-// MotrMultipartUpload::init(). (3) Extended metadata index
-// (bucket_nnn_extended_metadata): once parts has been uploaded and their
-// metadata saved in the part index, the user may issue multipart completion
-// request. When processing the completion request, the parts are read from
-// object part index and for each part an entry is created in extended index.
-// The entry for the object is created in bucket (object list) index. The part
-// index is deleted and an entry removed from bucket_nnn_multipart_index. Like
-// bucket multipart index, bucket part extened metadata index is created during
-// bucket creation.
-//
-// The extended metadata index is used mainly due to fault tolerant
-// considerations (how to handle Motr service crash when uploading an object)
-// and to avoid to create too many Motr indices (I am not sure I understand
-// why many Motr indices is bad.). In our POC, to keep it simple, only 2
-// indices are maintained: bucket multipart index and object_nnn_part_index.
-//
-//
-
-class MotrMultipartPart : public StoreMultipartPart {
-protected:
- RGWUploadPartInfo info;
-
-public:
- MotrObject::Meta meta;
-
- MotrMultipartPart(RGWUploadPartInfo _info, MotrObject::Meta _meta) :
- info(_info), meta(_meta) {}
- virtual ~MotrMultipartPart() = default;
-
- virtual uint32_t get_num() { return info.num; }
- virtual uint64_t get_size() { return info.accounted_size; }
- virtual const std::string& get_etag() { return info.etag; }
- virtual ceph::real_time& get_mtime() { return info.modified; }
-
- RGWObjManifest& get_manifest() { return info.manifest; }
-
- friend class MotrMultipartUpload;
-};
-
-class MotrMultipartUpload : public StoreMultipartUpload {
- MotrStore* store;
- RGWMPObj mp_obj;
- ACLOwner owner;
- ceph::real_time mtime;
- rgw_placement_rule placement;
- RGWObjManifest manifest;
-
-public:
- MotrMultipartUpload(MotrStore* _store, Bucket* _bucket, const std::string& oid,
- std::optional<std::string> upload_id, ACLOwner _owner, ceph::real_time _mtime) :
- StoreMultipartUpload(_bucket), store(_store), mp_obj(oid, upload_id), owner(_owner), mtime(_mtime) {}
- virtual ~MotrMultipartUpload() = default;
-
- virtual const std::string& get_meta() const { return mp_obj.get_meta(); }
- virtual const std::string& get_key() const { return mp_obj.get_key(); }
- virtual const std::string& get_upload_id() const { return mp_obj.get_upload_id(); }
- virtual const ACLOwner& get_owner() const override { return owner; }
- virtual ceph::real_time& get_mtime() { return mtime; }
- virtual std::unique_ptr<rgw::sal::Object> get_meta_obj() override;
- virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) override;
- virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
- int num_parts, int marker,
- int* next_marker, bool* truncated,
- bool assume_unsorted = false) override;
- virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override;
- virtual int complete(const DoutPrefixProvider* dpp,
- optional_yield y, CephContext* cct,
- std::map<int, std::string>& part_etags,
- std::list<rgw_obj_index_key>& remove_objs,
- uint64_t& accounted_size, bool& compressed,
- RGWCompressionInfo& cs_info, off_t& off,
- std::string& tag, ACLOwner& owner,
- uint64_t olh_epoch,
- rgw::sal::Object* target_obj) override;
- virtual int get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs = nullptr) override;
- virtual std::unique_ptr<Writer> get_writer(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- uint64_t part_num,
- const std::string& part_num_str) override;
- int delete_parts(const DoutPrefixProvider *dpp);
-};
-
-class MotrStore : public StoreDriver {
- private:
- MotrZone zone;
- RGWSyncModuleInstanceRef sync_module;
-
- MotrMetaCache* obj_meta_cache;
- MotrMetaCache* user_cache;
- MotrMetaCache* bucket_inst_cache;
-
- public:
- CephContext *cctx;
- struct m0_client *instance;
- struct m0_container container;
- struct m0_realm uber_realm;
- struct m0_config conf = {};
- struct m0_idx_dix_config dix_conf = {};
-
- MotrStore(CephContext *c): zone(this), cctx(c) {}
- ~MotrStore() {
- delete obj_meta_cache;
- delete user_cache;
- delete bucket_inst_cache;
- }
-
- virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) { return 0; }
- virtual const std::string get_name() const override {
- return "motr";
- }
-
- virtual std::unique_ptr<User> get_user(const rgw_user& u) override;
- virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) override;
- virtual int get_user_by_access_key(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y, std::unique_ptr<User>* user) override;
- virtual int get_user_by_email(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y, std::unique_ptr<User>* user) override;
- virtual int get_user_by_swift(const DoutPrefixProvider *dpp, const std::string& user_str, optional_yield y, std::unique_ptr<User>* user) override;
- virtual std::unique_ptr<Object> get_object(const rgw_obj_key& k) override;
- virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const rgw_bucket& b, std::unique_ptr<Bucket>* bucket, optional_yield y) override;
- virtual int get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr<Bucket>* bucket) override;
- virtual int get_bucket(const DoutPrefixProvider *dpp, User* u, const std::string& tenant, const std::string&name, std::unique_ptr<Bucket>* bucket, optional_yield y) override;
- virtual bool is_meta_master() override;
- virtual int forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv,
- bufferlist& in_data, JSONParser *jp, req_info& info,
- optional_yield y) override;
- virtual int forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv,
- bufferlist& in_data,
- RGWXMLDecoder::XMLParser* parser, req_info& info,
- optional_yield y) override;
- virtual Zone* get_zone() { return &zone; }
- virtual std::string zone_unique_id(uint64_t unique_num) override;
- virtual std::string zone_unique_trans_id(const uint64_t unique_num) override;
- virtual int get_zonegroup(const std::string& id, std::unique_ptr<ZoneGroup>* zonegroup) override;
- virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list<std::string>& zone_ids) override;
- virtual int cluster_stat(RGWClusterStat& stats) override;
- virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override;
- virtual std::unique_ptr<Notification> get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj,
- req_state* s, rgw::notify::EventType event_type, optional_yield y, const std::string* object_name=nullptr) override;
- virtual std::unique_ptr<Notification> get_notification(const DoutPrefixProvider* dpp, rgw::sal::Object* obj,
- rgw::sal::Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket,
- std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) override;
- virtual RGWLC* get_rgwlc(void) override { return NULL; }
- virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return NULL; }
-
- virtual int log_usage(const DoutPrefixProvider *dpp, std::map<rgw_user_bucket, RGWUsageBatch>& usage_info) override;
- virtual int log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) override;
- virtual int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type,
- const std::map<std::string, std::string>& meta) override;
- virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) override;
- virtual void get_quota(RGWQuota& quota) override;
- virtual int set_buckets_enabled(const DoutPrefixProvider *dpp, std::vector<rgw_bucket>& buckets, bool enabled) override;
- virtual int get_sync_policy_handler(const DoutPrefixProvider *dpp,
- std::optional<rgw_zone_id> zone,
- std::optional<rgw_bucket> bucket,
- RGWBucketSyncPolicyHandlerRef *phandler,
- optional_yield y) override;
- virtual RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone) override;
- virtual void wakeup_meta_sync_shards(std::set<int>& shard_ids) override { return; }
- virtual void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, boost::container::flat_map<int, boost::container::flat_set<rgw_data_notify_entry>>& shard_ids) override {}
- virtual int clear_usage(const DoutPrefixProvider *dpp) override { return 0; }
- virtual int read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch,
- uint32_t max_entries, bool *is_truncated,
- RGWUsageIter& usage_iter,
- std::map<rgw_user_bucket, rgw_usage_log_entry>& usage) override;
- virtual int trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override;
- virtual int get_config_key_val(std::string name, bufferlist* bl) override;
- virtual int meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) override;
- virtual int meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, std::list<std::string>& keys, bool* truncated) override;
- virtual void meta_list_keys_complete(void* handle) override;
- virtual std::string meta_get_marker(void *handle) override;
- virtual int meta_remove(const DoutPrefixProvider *dpp, std::string& metadata_key, optional_yield y) override;
-
- virtual const RGWSyncModuleInstanceRef& get_sync_module() { return sync_module; }
- virtual std::string get_host_id() { return ""; }
-
- virtual std::unique_ptr<LuaManager> get_lua_manager() override;
- virtual std::unique_ptr<RGWRole> get_role(std::string name,
- std::string tenant,
- std::string path="",
- std::string trust_policy="",
- std::string max_session_duration_str="",
- std::multimap<std::string, std::string> tags={}) override;
- virtual std::unique_ptr<RGWRole> get_role(const RGWRoleInfo& info) override;
- virtual std::unique_ptr<RGWRole> get_role(std::string id) override;
- virtual int get_roles(const DoutPrefixProvider *dpp,
- optional_yield y,
- const std::string& path_prefix,
- const std::string& tenant,
- std::vector<std::unique_ptr<RGWRole>>& roles) override;
- virtual std::unique_ptr<RGWOIDCProvider> get_oidc_provider() override;
- virtual int get_oidc_providers(const DoutPrefixProvider *dpp,
- const std::string& tenant,
- std::vector<std::unique_ptr<RGWOIDCProvider>>& providers) override;
- virtual std::unique_ptr<Writer> get_append_writer(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- const std::string& unique_tag,
- uint64_t position,
- uint64_t *cur_accounted_size) override;
- virtual std::unique_ptr<Writer> get_atomic_writer(const DoutPrefixProvider *dpp,
- optional_yield y,
- rgw::sal::Object* obj,
- const rgw_user& owner,
- const rgw_placement_rule *ptail_placement_rule,
- uint64_t olh_epoch,
- const std::string& unique_tag) override;
- virtual const std::string& get_compression_type(const rgw_placement_rule& rule) override;
- virtual bool valid_placement(const rgw_placement_rule& rule) override;
-
- virtual void finalize(void) override;
-
- virtual CephContext *ctx(void) override {
- return cctx;
- }
-
- virtual void register_admin_apis(RGWRESTMgr* mgr) override { };
-
- int open_idx(struct m0_uint128 *id, bool create, struct m0_idx *out);
- void close_idx(struct m0_idx *idx) { m0_idx_fini(idx); }
- int do_idx_op(struct m0_idx *, enum m0_idx_opcode opcode,
- std::vector<uint8_t>& key, std::vector<uint8_t>& val, bool update = false);
-
- int do_idx_next_op(struct m0_idx *idx,
- std::vector<std::vector<uint8_t>>& key_vec,
- std::vector<std::vector<uint8_t>>& val_vec);
- int next_query_by_name(std::string idx_name, std::vector<std::string>& key_str_vec,
- std::vector<bufferlist>& val_bl_vec,
- std::string prefix="", std::string delim="");
-
- void index_name_to_motr_fid(std::string iname, struct m0_uint128 *fid);
- int open_motr_idx(struct m0_uint128 *id, struct m0_idx *idx);
- int create_motr_idx_by_name(std::string iname);
- int delete_motr_idx_by_name(std::string iname);
- int do_idx_op_by_name(std::string idx_name, enum m0_idx_opcode opcode,
- std::string key_str, bufferlist &bl, bool update=true);
- int check_n_create_global_indices();
- int store_access_key(const DoutPrefixProvider *dpp, optional_yield y, MotrAccessKey access_key);
- int delete_access_key(const DoutPrefixProvider *dpp, optional_yield y, std::string access_key);
- int store_email_info(const DoutPrefixProvider *dpp, optional_yield y, MotrEmailInfo& email_info);
-
- int init_metadata_cache(const DoutPrefixProvider *dpp, CephContext *cct);
- MotrMetaCache* get_obj_meta_cache() {return obj_meta_cache;}
- MotrMetaCache* get_user_cache() {return user_cache;}
- MotrMetaCache* get_bucket_inst_cache() {return bucket_inst_cache;}
-};
-
-struct obj_time_weight {
- real_time mtime;
- uint32_t zone_short_id;
- uint64_t pg_ver;
- bool high_precision;
-
- obj_time_weight() : zone_short_id(0), pg_ver(0), high_precision(false) {}
-
- bool compare_low_precision(const obj_time_weight& rhs) {
- struct timespec l = ceph::real_clock::to_timespec(mtime);
- struct timespec r = ceph::real_clock::to_timespec(rhs.mtime);
- l.tv_nsec = 0;
- r.tv_nsec = 0;
- if (l > r) {
- return false;
- }
- if (l < r) {
- return true;
- }
- if (!zone_short_id || !rhs.zone_short_id) {
- /* don't compare zone ids, if one wasn't provided */
- return false;
- }
- if (zone_short_id != rhs.zone_short_id) {
- return (zone_short_id < rhs.zone_short_id);
- }
- return (pg_ver < rhs.pg_ver);
-
- }
-
- bool operator<(const obj_time_weight& rhs) {
- if (!high_precision || !rhs.high_precision) {
- return compare_low_precision(rhs);
- }
- if (mtime > rhs.mtime) {
- return false;
- }
- if (mtime < rhs.mtime) {
- return true;
- }
- if (!zone_short_id || !rhs.zone_short_id) {
- /* don't compare zone ids, if one wasn't provided */
- return false;
- }
- if (zone_short_id != rhs.zone_short_id) {
- return (zone_short_id < rhs.zone_short_id);
- }
- return (pg_ver < rhs.pg_ver);
- }
-
- void init(const real_time& _mtime, uint32_t _short_id, uint64_t _pg_ver) {
- mtime = _mtime;
- zone_short_id = _short_id;
- pg_ver = _pg_ver;
- }
-
- void init(RGWObjState *state) {
- mtime = state->mtime;
- zone_short_id = state->zone_short_id;
- pg_ver = state->pg_ver;
- }
-};
-
-inline std::ostream& operator<<(std::ostream& out, const obj_time_weight &o) {
- out << o.mtime;
-
- if (o.zone_short_id != 0 || o.pg_ver != 0) {
- out << "[zid=" << o.zone_short_id << ", pgv=" << o.pg_ver << "]";
- }
-
- return out;
-}
-
-} // namespace rgw::sal