From: Daniel Gryniewicz Date: Fri, 2 Dec 2022 16:34:44 +0000 (-0500) Subject: RGW - Zipper - move rgw/store to rgw/driver X-Git-Tag: v18.1.0~698^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=644a4881905b4cf88eae2c2e19b62aa8c07b9cc6;p=ceph-ci.git RGW - Zipper - move rgw/store to rgw/driver Signed-off-by: Daniel Gryniewicz --- diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 7b6ac917f85..8d663be01fb 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -142,57 +142,57 @@ set(librgw_common_srcs rgw_bucket_encryption.cc rgw_tracer.cc rgw_lua_background.cc - store/rados/cls_fifo_legacy.cc - store/rados/rgw_bucket.cc - store/rados/rgw_bucket_sync.cc - store/rados/rgw_cr_rados.cc - store/rados/rgw_cr_tools.cc - store/rados/rgw_d3n_datacache.cc - store/rados/rgw_datalog.cc - store/rados/rgw_datalog_notify.cc - store/rados/rgw_data_sync.cc - store/rados/rgw_etag_verifier.cc - store/rados/rgw_gc.cc - store/rados/rgw_gc_log.cc - store/rados/rgw_lc_tier.cc - store/rados/rgw_log_backing.cc - store/rados/rgw_metadata.cc - store/rados/rgw_notify.cc - store/rados/rgw_obj_manifest.cc - store/rados/rgw_object_expirer_core.cc - store/rados/rgw_otp.cc - store/rados/rgw_period.cc - store/rados/rgw_rest_pubsub.cc - store/rados/rgw_rest_realm.cc - store/rados/rgw_rest_user.cc - store/rados/rgw_sal_rados.cc - store/rados/rgw_service.cc - store/rados/rgw_sync.cc - store/rados/rgw_sync_counters.cc - store/rados/rgw_sync_error_repo.cc - store/rados/rgw_sync_module.cc - store/rados/rgw_sync_module_aws.cc - store/rados/rgw_sync_module_es.cc - store/rados/rgw_sync_module_es_rest.cc - store/rados/rgw_sync_module_log.cc - store/rados/rgw_sync_trace.cc - store/rados/rgw_tools.cc - store/rados/rgw_trim_bilog.cc - store/rados/rgw_trim_datalog.cc - store/rados/rgw_trim_mdlog.cc - store/rados/rgw_user.cc - store/rados/rgw_zone.cc) + driver/rados/cls_fifo_legacy.cc + driver/rados/rgw_bucket.cc + driver/rados/rgw_bucket_sync.cc + driver/rados/rgw_cr_rados.cc + driver/rados/rgw_cr_tools.cc + driver/rados/rgw_d3n_datacache.cc + driver/rados/rgw_datalog.cc + driver/rados/rgw_datalog_notify.cc + driver/rados/rgw_data_sync.cc + driver/rados/rgw_etag_verifier.cc + driver/rados/rgw_gc.cc + driver/rados/rgw_gc_log.cc + driver/rados/rgw_lc_tier.cc + driver/rados/rgw_log_backing.cc + driver/rados/rgw_metadata.cc + driver/rados/rgw_notify.cc + driver/rados/rgw_obj_manifest.cc + driver/rados/rgw_object_expirer_core.cc + driver/rados/rgw_otp.cc + driver/rados/rgw_period.cc + driver/rados/rgw_rest_pubsub.cc + driver/rados/rgw_rest_realm.cc + driver/rados/rgw_rest_user.cc + driver/rados/rgw_sal_rados.cc + driver/rados/rgw_service.cc + driver/rados/rgw_sync.cc + driver/rados/rgw_sync_counters.cc + driver/rados/rgw_sync_error_repo.cc + driver/rados/rgw_sync_module.cc + driver/rados/rgw_sync_module_aws.cc + driver/rados/rgw_sync_module_es.cc + driver/rados/rgw_sync_module_es_rest.cc + driver/rados/rgw_sync_module_log.cc + driver/rados/rgw_sync_trace.cc + driver/rados/rgw_tools.cc + driver/rados/rgw_trim_bilog.cc + driver/rados/rgw_trim_datalog.cc + driver/rados/rgw_trim_mdlog.cc + driver/rados/rgw_user.cc + driver/rados/rgw_zone.cc) list(APPEND librgw_common_srcs - store/immutable_config/store.cc - store/json_config/store.cc - store/rados/config/impl.cc - store/rados/config/period.cc - store/rados/config/period_config.cc - store/rados/config/realm.cc - store/rados/config/store.cc - store/rados/config/zone.cc - store/rados/config/zonegroup.cc) + driver/immutable_config/store.cc + driver/json_config/store.cc + driver/rados/config/impl.cc + driver/rados/config/period.cc + driver/rados/config/period_config.cc + driver/rados/config/realm.cc + driver/rados/config/store.cc + driver/rados/config/zone.cc + driver/rados/config/zonegroup.cc) if(WITH_RADOSGW_AMQP_ENDPOINT) list(APPEND librgw_common_srcs rgw_amqp.cc) @@ -201,7 +201,7 @@ if(WITH_RADOSGW_KAFKA_ENDPOINT) list(APPEND librgw_common_srcs rgw_kafka.cc) endif() if(WITH_RADOSGW_DBSTORE) - add_subdirectory(store/dbstore) + add_subdirectory(driver/dbstore) list(APPEND librgw_common_srcs rgw_sal_dbstore.cc) endif() if(WITH_RADOSGW_MOTR) @@ -253,7 +253,7 @@ target_link_libraries(rgw_common spawn) target_include_directories(rgw_common PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/services" - PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/store/rados" + PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados" PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw" PUBLIC "${LUA_INCLUDE_DIR}") if(WITH_RADOSGW_KAFKA_ENDPOINT) @@ -334,7 +334,7 @@ set(rgw_a_srcs rgw_rest_log.cc rgw_rest_metadata.cc rgw_rest_ratelimit.cc - store/rados/rgw_rest_realm.cc + driver/rados/rgw_rest_realm.cc rgw_rest_sts.cc rgw_rest_swift.cc rgw_rest_usage.cc @@ -359,7 +359,7 @@ target_include_directories(rgw_a SYSTEM PUBLIC "../rapidjson/include" PUBLIC "${CMAKE_SOURCE_DIR}/src/dmclock/support/src" PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw" - PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/store/rados" + PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados" PRIVATE "${CMAKE_SOURCE_DIR}/src/libkmip") if(WITH_RADOSGW_AMQP_ENDPOINT) diff --git a/src/rgw/driver/daos/README.md b/src/rgw/driver/daos/README.md new file mode 100644 index 00000000000..de6d215a016 --- /dev/null +++ b/src/rgw/driver/daos/README.md @@ -0,0 +1,47 @@ +# DAOS + +Standalone RADOS Gateway (RGW) on [DAOS](http://daos.io/) (Experimental) + +## CMake Option + +Add below cmake option + +```bash + -DWITH_RADOSGW_DAOS=ON +``` + +## Build + +```bash + cd build + ninja [vstart] +``` + +## Running Test cluster + +Edit ceph.conf to add below option + +```conf + [client] + rgw backend store = daos +``` + +Restart vstart cluster or just RGW server + +```bash + [..] RGW=1 ../src/vstart.sh -d +``` + +The above configuration brings up an RGW server on DAOS. + +## Creating a test user + + To create a `testid` user to be used for s3 operations, use the following command: + + ```bash +local akey='0555b35654ad1656d804' +local skey='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' + radosgw-admin user create --uid testid \ + --access-key $akey --secret $skey \ + --display-name 'M. Tester' --email tester@ceph.com --no-mon-config + ``` diff --git a/src/rgw/driver/dbstore/CMakeLists.txt b/src/rgw/driver/dbstore/CMakeLists.txt new file mode 100644 index 00000000000..0d34d32970b --- /dev/null +++ b/src/rgw/driver/dbstore/CMakeLists.txt @@ -0,0 +1,72 @@ +#need to update cmake version here +cmake_minimum_required(VERSION 3.14.0) +project(dbstore) + +option(USE_SQLITE "Enable SQLITE DB" ON) + +set (CMAKE_INCLUDE_DIR ${CMAKE_INCLUDE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/common") + +set(dbstore_srcs + common/dbstore_log.h + common/dbstore.h + common/dbstore.cc + config/store.cc) +IF(USE_SQLITE) + list(APPEND dbstore_srcs + config/sqlite.cc + sqlite/connection.cc + sqlite/error.cc + sqlite/statement.cc) +endif() + +set(dbstore_mgr_srcs + dbstore_mgr.h + dbstore_mgr.cc + ) + +add_library(dbstore_lib ${dbstore_srcs}) +target_include_directories(dbstore_lib + PUBLIC "${CMAKE_SOURCE_DIR}/src/fmt/include" + PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw" + PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/store/rados" + PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") +set(link_targets spawn) +if(WITH_JAEGER) + list(APPEND link_targets jaeger_base) +endif() +list(APPEND link_targets rgw_common) +target_link_libraries(dbstore_lib PUBLIC ${link_targets}) + +set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} dbstore_lib) + +IF(USE_SQLITE) + add_subdirectory(sqlite) + set(CMAKE_INCLUDE_DIR ${CMAKE_INCLUDE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/sqlite") + add_compile_definitions(SQLITE_ENABLED=1) + set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} rgw_common) + set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} sqlite_db) + add_dependencies(sqlite_db dbstore_lib) +ENDIF() + +# add pthread library +set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} pthread) + +find_package(gtest QUIET) +if(WITH_TESTS) + add_subdirectory(tests) +else() + message(WARNING "Gtest not enabled") +endif() + +include_directories(${CMAKE_INCLUDE_DIR}) +add_library(dbstore STATIC ${dbstore_mgr_srcs}) +target_link_libraries(dbstore ${CMAKE_LINK_LIBRARIES}) + +# testing purpose +set(dbstore_main_srcs + dbstore_main.cc) + +set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} dbstore) +add_executable(dbstore-bin ${dbstore_main_srcs}) +add_dependencies(dbstore-bin dbstore) +target_link_libraries(dbstore-bin ${CMAKE_LINK_LIBRARIES}) diff --git a/src/rgw/driver/dbstore/README.md b/src/rgw/driver/dbstore/README.md new file mode 100644 index 00000000000..0867bc2cca4 --- /dev/null +++ b/src/rgw/driver/dbstore/README.md @@ -0,0 +1,53 @@ +# DBStore +Standalone Rados Gateway (RGW) on DBStore (Experimental) + + +## CMake Option +Add below cmake option (enabled by default) + + -DWITH_RADOSGW_DBSTORE=ON + + +## Build + + cd build + ninja [vstart] + + +## Running Test cluster +Edit ceph.conf to add below option + + [client] + rgw backend store = dbstore + +Start vstart cluster + + [..] RGW=1 ../src/vstart.sh -o rgw_backend_store=dbstore -n -d + +The above vstart command brings up RGW server on dbstore and creates few default users (eg., testid) to be used for s3 operations. + +`radosgw-admin` can be used to create and remove other users. + + +By default, dbstore creates .db file *'/var/lib/ceph/radosgw/dbstore-default_ns.db'* to store the data. This can be configured using below options in ceph.conf + + [client] + dbstore db dir = + dbstore db name prefix = + + +## DBStore Unit Tests +To execute DBStore unit test cases (using Gtest framework), from build directory + + ninja unittest_dbstore_tests + ./bin/unittest_dbstore_tests [logfile] [loglevel] + (default logfile: rgw_dbstore_tests.log, loglevel: 20) + ninja unittest_dbstore_mgr_tests + ./bin/unittest_dbstore_mgr_tests + +To execute Sample test file + + ninja src/rgw/driver/dbstore/install + ./bin/dbstore-bin [logfile] [loglevel] + (default logfile: rgw_dbstore_bin.log, loglevel: 20) + diff --git a/src/rgw/driver/dbstore/common/connection_pool.h b/src/rgw/driver/dbstore/common/connection_pool.h new file mode 100644 index 00000000000..07f3c81c3df --- /dev/null +++ b/src/rgw/driver/dbstore/common/connection_pool.h @@ -0,0 +1,147 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include "common/dout.h" + +namespace rgw::dbstore { + +template +class ConnectionHandle; + +/// A thread-safe base class that manages a fixed-size pool of generic database +/// connections and supports the reclamation of ConnectionHandles. This class +/// is the subset of ConnectionPool which doesn't depend on the Factory type. +template +class ConnectionPoolBase { + public: + ConnectionPoolBase(std::size_t max_connections) + : connections(max_connections) + {} + private: + friend class ConnectionHandle; + + // TODO: the caller may detect a connection error that prevents the connection + // from being reused. allow them to indicate these errors here + void put(std::unique_ptr connection) + { + auto lock = std::scoped_lock{mutex}; + connections.push_back(std::move(connection)); + + if (connections.size() == 1) { // was empty + cond.notify_one(); + } + } + protected: + std::mutex mutex; + std::condition_variable cond; + boost::circular_buffer> connections; +}; + +/// Handle to a database connection borrowed from the pool. Automatically +/// returns the connection to its pool on the handle's destruction. +template +class ConnectionHandle { + ConnectionPoolBase* pool = nullptr; + std::unique_ptr conn; + public: + ConnectionHandle() noexcept = default; + ConnectionHandle(ConnectionPoolBase* pool, + std::unique_ptr conn) noexcept + : pool(pool), conn(std::move(conn)) {} + + ~ConnectionHandle() { + if (conn) { + pool->put(std::move(conn)); + } + } + + ConnectionHandle(ConnectionHandle&&) = default; + ConnectionHandle& operator=(ConnectionHandle&& o) noexcept { + if (conn) { + pool->put(std::move(conn)); + } + conn = std::move(o.conn); + pool = o.pool; + return *this; + } + + explicit operator bool() const noexcept { return static_cast(conn); } + Connection& operator*() const noexcept { return *conn; } + Connection* operator->() const noexcept { return conn.get(); } + Connection* get() const noexcept { return conn.get(); } +}; + + +// factory_of concept requires the function signature: +// F(const DoutPrefixProvider*) -> std::unique_ptr +template +concept factory_of = requires (F factory, const DoutPrefixProvider* dpp) { + { factory(dpp) } -> std::same_as>; + requires std::move_constructible; +}; + + +/// Generic database connection pool that enforces a limit on open connections. +template Factory> +class ConnectionPool : public ConnectionPoolBase { + public: + ConnectionPool(Factory factory, std::size_t max_connections) + : ConnectionPoolBase(max_connections), + factory(std::move(factory)) + {} + + /// Borrow a connection from the pool. If all existing connections are in use, + /// use the connection factory to create another one. If we've reached the + /// limit on open connections, wait on a condition variable for the next one + /// returned to the pool. + auto get(const DoutPrefixProvider* dpp) + -> ConnectionHandle + { + auto lock = std::unique_lock{this->mutex}; + std::unique_ptr conn; + + if (!this->connections.empty()) { + // take an existing connection + conn = std::move(this->connections.front()); + this->connections.pop_front(); + } else if (total < this->connections.capacity()) { + // add another connection to the pool + conn = factory(dpp); + ++total; + } else { + // wait for the next put() + // TODO: support optional_yield + ldpp_dout(dpp, 4) << "ConnectionPool waiting on a connection" << dendl; + this->cond.wait(lock, [&] { return !this->connections.empty(); }); + ldpp_dout(dpp, 4) << "ConnectionPool done waiting" << dendl; + conn = std::move(this->connections.front()); + this->connections.pop_front(); + } + + return {this, std::move(conn)}; + } + private: + Factory factory; + std::size_t total = 0; +}; + +} // namespace rgw::dbstore diff --git a/src/rgw/driver/dbstore/common/dbstore.cc b/src/rgw/driver/dbstore/common/dbstore.cc new file mode 100644 index 00000000000..3936368e6f7 --- /dev/null +++ b/src/rgw/driver/dbstore/common/dbstore.cc @@ -0,0 +1,2245 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "dbstore.h" + +using namespace std; + +namespace rgw { namespace store { + +map DB::objectmap = {}; + +map DB::getObjectMap() { + return DB::objectmap; +} + +int DB::Initialize(string logfile, int loglevel) +{ + int ret = -1; + const DoutPrefixProvider *dpp = get_def_dpp(); + + if (!cct) { + cout << "Failed to Initialize. No ceph Context \n"; + return -1; + } + + if (loglevel > 0) { + cct->_conf->subsys.set_log_level(ceph_subsys_rgw, loglevel); + } + if (!logfile.empty()) { + cct->_log->set_log_file(logfile); + cct->_log->reopen_log_file(); + } + + + db = openDB(dpp); + + if (!db) { + ldpp_dout(dpp, 0) <<"Failed to open database " << dendl; + return ret; + } + + ret = InitializeDBOps(dpp); + + if (ret) { + ldpp_dout(dpp, 0) <<"InitializeDBOps failed " << dendl; + closeDB(dpp); + db = NULL; + return ret; + } + + ldpp_dout(dpp, 0) << "DB successfully initialized - name:" \ + << db_name << "" << dendl; + + return ret; +} + +int DB::createGC(const DoutPrefixProvider *dpp) { + int ret = 0; + /* create gc thread */ + + gc_worker = std::make_unique(dpp, this); + gc_worker->create("db_gc"); + + return ret; +} + +int DB::stopGC() { + if (gc_worker) { + gc_worker->signal_stop(); + gc_worker->join(); + } + return 0; +} + +int DB::Destroy(const DoutPrefixProvider *dpp) +{ + if (!db) + return 0; + + stopGC(); + + closeDB(dpp); + + + ldpp_dout(dpp, 20)<<"DB successfully destroyed - name:" \ + < DB::getDBOp(const DoutPrefixProvider *dpp, std::string_view Op, + const DBOpParams *params) +{ + if (!Op.compare("InsertUser")) + return dbops.InsertUser; + if (!Op.compare("RemoveUser")) + return dbops.RemoveUser; + if (!Op.compare("GetUser")) + return dbops.GetUser; + if (!Op.compare("InsertBucket")) + return dbops.InsertBucket; + if (!Op.compare("UpdateBucket")) + return dbops.UpdateBucket; + if (!Op.compare("RemoveBucket")) + return dbops.RemoveBucket; + if (!Op.compare("GetBucket")) + return dbops.GetBucket; + if (!Op.compare("ListUserBuckets")) + return dbops.ListUserBuckets; + if (!Op.compare("InsertLCEntry")) + return dbops.InsertLCEntry; + if (!Op.compare("RemoveLCEntry")) + return dbops.RemoveLCEntry; + if (!Op.compare("GetLCEntry")) + return dbops.GetLCEntry; + if (!Op.compare("ListLCEntries")) + return dbops.ListLCEntries; + if (!Op.compare("InsertLCHead")) + return dbops.InsertLCHead; + if (!Op.compare("RemoveLCHead")) + return dbops.RemoveLCHead; + if (!Op.compare("GetLCHead")) + return dbops.GetLCHead; + + /* Object Operations */ + map::iterator iter; + class ObjectOp* Ob; + + { + const std::lock_guard lk(mtx); + iter = DB::objectmap.find(params->op.bucket.info.bucket.name); + } + + if (iter == DB::objectmap.end()) { + ldpp_dout(dpp, 30)<<"No objectmap found for bucket: " \ + <op.bucket.info.bucket.name << dendl; + /* not found */ + return nullptr; + } + + Ob = iter->second; + + if (!Op.compare("PutObject")) + return Ob->PutObject; + if (!Op.compare("DeleteObject")) + return Ob->DeleteObject; + if (!Op.compare("GetObject")) + return Ob->GetObject; + if (!Op.compare("UpdateObject")) + return Ob->UpdateObject; + if (!Op.compare("ListBucketObjects")) + return Ob->ListBucketObjects; + if (!Op.compare("ListVersionedObjects")) + return Ob->ListVersionedObjects; + if (!Op.compare("PutObjectData")) + return Ob->PutObjectData; + if (!Op.compare("UpdateObjectData")) + return Ob->UpdateObjectData; + if (!Op.compare("GetObjectData")) + return Ob->GetObjectData; + if (!Op.compare("DeleteObjectData")) + return Ob->DeleteObjectData; + if (!Op.compare("DeleteStaleObjectData")) + return Ob->DeleteStaleObjectData; + + return nullptr; +} + +int DB::objectmapInsert(const DoutPrefixProvider *dpp, string bucket, class ObjectOp* ptr) +{ + map::iterator iter; + class ObjectOp *Ob; + + const std::lock_guard lk(mtx); + iter = DB::objectmap.find(bucket); + + if (iter != DB::objectmap.end()) { + // entry already exists + // return success or replace it or + // return error ? + // + // return success for now & delete the newly allocated ptr + ldpp_dout(dpp, 30)<<"Objectmap entry already exists for bucket("\ + <InitializeObjectOps(getDBname(), dpp); + + DB::objectmap.insert(pair(bucket, Ob)); + + return 0; +} + +int DB::objectmapDelete(const DoutPrefixProvider *dpp, string bucket) +{ + map::iterator iter; + + const std::lock_guard lk(mtx); + iter = DB::objectmap.find(bucket); + + if (iter == DB::objectmap.end()) { + // entry doesn't exist + // return success or return error ? + // return success for now + ldpp_dout(dpp, 20)<<"Objectmap entry for bucket("<cct = cct; + + //reset params here + params->user_table = user_table; + params->bucket_table = bucket_table; + params->quota_table = quota_table; + params->lc_entry_table = lc_entry_table; + params->lc_head_table = lc_head_table; + + ret = 0; +out: + return ret; +} + +int DB::ProcessOp(const DoutPrefixProvider *dpp, std::string_view Op, DBOpParams *params) { + int ret = -1; + shared_ptr db_op; + + db_op = getDBOp(dpp, Op, params); + + if (!db_op) { + ldpp_dout(dpp, 0)<<"No db_op found for Op("<Execute(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0)<<"In Process op Execute failed for fop(" << Op << ")" << dendl; + } else { + ldpp_dout(dpp, 20)<<"Successfully processed fop(" << Op << ")" << dendl; + } + + return ret; +} + +int DB::get_user(const DoutPrefixProvider *dpp, + const std::string& query_str, const std::string& query_str_val, + RGWUserInfo& uinfo, map *pattrs, + RGWObjVersionTracker *pobjv_tracker) { + int ret = 0; + + if (query_str.empty() || query_str_val.empty()) { + ldpp_dout(dpp, 0)<<"In GetUser - Invalid query(" << query_str <<"), query_str_val(" << query_str_val <<")" << dendl; + return -1; + } + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.query_str = query_str; + + // validate query_str with UserTable entries names + if (query_str == "username") { + params.op.user.uinfo.display_name = query_str_val; + } else if (query_str == "email") { + params.op.user.uinfo.user_email = query_str_val; + } else if (query_str == "access_key") { + RGWAccessKey k(query_str_val, ""); + map keys; + keys[query_str_val] = k; + params.op.user.uinfo.access_keys = keys; + } else if (query_str == "user_id") { + params.op.user.uinfo.user_id = uinfo.user_id; + } else { + ldpp_dout(dpp, 0)<<"In GetUser Invalid query string :" <read_version = params.op.user.user_version; + } + +out: + return ret; +} + +int DB::store_user(const DoutPrefixProvider *dpp, + RGWUserInfo& uinfo, bool exclusive, map *pattrs, + RGWObjVersionTracker *pobjv, RGWUserInfo* pold_info) +{ + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + int ret = 0; + + /* Check if the user already exists and return the old info, caller will have a use for it */ + RGWUserInfo orig_info; + RGWObjVersionTracker objv_tracker = {}; + obj_version& obj_ver = objv_tracker.read_version; + + orig_info.user_id = uinfo.user_id; + ret = get_user(dpp, string("user_id"), uinfo.user_id.id, orig_info, nullptr, &objv_tracker); + + if (!ret && obj_ver.ver) { + /* already exists. */ + + if (pold_info) { + *pold_info = orig_info; + } + + if (pobjv && (pobjv->read_version.ver != obj_ver.ver)) { + /* Object version mismatch.. return ECANCELED */ + ret = -ECANCELED; + ldpp_dout(dpp, 0)<<"User Read version mismatch err:(" <read_version = obj_ver; + pobjv->write_version = obj_ver; + } + +out: + return ret; +} + +int DB::remove_user(const DoutPrefixProvider *dpp, + RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv) +{ + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + int ret = 0; + + RGWUserInfo orig_info; + RGWObjVersionTracker objv_tracker = {}; + + orig_info.user_id = uinfo.user_id; + ret = get_user(dpp, string("user_id"), uinfo.user_id.id, orig_info, nullptr, &objv_tracker); + + if (ret) { + return ret; + } + + if (!ret && objv_tracker.read_version.ver) { + /* already exists. */ + + if (pobjv && (pobjv->read_version.ver != objv_tracker.read_version.ver)) { + /* Object version mismatch.. return ECANCELED */ + ret = -ECANCELED; + ldpp_dout(dpp, 0)<<"User Read version mismatch err:(" <& attrs, + RGWBucketInfo& info, + obj_version *pobjv, + obj_version *pep_objv, + real_time creation_time, + rgw_bucket *pmaster_bucket, + uint32_t *pmaster_num_shards, + optional_yield y, + bool exclusive) +{ + /* + * XXX: Simple creation for now. + * + * Referring to RGWRados::create_bucket(), + * Check if bucket already exists, select_bucket_placement, + * is explicit put/remove instance info needed? - should not be ideally + */ + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + int ret = 0; + + /* Check if the bucket already exists and return the old info, caller will have a use for it */ + RGWBucketInfo orig_info; + orig_info.bucket.name = bucket.name; + ret = get_bucket_info(dpp, string("name"), "", orig_info, nullptr, nullptr, nullptr); + + if (!ret && !orig_info.owner.id.empty() && exclusive) { + /* already exists. Return the old info */ + + info = std::move(orig_info); + return ret; + } + + RGWObjVersionTracker& objv_tracker = info.objv_tracker; + + objv_tracker.read_version.clear(); + + if (pobjv) { + objv_tracker.write_version = *pobjv; + } else { + objv_tracker.generate_new_write_ver(cct); + } + params.op.bucket.bucket_version = objv_tracker.write_version; + objv_tracker.read_version = params.op.bucket.bucket_version; + + uint64_t bid = next_bucket_id(); + string s = getDBname() + "." + std::to_string(bid); + bucket.marker = bucket.bucket_id = s; + + info.bucket = bucket; + info.owner = owner.user_id; + info.zonegroup = zonegroup_id; + info.placement_rule = placement_rule; + info.swift_ver_location = swift_ver_location; + info.swift_versioning = (!swift_ver_location.empty()); + + info.requester_pays = false; + if (real_clock::is_zero(creation_time)) { + info.creation_time = ceph::real_clock::now(); + } else { + info.creation_time = creation_time; + } + if (pquota_info) { + info.quota = *pquota_info; + } + + params.op.bucket.info = info; + params.op.bucket.bucket_attrs = attrs; + params.op.bucket.mtime = ceph::real_time(); + params.op.user.uinfo.user_id.id = owner.user_id.id; + + ret = ProcessOp(dpp, "InsertBucket", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"create_bucket failed with err:(" <add(std::move(entry)); + } + + if (query_str == "all") { + // userID/OwnerID may have changed. Update it. + user.id = params.op.bucket.info.owner.id; + } + +out: + return ret; +} + +int DB::update_bucket(const DoutPrefixProvider *dpp, const std::string& query_str, + RGWBucketInfo& info, + bool exclusive, + const rgw_user* powner_id, + map* pattrs, + ceph::real_time* pmtime, + RGWObjVersionTracker* pobjv) +{ + int ret = 0; + DBOpParams params = {}; + obj_version bucket_version; + RGWBucketInfo orig_info; + + /* Check if the bucket already exists and return the old info, caller will have a use for it */ + orig_info.bucket.name = info.bucket.name; + params.op.bucket.info.bucket.name = info.bucket.name; + ret = get_bucket_info(dpp, string("name"), "", orig_info, nullptr, nullptr, + &bucket_version); + + if (ret) { + ldpp_dout(dpp, 0)<<"Failed to read bucket info err:(" <read_version.ver != bucket_version.ver) { + ldpp_dout(dpp, 0)<<"Read version mismatch err:(" <id; + } else { + params.op.user.uinfo.user_id.id = orig_info.owner.id; + } + + /* Update version & mtime */ + params.op.bucket.bucket_version.ver = ++(bucket_version.ver); + + if (pmtime) { + params.op.bucket.mtime = *pmtime;; + } else { + params.op.bucket.mtime = ceph::real_time(); + } + + if (query_str == "attrs") { + params.op.query_str = "attrs"; + params.op.bucket.bucket_attrs = *pattrs; + } else if (query_str == "owner") { + /* Update only owner i.e, chown. + * Update creation_time too */ + params.op.query_str = "owner"; + params.op.bucket.info.creation_time = params.op.bucket.mtime; + } else if (query_str == "info") { + params.op.query_str = "info"; + params.op.bucket.info = info; + } else { + ret = -1; + ldpp_dout(dpp, 0)<<"In UpdateBucket Invalid query_str : " << query_str << dendl; + goto out; + } + + ret = ProcessOp(dpp, "UpdateBucket", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In UpdateBucket failed err:(" <read_version = params.op.bucket.bucket_version; + pobjv->write_version = params.op.bucket.bucket_version; + } + +out: + return ret; +} + +/** + * Get ordered listing of the objects in a bucket. + * + * max_p: maximum number of results to return + * bucket: bucket to list contents of + * prefix: only return results that match this prefix + * delim: do not include results that match this string. + * Any skipped results will have the matching portion of their name + * inserted in common_prefixes with a "true" mark. + * marker: if filled in, begin the listing with this object. + * end_marker: if filled in, end the listing with this object. + * result: the objects are put in here. + * common_prefixes: if delim is filled in, any matching prefixes are + * placed here. + * is_truncated: if number of objects in the bucket is bigger than + * max, then truncated. + */ +int DB::Bucket::List::list_objects(const DoutPrefixProvider *dpp, int64_t max, + vector *result, + map *common_prefixes, bool *is_truncated) +{ + int ret = 0; + DB *store = target->get_store(); + int64_t count = 0; + std::string prev_obj; + + DBOpParams db_params = {}; + store->InitializeParams(dpp, &db_params); + + db_params.op.bucket.info = target->get_bucket_info(); + /* XXX: Handle whole marker? key -> name, instance, ns? */ + db_params.op.obj.min_marker = params.marker.name; + db_params.op.obj.max_marker = params.end_marker.name; + db_params.op.obj.prefix = params.prefix + "%"; + db_params.op.list_max_count = max + 1; /* +1 for next_marker */ + + ret = store->ProcessOp(dpp, "ListBucketObjects", &db_params); + + if (ret) { + ldpp_dout(dpp, 0)<<"In ListBucketObjects failed err:(" <= max) { + *is_truncated = true; + next_marker.name = entry.key.name; + next_marker.instance = entry.key.instance; + break; + } + + if (!params.delim.empty()) { + const std::string& objname = entry.key.name; + const int delim_pos = objname.find(params.delim, params.prefix.size()); + if (delim_pos >= 0) { + /* extract key -with trailing delimiter- for CommonPrefix */ + const std::string& prefix_key = + objname.substr(0, delim_pos + params.delim.length()); + + if (common_prefixes && + common_prefixes->find(prefix_key) == common_prefixes->end()) { + next_marker = prefix_key; + (*common_prefixes)[prefix_key] = true; + count++; + } + continue; + } + } + + if (!params.end_marker.name.empty() && + params.end_marker.name.compare(entry.key.name) <= 0) { + // should not include end_marker + *is_truncated = false; + break; + } + count++; + result->push_back(std::move(entry)); + } +out: + return ret; +} + +int DB::raw_obj::InitializeParamsfromRawObj(const DoutPrefixProvider *dpp, + DBOpParams* params) { + int ret = 0; + + if (!params) + return -1; + + params->op.bucket.info.bucket.name = bucket_name; + params->op.obj.state.obj.key.name = obj_name; + params->op.obj.state.obj.key.instance = obj_instance; + params->op.obj.state.obj.key.ns = obj_ns; + params->op.obj.obj_id = obj_id; + + if (multipart_part_str != "0.0") { + params->op.obj.is_multipart = true; + } else { + params->op.obj.is_multipart = false; + } + + params->op.obj_data.multipart_part_str = multipart_part_str; + params->op.obj_data.part_num = part_num; + + return ret; +} + +int DB::Object::InitializeParamsfromObject(const DoutPrefixProvider *dpp, + DBOpParams* params) { + int ret = 0; + string bucket = bucket_info.bucket.name; + + if (!params) + return -1; + + params->op.bucket.info.bucket.name = bucket; + params->op.obj.state.obj = obj; + params->op.obj.obj_id = obj_id; + + return ret; +} + +int DB::Object::get_object_impl(const DoutPrefixProvider *dpp, DBOpParams& params) { + int ret = 0; + + if (params.op.obj.state.obj.key.name.empty()) { + /* Initialize */ + store->InitializeParams(dpp, ¶ms); + InitializeParamsfromObject(dpp, ¶ms); + } + + ret = store->ProcessOp(dpp, "GetObject", ¶ms); + + /* pick one field check if object exists */ + if (!ret && !params.op.obj.state.exists) { + ldpp_dout(dpp, 0)<<"Object(bucket:" << bucket_info.bucket.name << ", Object:"<< obj.key.name << ") doesn't exist" << dendl; + ret = -ENOENT; + } + + return ret; +} + +int DB::Object::obj_omap_set_val_by_key(const DoutPrefixProvider *dpp, + const std::string& key, bufferlist& val, + bool must_exist) { + int ret = 0; + + DBOpParams params = {}; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <ProcessOp(dpp, "UpdateObject", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <& keys, + std::map* vals) +{ + int ret = 0; + DBOpParams params = {}; + std::map omap; + + if (!vals) + return -1; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <ProcessOp(dpp, "UpdateObject", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <& info) +{ + int ret = 0; + DBOpParams params = {}; + std::map omap; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <set_instance(buf); +} + +int DB::Object::obj_omap_get_all(const DoutPrefixProvider *dpp, + std::map *m) +{ + int ret = 0; + DBOpParams params = {}; + std::map omap; + + if (!m) + return -1; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" < *m, bool* pmore) +{ + int ret = 0; + DBOpParams params = {}; + std::map omap; + map::iterator iter; + uint64_t count = 0; + + if (!m) + return -1; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <first < marker) + continue; + + if ((++count) > max_count) { + *pmore = true; + break; + } + + (*m)[iter->first] = iter->second; + } + +out: + return ret; +} + +int DB::Object::set_attrs(const DoutPrefixProvider *dpp, + map& setattrs, + map* rmattrs) +{ + int ret = 0; + + DBOpParams params = {}; + rgw::sal::Attrs *attrs; + map::iterator iter; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <begin(); iter != rmattrs->end(); ++iter) { + (*attrs).erase(iter->first); + } + } + for (iter = setattrs.begin(); iter != setattrs.end(); ++iter) { + (*attrs)[iter->first] = iter->second; + } + + params.op.query_str = "attrs"; + params.op.obj.state.mtime = real_clock::now(); + + ret = store->ProcessOp(dpp, "UpdateObject", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" < *attrset; + + store->InitializeParams(dpp, ¶ms); + InitializeParamsfromObject(dpp, ¶ms); + + ret = store->ProcessOp(dpp, "GetObject", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0) <<"In GetObject failed err:(" <ProcessOp(dpp, "UpdateObject", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <InitializeParams(dpp, ¶ms); + InitializeParamsfromRawObj(dpp, ¶ms); + + ret = db->ProcessOp(dpp, "GetObjectData", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In GetObjectData failed err:(" <InitializeParams(dpp, ¶ms); + InitializeParamsfromRawObj(dpp, ¶ms); + + /* XXX: Check for chunk_size ?? */ + params.op.obj_data.offset = ofs; + unsigned write_len = std::min((uint64_t)bl.length() - write_ofs, len); + bl.begin(write_ofs).copy(write_len, params.op.obj_data.data); + params.op.obj_data.size = params.op.obj_data.data.length(); + params.op.obj.state.mtime = real_clock::now(); + + ret = db->ProcessOp(dpp, "PutObjectData", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In PutObjectData failed err:(" <& list_entries) { + int ret = 0; + store = get_store(); + DBOpParams db_params = {}; + + store->InitializeParams(dpp, &db_params); + InitializeParamsfromObject(dpp, &db_params); + + db_params.op.list_max_count = MAX_VERSIONED_OBJECTS; + + ret = store->ProcessOp(dpp, "ListVersionedObjects", &db_params); + + if (ret) { + ldpp_dout(dpp, 0)<<"In ListVersionedObjects failed err:(" <InitializeParams(dpp, ¶ms); + InitializeParamsfromObject(dpp, ¶ms); + params.op.obj.state.obj.key = ent.key; + + ret = get_object_impl(dpp, params); + + if (ret) { + ldpp_dout(dpp, 0) <<"get_object_impl of versioned object failed err:(" <shadow_obj to store ObjectID string */ + s->shadow_obj = params.op.obj.obj_id; + + *state = &obj_state; + **state = *s; + +out: + return ret; + +} + +int DB::Object::get_state(const DoutPrefixProvider *dpp, RGWObjState** pstate, bool follow_olh) +{ + return get_obj_state(dpp, bucket_info, obj, follow_olh, pstate); +} + +int DB::Object::Read::get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest) +{ + RGWObjState* state; + int r = source->get_state(dpp, &state, true); + if (r < 0) + return r; + if (!state->exists) + return -ENOENT; + if (!state->get_attr(name, dest)) + return -ENODATA; + + return 0; +} + +int DB::Object::Read::prepare(const DoutPrefixProvider *dpp) +{ + DB *store = source->get_store(); + CephContext *cct = store->ctx(); + + bufferlist etag; + + map::iterator iter; + + RGWObjState* astate; + + int r = source->get_state(dpp, &astate, true); + if (r < 0) + return r; + + if (!astate->exists) { + return -ENOENT; + } + + state.obj = astate->obj; + source->obj_id = astate->shadow_obj; + + if (params.target_obj) { + *params.target_obj = state.obj; + } + if (params.attrs) { + *params.attrs = astate->attrset; + if (cct->_conf->subsys.should_gather()) { + for (iter = params.attrs->begin(); iter != params.attrs->end(); ++iter) { + ldpp_dout(dpp, 20) << "Read xattr rgw_rados: " << iter->first << dendl; + } + } + } + + if (conds.if_match || conds.if_nomatch) { + r = get_attr(dpp, RGW_ATTR_ETAG, etag); + if (r < 0) + return r; + + if (conds.if_match) { + string if_match_str = rgw_string_unquote(conds.if_match); + ldpp_dout(dpp, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-Match: " << if_match_str << dendl; + if (if_match_str.compare(0, etag.length(), etag.c_str(), etag.length()) != 0) { + return -ERR_PRECONDITION_FAILED; + } + } + + if (conds.if_nomatch) { + string if_nomatch_str = rgw_string_unquote(conds.if_nomatch); + ldpp_dout(dpp, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-NoMatch: " << if_nomatch_str << dendl; + if (if_nomatch_str.compare(0, etag.length(), etag.c_str(), etag.length()) == 0) { + return -ERR_NOT_MODIFIED; + } + } + } + + if (params.obj_size) + *params.obj_size = astate->size; + if (params.lastmod) + *params.lastmod = astate->mtime; + + return 0; +} + +int DB::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end) +{ + if (ofs < 0) { + ofs += obj_size; + if (ofs < 0) + ofs = 0; + end = obj_size - 1; + } else if (end < 0) { + end = obj_size - 1; + } + + if (obj_size > 0) { + if (ofs >= (off_t)obj_size) { + return -ERANGE; + } + if (end >= (off_t)obj_size) { + end = obj_size - 1; + } + } + return 0; +} + +int DB::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider *dpp) +{ + DB *store = source->get_store(); + + uint64_t read_ofs = ofs; + uint64_t len, read_len; + + bufferlist read_bl; + uint64_t max_chunk_size = store->get_max_chunk_size(); + + RGWObjState* astate; + int r = source->get_state(dpp, &astate, true); + if (r < 0) + return r; + + if (!astate->exists) { + return -ENOENT; + } + + if (astate->size == 0) { + end = 0; + } else if (end >= (int64_t)astate->size) { + end = astate->size - 1; + } + + if (end < 0) + len = 0; + else + len = end - ofs + 1; + + + if (len > max_chunk_size) { + len = max_chunk_size; + } + + int head_data_size = astate->data.length(); + bool reading_from_head = (ofs < head_data_size); + + if (reading_from_head) { + if (astate) { // && astate->prefetch_data)? + if (!ofs && astate->data.length() >= len) { + bl = astate->data; + return bl.length(); + } + + if (ofs < astate->data.length()) { + unsigned copy_len = std::min((uint64_t)head_data_size - ofs, len); + astate->data.begin(ofs).copy(copy_len, bl); + return bl.length(); + } + } + } + + /* tail object */ + int part_num = (ofs / max_chunk_size); + /* XXX: Handle multipart_str */ + raw_obj read_obj(store, source->get_bucket_info().bucket.name, astate->obj.key.name, + astate->obj.key.instance, astate->obj.key.ns, source->obj_id, "0.0", part_num); + + read_len = len; + + ldpp_dout(dpp, 20) << "dbstore->read obj-ofs=" << ofs << " read_ofs=" << read_ofs << " read_len=" << read_len << dendl; + + // read from non head object + r = read_obj.read(dpp, read_ofs, read_len, bl); + + if (r < 0) { + return r; + } + + return bl.length(); +} + +static int _get_obj_iterate_cb(const DoutPrefixProvider *dpp, + const DB::raw_obj& read_obj, off_t obj_ofs, + off_t len, bool is_head_obj, + RGWObjState* astate, void *arg) +{ + struct db_get_obj_data* d = static_cast(arg); + return d->store->get_obj_iterate_cb(dpp, read_obj, obj_ofs, len, + is_head_obj, astate, arg); +} + +int DB::get_obj_iterate_cb(const DoutPrefixProvider *dpp, + const raw_obj& read_obj, off_t obj_ofs, + off_t len, bool is_head_obj, + RGWObjState* astate, void *arg) +{ + struct db_get_obj_data* d = static_cast(arg); + bufferlist bl; + int r = 0; + + if (is_head_obj) { + bl = astate->data; + } else { + // read from non head object + raw_obj robj = read_obj; + /* read entire data. So pass offset as '0' & len as '-1' */ + r = robj.read(dpp, 0, -1, bl); + + if (r <= 0) { + return r; + } + } + + unsigned read_ofs = 0, read_len = 0; + while (read_ofs < bl.length()) { + unsigned chunk_len = std::min((uint64_t)bl.length() - read_ofs, (uint64_t)len); + r = d->client_cb->handle_data(bl, read_ofs, chunk_len); + if (r < 0) + return r; + read_ofs += chunk_len; + read_len += chunk_len; + ldpp_dout(dpp, 20) << "dbstore->get_obj_iterate_cb obj-ofs=" << obj_ofs << " len=" << len << " chunk_len = " << chunk_len << " read_len = " << read_len << dendl; + } + + + d->offset += read_len; + + return read_len; +} + +int DB::Object::Read::iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb) +{ + DB *store = source->get_store(); + const uint64_t chunk_size = store->get_max_chunk_size(); + + db_get_obj_data data(store, cb, ofs); + + int r = source->iterate_obj(dpp, source->get_bucket_info(), state.obj, + ofs, end, chunk_size, _get_obj_iterate_cb, &data); + if (r < 0) { + ldpp_dout(dpp, 0) << "iterate_obj() failed with " << r << dendl; + return r; + } + + return 0; +} + +int DB::Object::iterate_obj(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, const rgw_obj& obj, + off_t ofs, off_t end, uint64_t max_chunk_size, + iterate_obj_cb cb, void *arg) +{ + DB *store = get_store(); + uint64_t len; + RGWObjState* astate; + + int r = get_state(dpp, &astate, true); + if (r < 0) { + return r; + } + + if (!astate->exists) { + return -ENOENT; + } + + if (end < 0) + len = 0; + else + len = end - ofs + 1; + + /* XXX: Will it really help to store all parts info in astate like manifest in Rados? */ + int part_num = 0; + int head_data_size = astate->data.length(); + + while (ofs <= end && (uint64_t)ofs < astate->size) { + part_num = (ofs / max_chunk_size); + uint64_t read_len = std::min(len, max_chunk_size); + + /* XXX: Handle multipart_str */ + raw_obj read_obj(store, get_bucket_info().bucket.name, astate->obj.key.name, + astate->obj.key.instance, astate->obj.key.ns, obj_id, "0.0", part_num); + bool reading_from_head = (ofs < head_data_size); + + r = cb(dpp, read_obj, ofs, read_len, reading_from_head, astate, arg); + if (r <= 0) { + return r; + } + /* r refers to chunk_len (no. of bytes) handled in cb */ + len -= r; + ofs += r; + } + + return 0; +} + +int DB::Object::Write::prepare(const DoutPrefixProvider* dpp) +{ + DB *store = target->get_store(); + + int ret = -1; + + /* XXX: handle assume_noent */ + + obj_state.obj = target->obj; + + if (target->obj_id.empty()) { + if (!target->obj.key.instance.empty() && (target->obj.key.instance != "null")) { + /* versioned object. Set obj_id same as versionID/instance */ + target->obj_id = target->obj.key.instance; + } else { + // generate obj_id + char buf[33]; + gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); + target->obj_id = buf; + } + } + + ret = 0; + return ret; +} + +/* writes tail objects */ +int DB::Object::Write::write_data(const DoutPrefixProvider* dpp, + bufferlist& data, uint64_t ofs) { + DB *store = target->get_store(); + /* tail objects */ + /* XXX: Split into parts each of max_chunk_size. But later make tail + * object chunk size limit to sqlite blob limit */ + int part_num = 0; + + uint64_t max_chunk_size = store->get_max_chunk_size(); + + /* tail_obj ofs should be greater than max_head_size */ + if (mp_part_str == "0.0") { // ensure not multipart meta object + if (ofs < store->get_max_head_size()) { + return -1; + } + } + + uint64_t end = data.length(); + uint64_t write_ofs = 0; + /* as we are writing max_chunk_size at a time in sal_dbstore DBAtomicWriter::process(), + * maybe this while loop is not needed + */ + while (write_ofs < end) { + part_num = (ofs / max_chunk_size); + uint64_t len = std::min(end, max_chunk_size); + + /* XXX: Handle multipart_str */ + raw_obj write_obj(store, target->get_bucket_info().bucket.name, obj_state.obj.key.name, + obj_state.obj.key.instance, obj_state.obj.key.ns, target->obj_id, mp_part_str, part_num); + + + ldpp_dout(dpp, 20) << "dbstore->write obj-ofs=" << ofs << " write_len=" << len << dendl; + + // write into non head object + int r = write_obj.write(dpp, ofs, write_ofs, len, data); + if (r < 0) { + return r; + } + /* r refers to chunk_len (no. of bytes) handled in raw_obj::write */ + len -= r; + ofs += r; + write_ofs += r; + } + + return 0; +} + +/* Write metadata & head object data */ +int DB::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, + uint64_t size, uint64_t accounted_size, + map& attrs, + bool assume_noent, bool modify_tail) +{ + DB *store = target->get_store(); + + RGWObjState* state = &obj_state; + map *attrset; + DBOpParams params = {}; + int ret = 0; + string etag; + string content_type; + bufferlist acl_bl; + string storage_class; + + map::iterator iter; + + store->InitializeParams(dpp, ¶ms); + target->InitializeParamsfromObject(dpp, ¶ms); + + obj_state = params.op.obj.state; + + if (real_clock::is_zero(meta.set_mtime)) { + meta.set_mtime = real_clock::now(); + } + + attrset = &state->attrset; + if (target->bucket_info.obj_lock_enabled() && target->bucket_info.obj_lock.has_rule()) { + // && meta.flags == PUT_OBJ_CREATE) { + auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); + if (iter == attrs.end()) { + real_time lock_until_date = target->bucket_info.obj_lock.get_lock_until_date(meta.set_mtime); + string mode = target->bucket_info.obj_lock.get_mode(); + RGWObjectRetention obj_retention(mode, lock_until_date); + bufferlist bl; + obj_retention.encode(bl); + (*attrset)[RGW_ATTR_OBJECT_RETENTION] = bl; + } + } + + state->mtime = meta.set_mtime; + + if (meta.data) { + /* if we want to overwrite the data, we also want to overwrite the + xattrs, so just remove the object */ + params.op.obj.head_data = *meta.data; + } + + if (meta.rmattrs) { + for (iter = meta.rmattrs->begin(); iter != meta.rmattrs->end(); ++iter) { + const string& name = iter->first; + (*attrset).erase(name.c_str()); + } + } + + if (meta.manifest) { + storage_class = meta.manifest->get_tail_placement().placement_rule.storage_class; + + /* remove existing manifest attr */ + iter = attrs.find(RGW_ATTR_MANIFEST); + if (iter != attrs.end()) + attrs.erase(iter); + + bufferlist bl; + encode(*meta.manifest, bl); + (*attrset)[RGW_ATTR_MANIFEST] = bl; + } + + for (iter = attrs.begin(); iter != attrs.end(); ++iter) { + const string& name = iter->first; + bufferlist& bl = iter->second; + + if (!bl.length()) + continue; + + (*attrset)[name.c_str()] = bl; + + if (name.compare(RGW_ATTR_ETAG) == 0) { + etag = rgw_bl_str(bl); + params.op.obj.etag = etag; + } else if (name.compare(RGW_ATTR_CONTENT_TYPE) == 0) { + content_type = rgw_bl_str(bl); + } else if (name.compare(RGW_ATTR_ACL) == 0) { + acl_bl = bl; + } + } + + if (!storage_class.empty()) { + bufferlist bl; + bl.append(storage_class); + (*attrset)[RGW_ATTR_STORAGE_CLASS] = bl; + } + + params.op.obj.state = *state ; + params.op.obj.state.exists = true; + params.op.obj.state.size = size; + params.op.obj.state.accounted_size = accounted_size; + params.op.obj.owner = target->get_bucket_info().owner.id; + params.op.obj.category = meta.category; + + if (meta.mtime) { + *meta.mtime = meta.set_mtime; + } + + params.op.query_str = "meta"; + params.op.obj.obj_id = target->obj_id; + + /* Check if versioned */ + bool is_versioned = !target->obj.key.instance.empty() && (target->obj.key.instance != "null"); + params.op.obj.is_versioned = is_versioned; + + if (is_versioned && (params.op.obj.category == RGWObjCategory::Main)) { + /* versioned object */ + params.op.obj.flags |= rgw_bucket_dir_entry::FLAG_VER; + } + ret = store->ProcessOp(dpp, "PutObject", ¶ms); + if (ret) { + ldpp_dout(dpp, 0)<<"In PutObject failed err:(" <list_versioned_objects(dpp, del_params.op.obj.list_entries); + if (ret) { + ldpp_dout(dpp, 0)<<"ListVersionedObjects failed err:(" <get_store(); + + ret = store->ProcessOp(dpp, "DeleteObject", &del_params); + if (ret) { + ldpp_dout(dpp, 0) << "In DeleteObject failed err:(" <ProcessOp(dpp, "UpdateObjectData", &update_params); + + if (ret) { + ldpp_dout(dpp, 0) << "Updating tail objects mtime failed err:(" <get_store(); + bool versioning_suspended = ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == BUCKET_VERSIONS_SUSPENDED); + int ret = -1; + DBOpParams olh_params = {}; + std::string version_id; + DBOpParams next_params = del_params; + + version_id = del_params.op.obj.state.obj.key.instance; + + DBOpParams dm_params = del_params; + + // create delete marker + + store->InitializeParams(dpp, &dm_params); + target->InitializeParamsfromObject(dpp, &dm_params); + dm_params.op.obj.category = RGWObjCategory::None; + + if (versioning_suspended) { + dm_params.op.obj.state.obj.key.instance = "null"; + } else { + store->gen_rand_obj_instance_name(&dm_params.op.obj.state.obj.key); + dm_params.op.obj.obj_id = dm_params.op.obj.state.obj.key.instance; + } + + dm_params.op.obj.flags |= (rgw_bucket_dir_entry::FLAG_DELETE_MARKER); + + ret = store->ProcessOp(dpp, "PutObject", &dm_params); + + if (ret) { + ldpp_dout(dpp, 0) << "delete_olh: failed to create delete marker - err:(" <* entry) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_entry.index = oid; + params.op.lc_entry.entry.set_bucket(marker); + + params.op.query_str = "get_entry"; + ret = ProcessOp(dpp, "GetLCEntry", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In GetLCEntry failed err:(" <reset(e); + } + +out: + return ret; +} + +int DB::get_next_entry(const std::string& oid, const std::string& marker, + std::unique_ptr* entry) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_entry.index = oid; + params.op.lc_entry.entry.set_bucket(marker); + + params.op.query_str = "get_next_entry"; + ret = ProcessOp(dpp, "GetLCEntry", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In GetLCEntry failed err:(" <reset(e); + } + +out: + return ret; +} + +int DB::set_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_entry.index = oid; + params.op.lc_entry.entry = entry; + + ret = ProcessOp(dpp, "InsertLCEntry", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In InsertLCEntry failed err:(" <>& entries) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + entries.clear(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_entry.index = oid; + params.op.lc_entry.min_marker = marker; + params.op.list_max_count = max_entries; + + ret = ProcessOp(dpp, "ListLCEntries", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In ListLCEntries failed err:(" <(std::move(entry))); + } + +out: + return ret; +} + +int DB::rm_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_entry.index = oid; + params.op.lc_entry.entry = entry; + + ret = ProcessOp(dpp, "RemoveLCEntry", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In RemoveLCEntry failed err:(" <* head) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_head.index = oid; + + ret = ProcessOp(dpp, "GetLCHead", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In GetLCHead failed err:(" <(params.op.lc_head.head); + +out: + return ret; +} + +int DB::put_head(const std::string& oid, rgw::sal::Lifecycle::LCHead& head) +{ + int ret = 0; + const DoutPrefixProvider *dpp = get_def_dpp(); + + DBOpParams params = {}; + InitializeParams(dpp, ¶ms); + + params.op.lc_head.index = oid; + params.op.lc_head.head = head; + + ret = ProcessOp(dpp, "InsertLCHead", ¶ms); + + if (ret) { + ldpp_dout(dpp, 0)<<"In InsertLCHead failed err:(" < lk(mtx); + + ldpp_dout(dpp, 2) << " DB GC started " << dendl; + int max = 100; + RGWUserBuckets buckets; + bool is_truncated = false; + + do { + std::string& marker = bucket_marker; + rgw_user user; + user.id = user_marker; + buckets.clear(); + is_truncated = false; + + int r = db->list_buckets(dpp, "all", user, marker, string(), + max, false, &buckets, &is_truncated); + + if (r < 0) { //do nothing? retry later ? + break; + } + + for (const auto& ent : buckets.get_buckets()) { + const std::string &bname = ent.first; + + r = db->delete_stale_objs(dpp, bname, gc_obj_min_wait); + + if (r < 0) { //do nothing? skip to next entry? + ldpp_dout(dpp, 2) << " delete_stale_objs failed for bucket( " << bname <<")" << dendl; + } + bucket_marker = bname; + user_marker = user.id; + + /* XXX: If using locks, unlock here and reacquire in the next iteration */ + cv.wait_for(lk, std::chrono::milliseconds(100)); + if (stop_signalled) { + goto done; + } + } + } while(is_truncated); + + bucket_marker.clear(); + cv.wait_for(lk, std::chrono::milliseconds(gc_interval*10)); + } while(! stop_signalled); + +done: + return nullptr; +} + +} } // namespace rgw::store + diff --git a/src/rgw/driver/dbstore/common/dbstore.h b/src/rgw/driver/dbstore/common/dbstore.h new file mode 100644 index 00000000000..12ab3f0600d --- /dev/null +++ b/src/rgw/driver/dbstore/common/dbstore.h @@ -0,0 +1,2024 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef DB_STORE_H +#define DB_STORE_H + +#include +#include +#include +#include +#include +#include +#include +// this seems safe to use, at least for now--arguably, we should +// prefer header-only fmt, in general +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include "fmt/format.h" +#include +#include "rgw_sal_store.h" +#include "rgw_common.h" +#include "rgw_bucket.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/ceph_context.h" +#include "rgw_obj_manifest.h" +#include "rgw_multi.h" + +namespace rgw { namespace store { + +class DB; + +struct DBOpUserInfo { + RGWUserInfo uinfo = {}; + obj_version user_version; + rgw::sal::Attrs user_attrs; +}; + +struct DBOpBucketInfo { + RGWBucketEnt ent; // maybe not needed. not used in create/get_bucket + RGWBucketInfo info; + RGWUser* owner = nullptr; + rgw::sal::Attrs bucket_attrs; + obj_version bucket_version; + ceph::real_time mtime; + // used for list query + std::string min_marker; + std::string max_marker; + std::list list_entries; +}; + +struct DBOpObjectInfo { + RGWAccessControlPolicy acls; + RGWObjState state = {}; + + /* Below are taken from rgw_bucket_dir_entry */ + RGWObjCategory category; + std::string etag; + std::string owner; + std::string owner_display_name; + std::string content_type; + std::string storage_class; + bool appendable; + uint64_t index_ver; + std::string tag; + uint16_t flags; + uint64_t versioned_epoch; + + /* from state.manifest (RGWObjManifest) */ + std::map objs; + uint64_t head_size{0}; + rgw_placement_rule head_placement_rule; + uint64_t max_head_size{0}; + std::string obj_id; + rgw_bucket_placement tail_placement; /* might be different than the original bucket, + as object might have been copied across pools */ + std::map rules; + std::string tail_instance; /* tail object's instance */ + + + /* Obj's omap store */ + std::map omap; + + /* Extra fields */ + bool is_multipart; + std::list mp_parts; + + bufferlist head_data; + std::string min_marker; + std::string max_marker; + std::string prefix; + std::list list_entries; + /* XXX: Maybe use std::vector instead of std::list */ + + /* for versioned objects */ + bool is_versioned; + uint64_t version_num = 0; +}; + +struct DBOpObjectDataInfo { + RGWObjState state; + uint64_t part_num; + std::string multipart_part_str; + uint64_t offset; + uint64_t size; + bufferlist data{}; +}; + +struct DBOpLCHeadInfo { + std::string index; + rgw::sal::StoreLifecycle::StoreLCHead head; +}; + +struct DBOpLCEntryInfo { + std::string index; + rgw::sal::StoreLifecycle::StoreLCEntry entry; + // used for list query + std::string min_marker; + std::list list_entries; +}; + +struct DBOpInfo { + std::string name; // Op name + /* Support only single access_key for now. So store + * it separately as primary access_key_id & secret to + * be able to query easily. + * + * XXX: Swift keys and subuser not supported for now */ + DBOpUserInfo user; + std::string query_str; + DBOpBucketInfo bucket; + DBOpObjectInfo obj; + DBOpObjectDataInfo obj_data; + DBOpLCHeadInfo lc_head; + DBOpLCEntryInfo lc_entry; + uint64_t list_max_count; +}; + +struct DBOpParams { + CephContext *cct; + + /* Tables */ + std::string user_table; + std::string bucket_table; + std::string object_table; + + /* Ops*/ + DBOpInfo op; + + std::string objectdata_table; + std::string object_trigger; + std::string object_view; + std::string quota_table; + std::string lc_head_table; + std::string lc_entry_table; + std::string obj; +}; + +/* Used for prepared schemas. + * Difference with above structure is that all + * the fields are strings here to accommodate any + * style identifiers used by backend db. By default + * initialized with sqlitedb style, can be overriden + * using InitPrepareParams() + * + * These identifiers are used in prepare and bind statements + * to get the right index of each param. + */ +struct DBOpUserPrepareInfo { + static constexpr const char* user_id = ":user_id"; + static constexpr const char* tenant = ":tenant"; + static constexpr const char* ns = ":ns"; + static constexpr const char* display_name = ":display_name"; + static constexpr const char* user_email = ":user_email"; + /* Support only single access_key for now. So store + * it separately as primary access_key_id & secret to + * be able to query easily. + * + * In future, when need to support & query from multiple + * access keys, better to maintain them in a separate table. + */ + static constexpr const char* access_keys_id = ":access_keys_id"; + static constexpr const char* access_keys_secret = ":access_keys_secret"; + static constexpr const char* access_keys = ":access_keys"; + static constexpr const char* swift_keys = ":swift_keys"; + static constexpr const char* subusers = ":subusers"; + static constexpr const char* suspended = ":suspended"; + static constexpr const char* max_buckets = ":max_buckets"; + static constexpr const char* op_mask = ":op_mask"; + static constexpr const char* user_caps = ":user_caps"; + static constexpr const char* admin = ":admin"; + static constexpr const char* system = ":system"; + static constexpr const char* placement_name = ":placement_name"; + static constexpr const char* placement_storage_class = ":placement_storage_class"; + static constexpr const char* placement_tags = ":placement_tags"; + static constexpr const char* bucket_quota = ":bucket_quota"; + static constexpr const char* temp_url_keys = ":temp_url_keys"; + static constexpr const char* user_quota = ":user_quota"; + static constexpr const char* type = ":type"; + static constexpr const char* mfa_ids = ":mfa_ids"; + static constexpr const char* assumed_role_arn = ":assumed_role_arn"; + static constexpr const char* user_attrs = ":user_attrs"; + static constexpr const char* user_ver = ":user_vers"; + static constexpr const char* user_ver_tag = ":user_ver_tag"; +}; + +struct DBOpBucketPrepareInfo { + static constexpr const char* bucket_name = ":bucket_name"; + static constexpr const char* tenant = ":tenant"; + static constexpr const char* marker = ":marker"; + static constexpr const char* bucket_id = ":bucket_id"; + static constexpr const char* size = ":size"; + static constexpr const char* size_rounded = ":size_rounded"; + static constexpr const char* creation_time = ":creation_time"; + static constexpr const char* count = ":count"; + static constexpr const char* placement_name = ":placement_name"; + static constexpr const char* placement_storage_class = ":placement_storage_class"; + /* ownerid - maps to DBOpUserPrepareInfo */ + static constexpr const char* flags = ":flags"; + static constexpr const char* zonegroup = ":zonegroup"; + static constexpr const char* has_instance_obj = ":has_instance_obj"; + static constexpr const char* quota = ":quota"; + static constexpr const char* requester_pays = ":requester_pays"; + static constexpr const char* has_website = ":has_website"; + static constexpr const char* website_conf = ":website_conf"; + static constexpr const char* swift_versioning = ":swift_versioning"; + static constexpr const char* swift_ver_location = ":swift_ver_location"; + static constexpr const char* mdsearch_config = ":mdsearch_config"; + static constexpr const char* new_bucket_instance_id = ":new_bucket_instance_id"; + static constexpr const char* obj_lock = ":obj_lock"; + static constexpr const char* sync_policy_info_groups = ":sync_policy_info_groups"; + static constexpr const char* bucket_attrs = ":bucket_attrs"; + static constexpr const char* bucket_ver = ":bucket_vers"; + static constexpr const char* bucket_ver_tag = ":bucket_ver_tag"; + static constexpr const char* mtime = ":mtime"; + static constexpr const char* min_marker = ":min_marker"; + static constexpr const char* max_marker = ":max_marker"; +}; + +struct DBOpObjectPrepareInfo { + static constexpr const char* obj_name = ":obj_name"; + static constexpr const char* obj_instance = ":obj_instance"; + static constexpr const char* obj_ns = ":obj_ns"; + static constexpr const char* acls = ":acls"; + static constexpr const char* index_ver = ":index_ver"; + static constexpr const char* tag = ":tag"; + static constexpr const char* flags = ":flags"; + static constexpr const char* versioned_epoch = ":versioned_epoch"; + static constexpr const char* obj_category = ":obj_category"; + static constexpr const char* etag = ":etag"; + static constexpr const char* owner = ":owner"; + static constexpr const char* owner_display_name = ":owner_display_name"; + static constexpr const char* storage_class = ":storage_class"; + static constexpr const char* appendable = ":appendable"; + static constexpr const char* content_type = ":content_type"; + static constexpr const char* index_hash_source = ":index_hash_source"; + static constexpr const char* obj_size = ":obj_size"; + static constexpr const char* accounted_size = ":accounted_size"; + static constexpr const char* mtime = ":mtime"; + static constexpr const char* epoch = ":epoch"; + static constexpr const char* obj_tag = ":obj_tag"; + static constexpr const char* tail_tag = ":tail_tag"; + static constexpr const char* write_tag = ":write_tag"; + static constexpr const char* fake_tag = ":fake_tag"; + static constexpr const char* shadow_obj = ":shadow_obj"; + static constexpr const char* has_data = ":has_data"; + static constexpr const char* is_versioned = ":is_versioned"; + static constexpr const char* version_num = ":version_num"; + static constexpr const char* pg_ver = ":pg_ver"; + static constexpr const char* zone_short_id = ":zone_short_id"; + static constexpr const char* obj_version = ":obj_version"; + static constexpr const char* obj_version_tag = ":obj_version_tag"; + static constexpr const char* obj_attrs = ":obj_attrs"; + static constexpr const char* head_size = ":head_size"; + static constexpr const char* max_head_size = ":max_head_size"; + static constexpr const char* obj_id = ":obj_id"; + static constexpr const char* tail_instance = ":tail_instance"; + static constexpr const char* head_placement_rule_name = ":head_placement_rule_name"; + static constexpr const char* head_placement_storage_class = ":head_placement_storage_class"; + static constexpr const char* tail_placement_rule_name = ":tail_placement_rule_name"; + static constexpr const char* tail_placement_storage_class = ":tail_placement_storage_class"; + static constexpr const char* manifest_part_objs = ":manifest_part_objs"; + static constexpr const char* manifest_part_rules = ":manifest_part_rules"; + static constexpr const char* omap = ":omap"; + static constexpr const char* is_multipart = ":is_multipart"; + static constexpr const char* mp_parts = ":mp_parts"; + static constexpr const char* head_data = ":head_data"; + static constexpr const char* min_marker = ":min_marker"; + static constexpr const char* max_marker = ":max_marker"; + static constexpr const char* prefix = ":prefix"; + /* Below used to update mp_parts obj name + * from meta object to src object on completion */ + static constexpr const char* new_obj_name = ":new_obj_name"; + static constexpr const char* new_obj_instance = ":new_obj_instance"; + static constexpr const char* new_obj_ns = ":new_obj_ns"; +}; + +struct DBOpObjectDataPrepareInfo { + static constexpr const char* part_num = ":part_num"; + static constexpr const char* offset = ":offset"; + static constexpr const char* data = ":data"; + static constexpr const char* size = ":size"; + static constexpr const char* multipart_part_str = ":multipart_part_str"; +}; + +struct DBOpLCEntryPrepareInfo { + static constexpr const char* index = ":index"; + static constexpr const char* bucket_name = ":bucket_name"; + static constexpr const char* start_time = ":start_time"; + static constexpr const char* status = ":status"; + static constexpr const char* min_marker = ":min_marker"; +}; + +struct DBOpLCHeadPrepareInfo { + static constexpr const char* index = ":index"; + static constexpr const char* start_date = ":start_date"; + static constexpr const char* marker = ":marker"; +}; + +struct DBOpPrepareInfo { + DBOpUserPrepareInfo user; + std::string_view query_str; // view into DBOpInfo::query_str + DBOpBucketPrepareInfo bucket; + DBOpObjectPrepareInfo obj; + DBOpObjectDataPrepareInfo obj_data; + DBOpLCHeadPrepareInfo lc_head; + DBOpLCEntryPrepareInfo lc_entry; + static constexpr const char* list_max_count = ":list_max_count"; +}; + +struct DBOpPrepareParams { + /* Tables */ + std::string user_table; + std::string bucket_table; + std::string object_table; + + /* Ops */ + DBOpPrepareInfo op; + + + std::string objectdata_table; + std::string object_trigger; + std::string object_view; + std::string quota_table; + std::string lc_head_table; + std::string lc_entry_table; +}; + +struct DBOps { + std::shared_ptr InsertUser; + std::shared_ptr RemoveUser; + std::shared_ptr GetUser; + std::shared_ptr InsertBucket; + std::shared_ptr UpdateBucket; + std::shared_ptr RemoveBucket; + std::shared_ptr GetBucket; + std::shared_ptr ListUserBuckets; + std::shared_ptr InsertLCEntry; + std::shared_ptr RemoveLCEntry; + std::shared_ptr GetLCEntry; + std::shared_ptr ListLCEntries; + std::shared_ptr InsertLCHead; + std::shared_ptr RemoveLCHead; + std::shared_ptr GetLCHead; +}; + +class ObjectOp { + public: + ObjectOp() {}; + + virtual ~ObjectOp() {} + + std::shared_ptr PutObject; + std::shared_ptr DeleteObject; + std::shared_ptr GetObject; + std::shared_ptr UpdateObject; + std::shared_ptr ListBucketObjects; + std::shared_ptr ListVersionedObjects; + std::shared_ptr PutObjectData; + std::shared_ptr UpdateObjectData; + std::shared_ptr GetObjectData; + std::shared_ptr DeleteObjectData; + std::shared_ptr DeleteStaleObjectData; + + virtual int InitializeObjectOps(std::string db_name, const DoutPrefixProvider *dpp) { return 0; } +}; + +class DBOp { + private: + static constexpr std::string_view CreateUserTableQ = + /* Corresponds to rgw::sal::User + * + * For now only UserID is made Primary key. + * If multiple tenants are stored in single .db handle, should + * make both (UserID, Tenant) as Primary Key. + * + * XXX: + * - AccessKeys, SwiftKeys, Subusers (map<>) are stored as blob. + * To enable easy query, first accesskey is stored in separate fields + * AccessKeysID, AccessKeysSecret. + * In future, may be have separate table to store these keys and + * query on that table. + * - Quota stored as blob .. should be linked to quota table. + */ + "CREATE TABLE IF NOT EXISTS '{}' ( \ + UserID TEXT NOT NULL UNIQUE, \ + Tenant TEXT , \ + NS TEXT , \ + DisplayName TEXT , \ + UserEmail TEXT , \ + AccessKeysID TEXT , \ + AccessKeysSecret TEXT , \ + AccessKeys BLOB , \ + SwiftKeys BLOB , \ + SubUsers BLOB , \ + Suspended INTEGER , \ + MaxBuckets INTEGER , \ + OpMask INTEGER , \ + UserCaps BLOB , \ + Admin INTEGER , \ + System INTEGER , \ + PlacementName TEXT , \ + PlacementStorageClass TEXT , \ + PlacementTags BLOB , \ + BucketQuota BLOB , \ + TempURLKeys BLOB , \ + UserQuota BLOB , \ + TYPE INTEGER , \ + MfaIDs BLOB , \ + AssumedRoleARN TEXT , \ + UserAttrs BLOB, \ + UserVersion INTEGER, \ + UserVersionTag TEXT, \ + PRIMARY KEY (UserID) \n);"; + + static constexpr std::string_view CreateBucketTableQ = + /* Corresponds to rgw::sal::Bucket + * + * For now only BucketName is made Primary key. Since buckets should + * be unique across users in rgw, OwnerID is not made part of primary key. + * However it is still referenced as foreign key + * + * If multiple tenants are stored in single .db handle, should + * make both (BucketName, Tenant) as Primary Key. Also should + * reference (UserID, Tenant) as Foreign key. + * + * leaving below RADOS specific fields + * - rgw_data_placement_target explicit_placement (struct rgw_bucket) + * - rgw::BucketLayout layout (struct RGWBucketInfo) + * - const static uint32_t NUM_SHARDS_BLIND_BUCKET (struct RGWBucketInfo), + * should be '0' indicating no sharding. + * - cls_rgw_reshard_status reshard_status (struct RGWBucketInfo) + * + * XXX: + * - Quota stored as blob .. should be linked to quota table. + * - WebsiteConf stored as BLOB..if required, should be split + * - Storing bucket_version (struct RGWBucket), objv_tracker + * (struct RGWBucketInfo) separately. Are they same? + * + */ + "CREATE TABLE IF NOT EXISTS '{}' ( \ + BucketName TEXT NOT NULL UNIQUE , \ + Tenant TEXT, \ + Marker TEXT, \ + BucketID TEXT, \ + Size INTEGER, \ + SizeRounded INTEGER,\ + CreationTime BLOB, \ + Count INTEGER, \ + PlacementName TEXT , \ + PlacementStorageClass TEXT , \ + OwnerID TEXT NOT NULL, \ + Flags INTEGER, \ + Zonegroup TEXT, \ + HasInstanceObj BOOLEAN, \ + Quota BLOB, \ + RequesterPays BOOLEAN, \ + HasWebsite BOOLEAN, \ + WebsiteConf BLOB, \ + SwiftVersioning BOOLEAN, \ + SwiftVerLocation TEXT, \ + MdsearchConfig BLOB, \ + NewBucketInstanceID TEXT,\ + ObjectLock BLOB, \ + SyncPolicyInfoGroups BLOB, \ + BucketAttrs BLOB, \ + BucketVersion INTEGER, \ + BucketVersionTag TEXT, \ + Mtime BLOB, \ + PRIMARY KEY (BucketName) \ + FOREIGN KEY (OwnerID) \ + REFERENCES '{}' (UserID) ON DELETE CASCADE ON UPDATE CASCADE \n);"; + + static constexpr std::string_view CreateObjectTableTriggerQ = + "CREATE TRIGGER IF NOT EXISTS '{}' \ + AFTER INSERT ON '{}' \ + BEGIN \ + UPDATE '{}' \ + SET VersionNum = (SELECT COALESCE(max(VersionNum), 0) from '{}' where ObjName = new.ObjName) + 1 \ + where ObjName = new.ObjName and ObjInstance = new.ObjInstance; \ + END;"; + + static constexpr std::string_view CreateObjectTableQ = + /* Corresponds to rgw::sal::Object + * + * For now only BucketName, ObjName is made Primary key. + * If multiple tenants are stored in single .db handle, should + * include Tenant too in the Primary Key. Also should + * reference (BucketID, Tenant) as Foreign key. + * + * referring to + * - rgw_bucket_dir_entry - following are added for now + * flags, + * versioned_epoch + * tag + * index_ver + * meta.category + * meta.etag + * meta.storageclass + * meta.appendable + * meta.content_type + * meta.owner + * meta.owner_display_name + * + * - RGWObjState. Below are omitted from that struct + * as they seem in-memory variables + * * is_atomic, has_atts, exists, prefetch_data, keep_tail, + * - RGWObjManifest + * + * Extra field added "IsMultipart" to flag multipart uploads, + * HeadData to store first chunk data. + */ + "CREATE TABLE IF NOT EXISTS '{}' ( \ + ObjName TEXT NOT NULL , \ + ObjInstance TEXT, \ + ObjNS TEXT, \ + BucketName TEXT NOT NULL , \ + ACLs BLOB, \ + IndexVer INTEGER, \ + Tag TEXT, \ + Flags INTEGER, \ + VersionedEpoch INTEGER, \ + ObjCategory INTEGER, \ + Etag TEXT, \ + Owner TEXT, \ + OwnerDisplayName TEXT, \ + StorageClass TEXT, \ + Appendable BOOL, \ + ContentType TEXT, \ + IndexHashSource TEXT, \ + ObjSize INTEGER, \ + AccountedSize INTEGER, \ + Mtime BLOB, \ + Epoch INTEGER, \ + ObjTag BLOB, \ + TailTag BLOB, \ + WriteTag TEXT, \ + FakeTag BOOL, \ + ShadowObj TEXT, \ + HasData BOOL, \ + IsVersioned BOOL, \ + VersionNum INTEGER, \ + PGVer INTEGER, \ + ZoneShortID INTEGER, \ + ObjVersion INTEGER, \ + ObjVersionTag TEXT, \ + ObjAttrs BLOB, \ + HeadSize INTEGER, \ + MaxHeadSize INTEGER, \ + ObjID TEXT NOT NULL, \ + TailInstance TEXT, \ + HeadPlacementRuleName TEXT, \ + HeadPlacementRuleStorageClass TEXT, \ + TailPlacementRuleName TEXT, \ + TailPlacementStorageClass TEXT, \ + ManifestPartObjs BLOB, \ + ManifestPartRules BLOB, \ + Omap BLOB, \ + IsMultipart BOOL, \ + MPPartsList BLOB, \ + HeadData BLOB, \ + PRIMARY KEY (ObjName, ObjInstance, BucketName), \ + FOREIGN KEY (BucketName) \ + REFERENCES '{}' (BucketName) ON DELETE CASCADE ON UPDATE CASCADE \n);"; + + static constexpr std::string_view CreateObjectDataTableQ = + /* Extra field 'MultipartPartStr' added which signifies multipart + * . For regular object, it is '0.0' + * + * - part: a collection of stripes that make a contiguous part of an + object. A regular object will only have one part (although might have + many stripes), a multipart object might have many parts. Each part + has a fixed stripe size (ObjChunkSize), although the last stripe of a + part might be smaller than that. + */ + "CREATE TABLE IF NOT EXISTS '{}' ( \ + ObjName TEXT NOT NULL , \ + ObjInstance TEXT, \ + ObjNS TEXT, \ + BucketName TEXT NOT NULL , \ + ObjID TEXT NOT NULL , \ + MultipartPartStr TEXT, \ + PartNum INTEGER NOT NULL, \ + Offset INTEGER, \ + Size INTEGER, \ + Mtime BLOB, \ + Data BLOB, \ + PRIMARY KEY (ObjName, BucketName, ObjInstance, ObjID, MultipartPartStr, PartNum), \ + FOREIGN KEY (BucketName) \ + REFERENCES '{}' (BucketName) ON DELETE CASCADE ON UPDATE CASCADE \n);"; + + static constexpr std::string_view CreateObjectViewQ = + /* This query creats temporary view with entries from ObjectData table which have + * corresponding head object (i.e, with same ObjName, ObjInstance, ObjNS, ObjID) + * in the Object table. + * + * GC thread can use this view to delete stale entries from the ObjectData table which + * do not exist in this view. + * + * XXX: This view is throwing ForeignKey mismatch error, mostly may be because all the keys + * of objectdata table are not referenced here. So this view is not used atm. + */ + "CREATE TEMP VIEW IF NOT EXISTS '{}' AS \ + SELECT s.ObjName, s.ObjInstance, s.ObjID from '{}' as s INNER JOIN '{}' USING \ + (ObjName, BucketName, ObjInstance, ObjID);"; + + + static constexpr std::string_view CreateQuotaTableQ = + "CREATE TABLE IF NOT EXISTS '{}' ( \ + QuotaID INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE , \ + MaxSizeSoftThreshold INTEGER , \ + MaxObjsSoftThreshold INTEGER , \ + MaxSize INTEGER , \ + MaxObjects INTEGER , \ + Enabled Boolean , \ + CheckOnRaw Boolean \n);"; + + static constexpr std::string_view CreateLCEntryTableQ = + "CREATE TABLE IF NOT EXISTS '{}' ( \ + LCIndex TEXT NOT NULL , \ + BucketName TEXT NOT NULL , \ + StartTime INTEGER , \ + Status INTEGER , \ + PRIMARY KEY (LCIndex, BucketName) \n);"; + + static constexpr std::string_view CreateLCHeadTableQ = + "CREATE TABLE IF NOT EXISTS '{}' ( \ + LCIndex TEXT NOT NULL , \ + Marker TEXT , \ + StartDate INTEGER , \ + PRIMARY KEY (LCIndex) \n);"; + + static constexpr std::string_view DropQ = "DROP TABLE IF EXISTS '{}'"; + static constexpr std::string_view ListAllQ = "SELECT * from '{}'"; + + public: + DBOp() {} + virtual ~DBOp() {} + std::mutex mtx; // to protect prepared stmt + + static std::string CreateTableSchema(std::string_view type, + const DBOpParams *params) { + if (!type.compare("User")) + return fmt::format(CreateUserTableQ, + params->user_table); + if (!type.compare("Bucket")) + return fmt::format(CreateBucketTableQ, + params->bucket_table, + params->user_table); + if (!type.compare("Object")) + return fmt::format(CreateObjectTableQ, + params->object_table, + params->bucket_table); + if (!type.compare("ObjectTrigger")) + return fmt::format(CreateObjectTableTriggerQ, + params->object_trigger, + params->object_table, + params->object_table, + params->object_table); + if (!type.compare("ObjectData")) + return fmt::format(CreateObjectDataTableQ, + params->objectdata_table, + params->bucket_table); + if (!type.compare("ObjectView")) + return fmt::format(CreateObjectTableQ, + params->object_view, + params->objectdata_table, + params->object_table); + if (!type.compare("Quota")) + return fmt::format(CreateQuotaTableQ, + params->quota_table); + if (!type.compare("LCHead")) + return fmt::format(CreateLCHeadTableQ, + params->lc_head_table); + if (!type.compare("LCEntry")) + return fmt::format(CreateLCEntryTableQ, + params->lc_entry_table, + params->bucket_table); + + ceph_abort_msgf("incorrect table type %.*s", type.size(), type.data()); + } + + static std::string DeleteTableSchema(std::string_view table) { + return fmt::format(DropQ, table); + } + static std::string ListTableSchema(std::string_view table) { + return fmt::format(ListAllQ, table); + } + + virtual int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; } + virtual int Bind(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; } + virtual int Execute(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; } +}; + +class InsertUserOp : virtual public DBOp { + private: + /* For existing entires, - + * (1) INSERT or REPLACE - it will delete previous entry and then + * inserts new one. Since it deletes previos enties, it will + * trigger all foriegn key cascade deletes or other triggers. + * (2) INSERT or UPDATE - this will set NULL values to unassigned + * fields. + * more info: https://code-examples.net/en/q/377728 + * + * For now using INSERT or REPLACE. If required of updating existing + * record, will use another query. + */ + static constexpr std::string_view Query = "INSERT OR REPLACE INTO '{}' \ + (UserID, Tenant, NS, DisplayName, UserEmail, \ + AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ + SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ + System, PlacementName, PlacementStorageClass, PlacementTags, \ + BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ + UserAttrs, UserVersion, UserVersionTag) \ + VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \ + {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});"; + + public: + virtual ~InsertUserOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.user_table, + params.op.user.user_id, params.op.user.tenant, params.op.user.ns, + params.op.user.display_name, params.op.user.user_email, + params.op.user.access_keys_id, params.op.user.access_keys_secret, + params.op.user.access_keys, params.op.user.swift_keys, + params.op.user.subusers, params.op.user.suspended, + params.op.user.max_buckets, params.op.user.op_mask, + params.op.user.user_caps, params.op.user.admin, params.op.user.system, + params.op.user.placement_name, params.op.user.placement_storage_class, + params.op.user.placement_tags, params.op.user.bucket_quota, + params.op.user.temp_url_keys, params.op.user.user_quota, + params.op.user.type, params.op.user.mfa_ids, + params.op.user.assumed_role_arn, params.op.user.user_attrs, + params.op.user.user_ver, params.op.user.user_ver_tag); + } + +}; + +class RemoveUserOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' where UserID = {}"; + + public: + virtual ~RemoveUserOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.user_table, + params.op.user.user_id); + } +}; + +class GetUserOp: virtual public DBOp { + private: + /* If below query columns are updated, make sure to update the indexes + * in list_user() cbk in sqliteDB.cc */ + static constexpr std::string_view Query = "SELECT \ + UserID, Tenant, NS, DisplayName, UserEmail, \ + AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ + SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ + System, PlacementName, PlacementStorageClass, PlacementTags, \ + BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ + UserAttrs, UserVersion, UserVersionTag from '{}' where UserID = {}"; + + static constexpr std::string_view QueryByEmail = "SELECT \ + UserID, Tenant, NS, DisplayName, UserEmail, \ + AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ + SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ + System, PlacementName, PlacementStorageClass, PlacementTags, \ + BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ + UserAttrs, UserVersion, UserVersionTag from '{}' where UserEmail = {}"; + + static constexpr std::string_view QueryByAccessKeys = "SELECT \ + UserID, Tenant, NS, DisplayName, UserEmail, \ + AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ + SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ + System, PlacementName, PlacementStorageClass, PlacementTags, \ + BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ + UserAttrs, UserVersion, UserVersionTag from '{}' where AccessKeysID = {}"; + + static constexpr std::string_view QueryByUserID = "SELECT \ + UserID, Tenant, NS, DisplayName, UserEmail, \ + AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ + SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ + System, PlacementName, PlacementStorageClass, PlacementTags, \ + BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ + UserAttrs, UserVersion, UserVersionTag \ + from '{}' where UserID = {}"; + + public: + virtual ~GetUserOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + if (params.op.query_str == "email") { + return fmt::format(QueryByEmail, params.user_table, + params.op.user.user_email); + } else if (params.op.query_str == "access_key") { + return fmt::format(QueryByAccessKeys, + params.user_table, + params.op.user.access_keys_id); + } else if (params.op.query_str == "user_id") { + return fmt::format(QueryByUserID, + params.user_table, + params.op.user.user_id); + } else { + return fmt::format(Query, params.user_table, + params.op.user.user_id); + } + } +}; + +class InsertBucketOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "INSERT OR REPLACE INTO '{}' \ + (BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ + Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \ + HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ + SwiftVersioning, SwiftVerLocation, \ + MdsearchConfig, NewBucketInstanceID, ObjectLock, \ + SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime) \ + VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, \ + {}, {}, {}, {}, {}, {}, {}, {}, {}, \ + {}, {}, {}, {}, {}, {}, {}, {}, {}, {})"; + + public: + virtual ~InsertBucketOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.bucket_table, + params.op.bucket.bucket_name, params.op.bucket.tenant, + params.op.bucket.marker, params.op.bucket.bucket_id, + params.op.bucket.size, params.op.bucket.size_rounded, + params.op.bucket.creation_time, params.op.bucket.count, + params.op.bucket.placement_name, params.op.bucket.placement_storage_class, + params.op.user.user_id, + params.op.bucket.flags, params.op.bucket.zonegroup, params.op.bucket.has_instance_obj, + params.op.bucket.quota, params.op.bucket.requester_pays, params.op.bucket.has_website, + params.op.bucket.website_conf, params.op.bucket.swift_versioning, + params.op.bucket.swift_ver_location, params.op.bucket.mdsearch_config, + params.op.bucket.new_bucket_instance_id, params.op.bucket.obj_lock, + params.op.bucket.sync_policy_info_groups, params.op.bucket.bucket_attrs, + params.op.bucket.bucket_ver, params.op.bucket.bucket_ver_tag, + params.op.bucket.mtime); + } +}; + +class UpdateBucketOp: virtual public DBOp { + private: + // Updates Info, Mtime, Version + static constexpr std::string_view InfoQuery = + "UPDATE '{}' SET Tenant = {}, Marker = {}, BucketID = {}, CreationTime = {}, \ + Count = {}, PlacementName = {}, PlacementStorageClass = {}, OwnerID = {}, Flags = {}, \ + Zonegroup = {}, HasInstanceObj = {}, Quota = {}, RequesterPays = {}, HasWebsite = {}, \ + WebsiteConf = {}, SwiftVersioning = {}, SwiftVerLocation = {}, MdsearchConfig = {}, \ + NewBucketInstanceID = {}, ObjectLock = {}, SyncPolicyInfoGroups = {}, \ + BucketVersion = {}, Mtime = {} WHERE BucketName = {}"; + // Updates Attrs, OwnerID, Mtime, Version + static constexpr std::string_view AttrsQuery = + "UPDATE '{}' SET OwnerID = {}, BucketAttrs = {}, Mtime = {}, BucketVersion = {} \ + WHERE BucketName = {}"; + // Updates OwnerID, CreationTime, Mtime, Version + static constexpr std::string_view OwnerQuery = + "UPDATE '{}' SET OwnerID = {}, CreationTime = {}, Mtime = {}, BucketVersion = {} WHERE BucketName = {}"; + + public: + virtual ~UpdateBucketOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + if (params.op.query_str == "info") { + return fmt::format(InfoQuery, params.bucket_table, + params.op.bucket.tenant, params.op.bucket.marker, params.op.bucket.bucket_id, + params.op.bucket.creation_time, params.op.bucket.count, + params.op.bucket.placement_name, params.op.bucket.placement_storage_class, + params.op.user.user_id, + params.op.bucket.flags, params.op.bucket.zonegroup, params.op.bucket.has_instance_obj, + params.op.bucket.quota, params.op.bucket.requester_pays, params.op.bucket.has_website, + params.op.bucket.website_conf, params.op.bucket.swift_versioning, + params.op.bucket.swift_ver_location, params.op.bucket.mdsearch_config, + params.op.bucket.new_bucket_instance_id, params.op.bucket.obj_lock, + params.op.bucket.sync_policy_info_groups, + params.op.bucket.bucket_ver, params.op.bucket.mtime, + params.op.bucket.bucket_name); + } + if (params.op.query_str == "attrs") { + return fmt::format(AttrsQuery, params.bucket_table, + params.op.user.user_id, params.op.bucket.bucket_attrs, + params.op.bucket.mtime, + params.op.bucket.bucket_ver, params.op.bucket.bucket_name); + } + if (params.op.query_str == "owner") { + return fmt::format(OwnerQuery, params.bucket_table, + params.op.user.user_id, params.op.bucket.creation_time, + params.op.bucket.mtime, + params.op.bucket.bucket_ver, params.op.bucket.bucket_name); + } + return ""; + } +}; + +class RemoveBucketOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' where BucketName = {}"; + + public: + virtual ~RemoveBucketOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.bucket_table, + params.op.bucket.bucket_name); + } +}; + +class GetBucketOp: virtual public DBOp { + private: + static constexpr std::string_view Query = "SELECT \ + BucketName, BucketTable.Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ + Count, BucketTable.PlacementName, BucketTable.PlacementStorageClass, OwnerID, Flags, Zonegroup, \ + HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ + SwiftVersioning, SwiftVerLocation, \ + MdsearchConfig, NewBucketInstanceID, ObjectLock, \ + SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime, NS \ + from '{}' as BucketTable INNER JOIN '{}' ON OwnerID = UserID where BucketName = {}"; + + public: + virtual ~GetBucketOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + //return fmt::format(Query, params.op.bucket.bucket_name, + // params.bucket_table, params.user_table); + return fmt::format(Query, + params.bucket_table, params.user_table, + params.op.bucket.bucket_name); + } +}; + +class ListUserBucketsOp: virtual public DBOp { + private: + // once we have stats also stored, may have to update this query to join + // these two tables. + static constexpr std::string_view Query = "SELECT \ + BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ + Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \ + HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ + SwiftVersioning, SwiftVerLocation, \ + MdsearchConfig, NewBucketInstanceID, ObjectLock, \ + SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime \ + FROM '{}' WHERE OwnerID = {} AND BucketName > {} ORDER BY BucketName ASC LIMIT {}"; + + /* BucketNames are unique across users. Hence userid/OwnerID is not used as + * marker or for ordering here in the below query + */ + static constexpr std::string_view AllQuery = "SELECT \ + BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ + Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \ + HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ + SwiftVersioning, SwiftVerLocation, \ + MdsearchConfig, NewBucketInstanceID, ObjectLock, \ + SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime \ + FROM '{}' WHERE BucketName > {} ORDER BY BucketName ASC LIMIT {}"; + + public: + virtual ~ListUserBucketsOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + if (params.op.query_str == "all") { + return fmt::format(AllQuery, params.bucket_table, + params.op.bucket.min_marker, + params.op.list_max_count); + } else { + return fmt::format(Query, params.bucket_table, + params.op.user.user_id, params.op.bucket.min_marker, + params.op.list_max_count); + } + } +}; + +class PutObjectOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "INSERT OR REPLACE INTO '{}' \ + (ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ + Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ + StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ + AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ + ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ + ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ + ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ + TailPlacementRuleName, TailPlacementStorageClass, \ + ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \ + HeadData) \ + VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \ + {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \ + {}, {}, {}, \ + {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})"; + + public: + virtual ~PutObjectOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.object_table, params.op.obj.obj_name, + params.op.obj.obj_instance, params.op.obj.obj_ns, + params.op.bucket.bucket_name, params.op.obj.acls, params.op.obj.index_ver, + params.op.obj.tag, params.op.obj.flags, params.op.obj.versioned_epoch, + params.op.obj.obj_category, params.op.obj.etag, params.op.obj.owner, + params.op.obj.owner_display_name, params.op.obj.storage_class, + params.op.obj.appendable, params.op.obj.content_type, + params.op.obj.index_hash_source, params.op.obj.obj_size, + params.op.obj.accounted_size, params.op.obj.mtime, + params.op.obj.epoch, params.op.obj.obj_tag, params.op.obj.tail_tag, + params.op.obj.write_tag, params.op.obj.fake_tag, params.op.obj.shadow_obj, + params.op.obj.has_data, params.op.obj.is_versioned, + params.op.obj.version_num, + params.op.obj.pg_ver, params.op.obj.zone_short_id, + params.op.obj.obj_version, params.op.obj.obj_version_tag, + params.op.obj.obj_attrs, params.op.obj.head_size, + params.op.obj.max_head_size, params.op.obj.obj_id, + params.op.obj.tail_instance, + params.op.obj.head_placement_rule_name, + params.op.obj.head_placement_storage_class, + params.op.obj.tail_placement_rule_name, + params.op.obj.tail_placement_storage_class, + params.op.obj.manifest_part_objs, + params.op.obj.manifest_part_rules, params.op.obj.omap, + params.op.obj.is_multipart, params.op.obj.mp_parts, + params.op.obj.head_data); + } +}; + +class DeleteObjectOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {}"; + + public: + virtual ~DeleteObjectOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.object_table, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance); + } +}; + +class GetObjectOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "SELECT \ + ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ + Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ + StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ + AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ + ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ + ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ + ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ + TailPlacementRuleName, TailPlacementStorageClass, \ + ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \ + HeadData from '{}' \ + where BucketName = {} and ObjName = {} and ObjInstance = {}"; + + public: + virtual ~GetObjectOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.object_table, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance); + } +}; + +class ListBucketObjectsOp: virtual public DBOp { + private: + // once we have stats also stored, may have to update this query to join + // these two tables. + static constexpr std::string_view Query = + "SELECT \ + ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ + Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ + StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ + AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ + ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ + ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ + ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ + TailPlacementRuleName, TailPlacementStorageClass, \ + ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, HeadData from '{}' \ + where BucketName = {} and ObjName >= {} and ObjName LIKE {} ORDER BY ObjName ASC, VersionNum DESC LIMIT {}"; + public: + virtual ~ListBucketObjectsOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + /* XXX: Include obj_id, delim */ + return fmt::format(Query, + params.object_table, + params.op.bucket.bucket_name, + params.op.obj.min_marker, + params.op.obj.prefix, + params.op.list_max_count); + } +}; + +#define MAX_VERSIONED_OBJECTS 20 +class ListVersionedObjectsOp: virtual public DBOp { + private: + // once we have stats also stored, may have to update this query to join + // these two tables. + static constexpr std::string_view Query = + "SELECT \ + ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ + Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ + StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ + AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ + ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ + ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ + ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ + TailPlacementRuleName, TailPlacementStorageClass, \ + ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \ + HeadData from '{}' \ + where BucketName = {} and ObjName = {} ORDER BY VersionNum DESC LIMIT {}"; + public: + virtual ~ListVersionedObjectsOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + /* XXX: Include obj_id, delim */ + return fmt::format(Query, + params.object_table, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.list_max_count); + } +}; + +class UpdateObjectOp: virtual public DBOp { + private: + // Updates Omap + static constexpr std::string_view OmapQuery = + "UPDATE '{}' SET Omap = {}, Mtime = {} \ + where BucketName = {} and ObjName = {} and ObjInstance = {}"; + static constexpr std::string_view AttrsQuery = + "UPDATE '{}' SET ObjAttrs = {}, Mtime = {} \ + where BucketName = {} and ObjName = {} and ObjInstance = {}"; + static constexpr std::string_view MPQuery = + "UPDATE '{}' SET MPPartsList = {}, Mtime = {} \ + where BucketName = {} and ObjName = {} and ObjInstance = {}"; + static constexpr std::string_view MetaQuery = + "UPDATE '{}' SET \ + ObjNS = {}, ACLs = {}, IndexVer = {}, Tag = {}, Flags = {}, VersionedEpoch = {}, \ + ObjCategory = {}, Etag = {}, Owner = {}, OwnerDisplayName = {}, \ + StorageClass = {}, Appendable = {}, ContentType = {}, \ + IndexHashSource = {}, ObjSize = {}, AccountedSize = {}, Mtime = {}, \ + Epoch = {}, ObjTag = {}, TailTag = {}, WriteTag = {}, FakeTag = {}, \ + ShadowObj = {}, HasData = {}, IsVersioned = {}, VersionNum = {}, PGVer = {}, \ + ZoneShortID = {}, ObjVersion = {}, ObjVersionTag = {}, ObjAttrs = {}, \ + HeadSize = {}, MaxHeadSize = {}, ObjID = {}, TailInstance = {}, \ + HeadPlacementRuleName = {}, HeadPlacementRuleStorageClass = {}, \ + TailPlacementRuleName = {}, TailPlacementStorageClass = {}, \ + ManifestPartObjs = {}, ManifestPartRules = {}, Omap = {}, \ + IsMultipart = {}, MPPartsList = {}, HeadData = {} \ + WHERE ObjName = {} and ObjInstance = {} and BucketName = {}"; + + public: + virtual ~UpdateObjectOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + if (params.op.query_str == "omap") { + return fmt::format(OmapQuery, + params.object_table, params.op.obj.omap, + params.op.obj.mtime, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance); + } + if (params.op.query_str == "attrs") { + return fmt::format(AttrsQuery, + params.object_table, params.op.obj.obj_attrs, + params.op.obj.mtime, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance); + } + if (params.op.query_str == "mp") { + return fmt::format(MPQuery, + params.object_table, params.op.obj.mp_parts, + params.op.obj.mtime, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance); + } + if (params.op.query_str == "meta") { + return fmt::format(MetaQuery, + params.object_table, + params.op.obj.obj_ns, params.op.obj.acls, params.op.obj.index_ver, + params.op.obj.tag, params.op.obj.flags, params.op.obj.versioned_epoch, + params.op.obj.obj_category, params.op.obj.etag, params.op.obj.owner, + params.op.obj.owner_display_name, params.op.obj.storage_class, + params.op.obj.appendable, params.op.obj.content_type, + params.op.obj.index_hash_source, params.op.obj.obj_size, + params.op.obj.accounted_size, params.op.obj.mtime, + params.op.obj.epoch, params.op.obj.obj_tag, params.op.obj.tail_tag, + params.op.obj.write_tag, params.op.obj.fake_tag, params.op.obj.shadow_obj, + params.op.obj.has_data, params.op.obj.is_versioned, params.op.obj.version_num, + params.op.obj.pg_ver, params.op.obj.zone_short_id, + params.op.obj.obj_version, params.op.obj.obj_version_tag, + params.op.obj.obj_attrs, params.op.obj.head_size, + params.op.obj.max_head_size, params.op.obj.obj_id, + params.op.obj.tail_instance, + params.op.obj.head_placement_rule_name, + params.op.obj.head_placement_storage_class, + params.op.obj.tail_placement_rule_name, + params.op.obj.tail_placement_storage_class, + params.op.obj.manifest_part_objs, + params.op.obj.manifest_part_rules, params.op.obj.omap, + params.op.obj.is_multipart, params.op.obj.mp_parts, + params.op.obj.head_data, + params.op.obj.obj_name, params.op.obj.obj_instance, + params.op.bucket.bucket_name); + } + return ""; + } +}; + +class PutObjectDataOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "INSERT OR REPLACE INTO '{}' \ + (ObjName, ObjInstance, ObjNS, BucketName, ObjID, MultipartPartStr, PartNum, Offset, Size, Mtime, Data) \ + VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})"; + + public: + virtual ~PutObjectDataOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.objectdata_table, + params.op.obj.obj_name, params.op.obj.obj_instance, + params.op.obj.obj_ns, + params.op.bucket.bucket_name, + params.op.obj.obj_id, + params.op.obj_data.multipart_part_str, + params.op.obj_data.part_num, + params.op.obj_data.offset, + params.op.obj_data.size, + params.op.obj.mtime, + params.op.obj_data.data); + } +}; + +/* XXX: Recheck if this is really needed */ +class UpdateObjectDataOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "UPDATE '{}' \ + SET Mtime = {} WHERE ObjName = {} and ObjInstance = {} and \ + BucketName = {} and ObjID = {}"; + + public: + virtual ~UpdateObjectDataOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.objectdata_table, + params.op.obj.mtime, + params.op.obj.obj_name, params.op.obj.obj_instance, + params.op.bucket.bucket_name, + params.op.obj.obj_id); + } +}; + +class GetObjectDataOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "SELECT \ + ObjName, ObjInstance, ObjNS, BucketName, ObjID, MultipartPartStr, PartNum, Offset, Size, Mtime, Data \ + from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {} and ObjID = {} ORDER BY MultipartPartStr, PartNum"; + + public: + virtual ~GetObjectDataOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.objectdata_table, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance, + params.op.obj.obj_id); + } +}; + +class DeleteObjectDataOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {} and ObjID = {}"; + + public: + virtual ~DeleteObjectDataOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.objectdata_table, + params.op.bucket.bucket_name, + params.op.obj.obj_name, + params.op.obj.obj_instance, + params.op.obj.obj_id); + } +}; + +class DeleteStaleObjectDataOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' WHERE (ObjName, ObjInstance, ObjID) NOT IN (SELECT s.ObjName, s.ObjInstance, s.ObjID from '{}' as s INNER JOIN '{}' USING (ObjName, BucketName, ObjInstance, ObjID)) and Mtime < {}"; + + public: + virtual ~DeleteStaleObjectDataOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, + params.objectdata_table, + params.objectdata_table, + params.object_table, + params.op.obj.mtime); + } +}; + +class InsertLCEntryOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "INSERT OR REPLACE INTO '{}' \ + (LCIndex, BucketName, StartTime, Status) \ + VALUES ({}, {}, {}, {})"; + + public: + virtual ~InsertLCEntryOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.lc_entry_table, + params.op.lc_entry.index, params.op.lc_entry.bucket_name, + params.op.lc_entry.start_time, params.op.lc_entry.status); + } +}; + +class RemoveLCEntryOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' where LCIndex = {} and BucketName = {}"; + + public: + virtual ~RemoveLCEntryOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.lc_entry_table, + params.op.lc_entry.index, params.op.lc_entry.bucket_name); + } +}; + +class GetLCEntryOp: virtual public DBOp { + private: + static constexpr std::string_view Query = "SELECT \ + LCIndex, BucketName, StartTime, Status \ + from '{}' where LCIndex = {} and BucketName = {}"; + static constexpr std::string_view NextQuery = "SELECT \ + LCIndex, BucketName, StartTime, Status \ + from '{}' where LCIndex = {} and BucketName > {} ORDER BY BucketName ASC"; + + public: + virtual ~GetLCEntryOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + if (params.op.query_str == "get_next_entry") { + return fmt::format(NextQuery, params.lc_entry_table, + params.op.lc_entry.index, params.op.lc_entry.bucket_name); + } + // default + return fmt::format(Query, params.lc_entry_table, + params.op.lc_entry.index, params.op.lc_entry.bucket_name); + } +}; + +class ListLCEntriesOp: virtual public DBOp { + private: + static constexpr std::string_view Query = "SELECT \ + LCIndex, BucketName, StartTime, Status \ + FROM '{}' WHERE LCIndex = {} AND BucketName > {} ORDER BY BucketName ASC LIMIT {}"; + + public: + virtual ~ListLCEntriesOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.lc_entry_table, + params.op.lc_entry.index, params.op.lc_entry.min_marker, + params.op.list_max_count); + } +}; + +class InsertLCHeadOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "INSERT OR REPLACE INTO '{}' \ + (LCIndex, Marker, StartDate) \ + VALUES ({}, {}, {})"; + + public: + virtual ~InsertLCHeadOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.lc_head_table, + params.op.lc_head.index, params.op.lc_head.marker, + params.op.lc_head.start_date); + } +}; + +class RemoveLCHeadOp: virtual public DBOp { + private: + static constexpr std::string_view Query = + "DELETE from '{}' where LCIndex = {}"; + + public: + virtual ~RemoveLCHeadOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.lc_head_table, + params.op.lc_head.index); + } +}; + +class GetLCHeadOp: virtual public DBOp { + private: + static constexpr std::string_view Query = "SELECT \ + LCIndex, Marker, StartDate \ + from '{}' where LCIndex = {}"; + + public: + virtual ~GetLCHeadOp() {} + + static std::string Schema(DBOpPrepareParams ¶ms) { + return fmt::format(Query, params.lc_head_table, + params.op.lc_head.index); + } +}; + +/* taken from rgw_rados.h::RGWOLHInfo */ +struct DBOLHInfo { + rgw_obj target; + bool removed; + DBOLHInfo() : removed(false) {} + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(target, bl); + encode(removed, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(target, bl); + decode(removed, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(DBOLHInfo) + +class DB { + private: + const std::string db_name; + rgw::sal::Driver* driver; + const std::string user_table; + const std::string bucket_table; + const std::string quota_table; + const std::string lc_head_table; + const std::string lc_entry_table; + static std::map objectmap; + + protected: + void *db; + CephContext *cct; + const DoutPrefix dp; + uint64_t max_bucket_id = 0; + // XXX: default ObjStripeSize or ObjChunk size - 4M, make them configurable? + uint64_t ObjHeadSize = 1024; /* 1K - default head data size */ + uint64_t ObjChunkSize = (get_blob_limit() - 1000); /* 1000 to accommodate other fields */ + // Below mutex is to protect objectmap and other shared + // objects if any. + std::mutex mtx; + + public: + DB(std::string db_name, CephContext *_cct) : db_name(db_name), + user_table(db_name+"_user_table"), + bucket_table(db_name+"_bucket_table"), + quota_table(db_name+"_quota_table"), + lc_head_table(db_name+"_lc_head_table"), + lc_entry_table(db_name+"_lc_entry_table"), + cct(_cct), + dp(_cct, ceph_subsys_rgw, "rgw DBStore backend: ") + {} + /* DB() {}*/ + + DB(CephContext *_cct) : db_name("default_db"), + user_table(db_name+"_user_table"), + bucket_table(db_name+"_bucket_table"), + quota_table(db_name+"_quota_table"), + lc_head_table(db_name+"_lc_head_table"), + lc_entry_table(db_name+"_lc_entry_table"), + cct(_cct), + dp(_cct, ceph_subsys_rgw, "rgw DBStore backend: ") + {} + virtual ~DB() {} + + const std::string getDBname() { return db_name; } + const std::string getDBfile() { return db_name + ".db"; } + const std::string getUserTable() { return user_table; } + const std::string getBucketTable() { return bucket_table; } + const std::string getQuotaTable() { return quota_table; } + const std::string getLCHeadTable() { return lc_head_table; } + const std::string getLCEntryTable() { return lc_entry_table; } + const std::string getObjectTable(std::string bucket) { + return db_name+"_"+bucket+"_object_table"; } + const std::string getObjectDataTable(std::string bucket) { + return db_name+"_"+bucket+"_objectdata_table"; } + const std::string getObjectView(std::string bucket) { + return db_name+"_"+bucket+"_object_view"; } + const std::string getObjectTrigger(std::string bucket) { + return db_name+"_"+bucket+"_object_trigger"; } + + std::map getObjectMap(); + + struct DBOps dbops; // DB operations, make it private? + + void set_driver(rgw::sal::Driver* _driver) { + driver = _driver; + } + + void set_context(CephContext *_cct) { + cct = _cct; + } + + CephContext *ctx() { return cct; } + const DoutPrefixProvider *get_def_dpp() { return &dp; } + + int Initialize(std::string logfile, int loglevel); + int Destroy(const DoutPrefixProvider *dpp); + int LockInit(const DoutPrefixProvider *dpp); + int LockDestroy(const DoutPrefixProvider *dpp); + int Lock(const DoutPrefixProvider *dpp); + int Unlock(const DoutPrefixProvider *dpp); + + int InitializeParams(const DoutPrefixProvider *dpp, DBOpParams *params); + int ProcessOp(const DoutPrefixProvider *dpp, std::string_view Op, DBOpParams *params); + std::shared_ptr getDBOp(const DoutPrefixProvider *dpp, std::string_view Op, const DBOpParams *params); + int objectmapInsert(const DoutPrefixProvider *dpp, std::string bucket, class ObjectOp* ptr); + int objectmapDelete(const DoutPrefixProvider *dpp, std::string bucket); + + virtual uint64_t get_blob_limit() { return 0; }; + virtual void *openDB(const DoutPrefixProvider *dpp) { return NULL; } + virtual int closeDB(const DoutPrefixProvider *dpp) { return 0; } + virtual int createTables(const DoutPrefixProvider *dpp) { return 0; } + virtual int InitializeDBOps(const DoutPrefixProvider *dpp) { return 0; } + virtual int InitPrepareParams(const DoutPrefixProvider *dpp, + DBOpPrepareParams &p_params, + DBOpParams* params) = 0; + virtual int createLCTables(const DoutPrefixProvider *dpp) = 0; + + virtual int ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) = 0; + virtual int ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) = 0; + virtual int ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) = 0; + + int get_user(const DoutPrefixProvider *dpp, + const std::string& query_str, const std::string& query_str_val, + RGWUserInfo& uinfo, std::map *pattrs, + RGWObjVersionTracker *pobjv_tracker); + int store_user(const DoutPrefixProvider *dpp, + RGWUserInfo& uinfo, bool exclusive, std::map *pattrs, + RGWObjVersionTracker *pobjv_tracker, RGWUserInfo* pold_info); + int remove_user(const DoutPrefixProvider *dpp, + RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv_tracker); + int get_bucket_info(const DoutPrefixProvider *dpp, const std::string& query_str, + const std::string& query_str_val, + RGWBucketInfo& info, rgw::sal::Attrs* pattrs, ceph::real_time* pmtime, + obj_version* pbucket_version); + int create_bucket(const DoutPrefixProvider *dpp, + const RGWUserInfo& owner, rgw_bucket& bucket, + const std::string& zonegroup_id, + const rgw_placement_rule& placement_rule, + const std::string& swift_ver_location, + const RGWQuotaInfo * pquota_info, + std::map& attrs, + RGWBucketInfo& info, + obj_version *pobjv, + obj_version *pep_objv, + real_time creation_time, + rgw_bucket *pmaster_bucket, + uint32_t *pmaster_num_shards, + optional_yield y, + bool exclusive); + + int next_bucket_id() { return ++max_bucket_id; }; + + int remove_bucket(const DoutPrefixProvider *dpp, const RGWBucketInfo info); + int list_buckets(const DoutPrefixProvider *dpp, const std::string& query_str, + rgw_user& user, + const std::string& marker, + const std::string& end_marker, + uint64_t max, + bool need_stats, + RGWUserBuckets *buckets, + bool *is_truncated); + int update_bucket(const DoutPrefixProvider *dpp, const std::string& query_str, + RGWBucketInfo& info, bool exclusive, + const rgw_user* powner_id, std::map* pattrs, + ceph::real_time* pmtime, RGWObjVersionTracker* pobjv); + + uint64_t get_max_head_size() { return ObjHeadSize; } + uint64_t get_max_chunk_size() { return ObjChunkSize; } + void gen_rand_obj_instance_name(rgw_obj_key *target_key); + + // db raw obj string is of format - + // "____" + static constexpr std::string_view raw_obj_oid = "{0}_{1}_{2}_{3}_{4}"; + + std::string to_oid(std::string_view bucket, std::string_view obj_name, + std::string_view obj_instance, std::string_view obj_id, + std::string_view mp_str, uint64_t partnum) { + return fmt::format(raw_obj_oid, bucket, obj_name, obj_instance, obj_id, mp_str, partnum); + } + int from_oid(const std::string& oid, std::string& bucket, std::string& obj_name, std::string& obj_id, + std::string& obj_instance, + std::string& mp_str, uint64_t& partnum) { + // TODO: use ceph::split() from common/split.h + // XXX: doesn't this break if obj_name has underscores in it? + std::vector result; + boost::split(result, oid, boost::is_any_of("_")); + bucket = result[0]; + obj_name = result[1]; + obj_instance = result[2]; + obj_id = result[3]; + mp_str = result[4]; + partnum = stoi(result[5]); + + return 0; + } + + struct raw_obj { + DB* db; + + std::string bucket_name; + std::string obj_name; + std::string obj_instance; + std::string obj_ns; + std::string obj_id; + std::string multipart_part_str; + uint64_t part_num; + + std::string obj_table; + std::string obj_data_table; + + raw_obj(DB* _db) { + db = _db; + } + + raw_obj(DB* _db, std::string& _bname, std::string& _obj_name, std::string& _obj_instance, + std::string& _obj_ns, std::string& _obj_id, std::string _mp_part_str, int _part_num) { + db = _db; + bucket_name = _bname; + obj_name = _obj_name; + obj_instance = _obj_instance; + obj_ns = _obj_ns; + obj_id = _obj_id; + multipart_part_str = _mp_part_str; + part_num = _part_num; + + obj_table = bucket_name+".object.table"; + obj_data_table = bucket_name+".objectdata.table"; + } + + raw_obj(DB* _db, std::string& oid) { + int r; + + db = _db; + r = db->from_oid(oid, bucket_name, obj_name, obj_instance, obj_id, multipart_part_str, + part_num); + if (r < 0) { + multipart_part_str = "0.0"; + part_num = 0; + } + + obj_table = db->getObjectTable(bucket_name); + obj_data_table = db->getObjectDataTable(bucket_name); + } + + int InitializeParamsfromRawObj (const DoutPrefixProvider *dpp, DBOpParams* params); + + int read(const DoutPrefixProvider *dpp, int64_t ofs, uint64_t end, bufferlist& bl); + int write(const DoutPrefixProvider *dpp, int64_t ofs, int64_t write_ofs, uint64_t len, bufferlist& bl); + }; + + class GC : public Thread { + const DoutPrefixProvider *dpp; + DB *db; + /* Default time interval for GC + * XXX: Make below options configurable + * + * gc_interval: The time between successive gc thread runs + * gc_obj_min_wait: Min. time to wait before deleting any data post its creation. + * + */ + std::mutex mtx; + std::condition_variable cv; + bool stop_signalled = false; + uint32_t gc_interval = 24*60*60; //sec ; default: 24*60*60 + uint32_t gc_obj_min_wait = 60*60; //60*60sec default + std::string bucket_marker; + std::string user_marker; + + public: + GC(const DoutPrefixProvider *_dpp, DB* _db) : + dpp(_dpp), db(_db) {} + + void *entry() override; + + void signal_stop() { + std::lock_guard lk_guard(mtx); + stop_signalled = true; + cv.notify_one(); + } + + friend class DB; + }; + std::unique_ptr gc_worker; + + class Bucket { + friend class DB; + DB* store; + + RGWBucketInfo bucket_info; + + public: + Bucket(DB *_store, const RGWBucketInfo& _binfo) : store(_store), bucket_info(_binfo) {} + DB *get_store() { return store; } + rgw_bucket& get_bucket() { return bucket_info.bucket; } + RGWBucketInfo& get_bucket_info() { return bucket_info; } + + class List { + protected: + // absolute maximum number of objects that + // list_objects_(un)ordered can return + static constexpr int64_t bucket_list_objects_absolute_max = 25000; + + DB::Bucket *target; + rgw_obj_key next_marker; + + public: + + struct Params { + std::string prefix; + std::string delim; + rgw_obj_key marker; + rgw_obj_key end_marker; + std::string ns; + bool enforce_ns; + RGWAccessListFilter* access_list_filter; + RGWBucketListNameFilter force_check_filter; + bool list_versions; + bool allow_unordered; + + Params() : + enforce_ns(true), + access_list_filter(nullptr), + list_versions(false), + allow_unordered(false) + {} + } params; + + explicit List(DB::Bucket *_target) : target(_target) {} + + /* XXX: Handle ordered and unordered separately. + * For now returning only ordered entries */ + int list_objects(const DoutPrefixProvider *dpp, int64_t max, + std::vector *result, + std::map *common_prefixes, bool *is_truncated); + rgw_obj_key& get_next_marker() { + return next_marker; + } + }; + }; + + class Object { + friend class DB; + DB* store; + + RGWBucketInfo bucket_info; + rgw_obj obj; + + RGWObjState obj_state; + std::string obj_id; + + bool versioning_disabled; + + bool bs_initialized; + + public: + Object(DB *_store, const RGWBucketInfo& _bucket_info, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info), + obj(_obj), + versioning_disabled(false), + bs_initialized(false) {} + + Object(DB *_store, const RGWBucketInfo& _bucket_info, const rgw_obj& _obj, const std::string& _obj_id) : store(_store), bucket_info(_bucket_info), obj(_obj), obj_id(_obj_id) {} + + struct Read { + DB::Object *source; + + struct GetObjState { + rgw_obj obj; + } state; + + struct ConditionParams { + const ceph::real_time *mod_ptr; + const ceph::real_time *unmod_ptr; + bool high_precision_time; + uint32_t mod_zone_id; + uint64_t mod_pg_ver; + const char *if_match; + const char *if_nomatch; + + ConditionParams() : + mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0), + if_match(NULL), if_nomatch(NULL) {} + } conds; + + struct Params { + ceph::real_time *lastmod; + uint64_t *obj_size; + std::map *attrs; + rgw_obj *target_obj; + + Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr), + target_obj(nullptr) {} + } params; + + explicit Read(DB::Object *_source) : source(_source) {} + + int prepare(const DoutPrefixProvider *dpp); + static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end); + int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider *dpp); + int iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb); + int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest); + }; + + struct Write { + DB::Object *target; + RGWObjState obj_state; + std::string mp_part_str = "0.0"; // multipart num + + struct MetaParams { + ceph::real_time *mtime; + std::map* rmattrs; + const bufferlist *data; + RGWObjManifest *manifest; + const std::string *ptag; + std::list *remove_objs; + ceph::real_time set_mtime; + rgw_user owner; + RGWObjCategory category; + int flags; + const char *if_match; + const char *if_nomatch; + std::optional olh_epoch; + ceph::real_time delete_at; + bool canceled; + const std::string *user_data; + rgw_zone_set *zones_trace; + bool modify_tail; + bool completeMultipart; + bool appendable; + + MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL), + remove_objs(NULL), category(RGWObjCategory::Main), flags(0), + if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr), + modify_tail(false), completeMultipart(false), appendable(false) {} + } meta; + + explicit Write(DB::Object *_target) : target(_target) {} + + void set_mp_part_str(std::string _mp_part_str) { mp_part_str = _mp_part_str;} + int prepare(const DoutPrefixProvider* dpp); + int write_data(const DoutPrefixProvider* dpp, + bufferlist& data, uint64_t ofs); + int _do_write_meta(const DoutPrefixProvider *dpp, + uint64_t size, uint64_t accounted_size, + std::map& attrs, + bool assume_noent, bool modify_tail); + int write_meta(const DoutPrefixProvider *dpp, uint64_t size, + uint64_t accounted_size, std::map& attrs); + }; + + struct Delete { + DB::Object *target; + + struct DeleteParams { + rgw_user bucket_owner; + int versioning_status; + ACLOwner obj_owner; /* needed for creation of deletion marker */ + uint64_t olh_epoch; + std::string marker_version_id; + uint32_t bilog_flags; + std::list *remove_objs; + ceph::real_time expiration_time; + ceph::real_time unmod_since; + ceph::real_time mtime; /* for setting delete marker mtime */ + bool high_precision_time; + rgw_zone_set *zones_trace; + bool abortmp; + uint64_t parts_accounted_size; + + DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {} + } params; + + struct DeleteResult { + bool delete_marker; + std::string version_id; + + DeleteResult() : delete_marker(false) {} + } result; + + explicit Delete(DB::Object *_target) : target(_target) {} + + int delete_obj(const DoutPrefixProvider *dpp); + int delete_obj_impl(const DoutPrefixProvider *dpp, DBOpParams& del_params); + int create_dm(const DoutPrefixProvider *dpp, DBOpParams& del_params); + }; + + /* XXX: the parameters may be subject to change. All we need is bucket name + * & obj name,instance - keys */ + int get_object_impl(const DoutPrefixProvider *dpp, DBOpParams& params); + int get_obj_state(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, + const rgw_obj& obj, + bool follow_olh, RGWObjState **state); + int get_state(const DoutPrefixProvider *dpp, RGWObjState **pstate, bool follow_olh); + int list_versioned_objects(const DoutPrefixProvider *dpp, + std::list& list_entries); + + DB *get_store() { return store; } + rgw_obj& get_obj() { return obj; } + RGWBucketInfo& get_bucket_info() { return bucket_info; } + + int InitializeParamsfromObject(const DoutPrefixProvider *dpp, DBOpParams* params); + int set_attrs(const DoutPrefixProvider *dpp, std::map& setattrs, + std::map* rmattrs); + int transition(const DoutPrefixProvider *dpp, + const rgw_placement_rule& rule, const real_time& mtime, + uint64_t olh_epoch); + int obj_omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, bool must_exist); + int obj_omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, + const std::set& keys, + std::map* vals); + int obj_omap_get_all(const DoutPrefixProvider *dpp, std::map *m); + int obj_omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count, + std::map *m, bool* pmore); + using iterate_obj_cb = int (*)(const DoutPrefixProvider*, const raw_obj&, off_t, off_t, + bool, RGWObjState*, void*); + int add_mp_part(const DoutPrefixProvider *dpp, RGWUploadPartInfo info); + int get_mp_parts_list(const DoutPrefixProvider *dpp, std::list& info); + + int iterate_obj(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, const rgw_obj& obj, + off_t ofs, off_t end, uint64_t max_chunk_size, + iterate_obj_cb cb, void *arg); + }; + int get_obj_iterate_cb(const DoutPrefixProvider *dpp, + const raw_obj& read_obj, off_t obj_ofs, + off_t len, bool is_head_obj, + RGWObjState *astate, void *arg); + + int get_entry(const std::string& oid, const std::string& marker, + std::unique_ptr* entry); + int get_next_entry(const std::string& oid, const std::string& marker, + std::unique_ptr* entry); + int set_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry); + int list_entries(const std::string& oid, const std::string& marker, + uint32_t max_entries, std::vector>& entries); + int rm_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry); + int get_head(const std::string& oid, std::unique_ptr* head); + int put_head(const std::string& oid, rgw::sal::Lifecycle::LCHead& head); + int delete_stale_objs(const DoutPrefixProvider *dpp, const std::string& bucket, + uint32_t min_wait); + int createGC(const DoutPrefixProvider *_dpp); + int stopGC(); +}; + +struct db_get_obj_data { + DB* store; + RGWGetDataCB* client_cb = nullptr; + uint64_t offset; // next offset to write to client + + db_get_obj_data(DB* db, RGWGetDataCB* cb, uint64_t offset) : + store(db), client_cb(cb), offset(offset) {} + ~db_get_obj_data() {} +}; + +} } // namespace rgw::store + +#endif diff --git a/src/rgw/driver/dbstore/common/dbstore_log.h b/src/rgw/driver/dbstore/common/dbstore_log.h new file mode 100644 index 00000000000..8d981d5adc4 --- /dev/null +++ b/src/rgw/driver/dbstore/common/dbstore_log.h @@ -0,0 +1,18 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef DB_STORE_LOG_H +#define DB_STORE_LOG_H + +#include +#include +#include +#include +#include +#include +#include "common/dout.h" + +#undef dout_prefix +#define dout_prefix *_dout << "rgw dbstore: " + +#endif diff --git a/src/rgw/driver/dbstore/config/sqlite.cc b/src/rgw/driver/dbstore/config/sqlite.cc new file mode 100644 index 00000000000..051dc34e921 --- /dev/null +++ b/src/rgw/driver/dbstore/config/sqlite.cc @@ -0,0 +1,2072 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include + +#include "include/buffer.h" +#include "include/encoding.h" +#include "common/dout.h" +#include "common/random_string.h" +#include "rgw_zone.h" + +#include "common/connection_pool.h" +#include "sqlite/connection.h" +#include "sqlite/error.h" +#include "sqlite/statement.h" +#include "sqlite_schema.h" +#include "sqlite.h" + +#define dout_subsys ceph_subsys_rgw_dbstore + +namespace rgw::dbstore::config { + +struct Prefix : DoutPrefixPipe { + std::string_view prefix; + Prefix(const DoutPrefixProvider& dpp, std::string_view prefix) + : DoutPrefixPipe(dpp), prefix(prefix) {} + unsigned get_subsys() const override { return dout_subsys; } + void add_prefix(std::ostream& out) const override { + out << prefix; + } +}; + +namespace { + +// parameter names for prepared statement bindings +static constexpr const char* P1 = ":1"; +static constexpr const char* P2 = ":2"; +static constexpr const char* P3 = ":3"; +static constexpr const char* P4 = ":4"; +static constexpr const char* P5 = ":5"; +static constexpr const char* P6 = ":6"; + + +void read_text_rows(const DoutPrefixProvider* dpp, + const sqlite::stmt_execution& stmt, + std::span entries, + sal::ListResult& result) +{ + result.entries = sqlite::read_text_rows(dpp, stmt, entries); + if (result.entries.size() < entries.size()) { // end of listing + result.next.clear(); + } else { + result.next = result.entries.back(); + } +} + +struct RealmRow { + RGWRealm info; + int ver; + std::string tag; +}; + +void read_realm_row(const sqlite::stmt_execution& stmt, RealmRow& row) +{ + row.info.id = sqlite::column_text(stmt, 0); + row.info.name = sqlite::column_text(stmt, 1); + row.info.current_period = sqlite::column_text(stmt, 2); + row.info.epoch = sqlite::column_int(stmt, 3); + row.ver = sqlite::column_int(stmt, 4); + row.tag = sqlite::column_text(stmt, 5); +} + +void read_period_row(const sqlite::stmt_execution& stmt, RGWPeriod& row) +{ + // just read the Data column and decode everything else from that + std::string data = sqlite::column_text(stmt, 3); + + bufferlist bl = bufferlist::static_from_string(data); + auto p = bl.cbegin(); + decode(row, p); +} + +struct ZoneGroupRow { + RGWZoneGroup info; + int ver; + std::string tag; +}; + +void read_zonegroup_row(const sqlite::stmt_execution& stmt, ZoneGroupRow& row) +{ + std::string data = sqlite::column_text(stmt, 3); + row.ver = sqlite::column_int(stmt, 4); + row.tag = sqlite::column_text(stmt, 5); + + bufferlist bl = bufferlist::static_from_string(data); + auto p = bl.cbegin(); + decode(row.info, p); +} + +struct ZoneRow { + RGWZoneParams info; + int ver; + std::string tag; +}; + +void read_zone_row(const sqlite::stmt_execution& stmt, ZoneRow& row) +{ + std::string data = sqlite::column_text(stmt, 3); + row.ver = sqlite::column_int(stmt, 4); + row.tag = sqlite::column_text(stmt, 5); + + bufferlist bl = bufferlist::static_from_string(data); + auto p = bl.cbegin(); + decode(row.info, p); +} + +std::string generate_version_tag(CephContext* cct) +{ + static constexpr auto TAG_LEN = 24; + return gen_rand_alphanumeric(cct, TAG_LEN); +} + +using SQLiteConnectionHandle = ConnectionHandle; + +using SQLiteConnectionPool = ConnectionPool< + sqlite::Connection, sqlite::ConnectionFactory>; + +} // anonymous namespace + +class SQLiteImpl : public SQLiteConnectionPool { + public: + using SQLiteConnectionPool::SQLiteConnectionPool; +}; + + +SQLiteConfigStore::SQLiteConfigStore(std::unique_ptr impl) + : impl(std::move(impl)) +{ +} + +SQLiteConfigStore::~SQLiteConfigStore() = default; + + +// Realm + +class SQLiteRealmWriter : public sal::RealmWriter { + SQLiteImpl* impl; + int ver; + std::string tag; + std::string realm_id; + std::string realm_name; + public: + SQLiteRealmWriter(SQLiteImpl* impl, int ver, std::string tag, + std::string_view realm_id, std::string_view realm_name) + : impl(impl), ver(ver), tag(std::move(tag)), + realm_id(realm_id), realm_name(realm_name) + {} + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWRealm& info) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:realm_write "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after a conflict or delete + } + if (realm_id != info.id || realm_name != info.name) { + return -EINVAL; // can't modify realm id or name directly + } + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["realm_upd"]; + if (!stmt) { + const std::string sql = fmt::format(schema::realm_update5, + P1, P2, P3, P4, P5); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, info.current_period); + sqlite::bind_int(dpp, binding, P3, info.epoch); + sqlite::bind_int(dpp, binding, P4, ver); + sqlite::bind_text(dpp, binding, P5, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + // our version is no longer consistent, so later writes would fail too + impl = nullptr; + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm update failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::foreign_key_constraint) { + return -EINVAL; // refers to nonexistent CurrentPeriod + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + ++ver; + return 0; + } + + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWRealm& info, std::string_view new_name) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:realm_rename "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + if (realm_id != info.id || realm_name != info.name) { + return -EINVAL; // can't modify realm id or name directly + } + if (new_name.empty()) { + ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["realm_rename"]; + if (!stmt) { + const std::string sql = fmt::format(schema::realm_rename4, + P1, P2, P3, P4); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + sqlite::bind_text(dpp, binding, P2, new_name); + sqlite::bind_int(dpp, binding, P3, ver); + sqlite::bind_text(dpp, binding, P4, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + impl = nullptr; + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm rename failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::unique_constraint) { + return -EEXIST; // Name already taken + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + info.name = std::string{new_name}; + ++ver; + return 0; + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:realm_remove "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["realm_del"]; + if (!stmt) { + const std::string sql = fmt::format(schema::realm_delete3, P1, P2, P3); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + sqlite::bind_int(dpp, binding, P2, ver); + sqlite::bind_text(dpp, binding, P3, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + impl = nullptr; // prevent any further writes after delete + if (!::sqlite3_changes(conn->db.get())) { + return -ECANCELED; // VersionNumber/Tag mismatch + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; + } +}; // SQLiteRealmWriter + + +int SQLiteConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:write_default_realm_id "}; dpp = &prefix; + + if (realm_id.empty()) { + ldpp_dout(dpp, 0) << "requires a realm id" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["def_realm_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::default_realm_insert1, P1); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["def_realm_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::default_realm_upsert1, P1); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default realm insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::primary_key_constraint) { + return -EEXIST; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string& realm_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_default_realm_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["def_realm_sel"]; + if (!stmt) { + static constexpr std::string_view sql = schema::default_realm_select0; + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + realm_id = sqlite::column_text(reset, 0); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default realm select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y) + +{ + Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_realm_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["def_realm_del"]; + if (!stmt) { + static constexpr std::string_view sql = schema::default_realm_delete0; + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { + return -ENOENT; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default realm delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +int SQLiteConfigStore::create_realm(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWRealm& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:create_realm "}; dpp = &prefix; + + if (info.id.empty()) { + ldpp_dout(dpp, 0) << "realm cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.name.empty()) { + ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; + return -EINVAL; + } + + int ver = 1; + auto tag = generate_version_tag(dpp->get_cct()); + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["realm_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::realm_insert4, + P1, P2, P3, P4); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["realm_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::realm_upsert4, + P1, P2, P3, P4); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, info.name); + sqlite::bind_int(dpp, binding, P3, ver); + sqlite::bind_text(dpp, binding, P4, tag); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::primary_key_constraint) { + return -EEXIST; // ID already taken + } else if (e.code() == sqlite::errc::unique_constraint) { + return -EEXIST; // Name already taken + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), ver, std::move(tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWRealm& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_by_id "}; dpp = &prefix; + + if (realm_id.empty()) { + ldpp_dout(dpp, 0) << "requires a realm id" << dendl; + return -EINVAL; + } + + RealmRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["realm_sel_id"]; + if (!stmt) { + const std::string sql = fmt::format(schema::realm_select_id1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_realm_row(reset, row); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +static void realm_select_by_name(const DoutPrefixProvider* dpp, + sqlite::Connection& conn, + std::string_view realm_name, + RealmRow& row) +{ + auto& stmt = conn.statements["realm_sel_name"]; + if (!stmt) { + const std::string sql = fmt::format(schema::realm_select_name1, P1); + stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_name); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_realm_row(reset, row); +} + +int SQLiteConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_by_name "}; dpp = &prefix; + + if (realm_name.empty()) { + ldpp_dout(dpp, 0) << "requires a realm name" << dendl; + return -EINVAL; + } + + RealmRow row; + try { + auto conn = impl->get(dpp); + realm_select_by_name(dpp, *conn, realm_name, row); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_default_realm(const DoutPrefixProvider* dpp, + optional_yield y, + RGWRealm& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_default_realm "}; dpp = &prefix; + + RealmRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["realm_sel_def"]; + if (!stmt) { + static constexpr std::string_view sql = schema::realm_select_default0; + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_realm_row(reset, row); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + std::string& realm_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_id "}; dpp = &prefix; + + if (realm_name.empty()) { + ldpp_dout(dpp, 0) << "requires a realm name" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + + RealmRow row; + realm_select_by_name(dpp, *conn, realm_name, row); + + realm_id = std::move(row.info.id); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + return 0; +} + +int SQLiteConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp, + optional_yield y, + const RGWPeriod& period) +{ + return -ENOTSUP; +} + +int SQLiteConfigStore::list_realm_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:list_realm_names "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["realm_sel_names"]; + if (!stmt) { + const std::string sql = fmt::format(schema::realm_select_names2, P1, P2); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, marker); + sqlite::bind_int(dpp, binding, P2, entries.size()); + + auto reset = sqlite::stmt_execution{stmt.get()}; + read_text_rows(dpp, reset, entries, result); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +// Period + +int SQLiteConfigStore::create_period(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWPeriod& info) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:create_period "}; dpp = &prefix; + + if (info.id.empty()) { + ldpp_dout(dpp, 0) << "period cannot have an empty id" << dendl; + return -EINVAL; + } + + bufferlist bl; + encode(info, bl); + const auto data = std::string_view{bl.c_str(), bl.length()}; + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["period_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::period_insert4, + P1, P2, P3, P4); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["period_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::period_upsert4, + P1, P2, P3, P4); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_int(dpp, binding, P2, info.epoch); + sqlite::bind_text(dpp, binding, P3, info.realm_id); + sqlite::bind_text(dpp, binding, P4, data); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "period insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::foreign_key_constraint) { + return -EINVAL; // refers to nonexistent RealmID + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +static void period_select_epoch(const DoutPrefixProvider* dpp, + sqlite::Connection& conn, + std::string_view id, uint32_t epoch, + RGWPeriod& row) +{ + auto& stmt = conn.statements["period_sel_epoch"]; + if (!stmt) { + const std::string sql = fmt::format(schema::period_select_epoch2, P1, P2); + stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, id); + sqlite::bind_int(dpp, binding, P2, epoch); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_period_row(reset, row); +} + +static void period_select_latest(const DoutPrefixProvider* dpp, + sqlite::Connection& conn, + std::string_view id, RGWPeriod& row) +{ + auto& stmt = conn.statements["period_sel_latest"]; + if (!stmt) { + const std::string sql = fmt::format(schema::period_select_latest1, P1); + stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_period_row(reset, row); +} + +int SQLiteConfigStore::read_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id, + std::optional epoch, + RGWPeriod& info) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_period "}; dpp = &prefix; + + if (period_id.empty()) { + ldpp_dout(dpp, 0) << "requires a period id" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + if (epoch) { + period_select_epoch(dpp, *conn, period_id, *epoch, info); + } else { + period_select_latest(dpp, *conn, period_id, info); + } + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "period decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "period select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::delete_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:delete_period "}; dpp = &prefix; + + if (period_id.empty()) { + ldpp_dout(dpp, 0) << "requires a period id" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["period_del"]; + if (!stmt) { + const std::string sql = fmt::format(schema::period_delete1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, period_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { + return -ENOENT; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "period delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::list_period_ids(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:list_period_ids "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["period_sel_ids"]; + if (!stmt) { + const std::string sql = fmt::format(schema::period_select_ids2, P1, P2); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, marker); + sqlite::bind_int(dpp, binding, P2, entries.size()); + + auto reset = sqlite::stmt_execution{stmt.get()}; + read_text_rows(dpp, reset, entries, result); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "period select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +// ZoneGroup + +class SQLiteZoneGroupWriter : public sal::ZoneGroupWriter { + SQLiteImpl* impl; + int ver; + std::string tag; + std::string zonegroup_id; + std::string zonegroup_name; + public: + SQLiteZoneGroupWriter(SQLiteImpl* impl, int ver, std::string tag, + std::string_view zonegroup_id, + std::string_view zonegroup_name) + : impl(impl), ver(ver), tag(std::move(tag)), + zonegroup_id(zonegroup_id), zonegroup_name(zonegroup_name) + {} + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWZoneGroup& info) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_write "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + if (zonegroup_id != info.id || zonegroup_name != info.name) { + return -EINVAL; // can't modify zonegroup id or name directly + } + + bufferlist bl; + encode(info, bl); + const auto data = std::string_view{bl.c_str(), bl.length()}; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_upd"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zonegroup_update5, + P1, P2, P3, P4, P5); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, info.realm_id); + sqlite::bind_text(dpp, binding, P3, data); + sqlite::bind_int(dpp, binding, P4, ver); + sqlite::bind_text(dpp, binding, P5, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + impl = nullptr; + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup update failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::foreign_key_constraint) { + return -EINVAL; // refers to nonexistent RealmID + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; + } + + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWZoneGroup& info, std::string_view new_name) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_rename "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) { + return -EINVAL; // can't modify zonegroup id or name directly + } + if (new_name.empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_rename"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zonegroup_rename4, + P1, P2, P3, P4); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, new_name); + sqlite::bind_int(dpp, binding, P3, ver); + sqlite::bind_text(dpp, binding, P4, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + impl = nullptr; + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup rename failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::unique_constraint) { + return -EEXIST; // Name already taken + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + info.name = std::string{new_name}; + return 0; + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_remove "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_del"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zonegroup_delete3, + P1, P2, P3); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, zonegroup_id); + sqlite::bind_int(dpp, binding, P2, ver); + sqlite::bind_text(dpp, binding, P3, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + impl = nullptr; + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; + } +}; // SQLiteZoneGroupWriter + + +int SQLiteConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zonegroup_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:write_default_zonegroup_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["def_zonegroup_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::default_zonegroup_insert2, + P1, P2); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["def_zonegroup_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::default_zonegroup_upsert2, + P1, P2); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + sqlite::bind_text(dpp, binding, P2, zonegroup_id); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default zonegroup insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zonegroup_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zonegroup_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["def_zonegroup_sel"]; + if (!stmt) { + const std::string sql = fmt::format(schema::default_zonegroup_select1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + zonegroup_id = sqlite::column_text(reset, 0); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default zonegroup select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_zonegroup_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["def_zonegroup_del"]; + if (!stmt) { + const std::string sql = fmt::format(schema::default_zonegroup_delete1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { + return -ENOENT; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default zonegroup delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +int SQLiteConfigStore::create_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneGroup& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:create_zonegroup "}; dpp = &prefix; + + if (info.id.empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.name.empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; + return -EINVAL; + } + + int ver = 1; + auto tag = generate_version_tag(dpp->get_cct()); + + bufferlist bl; + encode(info, bl); + const auto data = std::string_view{bl.c_str(), bl.length()}; + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["zonegroup_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::zonegroup_insert6, + P1, P2, P3, P4, P5, P6); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["zonegroup_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::zonegroup_upsert6, + P1, P2, P3, P4, P5, P6); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, info.name); + sqlite::bind_text(dpp, binding, P3, info.realm_id); + sqlite::bind_text(dpp, binding, P4, data); + sqlite::bind_int(dpp, binding, P5, ver); + sqlite::bind_text(dpp, binding, P6, tag); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::foreign_key_constraint) { + return -EINVAL; // refers to nonexistent RealmID + } else if (e.code() == sqlite::errc::primary_key_constraint) { + return -EEXIST; // ID already taken + } else if (e.code() == sqlite::errc::unique_constraint) { + return -EEXIST; // Name already taken + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), ver, std::move(tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_id, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_zonegroup_by_id "}; dpp = &prefix; + + if (zonegroup_id.empty()) { + ldpp_dout(dpp, 0) << "requires a zonegroup id" << dendl; + return -EINVAL; + } + + ZoneGroupRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_sel_id"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zonegroup_select_id1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, zonegroup_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_zonegroup_row(reset, row); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_zonegroup_by_name "}; dpp = &prefix; + + if (zonegroup_name.empty()) { + ldpp_dout(dpp, 0) << "requires a zonegroup name" << dendl; + return -EINVAL; + } + + ZoneGroupRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_sel_name"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zonegroup_select_name1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, zonegroup_name); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_zonegroup_row(reset, row); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zonegroup "}; dpp = &prefix; + + ZoneGroupRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_sel_def"]; + if (!stmt) { + static constexpr std::string_view sql = schema::zonegroup_select_default0; + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_zonegroup_row(reset, row); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:list_zonegroup_names "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zonegroup_sel_names"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zonegroup_select_names2, P1, P2); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + auto reset = sqlite::stmt_execution{stmt.get()}; + + sqlite::bind_text(dpp, binding, P1, marker); + sqlite::bind_int(dpp, binding, P2, entries.size()); + + read_text_rows(dpp, reset, entries, result); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +// Zone + +class SQLiteZoneWriter : public sal::ZoneWriter { + SQLiteImpl* impl; + int ver; + std::string tag; + std::string zone_id; + std::string zone_name; + public: + SQLiteZoneWriter(SQLiteImpl* impl, int ver, std::string tag, + std::string_view zone_id, std::string_view zone_name) + : impl(impl), ver(ver), tag(std::move(tag)), + zone_id(zone_id), zone_name(zone_name) + {} + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWZoneParams& info) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:zone_write "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + if (zone_id != info.id || zone_name != info.name) { + return -EINVAL; // can't modify zone id or name directly + } + + bufferlist bl; + encode(info, bl); + const auto data = std::string_view{bl.c_str(), bl.length()}; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_upd"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zone_update5, + P1, P2, P3, P4, P5); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, info.realm_id); + sqlite::bind_text(dpp, binding, P3, data); + sqlite::bind_int(dpp, binding, P4, ver); + sqlite::bind_text(dpp, binding, P5, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + impl = nullptr; + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone update failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::foreign_key_constraint) { + return -EINVAL; // refers to nonexistent RealmID + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + ++ver; + return 0; + } + + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWZoneParams& info, std::string_view new_name) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:zone_rename "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + if (zone_id != info.id || zone_name != info.name) { + return -EINVAL; // can't modify zone id or name directly + } + if (new_name.empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_rename"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zone_rename4, P1, P2, P2, P3); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, new_name); + sqlite::bind_int(dpp, binding, P3, ver); + sqlite::bind_text(dpp, binding, P4, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + impl = nullptr; + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone rename failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::unique_constraint) { + return -EEXIST; // Name already taken + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + info.name = std::string{new_name}; + ++ver; + return 0; + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + Prefix prefix{*dpp, "dbconfig:sqlite:zone_remove "}; dpp = &prefix; + + if (!impl) { + return -EINVAL; // can't write after conflict or delete + } + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_del"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zone_delete3, P1, P2, P3); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, zone_id); + sqlite::bind_int(dpp, binding, P2, ver); + sqlite::bind_text(dpp, binding, P3, tag); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + impl = nullptr; + if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch + return -ECANCELED; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; + } +}; // SQLiteZoneWriter + + +int SQLiteConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zone_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:write_default_zone_id "}; dpp = &prefix; + + if (zone_id.empty()) { + ldpp_dout(dpp, 0) << "requires a zone id" << dendl; + return -EINVAL; + } + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["def_zone_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::default_zone_insert2, P1, P2); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["def_zone_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::default_zone_upsert2, P1, P2); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + sqlite::bind_text(dpp, binding, P2, zone_id); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default zone insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zone_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zone_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["def_zone_sel"]; + if (!stmt) { + const std::string sql = fmt::format(schema::default_zone_select1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + zone_id = sqlite::column_text(reset, 0); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default zone select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_zone_id "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["def_zone_del"]; + if (!stmt) { + const std::string sql = fmt::format(schema::default_zone_delete1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval0(dpp, reset); + + if (!::sqlite3_changes(conn->db.get())) { + return -ENOENT; + } + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "default zone delete failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +int SQLiteConfigStore::create_zone(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneParams& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:create_zone "}; dpp = &prefix; + + if (info.id.empty()) { + ldpp_dout(dpp, 0) << "zone cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.name.empty()) { + ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl; + return -EINVAL; + } + + int ver = 1; + auto tag = generate_version_tag(dpp->get_cct()); + + bufferlist bl; + encode(info, bl); + const auto data = std::string_view{bl.c_str(), bl.length()}; + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["zone_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::zone_insert6, + P1, P2, P3, P4, P5, P6); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["zone_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::zone_upsert6, + P1, P2, P3, P4, P5, P6); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, info.id); + sqlite::bind_text(dpp, binding, P2, info.name); + sqlite::bind_text(dpp, binding, P3, info.realm_id); + sqlite::bind_text(dpp, binding, P4, data); + sqlite::bind_int(dpp, binding, P5, ver); + sqlite::bind_text(dpp, binding, P6, tag); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::foreign_key_constraint) { + return -EINVAL; // refers to nonexistent RealmID + } else if (e.code() == sqlite::errc::primary_key_constraint) { + return -EEXIST; // ID already taken + } else if (e.code() == sqlite::errc::unique_constraint) { + return -EEXIST; // Name already taken + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), ver, std::move(tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_id, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_zone_by_id "}; dpp = &prefix; + + if (zone_id.empty()) { + ldpp_dout(dpp, 0) << "requires a zone id" << dendl; + return -EINVAL; + } + + ZoneRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_sel_id"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zone_select_id1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, zone_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_zone_row(reset, row); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_zone_by_name "}; dpp = &prefix; + + if (zone_name.empty()) { + ldpp_dout(dpp, 0) << "requires a zone name" << dendl; + return -EINVAL; + } + + ZoneRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_sel_name"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zone_select_name1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, zone_name); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_zone_row(reset, row); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::read_default_zone(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zone "}; dpp = &prefix; + + ZoneRow row; + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_sel_def"]; + if (!stmt) { + static constexpr std::string_view sql = schema::zone_select_default0; + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + read_zone_row(reset, row); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + + info = std::move(row.info); + if (writer) { + *writer = std::make_unique( + impl.get(), row.ver, std::move(row.tag), info.id, info.name); + } + return 0; +} + +int SQLiteConfigStore::list_zone_names(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:list_zone_names "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["zone_sel_names"]; + if (!stmt) { + const std::string sql = fmt::format(schema::zone_select_names2, P1, P2); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, marker); + sqlite::bind_int(dpp, binding, P2, entries.size()); + + auto reset = sqlite::stmt_execution{stmt.get()}; + read_text_rows(dpp, reset, entries, result); + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + + +// PeriodConfig + +int SQLiteConfigStore::read_period_config(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWPeriodConfig& info) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:read_period_config "}; dpp = &prefix; + + try { + auto conn = impl->get(dpp); + auto& stmt = conn->statements["period_conf_sel"]; + if (!stmt) { + const std::string sql = fmt::format(schema::period_config_select1, P1); + stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + auto binding = sqlite::stmt_binding{stmt.get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + + auto reset = sqlite::stmt_execution{stmt.get()}; + sqlite::eval1(dpp, reset); + + std::string data = sqlite::column_text(reset, 0); + bufferlist bl = bufferlist::static_from_string(data); + auto p = bl.cbegin(); + decode(info, p); + + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "period config decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "period config select failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::done) { + return -ENOENT; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +int SQLiteConfigStore::write_period_config(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + const RGWPeriodConfig& info) +{ + Prefix prefix{*dpp, "dbconfig:sqlite:write_period_config "}; dpp = &prefix; + + bufferlist bl; + encode(info, bl); + const auto data = std::string_view{bl.c_str(), bl.length()}; + + try { + auto conn = impl->get(dpp); + sqlite::stmt_ptr* stmt = nullptr; + if (exclusive) { + stmt = &conn->statements["period_conf_ins"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::period_config_insert2, P1, P2); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } else { + stmt = &conn->statements["period_conf_ups"]; + if (!*stmt) { + const std::string sql = fmt::format(schema::period_config_upsert2, P1, P2); + *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); + } + } + auto binding = sqlite::stmt_binding{stmt->get()}; + sqlite::bind_text(dpp, binding, P1, realm_id); + sqlite::bind_text(dpp, binding, P2, data); + + auto reset = sqlite::stmt_execution{stmt->get()}; + sqlite::eval0(dpp, reset); + } catch (const buffer::error& e) { + ldpp_dout(dpp, 20) << "period config decode failed: " << e.what() << dendl; + return -EIO; + } catch (const sqlite::error& e) { + ldpp_dout(dpp, 20) << "period config insert failed: " << e.what() << dendl; + if (e.code() == sqlite::errc::primary_key_constraint) { + return -EEXIST; + } else if (e.code() == sqlite::errc::busy) { + return -EBUSY; + } + return -EIO; + } + return 0; +} + +namespace { + +int version_cb(void* user, int count, char** values, char** names) +{ + if (count != 1) { + return EINVAL; + } + std::string_view name = names[0]; + if (name != "user_version") { + return EINVAL; + } + std::string_view value = values[0]; + auto result = std::from_chars(value.begin(), value.end(), + *reinterpret_cast(user)); + if (result.ec != std::errc{}) { + return static_cast(result.ec); + } + return 0; +} + +void apply_schema_migrations(const DoutPrefixProvider* dpp, sqlite3* db) +{ + sqlite::execute(dpp, db, "PRAGMA foreign_keys = ON", nullptr, nullptr); + + // initiate a transaction and read the current schema version + uint32_t version = 0; + sqlite::execute(dpp, db, "BEGIN; PRAGMA user_version", version_cb, &version); + + const uint32_t initial_version = version; + ldpp_dout(dpp, 4) << "current schema version " << version << dendl; + + // use the version as an index into schema::migrations + auto m = std::next(schema::migrations.begin(), version); + + for (; m != schema::migrations.end(); ++m, ++version) { + try { + sqlite::execute(dpp, db, m->up, nullptr, nullptr); + } catch (const sqlite::error&) { + ldpp_dout(dpp, -1) << "ERROR: schema migration failed on v" << version + << ": " << m->description << dendl; + throw; + } + } + + if (version > initial_version) { + // update the user_version and commit the transaction + const auto commit = fmt::format("PRAGMA user_version = {}; COMMIT", version); + sqlite::execute(dpp, db, commit.c_str(), nullptr, nullptr); + + ldpp_dout(dpp, 4) << "upgraded database schema to version " << version << dendl; + } else { + // nothing to commit + sqlite::execute(dpp, db, "ROLLBACK", nullptr, nullptr); + } +} + +} // anonymous namespace + + +auto create_sqlite_store(const DoutPrefixProvider* dpp, const std::string& uri) + -> std::unique_ptr +{ + Prefix prefix{*dpp, "dbconfig:sqlite:create_sqlite_store "}; dpp = &prefix; + + // build the connection pool + int flags = SQLITE_OPEN_CREATE | SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE | + SQLITE_OPEN_NOMUTEX; + auto factory = sqlite::ConnectionFactory{uri, flags}; + + // sqlite does not support concurrent writers. we enforce this limitation by + // using a connection pool of size=1 + static constexpr size_t max_connections = 1; + auto impl = std::make_unique(std::move(factory), max_connections); + + // open a connection to apply schema migrations + auto conn = impl->get(dpp); + apply_schema_migrations(dpp, conn->db.get()); + + return std::make_unique(std::move(impl)); +} + +} // namespace rgw::dbstore::config diff --git a/src/rgw/driver/dbstore/config/sqlite.h b/src/rgw/driver/dbstore/config/sqlite.h new file mode 100644 index 00000000000..d79e040728c --- /dev/null +++ b/src/rgw/driver/dbstore/config/sqlite.h @@ -0,0 +1,172 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "rgw_sal_config.h" + +class DoutPrefixProvider; + +namespace rgw::dbstore::config { + +struct SQLiteImpl; + +class SQLiteConfigStore : public sal::ConfigStore { + public: + explicit SQLiteConfigStore(std::unique_ptr impl); + ~SQLiteConfigStore() override; + + int write_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id) override; + int read_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string& realm_id) override; + int delete_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y) override; + + int create_realm(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWRealm& info, + std::unique_ptr* writer) override; + int read_realm_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWRealm& info, + std::unique_ptr* writer) override; + int read_realm_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) override; + int read_default_realm(const DoutPrefixProvider* dpp, + optional_yield y, + RGWRealm& info, + std::unique_ptr* writer) override; + int read_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view realm_name, + std::string& realm_id) override; + int realm_notify_new_period(const DoutPrefixProvider* dpp, + optional_yield y, + const RGWPeriod& period) override; + int list_realm_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + int create_period(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWPeriod& info) override; + int read_period(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view period_id, + std::optional epoch, RGWPeriod& info) override; + int delete_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id) override; + int list_period_ids(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + int write_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zonegroup_id) override; + int read_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zonegroup_id) override; + int delete_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) override; + + int create_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneGroup& info, + std::unique_ptr* writer) override; + int read_zonegroup_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_id, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + int read_zonegroup_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + int read_default_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + int list_zonegroup_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + int write_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zone_id) override; + int read_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zone_id) override; + int delete_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) override; + + int create_zone(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneParams& info, + std::unique_ptr* writer) override; + int read_zone_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_id, + RGWZoneParams& info, + std::unique_ptr* writer) override; + int read_zone_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) override; + int read_default_zone(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneParams& info, + std::unique_ptr* writer) override; + int list_zone_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + int read_period_config(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWPeriodConfig& info) override; + int write_period_config(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + const RGWPeriodConfig& info) override; + + private: + std::unique_ptr impl; +}; // SQLiteConfigStore + + +auto create_sqlite_store(const DoutPrefixProvider* dpp, const std::string& uri) + -> std::unique_ptr; + +} // namespace rgw::dbstore::config diff --git a/src/rgw/driver/dbstore/config/sqlite_schema.h b/src/rgw/driver/dbstore/config/sqlite_schema.h new file mode 100644 index 00000000000..c8a8fce3e72 --- /dev/null +++ b/src/rgw/driver/dbstore/config/sqlite_schema.h @@ -0,0 +1,299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include + +namespace rgw::dbstore::config::schema { + +struct Migration { + // human-readable description to help with debugging migration errors + const char* description = nullptr; + // series of sql statements to apply the schema migration + const char* up = nullptr; + // series of sql statements to undo the schema migration + const char* down = nullptr; +}; + +static constexpr std::initializer_list migrations {{ + .description = "create the initial ConfigStore tables", + .up = R"( +CREATE TABLE IF NOT EXISTS Realms ( + ID TEXT PRIMARY KEY NOT NULL, + Name TEXT UNIQUE NOT NULL, + CurrentPeriod TEXT, + Epoch INTEGER DEFAULT 0, + VersionNumber INTEGER, + VersionTag TEXT +); +CREATE TABLE IF NOT EXISTS Periods ( + ID TEXT NOT NULL, + Epoch INTEGER DEFAULT 0, + RealmID TEXT NOT NULL REFERENCES Realms (ID), + Data TEXT NOT NULL, + PRIMARY KEY (ID, Epoch) +); +CREATE TABLE IF NOT EXISTS PeriodConfigs ( + RealmID TEXT PRIMARY KEY NOT NULL REFERENCES Realms (ID), + Data TEXT NOT NULL +); +CREATE TABLE IF NOT EXISTS ZoneGroups ( + ID TEXT PRIMARY KEY NOT NULL, + Name TEXT UNIQUE NOT NULL, + RealmID TEXT NOT NULL REFERENCES Realms (ID), + Data TEXT NOT NULL, + VersionNumber INTEGER, + VersionTag TEXT +); +CREATE TABLE IF NOT EXISTS Zones ( + ID TEXT PRIMARY KEY NOT NULL, + Name TEXT UNIQUE NOT NULL, + RealmID TEXT NOT NULL REFERENCES Realms (ID), + Data TEXT NOT NULL, + VersionNumber INTEGER, + VersionTag TEXT +); +CREATE TABLE IF NOT EXISTS DefaultRealms ( + ID TEXT, + Empty TEXT PRIMARY KEY +); +CREATE TABLE IF NOT EXISTS DefaultZoneGroups ( + ID TEXT, + RealmID TEXT PRIMARY KEY REFERENCES Realms (ID) +); +CREATE TABLE IF NOT EXISTS DefaultZones ( + ID TEXT, + RealmID TEXT PRIMARY KEY REFERENCES Realms (ID) +); +)", + .down = R"( +DROP TABLE IF EXISTS Realms; +DROP TABLE IF EXISTS Periods; +DROP TABLE IF EXISTS PeriodConfigs; +DROP TABLE IF EXISTS ZoneGroups; +DROP TABLE IF EXISTS Zones; +DROP TABLE IF EXISTS DefaultRealms; +DROP TABLE IF EXISTS DefaultZoneGroups; +DROP TABLE IF EXISTS DefaultZones; +)" + } +}; + + +// DefaultRealms + +static constexpr const char* default_realm_insert1 = +"INSERT INTO DefaultRealms (ID, Empty) VALUES ({}, '')"; + +static constexpr const char* default_realm_upsert1 = +R"(INSERT INTO DefaultRealms (ID, Empty) VALUES ({0}, '') +ON CONFLICT(Empty) DO UPDATE SET ID = {0})"; + +static constexpr const char* default_realm_select0 = +"SELECT ID FROM DefaultRealms LIMIT 1"; + +static constexpr const char* default_realm_delete0 = +"DELETE FROM DefaultRealms"; + + +// Realms + +static constexpr const char* realm_update5 = +"UPDATE Realms SET CurrentPeriod = {1}, Epoch = {2}, VersionNumber = {3} + 1 \ +WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}"; + +static constexpr const char* realm_rename4 = +"UPDATE Realms SET Name = {1}, VersionNumber = {2} + 1 \ +WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}"; + +static constexpr const char* realm_delete3 = +"DELETE FROM Realms WHERE ID = {} AND VersionNumber = {} AND VersionTag = {}"; + +static constexpr const char* realm_insert4 = +"INSERT INTO Realms (ID, Name, VersionNumber, VersionTag) \ +VALUES ({}, {}, {}, {})"; + +static constexpr const char* realm_upsert4 = +"INSERT INTO Realms (ID, Name, VersionNumber, VersionTag) \ +VALUES ({0}, {1}, {2}, {3}) \ +ON CONFLICT(ID) DO UPDATE SET Name = {1}, \ +VersionNumber = {2}, VersionTag = {3}"; + +static constexpr const char* realm_select_id1 = +"SELECT * FROM Realms WHERE ID = {} LIMIT 1"; + +static constexpr const char* realm_select_name1 = +"SELECT * FROM Realms WHERE Name = {} LIMIT 1"; + +static constexpr const char* realm_select_default0 = +"SELECT r.* FROM Realms r \ +INNER JOIN DefaultRealms d \ +ON d.ID = r.ID LIMIT 1"; + +static constexpr const char* realm_select_names2 = +"SELECT Name FROM Realms WHERE Name > {} \ +ORDER BY Name ASC LIMIT {}"; + + +// Periods + +static constexpr const char* period_insert4 = +"INSERT INTO Periods (ID, Epoch, RealmID, Data) \ +VALUES ({}, {}, {}, {})"; + +static constexpr const char* period_upsert4 = +"INSERT INTO Periods (ID, Epoch, RealmID, Data) \ +VALUES ({0}, {1}, {2}, {3}) \ +ON CONFLICT DO UPDATE SET RealmID = {2}, Data = {3}"; + +static constexpr const char* period_select_epoch2 = +"SELECT * FROM Periods WHERE ID = {} AND Epoch = {} LIMIT 1"; + +static constexpr const char* period_select_latest1 = +"SELECT * FROM Periods WHERE ID = {} ORDER BY Epoch DESC LIMIT 1"; + +static constexpr const char* period_delete1 = +"DELETE FROM Periods WHERE ID = {}"; + +static constexpr const char* period_select_ids2 = +"SELECT ID FROM Periods WHERE ID > {} ORDER BY ID ASC LIMIT {}"; + + +// DefaultZoneGroups + +static constexpr const char* default_zonegroup_insert2 = +"INSERT INTO DefaultZoneGroups (RealmID, ID) VALUES ({}, {})"; + +static constexpr const char* default_zonegroup_upsert2 = +"INSERT INTO DefaultZoneGroups (RealmID, ID) \ +VALUES ({0}, {1}) \ +ON CONFLICT(RealmID) DO UPDATE SET ID = {1}"; + +static constexpr const char* default_zonegroup_select1 = +"SELECT ID FROM DefaultZoneGroups WHERE RealmID = {}"; + +static constexpr const char* default_zonegroup_delete1 = +"DELETE FROM DefaultZoneGroups WHERE RealmID = {}"; + + +// ZoneGroups + +static constexpr const char* zonegroup_update5 = +"UPDATE ZoneGroups SET RealmID = {1}, Data = {2}, VersionNumber = {3} + 1 \ +WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}"; + +static constexpr const char* zonegroup_rename4 = +"UPDATE ZoneGroups SET Name = {1}, VersionNumber = {2} + 1 \ +WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}"; + +static constexpr const char* zonegroup_delete3 = +"DELETE FROM ZoneGroups WHERE ID = {} \ +AND VersionNumber = {} AND VersionTag = {}"; + +static constexpr const char* zonegroup_insert6 = +"INSERT INTO ZoneGroups (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ +VALUES ({}, {}, {}, {}, {}, {})"; + +static constexpr const char* zonegroup_upsert6 = +"INSERT INTO ZoneGroups (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ +VALUES ({0}, {1}, {2}, {3}, {4}, {5}) \ +ON CONFLICT (ID) DO UPDATE SET Name = {1}, RealmID = {2}, \ +Data = {3}, VersionNumber = {4}, VersionTag = {5}"; + +static constexpr const char* zonegroup_select_id1 = +"SELECT * FROM ZoneGroups WHERE ID = {} LIMIT 1"; + +static constexpr const char* zonegroup_select_name1 = +"SELECT * FROM ZoneGroups WHERE Name = {} LIMIT 1"; + +static constexpr const char* zonegroup_select_default0 = +"SELECT z.* FROM ZoneGroups z \ +INNER JOIN DefaultZoneGroups d \ +ON d.ID = z.ID LIMIT 1"; + +static constexpr const char* zonegroup_select_names2 = +"SELECT Name FROM ZoneGroups WHERE Name > {} \ +ORDER BY Name ASC LIMIT {}"; + + +// DefaultZones + +static constexpr const char* default_zone_insert2 = +"INSERT INTO DefaultZones (RealmID, ID) VALUES ({}, {})"; + +static constexpr const char* default_zone_upsert2 = +"INSERT INTO DefaultZones (RealmID, ID) VALUES ({0}, {1}) \ +ON CONFLICT(RealmID) DO UPDATE SET ID = {1}"; + +static constexpr const char* default_zone_select1 = +"SELECT ID FROM DefaultZones WHERE RealmID = {}"; + +static constexpr const char* default_zone_delete1 = +"DELETE FROM DefaultZones WHERE RealmID = {}"; + + +// Zones + +static constexpr const char* zone_update5 = +"UPDATE Zones SET RealmID = {1}, Data = {2}, VersionNumber = {3} + 1 \ +WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}"; + +static constexpr const char* zone_rename4 = +"UPDATE Zones SET Name = {1}, VersionNumber = {2} + 1 \ +WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}"; + +static constexpr const char* zone_delete3 = +"DELETE FROM Zones WHERE ID = {} AND VersionNumber = {} AND VersionTag = {}"; + +static constexpr const char* zone_insert6 = +"INSERT INTO Zones (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ +VALUES ({}, {}, {}, {}, {}, {})"; + +static constexpr const char* zone_upsert6 = +"INSERT INTO Zones (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ +VALUES ({0}, {1}, {2}, {3}, {4}, {5}) \ +ON CONFLICT (ID) DO UPDATE SET Name = {1}, RealmID = {2}, \ +Data = {3}, VersionNumber = {4}, VersionTag = {5}"; + +static constexpr const char* zone_select_id1 = +"SELECT * FROM Zones WHERE ID = {} LIMIT 1"; + +static constexpr const char* zone_select_name1 = +"SELECT * FROM Zones WHERE Name = {} LIMIT 1"; + +static constexpr const char* zone_select_default0 = +"SELECT z.* FROM Zones z \ +INNER JOIN DefaultZones d \ +ON d.ID = z.ID LIMIT 1"; + +static constexpr const char* zone_select_names2 = +"SELECT Name FROM Zones WHERE Name > {} \ +ORDER BY Name ASC LIMIT {}"; + + +// PeriodConfigs + +static constexpr const char* period_config_insert2 = +"INSERT INTO PeriodConfigs (RealmID, Data) VALUES ({}, {})"; + +static constexpr const char* period_config_upsert2 = +"INSERT INTO PeriodConfigs (RealmID, Data) VALUES ({0}, {1}) \ +ON CONFLICT (RealmID) DO UPDATE SET Data = {1}"; + +static constexpr const char* period_config_select1 = +"SELECT Data FROM PeriodConfigs WHERE RealmID = {} LIMIT 1"; + +} // namespace rgw::dbstore::config::schema diff --git a/src/rgw/driver/dbstore/config/store.cc b/src/rgw/driver/dbstore/config/store.cc new file mode 100644 index 00000000000..66f7471d534 --- /dev/null +++ b/src/rgw/driver/dbstore/config/store.cc @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include "store.h" +#ifdef SQLITE_ENABLED +#include "sqlite.h" +#endif + +namespace rgw::dbstore { + +auto create_config_store(const DoutPrefixProvider* dpp, const std::string& uri) + -> std::unique_ptr +{ +#ifdef SQLITE_ENABLED + if (uri.starts_with("file:")) { + return config::create_sqlite_store(dpp, uri); + } +#endif + throw std::runtime_error(fmt::format("unrecognized URI {}", uri)); +} + +} // namespace rgw::dbstore diff --git a/src/rgw/driver/dbstore/config/store.h b/src/rgw/driver/dbstore/config/store.h new file mode 100644 index 00000000000..553d9f70934 --- /dev/null +++ b/src/rgw/driver/dbstore/config/store.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include "rgw_sal_config.h" + +namespace rgw::dbstore { + +// ConfigStore factory +auto create_config_store(const DoutPrefixProvider* dpp, const std::string& uri) + -> std::unique_ptr; + +} // namespace rgw::dbstore diff --git a/src/rgw/driver/dbstore/dbstore_main.cc b/src/rgw/driver/dbstore/dbstore_main.cc new file mode 100644 index 00000000000..08724d8227e --- /dev/null +++ b/src/rgw/driver/dbstore/dbstore_main.cc @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include + +#include "dbstore_mgr.h" +#include +#include + +using namespace std; +using namespace rgw::store; +using DB = rgw::store::DB; + +struct thr_args { + DB *dbs; + int thr_id; +}; + +void* process(void *arg) +{ + struct thr_args *t_args = (struct thr_args*)arg; + + DB *db = t_args->dbs; + int thr_id = t_args->thr_id; + int ret = -1; + + cout<<"Entered thread:"<get_def_dpp(); + + db->InitializeParams(dpp, ¶ms); + + params.op.user.uinfo.display_name = user1; + params.op.user.uinfo.user_id.tenant = "tenant"; + params.op.user.uinfo.user_id.id = user1; + params.op.user.uinfo.suspended = 123; + params.op.user.uinfo.max_buckets = 456; + params.op.user.uinfo.assumed_role_arn = "role"; + params.op.user.uinfo.placement_tags.push_back("tags1"); + params.op.user.uinfo.placement_tags.push_back("tags2"); + + RGWAccessKey k1("id1", "key1"); + RGWAccessKey k2("id2", "key2"); + params.op.user.uinfo.access_keys.insert(make_pair("key1", k1)); + params.op.user.uinfo.access_keys.insert(make_pair("key2", k2)); + + ret = db->ProcessOp(dpp, "InsertUser", ¶ms); + cout << "InsertUser return value: " << ret << "\n"; + + DBOpParams params2 = {}; + params.op.user.uinfo.user_id.tenant = "tenant2"; + + db->InitializeParams(dpp, ¶ms2); + params2.op.user.uinfo.display_name = user1; + ret = db->ProcessOp(dpp, "GetUser", ¶ms2); + + cout << "GetUser return value: " << ret << "\n"; + + cout << "tenant: " << params2.op.user.uinfo.user_id.tenant << "\n"; + cout << "suspended: " << (int)params2.op.user.uinfo.suspended << "\n"; + cout << "assumed_role_arn: " << params2.op.user.uinfo.assumed_role_arn << "\n"; + + list::iterator it = params2.op.user.uinfo.placement_tags.begin(); + + while (it != params2.op.user.uinfo.placement_tags.end()) { + cout << "list = " << *it << "\n"; + it++; + } + + map::iterator it2 = params2.op.user.uinfo.access_keys.begin(); + + while (it2 != params2.op.user.uinfo.access_keys.end()) { + cout << "keys = " << it2->first << "\n"; + RGWAccessKey k = it2->second; + cout << "id = " << k.id << ", keys = " << k.key << "\n"; + it2++; + } + + params.op.bucket.info.bucket.name = bucketa; + db->ProcessOp(dpp, "InsertBucket", ¶ms); + + params.op.user.uinfo.display_name = user2; + params.op.user.uinfo.user_id.id = user2; + db->ProcessOp(dpp, "InsertUser", ¶ms); + + params.op.bucket.info.bucket.name = bucketb; + db->ProcessOp(dpp, "InsertBucket", ¶ms); + + db->ProcessOp(dpp, "GetUser", ¶ms); + db->ProcessOp(dpp, "GetBucket", ¶ms); + + db->ListAllUsers(dpp, ¶ms); + db->ListAllBuckets(dpp, ¶ms); + + params.op.bucket.info.bucket.name = bucketb; + + db->ProcessOp(dpp, "RemoveBucket", ¶ms); + + params.op.user.uinfo.user_id.id = user2; + db->ProcessOp(dpp, "RemoveUser", ¶ms); + + db->ListAllUsers(dpp, ¶ms); + db->ListAllBuckets(dpp, ¶ms); + cout<<"Exiting thread:"< args; + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, CINIT_FLAG_NO_MON_CONFIG, 1); + dbsm = new DBStoreManager(cct.get(), logfile, loglevel); + dbs = dbsm->getDB(tenant, true); + + cout<<"No. of threads being created = "<destroyAllHandles(); + + return 0; +} diff --git a/src/rgw/driver/dbstore/dbstore_mgr.cc b/src/rgw/driver/dbstore/dbstore_mgr.cc new file mode 100644 index 00000000000..6835f526bc6 --- /dev/null +++ b/src/rgw/driver/dbstore/dbstore_mgr.cc @@ -0,0 +1,140 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "dbstore_mgr.h" +#include "common/dbstore_log.h" + +#include + +static constexpr auto dout_subsys = ceph_subsys_rgw; + +using namespace std; + + +/* Given a tenant, find and return the DBStore handle. + * If not found and 'create' set to true, create one + * and return + */ +DB *DBStoreManager::getDB (string tenant, bool create) +{ + map::iterator iter; + DB *dbs = nullptr; + pair::iterator,bool> ret; + + if (tenant.empty()) + return default_db; + + if (DBStoreHandles.empty()) + goto not_found; + + iter = DBStoreHandles.find(tenant); + + if (iter != DBStoreHandles.end()) + return iter->second; + +not_found: + if (!create) + return nullptr; + + dbs = createDB(tenant); + + return dbs; +} + +/* Create DBStore instance */ +DB *DBStoreManager::createDB(std::string tenant) { + DB *dbs = nullptr; + pair::iterator,bool> ret; + const auto& db_path = g_conf().get_val("dbstore_db_dir"); + const auto& db_name = g_conf().get_val("dbstore_db_name_prefix") + "-" + tenant; + + auto db_full_path = std::filesystem::path(db_path) / db_name; + ldout(cct, 0) << "DB initialization full db_path("<Initialize("", -1) < 0) { + ldout(cct, 0) << "DB initialization failed for tenant("<(tenant, dbs)); + + /* + * Its safe to check for already existing entry (just + * incase other thread raced and created the entry) + */ + if (ret.second == false) { + /* Entry already created by another thread */ + delete dbs; + + dbs = ret.first->second; + } + + return dbs; +} + +void DBStoreManager::deleteDB(string tenant) { + map::iterator iter; + DB *dbs = nullptr; + + if (tenant.empty() || DBStoreHandles.empty()) + return; + + /* XXX: Check if we need to perform this operation under a lock */ + iter = DBStoreHandles.find(tenant); + + if (iter == DBStoreHandles.end()) + return; + + dbs = iter->second; + + DBStoreHandles.erase(iter); + dbs->Destroy(dbs->get_def_dpp()); + delete dbs; + + return; +} + +void DBStoreManager::deleteDB(DB *dbs) { + if (!dbs) + return; + + (void)deleteDB(dbs->getDBname()); +} + + +void DBStoreManager::destroyAllHandles(){ + map::iterator iter; + DB *dbs = nullptr; + + if (DBStoreHandles.empty()) + return; + + for (iter = DBStoreHandles.begin(); iter != DBStoreHandles.end(); + ++iter) { + dbs = iter->second; + dbs->Destroy(dbs->get_def_dpp()); + delete dbs; + } + + DBStoreHandles.clear(); + + return; +} + + diff --git a/src/rgw/driver/dbstore/dbstore_mgr.h b/src/rgw/driver/dbstore/dbstore_mgr.h new file mode 100644 index 00000000000..77fc3aaf731 --- /dev/null +++ b/src/rgw/driver/dbstore/dbstore_mgr.h @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/ceph_context.h" +#include "common/dbstore.h" +#include "sqlite/sqliteDB.h" + +using namespace rgw::store; +using DB = rgw::store::DB; + +/* XXX: Should be a dbstore config option */ +const static std::string default_tenant = "default_ns"; + +class DBStoreManager { +private: + std::map DBStoreHandles; + DB *default_db = nullptr; + CephContext *cct; + +public: + DBStoreManager(CephContext *_cct): DBStoreHandles() { + cct = _cct; + default_db = createDB(default_tenant); + }; + DBStoreManager(CephContext *_cct, std::string logfile, int loglevel): DBStoreHandles() { + /* No ceph context. Create one with log args provided */ + cct = _cct; + cct->_log->set_log_file(logfile); + cct->_log->reopen_log_file(); + cct->_conf->subsys.set_log_level(ceph_subsys_rgw, loglevel); + default_db = createDB(default_tenant); + }; + ~DBStoreManager() { destroyAllHandles(); }; + + /* XXX: TBD based on testing + * 1) Lock to protect DBStoreHandles map. + * 2) Refcount of each DBStore to protect from + * being deleted while using it. + */ + DB* getDB () { return default_db; }; + DB* getDB (std::string tenant, bool create); + DB* createDB (std::string tenant); + void deleteDB (std::string tenant); + void deleteDB (DB* db); + void destroyAllHandles(); +}; diff --git a/src/rgw/driver/dbstore/sqlite/CMakeLists.txt b/src/rgw/driver/dbstore/sqlite/CMakeLists.txt new file mode 100644 index 00000000000..909765e3058 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.14.0) +project(sqlite_db) + +find_package(SQLite3 REQUIRED) + +set(sqlite_db_srcs + sqliteDB.h + sqliteDB.cc) + +include_directories(${CMAKE_INCLUDE_DIR}) + +set(SQLITE_COMPILE_FLAGS "-DSQLITE_THREADSAFE=1") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SQLITE_COMPILE_FLAGS}") + +add_library(sqlite_db STATIC ${sqlite_db_srcs}) +target_link_libraries(sqlite_db sqlite3 dbstore_lib rgw_common) diff --git a/src/rgw/driver/dbstore/sqlite/connection.cc b/src/rgw/driver/dbstore/sqlite/connection.cc new file mode 100644 index 00000000000..143a3a0d5a1 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/connection.cc @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" +#include "connection.h" +#include "error.h" + +namespace rgw::dbstore::sqlite { + +db_ptr open_database(const char* filename, int flags) +{ + sqlite3* db = nullptr; + const int result = ::sqlite3_open_v2(filename, &db, flags, nullptr); + if (result != SQLITE_OK) { + throw std::system_error(result, sqlite::error_category()); + } + // request extended result codes + (void) ::sqlite3_extended_result_codes(db, 1); + return db_ptr{db}; +} + +} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/driver/dbstore/sqlite/connection.h b/src/rgw/driver/dbstore/sqlite/connection.h new file mode 100644 index 00000000000..f5cd77d6e26 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/connection.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include "sqlite/statement.h" + +class DoutPrefixProvider; + +namespace rgw::dbstore::sqlite { + +// owning sqlite3 pointer +struct db_deleter { + void operator()(sqlite3* p) const { ::sqlite3_close(p); } +}; +using db_ptr = std::unique_ptr; + + +// open the database file or throw on error +db_ptr open_database(const char* filename, int flags); + + +struct Connection { + db_ptr db; + // map of statements, prepared on first use + std::map statements; + + explicit Connection(db_ptr db) : db(std::move(db)) {} +}; + +// sqlite connection factory for ConnectionPool +class ConnectionFactory { + std::string uri; + int flags; + public: + ConnectionFactory(std::string uri, int flags) + : uri(std::move(uri)), flags(flags) {} + + auto operator()(const DoutPrefixProvider* dpp) + -> std::unique_ptr + { + auto db = open_database(uri.c_str(), flags); + return std::make_unique(std::move(db)); + } +}; + +} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/driver/dbstore/sqlite/error.cc b/src/rgw/driver/dbstore/sqlite/error.cc new file mode 100644 index 00000000000..5fe9eb0ae82 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/error.cc @@ -0,0 +1,37 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "error.h" + +namespace rgw::dbstore::sqlite { + +const std::error_category& error_category() +{ + struct category : std::error_category { + const char* name() const noexcept override { + return "dbstore:sqlite"; + } + std::string message(int ev) const override { + return ::sqlite3_errstr(ev); + } + std::error_condition default_error_condition(int code) const noexcept override { + return {code & 0xFF, category()}; + } + }; + static category instance; + return instance; +} + +} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/driver/dbstore/sqlite/error.h b/src/rgw/driver/dbstore/sqlite/error.h new file mode 100644 index 00000000000..15396d8ca2b --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/error.h @@ -0,0 +1,81 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include + +namespace rgw::dbstore::sqlite { + +// error category for sqlite extended result codes: +// https://www.sqlite.org/rescode.html +const std::error_category& error_category(); + + +// sqlite exception type that carries the extended error code and message +class error : public std::runtime_error { + std::error_code ec; + public: + error(const char* errmsg, std::error_code ec) + : runtime_error(errmsg), ec(ec) {} + error(sqlite3* db, std::error_code ec) : error(::sqlite3_errmsg(db), ec) {} + error(sqlite3* db, int result) : error(db, {result, error_category()}) {} + error(sqlite3* db) : error(db, ::sqlite3_extended_errcode(db)) {} + std::error_code code() const { return ec; } +}; + + +// sqlite error conditions for primary and extended result codes +// +// 'primary' error_conditions will match 'primary' error_codes as well as any +// 'extended' error_codes whose lowest 8 bits match that primary code. for +// example, the error_condition for SQLITE_CONSTRAINT will match the error_codes +// SQLITE_CONSTRAINT and SQLITE_CONSTRAINT_* +enum class errc { + // primary result codes + ok = SQLITE_OK, + busy = SQLITE_BUSY, + constraint = SQLITE_CONSTRAINT, + row = SQLITE_ROW, + done = SQLITE_DONE, + + // extended result codes + primary_key_constraint = SQLITE_CONSTRAINT_PRIMARYKEY, + foreign_key_constraint = SQLITE_CONSTRAINT_FOREIGNKEY, + unique_constraint = SQLITE_CONSTRAINT_UNIQUE, + + // ..add conditions as needed +}; + +inline std::error_code make_error_code(errc e) +{ + return {static_cast(e), error_category()}; +} + +inline std::error_condition make_error_condition(errc e) +{ + return {static_cast(e), error_category()}; +} + +} // namespace rgw::dbstore::sqlite + +namespace std { + +// enable implicit conversions from sqlite::errc to std::error_condition +template<> struct is_error_condition_enum< + rgw::dbstore::sqlite::errc> : public true_type {}; + +} // namespace std diff --git a/src/rgw/driver/dbstore/sqlite/sqliteDB.cc b/src/rgw/driver/dbstore/sqlite/sqliteDB.cc new file mode 100644 index 00000000000..b0ced456a3c --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/sqliteDB.cc @@ -0,0 +1,3001 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "sqliteDB.h" + +using namespace std; + +#define SQL_PREPARE(dpp, params, sdb, stmt, ret, Op) \ + do { \ + string schema; \ + schema = Schema(params); \ + sqlite3_prepare_v2 (*sdb, schema.c_str(), \ + -1, &stmt , NULL); \ + if (!stmt) { \ + ldpp_dout(dpp, 0) <<"failed to prepare statement " \ + <<"for Op("<(blob), blob_len); \ + \ + decode(param, b); \ + }while(0); + +#define SQL_EXECUTE(dpp, params, stmt, cbk, args...) \ + do{ \ + const std::lock_guard lk(((DBOp*)(this))->mtx); \ + if (!stmt) { \ + ret = Prepare(dpp, params); \ + } \ + \ + if (!stmt) { \ + ldpp_dout(dpp, 0) <<"No prepared statement "<< dendl; \ + goto out; \ + } \ + \ + ret = Bind(dpp, params); \ + if (ret) { \ + ldpp_dout(dpp, 0) <<"Bind parameters failed for stmt(" <op, stmt, cbk); \ + \ + Reset(dpp, stmt); \ + \ + if (ret) { \ + ldpp_dout(dpp, 0) <<"Execution failed for stmt(" <user_table.empty()) { + params->user_table = getUserTable(); + } + if (params->user_table.empty()) { + params->user_table = getUserTable(); + } + if (params->bucket_table.empty()) { + params->bucket_table = getBucketTable(); + } + if (params->quota_table.empty()) { + params->quota_table = getQuotaTable(); + } + if (params->lc_entry_table.empty()) { + params->lc_entry_table = getLCEntryTable(); + } + if (params->lc_head_table.empty()) { + params->lc_head_table = getLCHeadTable(); + } + + p_params.user_table = params->user_table; + p_params.bucket_table = params->bucket_table; + p_params.quota_table = params->quota_table; + p_params.lc_entry_table = params->lc_entry_table; + p_params.lc_head_table = params->lc_head_table; + + p_params.op.query_str = params->op.query_str; + + bucket = params->op.bucket.info.bucket.name; + + if (!bucket.empty()) { + if (params->object_table.empty()) { + params->object_table = getObjectTable(bucket); + } + if (params->objectdata_table.empty()) { + params->objectdata_table = getObjectDataTable(bucket); + } + if (params->object_view.empty()) { + params->object_view = getObjectView(bucket); + } + if (params->object_trigger.empty()) { + params->object_trigger = getObjectTrigger(bucket); + } + p_params.object_table = params->object_table; + p_params.objectdata_table = params->objectdata_table; + p_params.object_view = params->object_view; + } + + return 0; +} + +static int list_callback(void *None, int argc, char **argv, char **aname) +{ + int i; + for(i=0; i < argc; i++) { + string arg = argv[i] ? argv[i] : "NULL"; + cout<(&this->db, this->getDBname(), cct); + dbops.RemoveUser = make_shared(&this->db, this->getDBname(), cct); + dbops.GetUser = make_shared(&this->db, this->getDBname(), cct); + dbops.InsertBucket = make_shared(&this->db, this->getDBname(), cct); + dbops.UpdateBucket = make_shared(&this->db, this->getDBname(), cct); + dbops.RemoveBucket = make_shared(&this->db, this->getDBname(), cct); + dbops.GetBucket = make_shared(&this->db, this->getDBname(), cct); + dbops.ListUserBuckets = make_shared(&this->db, this->getDBname(), cct); + dbops.InsertLCEntry = make_shared(&this->db, this->getDBname(), cct); + dbops.RemoveLCEntry = make_shared(&this->db, this->getDBname(), cct); + dbops.GetLCEntry = make_shared(&this->db, this->getDBname(), cct); + dbops.ListLCEntries = make_shared(&this->db, this->getDBname(), cct); + dbops.InsertLCHead = make_shared(&this->db, this->getDBname(), cct); + dbops.RemoveLCHead = make_shared(&this->db, this->getDBname(), cct); + dbops.GetLCHead = make_shared(&this->db, this->getDBname(), cct); + + return 0; +} + +void *SQLiteDB::openDB(const DoutPrefixProvider *dpp) +{ + string dbname; + int rc = 0; + + dbname = getDBfile(); + if (dbname.empty()) { + ldpp_dout(dpp, 0)<<"dbname is NULL" << dendl; + goto out; + } + + rc = sqlite3_open_v2(dbname.c_str(), (sqlite3**)&db, + SQLITE_OPEN_READWRITE | + SQLITE_OPEN_CREATE | + SQLITE_OPEN_FULLMUTEX, + NULL); + + if (rc) { + ldpp_dout(dpp, 0) <<"Cant open "<user_table); + + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeleteUserTable failed " << dendl; + + ldpp_dout(dpp, 20)<<"DeleteUserTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::DeleteBucketTable(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = DeleteTableSchema(params->bucket_table); + + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeletebucketTable failed " << dendl; + + ldpp_dout(dpp, 20)<<"DeletebucketTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::DeleteObjectTable(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = DeleteTableSchema(params->object_table); + + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeleteObjectTable failed " << dendl; + + ldpp_dout(dpp, 20)<<"DeleteObjectTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::DeleteObjectDataTable(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = DeleteTableSchema(params->objectdata_table); + + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeleteObjectDataTable failed " << dendl; + + ldpp_dout(dpp, 20)<<"DeleteObjectDataTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::DeleteQuotaTable(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = DeleteTableSchema(params->quota_table); + + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeleteQuotaTable failed " << dendl; + + ldpp_dout(dpp, 20)<<"DeleteQuotaTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::DeleteLCEntryTable(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = DeleteTableSchema(params->lc_entry_table); + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeleteLCEntryTable failed " << dendl; + ldpp_dout(dpp, 20)<<"DeleteLCEntryTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::DeleteLCHeadTable(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = DeleteTableSchema(params->lc_head_table); + ret = exec(dpp, schema.c_str(), NULL); + if (ret) + ldpp_dout(dpp, 0)<<"DeleteLCHeadTable failed " << dendl; + ldpp_dout(dpp, 20)<<"DeleteLCHeadTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = ListTableSchema(params->user_table); + ret = exec(dpp, schema.c_str(), &list_callback); + if (ret) + ldpp_dout(dpp, 0)<<"GetUsertable failed " << dendl; + + ldpp_dout(dpp, 20)<<"GetUserTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + + schema = ListTableSchema(params->bucket_table); + + ret = exec(dpp, schema.c_str(), &list_callback); + if (ret) + ldpp_dout(dpp, 0)<<"Listbuckettable failed " << dendl; + + ldpp_dout(dpp, 20)<<"ListbucketTable suceeded " << dendl; + + return ret; +} + +int SQLiteDB::ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) +{ + int ret = -1; + string schema; + map::iterator iter; + map objectmap; + string bucket; + + objectmap = getObjectMap(); + + if (objectmap.empty()) + ldpp_dout(dpp, 20)<<"objectmap empty " << dendl; + + for (iter = objectmap.begin(); iter != objectmap.end(); ++iter) { + bucket = iter->first; + params->object_table = getObjectTable(bucket); + schema = ListTableSchema(params->object_table); + + ret = exec(dpp, schema.c_str(), &list_callback); + if (ret) + ldpp_dout(dpp, 0)<<"ListObjecttable failed " << dendl; + + ldpp_dout(dpp, 20)<<"ListObjectTable suceeded " << dendl; + } + + return ret; +} + +int SQLObjectOp::InitializeObjectOps(string db_name, const DoutPrefixProvider *dpp) +{ + PutObject = make_shared(sdb, db_name, cct); + DeleteObject = make_shared(sdb, db_name, cct); + GetObject = make_shared(sdb, db_name, cct); + UpdateObject = make_shared(sdb, db_name, cct); + ListBucketObjects = make_shared(sdb, db_name, cct); + ListVersionedObjects = make_shared(sdb, db_name, cct); + PutObjectData = make_shared(sdb, db_name, cct); + UpdateObjectData = make_shared(sdb, db_name, cct); + GetObjectData = make_shared(sdb, db_name, cct); + DeleteObjectData = make_shared(sdb, db_name, cct); + DeleteStaleObjectData = make_shared(sdb, db_name, cct); + + return 0; +} + +int SQLInsertUser::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLInsertUser - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertUser"); +out: + return ret; +} + +int SQLInsertUser::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.tenant, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.tenant.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.ns, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.ns.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.display_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.display_name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_email, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_email.c_str(), sdb); + + if (!params->op.user.uinfo.access_keys.empty()) { + string access_key; + string key; + map::const_iterator it = + params->op.user.uinfo.access_keys.begin(); + const RGWAccessKey& k = it->second; + access_key = k.id; + key = k.key; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.access_keys_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, access_key.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.access_keys_secret, sdb); + SQL_BIND_TEXT(dpp, stmt, index, key.c_str(), sdb); + + } + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.access_keys, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.access_keys, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.swift_keys, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.swift_keys, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.subusers, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.subusers, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.suspended, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.suspended, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.max_buckets, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.max_buckets, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.op_mask, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.op_mask, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_caps, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.caps, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.admin, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.admin, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.system, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.system, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.placement_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.default_placement.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.default_placement.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.placement_tags, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.placement_tags, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.bucket_quota, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.quota.bucket_quota, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.temp_url_keys, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.temp_url_keys, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_quota, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.quota.user_quota, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.type, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.type, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.mfa_ids, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.mfa_ids, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.assumed_role_arn, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.assumed_role_arn.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_attrs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.user_attrs, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_ver, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.user.user_version.ver, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_ver_tag, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.user_version.tag.c_str(), sdb); + +out: + return rc; +} + +int SQLInsertUser::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLRemoveUser::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLRemoveUser - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveUser"); +out: + return ret; +} + +int SQLRemoveUser::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + +out: + return rc; +} + +int SQLRemoveUser::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLGetUser::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLGetUser - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + if (params->op.query_str == "email") { + SQL_PREPARE(dpp, p_params, sdb, email_stmt, ret, "PrepareGetUser"); + } else if (params->op.query_str == "access_key") { + SQL_PREPARE(dpp, p_params, sdb, ak_stmt, ret, "PrepareGetUser"); + } else if (params->op.query_str == "user_id") { + SQL_PREPARE(dpp, p_params, sdb, userid_stmt, ret, "PrepareGetUser"); + } else { // by default by userid + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetUser"); + } +out: + return ret; +} + +int SQLGetUser::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.query_str == "email") { + SQL_BIND_INDEX(dpp, email_stmt, index, p_params.op.user.user_email, sdb); + SQL_BIND_TEXT(dpp, email_stmt, index, params->op.user.uinfo.user_email.c_str(), sdb); + } else if (params->op.query_str == "access_key") { + if (!params->op.user.uinfo.access_keys.empty()) { + string access_key; + map::const_iterator it = + params->op.user.uinfo.access_keys.begin(); + const RGWAccessKey& k = it->second; + access_key = k.id; + + SQL_BIND_INDEX(dpp, ak_stmt, index, p_params.op.user.access_keys_id, sdb); + SQL_BIND_TEXT(dpp, ak_stmt, index, access_key.c_str(), sdb); + } + } else if (params->op.query_str == "user_id") { + SQL_BIND_INDEX(dpp, userid_stmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, userid_stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + } else { // by default by userid + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + } + +out: + return rc; +} + +int SQLGetUser::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + if (params->op.query_str == "email") { + SQL_EXECUTE(dpp, params, email_stmt, list_user); + } else if (params->op.query_str == "access_key") { + SQL_EXECUTE(dpp, params, ak_stmt, list_user); + } else if (params->op.query_str == "user_id") { + SQL_EXECUTE(dpp, params, userid_stmt, list_user); + } else { // by default by userid + SQL_EXECUTE(dpp, params, stmt, list_user); + } + +out: + return ret; +} + +int SQLInsertBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLInsertBucket - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertBucket"); + +out: + return ret; +} + +int SQLInsertBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + // user_id here is copied as OwnerID in the bucket table. + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.tenant, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.tenant.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.marker, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.marker.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.bucket_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.size, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.ent.size, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.size_rounded, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.ent.size_rounded, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.creation_time, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.creation_time, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.count, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.ent.count, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.placement_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.placement_rule.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.placement_rule.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.flags, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.flags, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.zonegroup, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.zonegroup.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.has_instance_obj, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.has_instance_obj, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.quota, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.quota, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.requester_pays, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.requester_pays, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.has_website, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.has_website, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.website_conf, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.website_conf, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.swift_versioning, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.swift_versioning, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.swift_ver_location, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.swift_ver_location.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.mdsearch_config, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.mdsearch_config, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.new_bucket_instance_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.new_bucket_instance_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.obj_lock, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.obj_lock, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.sync_policy_info_groups, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.sync_policy, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_attrs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.bucket_attrs, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_ver, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.bucket.bucket_version.ver, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_ver_tag, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.bucket_version.tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.mtime, sdb); + +out: + return rc; +} + +int SQLInsertBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + class SQLObjectOp *ObPtr = NULL; + string bucket_name = params->op.bucket.info.bucket.name; + struct DBOpPrepareParams p_params = PrepareParams; + + ObPtr = new SQLObjectOp(sdb, ctx()); + + objectmapInsert(dpp, bucket_name, ObPtr); + + SQL_EXECUTE(dpp, params, stmt, NULL); + + /* Once Bucket is inserted created corresponding object(&data) tables + */ + InitPrepareParams(dpp, p_params, params); + + (void)createObjectTable(dpp, params); + (void)createObjectDataTable(dpp, params); + (void)createObjectTableTrigger(dpp, params); +out: + return ret; +} + +int SQLUpdateBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLUpdateBucket - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + if (params->op.query_str == "attrs") { + SQL_PREPARE(dpp, p_params, sdb, attrs_stmt, ret, "PrepareUpdateBucket"); + } else if (params->op.query_str == "owner") { + SQL_PREPARE(dpp, p_params, sdb, owner_stmt, ret, "PrepareUpdateBucket"); + } else if (params->op.query_str == "info") { + SQL_PREPARE(dpp, p_params, sdb, info_stmt, ret, "PrepareUpdateBucket"); + } else { + ldpp_dout(dpp, 0)<<"In SQLUpdateBucket invalid query_str:" << + params->op.query_str << "" << dendl; + goto out; + } + +out: + return ret; +} + +int SQLUpdateBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + sqlite3_stmt** stmt = NULL; // Prepared statement + + /* All below fields for attrs */ + if (params->op.query_str == "attrs") { + stmt = &attrs_stmt; + } else if (params->op.query_str == "owner") { + stmt = &owner_stmt; + } else if (params->op.query_str == "info") { + stmt = &info_stmt; + } else { + ldpp_dout(dpp, 0)<<"In SQLUpdateBucket invalid query_str:" << + params->op.query_str << "" << dendl; + goto out; + } + + if (params->op.query_str == "attrs") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_attrs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.bucket_attrs, sdb); + } else if (params->op.query_str == "owner") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.creation_time, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.creation_time, sdb); + } else if (params->op.query_str == "info") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.tenant, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.tenant.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.marker, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.marker.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_id, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.bucket_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.creation_time, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.creation_time, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.count, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.ent.count, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.placement_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.placement_rule.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.placement_rule.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.flags, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.flags, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.zonegroup, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.zonegroup.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.has_instance_obj, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.has_instance_obj, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.quota, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.quota, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.requester_pays, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.requester_pays, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.has_website, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.has_website, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.website_conf, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.website_conf, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.swift_versioning, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.swift_versioning, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.swift_ver_location, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.swift_ver_location.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.mdsearch_config, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.mdsearch_config, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.new_bucket_instance_id, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.new_bucket_instance_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.obj_lock, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.obj_lock, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.sync_policy_info_groups, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.sync_policy, sdb); + } + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_ver, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.bucket_version.ver, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.mtime, sdb); + +out: + return rc; +} + +int SQLUpdateBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + sqlite3_stmt** stmt = NULL; // Prepared statement + + if (params->op.query_str == "attrs") { + stmt = &attrs_stmt; + } else if (params->op.query_str == "owner") { + stmt = &owner_stmt; + } else if (params->op.query_str == "info") { + stmt = &info_stmt; + } else { + ldpp_dout(dpp, 0)<<"In SQLUpdateBucket invalid query_str:" << + params->op.query_str << "" << dendl; + goto out; + } + + SQL_EXECUTE(dpp, params, *stmt, NULL); +out: + return ret; +} + +int SQLRemoveBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLRemoveBucket - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveBucket"); + +out: + return ret; +} + +int SQLRemoveBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + +out: + return rc; +} + +int SQLRemoveBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + objectmapDelete(dpp, params->op.bucket.info.bucket.name); + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLGetBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLGetBucket - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetBucket"); + +out: + return ret; +} + +int SQLGetBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + +out: + return rc; +} + +int SQLGetBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + class SQLObjectOp *ObPtr = NULL; + + params->op.name = "GetBucket"; + + ObPtr = new SQLObjectOp(sdb, ctx()); + + /* For the case when the server restarts, need to reinsert objectmap*/ + objectmapInsert(dpp, params->op.bucket.info.bucket.name, ObPtr); + SQL_EXECUTE(dpp, params, stmt, list_bucket); +out: + return ret; +} + +int SQLListUserBuckets::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLListUserBuckets - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + if (params->op.query_str == "all") { + SQL_PREPARE(dpp, p_params, sdb, all_stmt, ret, "PrepareListUserBuckets"); + }else { + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListUserBuckets"); + } + +out: + return ret; +} + +int SQLListUserBuckets::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + sqlite3_stmt** pstmt = NULL; // Prepared statement + + if (params->op.query_str == "all") { + pstmt = &all_stmt; + } else { + pstmt = &stmt; + } + + if (params->op.query_str != "all") { + SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.user.user_id, sdb); + SQL_BIND_TEXT(dpp, *pstmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); + } + + SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.bucket.min_marker, sdb); + SQL_BIND_TEXT(dpp, *pstmt, index, params->op.bucket.min_marker.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.list_max_count, sdb); + SQL_BIND_INT(dpp, *pstmt, index, params->op.list_max_count, sdb); + +out: + return rc; +} + +int SQLListUserBuckets::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + if (params->op.query_str == "all") { + SQL_EXECUTE(dpp, params, all_stmt, list_bucket); + } else { + SQL_EXECUTE(dpp, params, stmt, list_bucket); + } +out: + return ret; +} + +int SQLPutObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLPutObject - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PreparePutObject"); + +out: + return ret; +} + +int SQLPutObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + int VersionNum = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_ns, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.ns.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.acls, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.acls, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.index_ver, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.index_ver, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tag, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.flags, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.flags, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.versioned_epoch, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.versioned_epoch, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_category, sdb); + SQL_BIND_INT(dpp, stmt, index, (uint8_t)(params->op.obj.category), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.etag, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.etag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.owner, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.owner.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.owner_display_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.owner_display_name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.storage_class, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.appendable, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.appendable, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.content_type, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.content_type.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.index_hash_source, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.index_hash_source.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_size, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.size, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.accounted_size, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.accounted_size, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.epoch, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.epoch, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_tag, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.obj_tag, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_tag, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.tail_tag, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.write_tag, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.write_tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.fake_tag, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.fake_tag, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.shadow_obj, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.shadow_obj.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.has_data, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.has_data, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.is_versioned, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.is_versioned, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.version_num, sdb); + SQL_BIND_INT(dpp, stmt, index, VersionNum, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.pg_ver, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.pg_ver, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.zone_short_id, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.zone_short_id, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_version, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.objv_tracker.read_version.ver, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_version_tag, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.objv_tracker.read_version.tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_attrs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.attrset, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_size, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.head_size, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.max_head_size, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.max_head_size, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tail_instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_placement_rule_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.head_placement_rule.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.head_placement_rule.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_placement_rule_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tail_placement.placement_rule.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tail_placement.placement_rule.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.manifest_part_objs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.objs, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.manifest_part_rules, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.rules, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.omap, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.omap, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.is_multipart, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.obj.is_multipart, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mp_parts, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.mp_parts, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_data, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.head_data, sdb); + +out: + return rc; +} + +int SQLPutObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLDeleteObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLDeleteObject - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareDeleteObject"); + +out: + return ret; +} + +int SQLDeleteObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); +out: + return rc; +} + +int SQLDeleteObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLGetObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLGetObject - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetObject"); + +out: + return ret; +} + +int SQLGetObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + +out: + return rc; +} + +int SQLGetObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, list_object); +out: + return ret; +} + +int SQLUpdateObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + struct DBOpParams copy = *params; + string bucket_name; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLUpdateObject - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + if (params->op.query_str == "omap") { + SQL_PREPARE(dpp, p_params, sdb, omap_stmt, ret, "PrepareUpdateObject"); + } else if (params->op.query_str == "attrs") { + SQL_PREPARE(dpp, p_params, sdb, attrs_stmt, ret, "PrepareUpdateObject"); + } else if (params->op.query_str == "meta") { + SQL_PREPARE(dpp, p_params, sdb, meta_stmt, ret, "PrepareUpdateObject"); + } else if (params->op.query_str == "mp") { + SQL_PREPARE(dpp, p_params, sdb, mp_stmt, ret, "PrepareUpdateObject"); + } else { + ldpp_dout(dpp, 0)<<"In SQLUpdateObject invalid query_str:" << + params->op.query_str << dendl; + goto out; + } + +out: + return ret; +} + +int SQLUpdateObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + sqlite3_stmt** stmt = NULL; // Prepared statement + + /* All below fields for attrs */ + if (params->op.query_str == "omap") { + stmt = &omap_stmt; + } else if (params->op.query_str == "attrs") { + stmt = &attrs_stmt; + } else if (params->op.query_str == "meta") { + stmt = &meta_stmt; + } else if (params->op.query_str == "mp") { + stmt = &mp_stmt; + } else { + ldpp_dout(dpp, 0)<<"In SQLUpdateObject invalid query_str:" << + params->op.query_str << dendl; + goto out; + } + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.mtime, sdb); + + if (params->op.query_str == "omap") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.omap, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.omap, sdb); + } + if (params->op.query_str == "attrs") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_attrs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.attrset, sdb); + } + if (params->op.query_str == "mp") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.mp_parts, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.mp_parts, sdb); + } + if (params->op.query_str == "meta") { + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_ns, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.key.ns.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.acls, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.acls, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.index_ver, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.index_ver, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tag, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.flags, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.flags, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.versioned_epoch, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.versioned_epoch, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_category, sdb); + SQL_BIND_INT(dpp, *stmt, index, (uint8_t)(params->op.obj.category), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.etag, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.etag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.owner, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.owner.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.owner_display_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.owner_display_name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.storage_class, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.appendable, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.appendable, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.content_type, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.content_type.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.index_hash_source, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.index_hash_source.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_size, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.size, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.accounted_size, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.accounted_size, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.epoch, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.epoch, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_tag, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.obj_tag, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_tag, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.tail_tag, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.write_tag, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.write_tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.fake_tag, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.fake_tag, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.shadow_obj, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.shadow_obj.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.has_data, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.has_data, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.is_versioned, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.is_versioned, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.version_num, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.version_num, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.pg_ver, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.pg_ver, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.zone_short_id, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.zone_short_id, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_version, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.objv_tracker.read_version.ver, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_version_tag, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.objv_tracker.read_version.tag.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_attrs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.attrset, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_size, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.head_size, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.max_head_size, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.max_head_size, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_id, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.obj_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_instance, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tail_instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_placement_rule_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.head_placement_rule.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.head_placement_rule.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_placement_rule_name, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tail_placement.placement_rule.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_placement_storage_class, sdb); + SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tail_placement.placement_rule.storage_class.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.manifest_part_objs, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.objs, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.manifest_part_rules, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.rules, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.omap, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.omap, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.is_multipart, sdb); + SQL_BIND_INT(dpp, *stmt, index, params->op.obj.is_multipart, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.mp_parts, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.mp_parts, sdb); + + SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_data, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.head_data, sdb); + } + +out: + return rc; +} + +int SQLUpdateObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + sqlite3_stmt** stmt = NULL; // Prepared statement + + if (params->op.query_str == "omap") { + stmt = &omap_stmt; + } else if (params->op.query_str == "attrs") { + stmt = &attrs_stmt; + } else if (params->op.query_str == "meta") { + stmt = &meta_stmt; + } else if (params->op.query_str == "mp") { + stmt = &mp_stmt; + } else { + ldpp_dout(dpp, 0)<<"In SQLUpdateObject invalid query_str:" << + params->op.query_str << dendl; + goto out; + } + + SQL_EXECUTE(dpp, params, *stmt, NULL); +out: + return ret; +} + +int SQLListBucketObjects::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLListBucketObjects - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListBucketObjects"); + +out: + return ret; +} + +int SQLListBucketObjects::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.min_marker, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.min_marker.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.prefix, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.prefix.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.list_max_count, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.list_max_count, sdb); + +out: + return rc; +} + +int SQLListBucketObjects::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, list_object); +out: + return ret; +} + +int SQLListVersionedObjects::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLListVersionedObjects - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListVersionedObjects"); + +out: + return ret; +} + +int SQLListVersionedObjects::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.list_max_count, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.list_max_count, sdb); + +out: + return rc; +} + +int SQLListVersionedObjects::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, list_object); +out: + return ret; +} + +int SQLPutObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLPutObjectData - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PreparePutObjectData"); + +out: + return ret; +} + +int SQLPutObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_ns, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.ns.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.part_num, sdb); + + SQL_BIND_INT(dpp, stmt, index, params->op.obj_data.part_num, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.offset, sdb); + + SQL_BIND_INT(dpp, stmt, index, params->op.obj_data.offset, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.data, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj_data.data, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.size, sdb); + + SQL_BIND_INT(dpp, stmt, index, params->op.obj_data.size, sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.multipart_part_str, sdb); + + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj_data.multipart_part_str.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); + +out: + return rc; +} + +int SQLPutObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLUpdateObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLUpdateObjectData - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareUpdateObjectData"); + +out: + return ret; +} + +int SQLUpdateObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); + +out: + return rc; +} + +int SQLUpdateObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLGetObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLGetObjectData - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetObjectData"); + +out: + return ret; +} + +int SQLGetObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); + +out: + return rc; +} + +int SQLGetObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, get_objectdata); +out: + return ret; +} + +int SQLDeleteObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLDeleteObjectData - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareDeleteObjectData"); + +out: + return ret; +} + +int SQLDeleteObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + if (params->op.obj.state.obj.key.instance.empty()) { + params->op.obj.state.obj.key.instance = "null"; + } + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); + +out: + return rc; +} + +int SQLDeleteObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLDeleteStaleObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLDeleteStaleObjectData - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareDeleteStaleObjectData"); + +out: + return ret; +} + +int SQLDeleteStaleObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); + +out: + return rc; +} + +int SQLDeleteStaleObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLInsertLCEntry::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLInsertLCEntry - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertLCEntry"); + +out: + return ret; +} + +int SQLInsertLCEntry::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.index, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.index.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.entry.get_bucket().c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.status, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.lc_entry.entry.get_status(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.start_time, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.lc_entry.entry.get_start_time(), sdb); + +out: + return rc; +} + +int SQLInsertLCEntry::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLRemoveLCEntry::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLRemoveLCEntry - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveLCEntry"); + +out: + return ret; +} + +int SQLRemoveLCEntry::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.index, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.index.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.bucket_name, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.entry.get_bucket().c_str(), sdb); + +out: + return rc; +} + +int SQLRemoveLCEntry::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLGetLCEntry::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + sqlite3_stmt** pstmt = NULL; // Prepared statement + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLGetLCEntry - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + if (params->op.query_str == "get_next_entry") { + pstmt = &next_stmt; + } else { + pstmt = &stmt; + } + SQL_PREPARE(dpp, p_params, sdb, *pstmt, ret, "PrepareGetLCEntry"); + +out: + return ret; +} + +int SQLGetLCEntry::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + sqlite3_stmt** pstmt = NULL; // Prepared statement + + if (params->op.query_str == "get_next_entry") { + pstmt = &next_stmt; + } else { + pstmt = &stmt; + } + SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.lc_entry.index, sdb); + SQL_BIND_TEXT(dpp, *pstmt, index, params->op.lc_entry.index.c_str(), sdb); + + SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.lc_entry.bucket_name, sdb); + SQL_BIND_TEXT(dpp, *pstmt, index, params->op.lc_entry.entry.get_bucket().c_str(), sdb); + +out: + return rc; +} + +int SQLGetLCEntry::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + sqlite3_stmt** pstmt = NULL; // Prepared statement + + if (params->op.query_str == "get_next_entry") { + pstmt = &next_stmt; + } else { + pstmt = &stmt; + } + + SQL_EXECUTE(dpp, params, *pstmt, list_lc_entry); +out: + return ret; +} + +int SQLListLCEntries::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLListLCEntries - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListLCEntries"); + +out: + return ret; +} + +int SQLListLCEntries::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.index, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.index.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.min_marker, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.min_marker.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.list_max_count, sdb); + SQL_BIND_INT(dpp, stmt, index, params->op.list_max_count, sdb); + +out: + return rc; +} + +int SQLListLCEntries::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, list_lc_entry); +out: + return ret; +} + +int SQLInsertLCHead::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLInsertLCHead - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertLCHead"); + +out: + return ret; +} + +int SQLInsertLCHead::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.index, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.index.c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.marker, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.head.get_marker().c_str(), sdb); + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.start_date, sdb); + SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, static_cast(params->op.lc_head.head.start_date), sdb); + +out: + return rc; +} + +int SQLInsertLCHead::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLRemoveLCHead::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLRemoveLCHead - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveLCHead"); + +out: + return ret; +} + +int SQLRemoveLCHead::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.index, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.index.c_str(), sdb); + +out: + return rc; +} + +int SQLRemoveLCHead::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + SQL_EXECUTE(dpp, params, stmt, NULL); +out: + return ret; +} + +int SQLGetLCHead::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + struct DBOpPrepareParams p_params = PrepareParams; + + if (!*sdb) { + ldpp_dout(dpp, 0)<<"In SQLGetLCHead - no db" << dendl; + goto out; + } + + InitPrepareParams(dpp, p_params, params); + + SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetLCHead"); + +out: + return ret; +} + +int SQLGetLCHead::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int index = -1; + int rc = 0; + struct DBOpPrepareParams p_params = PrepareParams; + + SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.index, sdb); + SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.index.c_str(), sdb); + +out: + return rc; +} + +int SQLGetLCHead::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) +{ + int ret = -1; + + // clear the params before fetching the entry + params->op.lc_head.head = {}; + SQL_EXECUTE(dpp, params, stmt, list_lc_head); +out: + return ret; +} diff --git a/src/rgw/driver/dbstore/sqlite/sqliteDB.h b/src/rgw/driver/dbstore/sqlite/sqliteDB.h new file mode 100644 index 00000000000..4f651448a99 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/sqliteDB.h @@ -0,0 +1,554 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef SQLITE_DB_H +#define SQLITE_DB_H + +#include +#include +#include +#include +#include "rgw/driver/dbstore/common/dbstore.h" + +using namespace rgw::store; + +class SQLiteDB : public DB, virtual public DBOp { + private: + sqlite3_mutex *mutex = NULL; + + protected: + CephContext *cct; + + public: + sqlite3_stmt *stmt = NULL; + DBOpPrepareParams PrepareParams; + + SQLiteDB(sqlite3 *dbi, std::string db_name, CephContext *_cct) : DB(db_name, _cct), cct(_cct) { + db = (void*)dbi; + } + SQLiteDB(std::string db_name, CephContext *_cct) : DB(db_name, _cct), cct(_cct) { + } + ~SQLiteDB() {} + + uint64_t get_blob_limit() override { return SQLITE_LIMIT_LENGTH; } + void *openDB(const DoutPrefixProvider *dpp) override; + int closeDB(const DoutPrefixProvider *dpp) override; + int InitializeDBOps(const DoutPrefixProvider *dpp) override; + + int InitPrepareParams(const DoutPrefixProvider *dpp, DBOpPrepareParams &p_params, + DBOpParams* params) override; + + int exec(const DoutPrefixProvider *dpp, const char *schema, + int (*callback)(void*,int,char**,char**)); + int Step(const DoutPrefixProvider *dpp, DBOpInfo &op, sqlite3_stmt *stmt, + int (*cbk)(const DoutPrefixProvider *dpp, DBOpInfo &op, sqlite3_stmt *stmt)); + int Reset(const DoutPrefixProvider *dpp, sqlite3_stmt *stmt); + /* default value matches with sqliteDB style */ + + int createTables(const DoutPrefixProvider *dpp) override; + int createBucketTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int createUserTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int createObjectTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int createObjectDataTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int createObjectView(const DoutPrefixProvider *dpp, DBOpParams *params); + int createObjectTableTrigger(const DoutPrefixProvider *dpp, DBOpParams *params); + int createQuotaTable(const DoutPrefixProvider *dpp, DBOpParams *params); + void populate_object_params(const DoutPrefixProvider *dpp, + struct DBOpPrepareParams& p_params, + struct DBOpParams* params, bool data); + + int createLCTables(const DoutPrefixProvider *dpp) override; + + int DeleteBucketTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int DeleteUserTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int DeleteObjectTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int DeleteObjectDataTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int DeleteQuotaTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int DeleteLCEntryTable(const DoutPrefixProvider *dpp, DBOpParams *params); + int DeleteLCHeadTable(const DoutPrefixProvider *dpp, DBOpParams *params); + + int ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) override; + int ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) override; + int ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) override; +}; + +class SQLObjectOp : public ObjectOp { + private: + sqlite3 **sdb = NULL; + CephContext *cct; + + public: + SQLObjectOp(sqlite3 **sdbi, CephContext *_cct) : sdb(sdbi), cct(_cct) {}; + ~SQLObjectOp() {} + + int InitializeObjectOps(std::string db_name, const DoutPrefixProvider *dpp); +}; + +class SQLInsertUser : public SQLiteDB, public InsertUserOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLInsertUser(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLInsertUser() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLRemoveUser : public SQLiteDB, public RemoveUserOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLRemoveUser(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLRemoveUser() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLGetUser : public SQLiteDB, public GetUserOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + sqlite3_stmt *email_stmt = NULL; // Prepared statement to query by useremail + sqlite3_stmt *ak_stmt = NULL; // Prepared statement to query by access_key_id + sqlite3_stmt *userid_stmt = NULL; // Prepared statement to query by user_id + + public: + SQLGetUser(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLGetUser() { + if (stmt) + sqlite3_finalize(stmt); + if (email_stmt) + sqlite3_finalize(email_stmt); + if (ak_stmt) + sqlite3_finalize(ak_stmt); + if (userid_stmt) + sqlite3_finalize(userid_stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLInsertBucket : public SQLiteDB, public InsertBucketOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLInsertBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLInsertBucket() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLUpdateBucket : public SQLiteDB, public UpdateBucketOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *info_stmt = NULL; // Prepared statement + sqlite3_stmt *attrs_stmt = NULL; // Prepared statement + sqlite3_stmt *owner_stmt = NULL; // Prepared statement + + public: + SQLUpdateBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLUpdateBucket() { + if (info_stmt) + sqlite3_finalize(info_stmt); + if (attrs_stmt) + sqlite3_finalize(attrs_stmt); + if (owner_stmt) + sqlite3_finalize(owner_stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLRemoveBucket : public SQLiteDB, public RemoveBucketOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLRemoveBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLRemoveBucket() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLGetBucket : public SQLiteDB, public GetBucketOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLGetBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLGetBucket() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLListUserBuckets : public SQLiteDB, public ListUserBucketsOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + sqlite3_stmt *all_stmt = NULL; // Prepared statement + + public: + SQLListUserBuckets(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLListUserBuckets() { + if (stmt) + sqlite3_finalize(stmt); + if (all_stmt) + sqlite3_finalize(all_stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLPutObject : public SQLiteDB, public PutObjectOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLPutObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLPutObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLPutObject() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLDeleteObject : public SQLiteDB, public DeleteObjectOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLDeleteObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLDeleteObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLDeleteObject() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLGetObject : public SQLiteDB, public GetObjectOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLGetObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLGetObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLGetObject() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLUpdateObject : public SQLiteDB, public UpdateObjectOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *omap_stmt = NULL; // Prepared statement + sqlite3_stmt *attrs_stmt = NULL; // Prepared statement + sqlite3_stmt *meta_stmt = NULL; // Prepared statement + sqlite3_stmt *mp_stmt = NULL; // Prepared statement + + public: + SQLUpdateObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLUpdateObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLUpdateObject() { + if (omap_stmt) + sqlite3_finalize(omap_stmt); + if (attrs_stmt) + sqlite3_finalize(attrs_stmt); + if (meta_stmt) + sqlite3_finalize(meta_stmt); + } + + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLListBucketObjects : public SQLiteDB, public ListBucketObjectsOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLListBucketObjects(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLListBucketObjects(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLListBucketObjects() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLListVersionedObjects : public SQLiteDB, public ListVersionedObjectsOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLListVersionedObjects(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLListVersionedObjects(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLListVersionedObjects() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLPutObjectData : public SQLiteDB, public PutObjectDataOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLPutObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLPutObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLPutObjectData() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLUpdateObjectData : public SQLiteDB, public UpdateObjectDataOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLUpdateObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLUpdateObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLUpdateObjectData() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLGetObjectData : public SQLiteDB, public GetObjectDataOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLGetObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLGetObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLGetObjectData() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLDeleteObjectData : public SQLiteDB, public DeleteObjectDataOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLDeleteObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLDeleteObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLDeleteObjectData() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLDeleteStaleObjectData : public SQLiteDB, public DeleteStaleObjectDataOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLDeleteStaleObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + SQLDeleteStaleObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} + + ~SQLDeleteStaleObjectData() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLInsertLCEntry : public SQLiteDB, public InsertLCEntryOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLInsertLCEntry(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLInsertLCEntry() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLRemoveLCEntry : public SQLiteDB, public RemoveLCEntryOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLRemoveLCEntry(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLRemoveLCEntry() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLGetLCEntry : public SQLiteDB, public GetLCEntryOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + sqlite3_stmt *next_stmt = NULL; // Prepared statement + + public: + SQLGetLCEntry(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLGetLCEntry() { + if (stmt) + sqlite3_finalize(stmt); + if (next_stmt) + sqlite3_finalize(next_stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLListLCEntries : public SQLiteDB, public ListLCEntriesOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLListLCEntries(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLListLCEntries() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLInsertLCHead : public SQLiteDB, public InsertLCHeadOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLInsertLCHead(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLInsertLCHead() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLRemoveLCHead : public SQLiteDB, public RemoveLCHeadOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLRemoveLCHead(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLRemoveLCHead() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +class SQLGetLCHead : public SQLiteDB, public GetLCHeadOp { + private: + sqlite3 **sdb = NULL; + sqlite3_stmt *stmt = NULL; // Prepared statement + + public: + SQLGetLCHead(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} + ~SQLGetLCHead() { + if (stmt) + sqlite3_finalize(stmt); + } + int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); + int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); + int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); +}; + +#endif diff --git a/src/rgw/driver/dbstore/sqlite/statement.cc b/src/rgw/driver/dbstore/sqlite/statement.cc new file mode 100644 index 00000000000..dcf7dba9c50 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/statement.cc @@ -0,0 +1,196 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" +#include "error.h" +#include "statement.h" + +#define dout_subsys ceph_subsys_rgw_dbstore + +namespace rgw::dbstore::sqlite { + +// owning pointer to arbitrary memory allocated and returned by sqlite3 +struct sqlite_deleter { + template + void operator()(T* p) { ::sqlite3_free(p); } +}; +template +using sqlite_ptr = std::unique_ptr; + + +stmt_ptr prepare_statement(const DoutPrefixProvider* dpp, + sqlite3* db, std::string_view sql) +{ + sqlite3_stmt* stmt = nullptr; + int result = ::sqlite3_prepare_v2(db, sql.data(), sql.size(), &stmt, nullptr); + auto ec = std::error_code{result, sqlite::error_category()}; + if (ec != sqlite::errc::ok) { + const char* errmsg = ::sqlite3_errmsg(db); + ldpp_dout(dpp, 1) << "preparation failed: " << errmsg + << " (" << ec << ")\nstatement: " << sql << dendl; + throw sqlite::error(errmsg, ec); + } + return stmt_ptr{stmt}; +} + +static int bind_index(const DoutPrefixProvider* dpp, + const stmt_binding& stmt, const char* name) +{ + const int index = ::sqlite3_bind_parameter_index(stmt.get(), name); + if (index <= 0) { + ldpp_dout(dpp, 1) << "binding failed on parameter name=" + << name << dendl; + sqlite3* db = ::sqlite3_db_handle(stmt.get()); + throw sqlite::error(db); + } + return index; +} + +void bind_text(const DoutPrefixProvider* dpp, const stmt_binding& stmt, + const char* name, std::string_view value) +{ + const int index = bind_index(dpp, stmt, name); + + int result = ::sqlite3_bind_text(stmt.get(), index, value.data(), + value.size(), SQLITE_STATIC); + auto ec = std::error_code{result, sqlite::error_category()}; + if (ec != sqlite::errc::ok) { + ldpp_dout(dpp, 1) << "binding failed on parameter name=" + << name << " value=" << value << dendl; + sqlite3* db = ::sqlite3_db_handle(stmt.get()); + throw sqlite::error(db, ec); + } +} + +void bind_int(const DoutPrefixProvider* dpp, const stmt_binding& stmt, + const char* name, int value) +{ + const int index = bind_index(dpp, stmt, name); + + int result = ::sqlite3_bind_int(stmt.get(), index, value); + auto ec = std::error_code{result, sqlite::error_category()}; + if (ec != sqlite::errc::ok) { + ldpp_dout(dpp, 1) << "binding failed on parameter name=" + << name << " value=" << value << dendl; + sqlite3* db = ::sqlite3_db_handle(stmt.get()); + throw sqlite::error(db, ec); + } +} + +void eval0(const DoutPrefixProvider* dpp, const stmt_execution& stmt) +{ + sqlite_ptr sql; + if (dpp->get_cct()->_conf->subsys.should_gather()) { + sql.reset(::sqlite3_expanded_sql(stmt.get())); + } + + const int result = ::sqlite3_step(stmt.get()); + auto ec = std::error_code{result, sqlite::error_category()}; + sqlite3* db = ::sqlite3_db_handle(stmt.get()); + + if (ec != sqlite::errc::done) { + const char* errmsg = ::sqlite3_errmsg(db); + ldpp_dout(dpp, 20) << "evaluation failed: " << errmsg + << " (" << ec << ")\nstatement: " << sql.get() << dendl; + throw sqlite::error(errmsg, ec); + } + ldpp_dout(dpp, 20) << "evaluation succeeded: " << sql.get() << dendl; +} + +void eval1(const DoutPrefixProvider* dpp, const stmt_execution& stmt) +{ + sqlite_ptr sql; + if (dpp->get_cct()->_conf->subsys.should_gather()) { + sql.reset(::sqlite3_expanded_sql(stmt.get())); + } + + const int result = ::sqlite3_step(stmt.get()); + auto ec = std::error_code{result, sqlite::error_category()}; + if (ec != sqlite::errc::row) { + sqlite3* db = ::sqlite3_db_handle(stmt.get()); + const char* errmsg = ::sqlite3_errmsg(db); + ldpp_dout(dpp, 1) << "evaluation failed: " << errmsg << " (" << ec + << ")\nstatement: " << sql.get() << dendl; + throw sqlite::error(errmsg, ec); + } + ldpp_dout(dpp, 20) << "evaluation succeeded: " << sql.get() << dendl; +} + +int column_int(const stmt_execution& stmt, int column) +{ + return ::sqlite3_column_int(stmt.get(), column); +} + +std::string column_text(const stmt_execution& stmt, int column) +{ + const unsigned char* text = ::sqlite3_column_text(stmt.get(), column); + // may be NULL + if (text) { + const std::size_t size = ::sqlite3_column_bytes(stmt.get(), column); + return {reinterpret_cast(text), size}; + } else { + return {}; + } +} + +auto read_text_rows(const DoutPrefixProvider* dpp, + const stmt_execution& stmt, + std::span entries) + -> std::span +{ + sqlite_ptr sql; + if (dpp->get_cct()->_conf->subsys.should_gather()) { + sql.reset(::sqlite3_expanded_sql(stmt.get())); + } + + std::size_t count = 0; + while (count < entries.size()) { + const int result = ::sqlite3_step(stmt.get()); + auto ec = std::error_code{result, sqlite::error_category()}; + if (ec == sqlite::errc::done) { + break; + } + if (ec != sqlite::errc::row) { + sqlite3* db = ::sqlite3_db_handle(stmt.get()); + const char* errmsg = ::sqlite3_errmsg(db); + ldpp_dout(dpp, 1) << "evaluation failed: " << errmsg << " (" << ec + << ")\nstatement: " << sql.get() << dendl; + throw sqlite::error(errmsg, ec); + } + entries[count] = column_text(stmt, 0); + ++count; + } + ldpp_dout(dpp, 20) << "statement evaluation produced " << count + << " results: " << sql.get() << dendl; + + return entries.first(count); +} + +void execute(const DoutPrefixProvider* dpp, sqlite3* db, const char* query, + sqlite3_callback callback, void* arg) +{ + char* errmsg = nullptr; + const int result = ::sqlite3_exec(db, query, callback, arg, &errmsg); + auto ec = std::error_code{result, sqlite::error_category()}; + auto ptr = sqlite_ptr{errmsg}; // free on destruction + if (ec != sqlite::errc::ok) { + ldpp_dout(dpp, 1) << "query execution failed: " << errmsg << " (" << ec + << ")\nquery: " << query << dendl; + throw sqlite::error(errmsg, ec); + } + ldpp_dout(dpp, 20) << "query execution succeeded: " << query << dendl; +} + +} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/driver/dbstore/sqlite/statement.h b/src/rgw/driver/dbstore/sqlite/statement.h new file mode 100644 index 00000000000..98b4acfea23 --- /dev/null +++ b/src/rgw/driver/dbstore/sqlite/statement.h @@ -0,0 +1,83 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include + +#include + +class DoutPrefixProvider; + +namespace rgw::dbstore::sqlite { + +// owning sqlite3_stmt pointer +struct stmt_deleter { + void operator()(sqlite3_stmt* p) const { ::sqlite3_finalize(p); } +}; +using stmt_ptr = std::unique_ptr; + +// non-owning sqlite3_stmt pointer that clears binding state on destruction +struct stmt_binding_deleter { + void operator()(sqlite3_stmt* p) const { ::sqlite3_clear_bindings(p); } +}; +using stmt_binding = std::unique_ptr; + +// non-owning sqlite3_stmt pointer that clears execution state on destruction +struct stmt_execution_deleter { + void operator()(sqlite3_stmt* p) const { ::sqlite3_reset(p); } +}; +using stmt_execution = std::unique_ptr; + + +// prepare the sql statement or throw on error +stmt_ptr prepare_statement(const DoutPrefixProvider* dpp, + sqlite3* db, std::string_view sql); + +// bind an input string for the given parameter name +void bind_text(const DoutPrefixProvider* dpp, const stmt_binding& stmt, + const char* name, std::string_view value); + +// bind an input integer for the given parameter name +void bind_int(const DoutPrefixProvider* dpp, const stmt_binding& stmt, + const char* name, int value); + +// evaluate a prepared statement, expecting no result rows +void eval0(const DoutPrefixProvider* dpp, const stmt_execution& stmt); + +// evaluate a prepared statement, expecting a single result row +void eval1(const DoutPrefixProvider* dpp, const stmt_execution& stmt); + +// return the given column as an integer +int column_int(const stmt_execution& stmt, int column); + +// return the given column as text, or an empty string on NULL +std::string column_text(const stmt_execution& stmt, int column); + +// read the text column from each result row into the given entries, and return +// the sub-span of entries that contain results +auto read_text_rows(const DoutPrefixProvider* dpp, + const stmt_execution& stmt, + std::span entries) + -> std::span; + +// execute a raw query without preparing a statement. the optional callback +// can be used to read results +void execute(const DoutPrefixProvider* dpp, sqlite3* db, const char* query, + sqlite3_callback callback, void* arg); + +} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/driver/dbstore/tests/CMakeLists.txt b/src/rgw/driver/dbstore/tests/CMakeLists.txt new file mode 100644 index 00000000000..4e60dcf5ee2 --- /dev/null +++ b/src/rgw/driver/dbstore/tests/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.14.0) +project(dbstore-tests) + +set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} gtest) + +set(dbstore_tests_srcs + dbstore_tests.cc) + +include_directories(${CMAKE_INCLUDE_DIR}) + +add_executable(unittest_dbstore_tests ${dbstore_tests_srcs}) +target_link_libraries(unittest_dbstore_tests ${CMAKE_LINK_LIBRARIES}) +add_ceph_unittest(unittest_dbstore_tests) + +add_executable(unittest_dbstore_mgr_tests dbstore_mgr_tests.cc) +target_link_libraries(unittest_dbstore_mgr_tests dbstore gtest_main) +add_ceph_unittest(unittest_dbstore_mgr_tests) diff --git a/src/rgw/driver/dbstore/tests/dbstore_mgr_tests.cc b/src/rgw/driver/dbstore/tests/dbstore_mgr_tests.cc new file mode 100644 index 00000000000..02ecd9f1565 --- /dev/null +++ b/src/rgw/driver/dbstore/tests/dbstore_mgr_tests.cc @@ -0,0 +1,157 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_context.h" +#include "rgw/driver/dbstore/dbstore_mgr.h" + +#include +#include +#include + +using namespace rgw; +namespace fs = std::filesystem; +const static std::string TEST_DIR = "rgw_dbstore_tests"; + +bool endsWith(const std::string &mainStr, const std::string &toMatch) +{ + if(mainStr.size() >= toMatch.size() && + mainStr.compare(mainStr.size() - toMatch.size(), toMatch.size(), toMatch) == 0) + return true; + else + return false; +} + +class TestDBStoreManager : public ::testing::Test { +protected: + void SetUp() override { + ctx_ = std::make_shared(CEPH_ENTITY_TYPE_CLIENT); + g_ceph_context = ctx_.get(); + fs::current_path(fs::temp_directory_path()); + fs::create_directory(TEST_DIR); + } + + void TearDown() override { + fs::current_path(fs::temp_directory_path()); + fs::remove_all(TEST_DIR); + } + + std::string getTestDir() const { + auto test_dir = fs::temp_directory_path() / TEST_DIR; + return test_dir.string(); + } + + fs::path getDBFullPath(const std::string & base_dir, + const std::string & tenant) const { + auto db_path = ctx_->_conf.get_val("dbstore_db_dir"); + const auto& db_name = ctx_->_conf.get_val("dbstore_db_name_prefix") + "-" + tenant + ".db"; + + auto db_full_path = std::filesystem::path(db_path) / db_name; + auto db_full_path_test = fs::path(base_dir) / db_full_path; + return db_full_path_test; + } + + std::string getDBTenant(const std::string & base_dir, + const std::string & tenant) const { + auto db_name = ctx_->_conf.get_val("dbstore_db_name_prefix"); + db_name += "-" + tenant; + auto db_full_path = fs::path(base_dir) / db_name; + return db_full_path.string(); + } + + std::string getDBTenant(const std::string & tenant = default_tenant) const { + return getDBTenant(getTestDir(), tenant); + } + + fs::path getDBFullPath(const std::string & tenant) const { + return getDBFullPath(getTestDir(), tenant); + } + + fs::path getLogFilePath(const std::string & log_file) { + return fs::temp_directory_path() / log_file; + } + + std::shared_ptr getContext() const { + return ctx_; + } + + private: + std::shared_ptr ctx_; +}; + +TEST_F(TestDBStoreManager, BasicInstantiateUsingDBDir) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + EXPECT_FALSE(fs::exists(getDBFullPath(default_tenant))); + auto dbstore_mgr = std::make_shared(getContext().get()); + EXPECT_TRUE(fs::exists(getDBFullPath(default_tenant))); +} + +TEST_F(TestDBStoreManager, DBNamePrefix) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + std::string prefix = "testprefix"; + getContext()->_conf.set_val("dbstore_db_name_prefix", prefix); + + EXPECT_FALSE(fs::exists(getDBFullPath(default_tenant))); + auto dbstore_mgr = std::make_shared(getContext().get()); + EXPECT_TRUE(fs::exists(getDBFullPath(default_tenant))); + + // check that the database name contains the given prefix + std::string expected_db_name = prefix + "-" + default_tenant + ".db"; + EXPECT_TRUE(endsWith(getDBFullPath(default_tenant), expected_db_name)); +} + +TEST_F(TestDBStoreManager, BasicInstantiateSecondConstructor) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + EXPECT_FALSE(fs::exists(getDBFullPath(default_tenant))); + auto dbstore_mgr = std::make_shared(getContext().get(), getLogFilePath("test.log").string(), 10); + EXPECT_TRUE(fs::exists(getDBFullPath(default_tenant))); +} + +TEST_F(TestDBStoreManager, TestDBName) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + auto dbstore_mgr = std::make_shared(getContext().get()); + auto db = dbstore_mgr->getDB(default_tenant, false); + ASSERT_NE(nullptr, db); + EXPECT_EQ(getDBTenant(), db->getDBname()); +} + + +TEST_F(TestDBStoreManager, TestDBNameDefaultDB) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + auto dbstore_mgr = std::make_shared(getContext().get()); + // passing an empty tenant should return the default_db + auto db = dbstore_mgr->getDB("", false); + ASSERT_NE(nullptr, db); + EXPECT_EQ(getDBTenant(), db->getDBname()); +} + +TEST_F(TestDBStoreManager, TestDBBadTenant) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + auto dbstore_mgr = std::make_shared(getContext().get()); + auto db = dbstore_mgr->getDB("does-not-exist", false); + ASSERT_EQ(nullptr, db); +} + +TEST_F(TestDBStoreManager, TestGetNewDB) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + auto dbstore_mgr = std::make_shared(getContext().get()); + + auto new_tenant_path = "new_tenant"; + auto db = dbstore_mgr->getDB(new_tenant_path, true); + ASSERT_NE(nullptr, db); + EXPECT_EQ(getDBTenant(new_tenant_path), db->getDBname()); +} + +TEST_F(TestDBStoreManager, TestDelete) { + getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); + + auto dbstore_mgr = std::make_shared(getContext().get()); + dbstore_mgr->deleteDB(default_tenant); + auto db = dbstore_mgr->getDB(default_tenant, false); + ASSERT_EQ(nullptr, db); +} diff --git a/src/rgw/driver/dbstore/tests/dbstore_tests.cc b/src/rgw/driver/dbstore/tests/dbstore_tests.cc new file mode 100644 index 00000000000..e87002f61b5 --- /dev/null +++ b/src/rgw/driver/dbstore/tests/dbstore_tests.cc @@ -0,0 +1,1424 @@ +#include "gtest/gtest.h" +#include +#include +#include +#include +#include +#include +#include +#include "rgw_common.h" + +using namespace std; +using DB = rgw::store::DB; + +vector args; + +namespace gtest { + class Environment* env; + + class Environment : public ::testing::Environment { + public: + Environment(): tenant("default_ns"), db(nullptr), + db_type("SQLite"), ret(-1) {} + + Environment(string tenantname, string db_typename): + tenant(tenantname), db(nullptr), + db_type(db_typename), ret(-1) {} + + virtual ~Environment() {} + + void SetUp() override { + cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE | CINIT_FLAG_NO_MON_CONFIG | CINIT_FLAG_NO_DAEMON_ACTIONS); + if (!db_type.compare("SQLite")) { + db = new SQLiteDB(tenant, cct.get()); + ASSERT_TRUE(db != nullptr); + ret = db->Initialize(logfile, loglevel); + ASSERT_GE(ret, 0); + } + } + + void TearDown() override { + if (!db) + return; + db->Destroy(db->get_def_dpp()); + delete db; + } + + string tenant; + DB *db; + string db_type; + int ret; + string logfile = "rgw_dbstore_tests.log"; + int loglevel = 30; + boost::intrusive_ptr cct; + }; +} + +ceph::real_time bucket_mtime = real_clock::now(); +string marker1; + +class DBGetDataCB : public RGWGetDataCB { + public: + bufferlist data_bl; + off_t data_ofs, data_len; + + int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) { + data_bl = bl; + data_ofs = bl_ofs; + data_len = bl_len; + return 0; + } +}; + +namespace { + + class DBStoreTest : public ::testing::Test { + protected: + int ret; + DB *db = nullptr; + string user1 = "user1"; + string user_id1 = "user_id1"; + string bucket1 = "bucket1"; + string object1 = "object1"; + string data = "Hello World"; + DBOpParams GlobalParams = {}; + const DoutPrefixProvider *dpp; + + DBStoreTest() {} + void SetUp() { + db = gtest::env->db; + ASSERT_TRUE(db != nullptr); + dpp = db->get_def_dpp(); + ASSERT_TRUE(dpp != nullptr); + + GlobalParams.op.user.uinfo.display_name = user1; + GlobalParams.op.user.uinfo.user_id.id = user_id1; + GlobalParams.op.bucket.info.bucket.name = bucket1; + GlobalParams.op.obj.state.obj.bucket = GlobalParams.op.bucket.info.bucket; + GlobalParams.op.obj.state.obj.key.name = object1; + GlobalParams.op.obj.state.obj.key.instance = "inst1"; + GlobalParams.op.obj.obj_id = "obj_id1"; + GlobalParams.op.obj_data.part_num = 0; + + /* As of now InitializeParams doesnt do anything + * special based on fop. Hence its okay to do + * global initialization once. + */ + ret = db->InitializeParams(dpp, &GlobalParams); + ASSERT_EQ(ret, 0); + } + + void TearDown() { + } + + int write_object(const DoutPrefixProvider *dpp, DBOpParams params) { + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + DB::Object::Write write_op(&op_target); + map setattrs; + ret = write_op.prepare(dpp); + if (ret) + return ret; + + write_op.meta.mtime = &bucket_mtime; + write_op.meta.category = RGWObjCategory::Main; + write_op.meta.owner = params.op.user.uinfo.user_id; + + bufferlist b1 = params.op.obj.head_data; + write_op.meta.data = &b1; + + bufferlist b2; + encode("ACL", b2); + setattrs[RGW_ATTR_ACL] = b2; + + ret = write_op.write_meta(0, params.op.obj.state.size, b1.length()+1, setattrs); + return ret; + } + }; +} + +TEST_F(DBStoreTest, InsertUser) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.user.uinfo.user_id.tenant = "tenant"; + params.op.user.uinfo.user_email = "user1@dbstore.com"; + params.op.user.uinfo.suspended = 123; + params.op.user.uinfo.max_buckets = 456; + params.op.user.uinfo.assumed_role_arn = "role"; + params.op.user.uinfo.placement_tags.push_back("tags"); + RGWAccessKey k1("id1", "key1"); + RGWAccessKey k2("id2", "key2"); + params.op.user.uinfo.access_keys["id1"] = k1; + params.op.user.uinfo.access_keys["id2"] = k2; + params.op.user.user_version.ver = 1; + params.op.user.user_version.tag = "UserTAG"; + + ret = db->ProcessOp(dpp, "InsertUser", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, GetUser) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ProcessOp(dpp, "GetUser", ¶ms); + ASSERT_EQ(ret, 0); + ASSERT_EQ(params.op.user.uinfo.user_id.tenant, "tenant"); + ASSERT_EQ(params.op.user.uinfo.user_email, "user1@dbstore.com"); + ASSERT_EQ(params.op.user.uinfo.user_id.id, "user_id1"); + ASSERT_EQ(params.op.user.uinfo.suspended, 123); + ASSERT_EQ(params.op.user.uinfo.max_buckets, 456); + ASSERT_EQ(params.op.user.uinfo.assumed_role_arn, "role"); + ASSERT_EQ(params.op.user.uinfo.placement_tags.back(), "tags"); + RGWAccessKey k; + map::iterator it2 = params.op.user.uinfo.access_keys.begin(); + k = it2->second; + ASSERT_EQ(k.id, "id1"); + ASSERT_EQ(k.key, "key1"); + it2++; + k = it2->second; + ASSERT_EQ(k.id, "id2"); + ASSERT_EQ(k.key, "key2"); + +} + +TEST_F(DBStoreTest, GetUserQuery) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.query_str = "email"; + params.op.user.uinfo.user_email = "user1@dbstore.com"; + + ret = db->ProcessOp(dpp, "GetUser", ¶ms); + ASSERT_EQ(ret, 0); + ASSERT_EQ(params.op.user.uinfo.user_id.tenant, "tenant"); + ASSERT_EQ(params.op.user.uinfo.user_email, "user1@dbstore.com"); + ASSERT_EQ(params.op.user.uinfo.user_id.id, "user_id1"); + ASSERT_EQ(params.op.user.uinfo.suspended, 123); + ASSERT_EQ(params.op.user.uinfo.max_buckets, 456); + ASSERT_EQ(params.op.user.uinfo.assumed_role_arn, "role"); + ASSERT_EQ(params.op.user.uinfo.placement_tags.back(), "tags"); + RGWAccessKey k; + map::iterator it2 = params.op.user.uinfo.access_keys.begin(); + k = it2->second; + ASSERT_EQ(k.id, "id1"); + ASSERT_EQ(k.key, "key1"); + it2++; + k = it2->second; + ASSERT_EQ(k.id, "id2"); + ASSERT_EQ(k.key, "key2"); + +} + +TEST_F(DBStoreTest, GetUserQueryByEmail) { + int ret = -1; + RGWUserInfo uinfo; + string email = "user1@dbstore.com"; + map attrs; + RGWObjVersionTracker objv; + + ret = db->get_user(dpp, "email", email, uinfo, &attrs, &objv); + ASSERT_EQ(ret, 0); + ASSERT_EQ(uinfo.user_id.tenant, "tenant"); + ASSERT_EQ(uinfo.user_email, "user1@dbstore.com"); + ASSERT_EQ(uinfo.user_id.id, "user_id1"); + ASSERT_EQ(uinfo.suspended, 123); + ASSERT_EQ(uinfo.max_buckets, 456); + ASSERT_EQ(uinfo.assumed_role_arn, "role"); + ASSERT_EQ(uinfo.placement_tags.back(), "tags"); + RGWAccessKey k; + map::iterator it2 = uinfo.access_keys.begin(); + k = it2->second; + ASSERT_EQ(k.id, "id1"); + ASSERT_EQ(k.key, "key1"); + it2++; + k = it2->second; + ASSERT_EQ(k.id, "id2"); + ASSERT_EQ(k.key, "key2"); + ASSERT_EQ(objv.read_version.ver, 1); +} + +TEST_F(DBStoreTest, GetUserQueryByAccessKey) { + int ret = -1; + RGWUserInfo uinfo; + string key = "id1"; + + ret = db->get_user(dpp, "access_key", key, uinfo, nullptr, nullptr); + ASSERT_EQ(ret, 0); + ASSERT_EQ(uinfo.user_id.tenant, "tenant"); + ASSERT_EQ(uinfo.user_email, "user1@dbstore.com"); + ASSERT_EQ(uinfo.user_id.id, "user_id1"); + ASSERT_EQ(uinfo.suspended, 123); + ASSERT_EQ(uinfo.max_buckets, 456); + ASSERT_EQ(uinfo.assumed_role_arn, "role"); + ASSERT_EQ(uinfo.placement_tags.back(), "tags"); + RGWAccessKey k; + map::iterator it2 = uinfo.access_keys.begin(); + k = it2->second; + ASSERT_EQ(k.id, "id1"); + ASSERT_EQ(k.key, "key1"); + it2++; + k = it2->second; + ASSERT_EQ(k.id, "id2"); + ASSERT_EQ(k.key, "key2"); +} + +TEST_F(DBStoreTest, StoreUser) { + struct DBOpParams params = GlobalParams; + int ret = -1; + RGWUserInfo uinfo, old_uinfo; + map attrs; + RGWObjVersionTracker objv_tracker; + + bufferlist attr1, attr2; + encode("attrs1", attr1); + attrs["attr1"] = attr1; + encode("attrs2", attr2); + attrs["attr2"] = attr2; + + uinfo.user_id.id = "user_id2"; + uinfo.user_id.tenant = "tenant"; + uinfo.user_email = "user2@dbstore.com"; + uinfo.suspended = 123; + uinfo.max_buckets = 456; + uinfo.assumed_role_arn = "role"; + uinfo.placement_tags.push_back("tags"); + RGWAccessKey k1("id1", "key1"); + RGWAccessKey k2("id2", "key2"); + uinfo.access_keys["id1"] = k1; + uinfo.access_keys["id2"] = k2; + + /* non exclusive create..should create new one */ + ret = db->store_user(dpp, uinfo, false, &attrs, &objv_tracker, &old_uinfo); + ASSERT_EQ(ret, 0); + ASSERT_EQ(old_uinfo.user_email, ""); + ASSERT_EQ(objv_tracker.read_version.ver, 1); + ASSERT_EQ(objv_tracker.read_version.tag, "UserTAG"); + + /* invalid version number */ + objv_tracker.read_version.ver = 4; + ret = db->store_user(dpp, uinfo, true, &attrs, &objv_tracker, &old_uinfo); + ASSERT_EQ(ret, -125); /* returns ECANCELED */ + ASSERT_EQ(old_uinfo.user_id.id, uinfo.user_id.id); + ASSERT_EQ(old_uinfo.user_email, uinfo.user_email); + + /* exclusive create..should not create new one */ + uinfo.user_email = "user2_new@dbstore.com"; + objv_tracker.read_version.ver = 1; + ret = db->store_user(dpp, uinfo, true, &attrs, &objv_tracker, &old_uinfo); + ASSERT_EQ(ret, 0); + ASSERT_EQ(old_uinfo.user_email, "user2@dbstore.com"); + ASSERT_EQ(objv_tracker.read_version.ver, 1); + + ret = db->store_user(dpp, uinfo, false, &attrs, &objv_tracker, &old_uinfo); + ASSERT_EQ(ret, 0); + ASSERT_EQ(old_uinfo.user_email, "user2@dbstore.com"); + ASSERT_EQ(objv_tracker.read_version.ver, 2); + ASSERT_EQ(objv_tracker.read_version.tag, "UserTAG"); +} + +TEST_F(DBStoreTest, GetUserQueryByUserID) { + int ret = -1; + RGWUserInfo uinfo; + map attrs; + RGWObjVersionTracker objv; + + uinfo.user_id.tenant = "tenant"; + uinfo.user_id.id = "user_id2"; + + ret = db->get_user(dpp, "user_id", "user_id2", uinfo, &attrs, &objv); + ASSERT_EQ(ret, 0); + ASSERT_EQ(uinfo.user_id.tenant, "tenant"); + ASSERT_EQ(uinfo.user_email, "user2_new@dbstore.com"); + ASSERT_EQ(uinfo.user_id.id, "user_id2"); + ASSERT_EQ(uinfo.suspended, 123); + ASSERT_EQ(uinfo.max_buckets, 456); + ASSERT_EQ(uinfo.assumed_role_arn, "role"); + ASSERT_EQ(uinfo.placement_tags.back(), "tags"); + RGWAccessKey k; + map::iterator it = uinfo.access_keys.begin(); + k = it->second; + ASSERT_EQ(k.id, "id1"); + ASSERT_EQ(k.key, "key1"); + it++; + k = it->second; + ASSERT_EQ(k.id, "id2"); + ASSERT_EQ(k.key, "key2"); + + ASSERT_EQ(objv.read_version.ver, 2); + + bufferlist k1, k2; + string attr; + map::iterator it2 = attrs.begin(); + k1 = it2->second; + decode(attr, k1); + ASSERT_EQ(attr, "attrs1"); + it2++; + k2 = it2->second; + decode(attr, k2); + ASSERT_EQ(attr, "attrs2"); +} + +TEST_F(DBStoreTest, ListAllUsers) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ListAllUsers(dpp, ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, InsertBucket) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.bucket.info.bucket.name = "bucket1"; + params.op.bucket.info.bucket.tenant = "tenant"; + params.op.bucket.info.bucket.marker = "marker1"; + + params.op.bucket.ent.size = 1024; + + params.op.bucket.info.has_instance_obj = false; + params.op.bucket.bucket_version.ver = 1; + params.op.bucket.bucket_version.tag = "read_tag"; + + params.op.bucket.mtime = bucket_mtime; + + ret = db->ProcessOp(dpp, "InsertBucket", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, UpdateBucketAttrs) { + int ret = -1; + RGWBucketInfo info; + map attrs; + RGWObjVersionTracker objv; + + bufferlist aclbl, aclbl2; + encode("attrs1", aclbl); + attrs["attr1"] = aclbl; + encode("attrs2", aclbl2); + attrs["attr2"] = aclbl2; + + info.bucket.name = "bucket1"; + + /* invalid version number */ + objv.read_version.ver = 4; + ret = db->update_bucket(dpp, "attrs", info, false, nullptr, &attrs, &bucket_mtime, &objv); + ASSERT_EQ(ret, -125); /* returns ECANCELED */ + + /* right version number */ + objv.read_version.ver = 1; + ret = db->update_bucket(dpp, "attrs", info, false, nullptr, &attrs, &bucket_mtime, &objv); + ASSERT_EQ(ret, 0); + ASSERT_EQ(objv.read_version.ver, 2); +} + +TEST_F(DBStoreTest, UpdateBucketInfo) { + struct DBOpParams params = GlobalParams; + int ret = -1; + RGWBucketInfo info; + + params.op.bucket.info.bucket.name = "bucket1"; + + ret = db->ProcessOp(dpp, "GetBucket", ¶ms); + ASSERT_EQ(ret, 0); + + info = params.op.bucket.info; + + info.bucket.marker = "marker2"; + ret = db->update_bucket(dpp, "info", info, false, nullptr, nullptr, &bucket_mtime, nullptr); + ASSERT_EQ(ret, 0); + ASSERT_EQ(info.objv_tracker.read_version.ver, 3); +} + +TEST_F(DBStoreTest, GetBucket) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.bucket.info.bucket.name = "bucket1"; + ret = db->ProcessOp(dpp, "GetBucket", ¶ms); + ASSERT_EQ(ret, 0); + ASSERT_EQ(params.op.bucket.info.bucket.name, "bucket1"); + ASSERT_EQ(params.op.bucket.info.bucket.tenant, "tenant"); + ASSERT_EQ(params.op.bucket.info.bucket.marker, "marker2"); + ASSERT_EQ(params.op.bucket.ent.size, 1024); + ASSERT_EQ(params.op.bucket.ent.bucket.name, "bucket1"); + ASSERT_EQ(params.op.bucket.ent.bucket.tenant, "tenant"); + ASSERT_EQ(params.op.bucket.info.has_instance_obj, false); + ASSERT_EQ(params.op.bucket.info.objv_tracker.read_version.ver, 3); + ASSERT_EQ(params.op.bucket.info.objv_tracker.read_version.tag, "read_tag"); + ASSERT_EQ(params.op.bucket.mtime, bucket_mtime); + ASSERT_EQ(params.op.bucket.info.owner.id, "user_id1"); + bufferlist k, k2; + string acl; + map::iterator it2 = params.op.bucket.bucket_attrs.begin(); + k = it2->second; + decode(acl, k); + ASSERT_EQ(acl, "attrs1"); + it2++; + k2 = it2->second; + decode(acl, k2); + ASSERT_EQ(acl, "attrs2"); +} + +TEST_F(DBStoreTest, CreateBucket) { + struct DBOpParams params = GlobalParams; + int ret = -1; + RGWBucketInfo info; + RGWUserInfo owner; + rgw_bucket bucket; + obj_version objv; + rgw_placement_rule rule; + map attrs; + + owner.user_id.id = "user_id1"; + bucket.name = "bucket1"; + bucket.tenant = "tenant"; + + objv.ver = 2; + objv.tag = "write_tag"; + + rule.name = "rule1"; + rule.storage_class = "sc1"; + + ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, + attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, + null_yield, false); + ASSERT_EQ(ret, 0); + bucket.name = "bucket2"; + ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, + attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, + null_yield, false); + ASSERT_EQ(ret, 0); + bucket.name = "bucket3"; + ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, + attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, + null_yield, false); + ASSERT_EQ(ret, 0); + bucket.name = "bucket4"; + ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, + attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, + null_yield, false); + ASSERT_EQ(ret, 0); + bucket.name = "bucket5"; + ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, + attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, + null_yield, false); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, GetBucketQueryByName) { + int ret = -1; + RGWBucketInfo binfo; + binfo.bucket.name = "bucket2"; + rgw::sal::Attrs attrs; + ceph::real_time mtime; + obj_version objv; + + ret = db->get_bucket_info(dpp, "name", "", binfo, &attrs, &mtime, &objv); + ASSERT_EQ(ret, 0); + ASSERT_EQ(binfo.bucket.name, "bucket2"); + ASSERT_EQ(binfo.bucket.tenant, "tenant"); + ASSERT_EQ(binfo.owner.id, "user_id1"); + ASSERT_EQ(binfo.objv_tracker.read_version.ver, 2); + ASSERT_EQ(binfo.objv_tracker.read_version.tag, "write_tag"); + ASSERT_EQ(binfo.zonegroup, "zid"); + ASSERT_EQ(binfo.creation_time, bucket_mtime); + ASSERT_EQ(binfo.placement_rule.name, "rule1"); + ASSERT_EQ(binfo.placement_rule.storage_class, "sc1"); + ASSERT_EQ(objv.ver, 2); + ASSERT_EQ(objv.tag, "write_tag"); + + marker1 = binfo.bucket.marker; +} + +TEST_F(DBStoreTest, ListUserBuckets) { + struct DBOpParams params = GlobalParams; + int ret = -1; + rgw_user owner; + int max = 2; + bool need_stats = true; + bool is_truncated = false; + RGWUserBuckets ulist; + + owner.id = "user_id1"; + + marker1 = ""; + do { + is_truncated = false; + ret = db->list_buckets(dpp, "", owner, marker1, "", max, need_stats, &ulist, + &is_truncated); + ASSERT_EQ(ret, 0); + + cout << "marker1 :" << marker1 << "\n"; + + cout << "is_truncated :" << is_truncated << "\n"; + + for (const auto& ent: ulist.get_buckets()) { + RGWBucketEnt e = ent.second; + cout << "###################### \n"; + cout << "ent.bucket.id : " << e.bucket.name << "\n"; + cout << "ent.bucket.marker : " << e.bucket.marker << "\n"; + cout << "ent.bucket.bucket_id : " << e.bucket.bucket_id << "\n"; + cout << "ent.size : " << e.size << "\n"; + cout << "ent.rule.name : " << e.placement_rule.name << "\n"; + + marker1 = e.bucket.name; + } + ulist.clear(); + } while(is_truncated); +} + +TEST_F(DBStoreTest, BucketChown) { + int ret = -1; + RGWBucketInfo info; + rgw_user user; + user.id = "user_id2"; + + info.bucket.name = "bucket5"; + + ret = db->update_bucket(dpp, "owner", info, false, &user, nullptr, &bucket_mtime, nullptr); + ASSERT_EQ(ret, 0); + ASSERT_EQ(info.objv_tracker.read_version.ver, 3); +} + +TEST_F(DBStoreTest, ListAllBuckets) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ListAllBuckets(dpp, ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, ListAllBuckets2) { + struct DBOpParams params = GlobalParams; + int ret = -1; + rgw_user owner; + int max = 2; + bool need_stats = true; + bool is_truncated = false; + RGWUserBuckets ulist; + + marker1 = ""; + do { + is_truncated = false; + ret = db->list_buckets(dpp, "all", owner, marker1, "", max, need_stats, &ulist, + &is_truncated); + ASSERT_EQ(ret, 0); + + cout << "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"; + cout << "ownerID : " << owner.id << "\n"; + cout << "marker1 :" << marker1 << "\n"; + + cout << "is_truncated :" << is_truncated << "\n"; + + for (const auto& ent: ulist.get_buckets()) { + RGWBucketEnt e = ent.second; + cout << "###################### \n"; + cout << "ent.bucket.id : " << e.bucket.name << "\n"; + cout << "ent.bucket.marker : " << e.bucket.marker << "\n"; + cout << "ent.bucket.bucket_id : " << e.bucket.bucket_id << "\n"; + cout << "ent.size : " << e.size << "\n"; + cout << "ent.rule.name : " << e.placement_rule.name << "\n"; + + marker1 = e.bucket.name; + } + ulist.clear(); + } while(is_truncated); +} + +TEST_F(DBStoreTest, RemoveBucketAPI) { + int ret = -1; + RGWBucketInfo info; + + info.bucket.name = "bucket5"; + + ret = db->remove_bucket(dpp, info); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, RemoveUserAPI) { + int ret = -1; + RGWUserInfo uinfo; + RGWObjVersionTracker objv; + + uinfo.user_id.tenant = "tenant"; + uinfo.user_id.id = "user_id2"; + + /* invalid version number...should fail */ + objv.read_version.ver = 4; + ret = db->remove_user(dpp, uinfo, &objv); + ASSERT_EQ(ret, -125); + + objv.read_version.ver = 2; + ret = db->remove_user(dpp, uinfo, &objv); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, PutObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.obj.category = RGWObjCategory::Main; + params.op.obj.storage_class = "STANDARD"; + bufferlist b1; + encode("HELLO WORLD", b1); + cout<<"XXXXXXXXX Insert b1.length " << b1.length() << "\n"; + params.op.obj.head_data = b1; + params.op.obj.state.size = 12; + params.op.obj.state.is_olh = false; + ret = db->ProcessOp(dpp, "PutObject", ¶ms); + ASSERT_EQ(ret, 0); + + /* Insert another objects */ + params.op.obj.state.obj.key.name = "object2"; + params.op.obj.state.obj.key.instance = "inst2"; + ret = db->ProcessOp(dpp, "PutObject", ¶ms); + ASSERT_EQ(ret, 0); + + params.op.obj.state.obj.key.name = "object3"; + params.op.obj.state.obj.key.instance = "inst3"; + ret = db->ProcessOp(dpp, "PutObject", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, ListAllObjects) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ListAllObjects(dpp, ¶ms); + ASSERT_GE(ret, 0); +} + +TEST_F(DBStoreTest, GetObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ProcessOp(dpp, "GetObject", ¶ms); + ASSERT_EQ(ret, 0); + ASSERT_EQ(params.op.obj.category, RGWObjCategory::Main); + ASSERT_EQ(params.op.obj.storage_class, "STANDARD"); + string data; + decode(data, params.op.obj.head_data); + ASSERT_EQ(data, "HELLO WORLD"); + ASSERT_EQ(params.op.obj.state.size, 12); + cout << "versionNum :" << params.op.obj.version_num << "\n"; +} + +TEST_F(DBStoreTest, GetObjectState) { + struct DBOpParams params = GlobalParams; + int ret = -1; + RGWObjState* s; + + params.op.obj.state.obj.key.name = "object2"; + params.op.obj.state.obj.key.instance = "inst2"; + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + ret = op_target.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, + false, &s); + ASSERT_EQ(ret, 0); + ASSERT_EQ(s->size, 12); + ASSERT_EQ(s->is_olh, false); + cout << "versionNum :" << params.op.obj.version_num << "\n"; + + /* Recheck with get_state API */ + ret = op_target.get_state(dpp, &s, false); + ASSERT_EQ(ret, 0); + ASSERT_EQ(s->size, 12); + ASSERT_EQ(s->is_olh, false); + cout << "versionNum :" << params.op.obj.version_num << "\n"; +} + +TEST_F(DBStoreTest, ObjAttrs) { + struct DBOpParams params = GlobalParams; + int ret = -1; + map setattrs; + map rmattrs; + map readattrs; + + bufferlist b1, b2, b3; + encode("ACL", b1); + setattrs[RGW_ATTR_ACL] = b1; + encode("LC", b2); + setattrs[RGW_ATTR_LC] = b2; + encode("ETAG", b3); + setattrs[RGW_ATTR_ETAG] = b3; + + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + /* Set some attrs */ + ret = op_target.set_attrs(dpp, setattrs, nullptr); + ASSERT_EQ(ret, 0); + + /* read those attrs */ + DB::Object::Read read_op(&op_target); + read_op.params.attrs = &readattrs; + ret = read_op.prepare(dpp); + ASSERT_EQ(ret, 0); + + string val; + decode(val, readattrs[RGW_ATTR_ACL]); + ASSERT_EQ(val, "ACL"); + decode(val, readattrs[RGW_ATTR_LC]); + ASSERT_EQ(val, "LC"); + decode(val, readattrs[RGW_ATTR_ETAG]); + ASSERT_EQ(val, "ETAG"); + + /* Remove some attrs */ + rmattrs[RGW_ATTR_ACL] = b1; + map empty; + ret = op_target.set_attrs(dpp, empty, &rmattrs); + ASSERT_EQ(ret, 0); + + /* read those attrs */ + ret = read_op.prepare(dpp); + ASSERT_EQ(ret, 0); + + ASSERT_EQ(readattrs.count(RGW_ATTR_ACL), 0); + decode(val, readattrs[RGW_ATTR_LC]); + ASSERT_EQ(val, "LC"); + decode(val, readattrs[RGW_ATTR_ETAG]); + ASSERT_EQ(val, "ETAG"); +} + +TEST_F(DBStoreTest, WriteObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + params.op.obj.state.obj.key.name = "object3"; + params.op.obj.state.obj.key.instance = "inst3"; + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + bufferlist b1; + encode("HELLO WORLD - Object3", b1); + params.op.obj.head_data = b1; + params.op.obj.state.size = 22; + + ret = write_object(dpp, params); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, ReadObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + map readattrs; + params.op.obj.state.obj.key.name = "object3"; + params.op.obj.state.obj.key.instance = "inst3"; + uint64_t obj_size; + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + DB::Object::Read read_op(&op_target); + read_op.params.attrs = &readattrs; + read_op.params.obj_size = &obj_size; + ret = read_op.prepare(dpp); + ASSERT_EQ(ret, 0); + + bufferlist bl; + ret = read_op.read(0, 25, bl, dpp); + cout<<"XXXXXXXXX Insert bl.length " << bl.length() << "\n"; + ASSERT_EQ(ret, 25); + + string data; + decode(data, bl); + ASSERT_EQ(data, "HELLO WORLD - Object3"); + ASSERT_EQ(obj_size, 22); +} + +TEST_F(DBStoreTest, IterateObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + map readattrs; + uint64_t obj_size; + DBGetDataCB cb; + + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + DB::Object::Read read_op(&op_target); + read_op.params.attrs = &readattrs; + read_op.params.obj_size = &obj_size; + ret = read_op.prepare(dpp); + ASSERT_EQ(ret, 0); + + bufferlist bl; + ret = read_op.iterate(dpp, 0, 15, &cb); + ASSERT_EQ(ret, 0); + string data; + decode(data, cb.data_bl); + cout << "XXXXXXXXXX iterate data is " << data << ", bl_ofs = " << cb.data_ofs << ", bl_len = " << cb.data_len << "\n"; + ASSERT_EQ(data, "HELLO WORLD"); + ASSERT_EQ(cb.data_ofs, 0); + ASSERT_EQ(cb.data_len, 15); +} + +TEST_F(DBStoreTest, ListBucketObjects) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + int max = 2; + bool is_truncated = false; + rgw_obj_key marker1; + DB::Bucket target(db, params.op.bucket.info); + DB::Bucket::List list_op(&target); + + vector dir_list; + + marker1.name = ""; + do { + is_truncated = false; + list_op.params.marker = marker1; + ret = list_op.list_objects(dpp, max, &dir_list, nullptr, &is_truncated); + ASSERT_EQ(ret, 0); + + cout << "marker1 :" << marker1.name << "\n"; + + cout << "is_truncated :" << is_truncated << "\n"; + + for (const auto& ent: dir_list) { + cls_rgw_obj_key key = ent.key; + cout << "###################### \n"; + cout << "key.name : " << key.name << "\n"; + cout << "key.instance : " << key.instance << "\n"; + + marker1 = list_op.get_next_marker(); + } + dir_list.clear(); + } while(is_truncated); +} + +TEST_F(DBStoreTest, DeleteObj) { + struct DBOpParams params = GlobalParams; + int ret = -1; + RGWObjState *s; + + /* delete object2 */ + params.op.obj.state.obj.key.name = "object2"; + params.op.obj.state.obj.key.instance = "inst2"; + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + DB::Object::Delete delete_op(&op_target); + ret = delete_op.delete_obj(dpp); + ASSERT_EQ(ret, 0); + + /* Should return ENOENT */ + ret = op_target.get_state(dpp, &s, false); + ASSERT_EQ(ret, -2); +} + +TEST_F(DBStoreTest, WriteVersionedObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::string instances[] = {"inst1", "inst2", "inst3"}; + bufferlist b1; + + params.op.obj.flags |= rgw_bucket_dir_entry::FLAG_CURRENT; + params.op.obj.state.obj.key.name = "object1"; + + /* Write versioned objects */ + DB::Object op_target(db, params.op.bucket.info, params.op.obj.state.obj); + DB::Object::Write write_op(&op_target); + + /* Version1 */ + params.op.obj.state.obj.key.instance = instances[0]; + encode("HELLO WORLD", b1); + params.op.obj.head_data = b1; + params.op.obj.state.size = 12; + ret = write_object(dpp, params); + ASSERT_EQ(ret, 0); + + /* Version2 */ + params.op.obj.state.obj.key.instance = instances[1]; + b1.clear(); + encode("HELLO WORLD ABC", b1); + params.op.obj.head_data = b1; + params.op.obj.state.size = 16; + ret = write_object(dpp, params); + ASSERT_EQ(ret, 0); + + /* Version3 */ + params.op.obj.state.obj.key.instance = instances[2]; + b1.clear(); + encode("HELLO WORLD A", b1); + params.op.obj.head_data = b1; + params.op.obj.state.size = 14; + ret = write_object(dpp, params); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, ListVersionedObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::string instances[] = {"inst1", "inst2", "inst3"}; + int i = 0; + + /* list versioned objects */ + params.op.obj.state.obj.key.instance.clear(); + params.op.list_max_count = MAX_VERSIONED_OBJECTS; + ret = db->ProcessOp(dpp, "ListVersionedObjects", ¶ms); + ASSERT_EQ(ret, 0); + + i = 2; + for (auto ent: params.op.obj.list_entries) { + + + ASSERT_EQ(ent.key.instance, instances[i]); + i--; + } +} + +TEST_F(DBStoreTest, ReadVersionedObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::string instances[] = {"inst1", "inst2", "inst3"}; + std::string data; + + /* read object.. should fetch latest version */ + RGWObjState* s; + params = GlobalParams; + params.op.obj.state.obj.key.instance.clear(); + DB::Object op_target2(db, params.op.bucket.info, params.op.obj.state.obj); + ret = op_target2.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, + true, &s); + ASSERT_EQ(ret, 0); + ASSERT_EQ(s->obj.key.instance, instances[2]); + decode(data, s->data); + ASSERT_EQ(data, "HELLO WORLD A"); + ASSERT_EQ(s->size, 14); + + /* read a particular non-current version */ + params.op.obj.state.obj.key.instance = instances[1]; + DB::Object op_target3(db, params.op.bucket.info, params.op.obj.state.obj); + ret = op_target3.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, + true, &s); + ASSERT_EQ(ret, 0); + decode(data, s->data); + ASSERT_EQ(data, "HELLO WORLD ABC"); + ASSERT_EQ(s->size, 16); +} + +TEST_F(DBStoreTest, DeleteVersionedObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::string instances[] = {"inst1", "inst2", "inst3"}; + std::string data; + std::string dm_instance; + int i = 0; + + /* Delete object..should create delete marker */ + params.op.obj.state.obj.key.instance.clear(); + DB::Object op_target(db, params.op.bucket.info, params.op.obj.state.obj); + DB::Object::Delete delete_op(&op_target); + delete_op.params.versioning_status |= BUCKET_VERSIONED; + + ret = delete_op.delete_obj(dpp); + ASSERT_EQ(ret, 0); + + /* list versioned objects */ + params = GlobalParams; + params.op.obj.state.obj.key.instance.clear(); + params.op.list_max_count = MAX_VERSIONED_OBJECTS; + ret = db->ProcessOp(dpp, "ListVersionedObjects", ¶ms); + + i = 3; + for (auto ent: params.op.obj.list_entries) { + string is_delete_marker = (ent.flags & rgw_bucket_dir_entry::FLAG_DELETE_MARKER)? "true" : "false"; + cout << "ent.name: " << ent.key.name << ". ent.instance: " << ent.key.instance << " is_delete_marker = " << is_delete_marker << "\n"; + + if (i == 3) { + ASSERT_EQ(is_delete_marker, "true"); + dm_instance = ent.key.instance; + } else { + ASSERT_EQ(is_delete_marker, "false"); + ASSERT_EQ(ent.key.instance, instances[i]); + } + + i--; + } + + /* read object.. should return -ENOENT */ + RGWObjState* s; + params = GlobalParams; + params.op.obj.state.obj.key.instance.clear(); + DB::Object op_target2(db, params.op.bucket.info, params.op.obj.state.obj); + ret = op_target2.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, + true, &s); + ASSERT_EQ(ret, -ENOENT); + + /* Delete delete marker..should be able to read object now */ + params.op.obj.state.obj.key.instance = dm_instance; + DB::Object op_target3(db, params.op.bucket.info, params.op.obj.state.obj); + DB::Object::Delete delete_op2(&op_target3); + delete_op2.params.versioning_status |= BUCKET_VERSIONED; + + ret = delete_op2.delete_obj(dpp); + ASSERT_EQ(ret, 0); + + /* read object.. should fetch latest version */ + params = GlobalParams; + params.op.obj.state.obj.key.instance.clear(); + DB::Object op_target4(db, params.op.bucket.info, params.op.obj.state.obj); + ret = op_target4.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, + true, &s); + ASSERT_EQ(s->obj.key.instance, instances[2]); + decode(data, s->data); + ASSERT_EQ(data, "HELLO WORLD A"); + ASSERT_EQ(s->size, 14); + + /* delete latest version using version-id. Next version should get promoted */ + params.op.obj.state.obj.key.instance = instances[2]; + DB::Object op_target5(db, params.op.bucket.info, params.op.obj.state.obj); + DB::Object::Delete delete_op3(&op_target5); + delete_op3.params.versioning_status |= BUCKET_VERSIONED; + + ret = delete_op3.delete_obj(dpp); + ASSERT_EQ(ret, 0); + + /* list versioned objects..only two versions should be present + * with second version marked as CURRENT */ + params = GlobalParams; + params.op.obj.state.obj.key.instance.clear(); + params.op.list_max_count = MAX_VERSIONED_OBJECTS; + ret = db->ProcessOp(dpp, "ListVersionedObjects", ¶ms); + + i = 1; + for (auto ent: params.op.obj.list_entries) { + + if (i == 1) { + dm_instance = ent.key.instance; + } else { + ASSERT_EQ(ent.key.instance, instances[i]); + } + + i--; + } + +} + +TEST_F(DBStoreTest, ObjectOmapSetVal) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + string val = "part1_val"; + bufferlist bl; + encode(val, bl); + ret = op_target.obj_omap_set_val_by_key(dpp, "part1", bl, false); + ASSERT_EQ(ret, 0); + + val = "part2_val"; + bl.clear(); + encode(val, bl); + ret = op_target.obj_omap_set_val_by_key(dpp, "part2", bl, false); + ASSERT_EQ(ret, 0); + + val = "part3_val"; + bl.clear(); + encode(val, bl); + ret = op_target.obj_omap_set_val_by_key(dpp, "part3", bl, false); + ASSERT_EQ(ret, 0); + + val = "part4_val"; + bl.clear(); + encode(val, bl); + ret = op_target.obj_omap_set_val_by_key(dpp, "part4", bl, false); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, ObjectOmapGetValsByKeys) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::set keys; + std::map vals; + + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + keys.insert("part2"); + keys.insert("part4"); + + ret = op_target.obj_omap_get_vals_by_keys(dpp, "", keys, &vals); + ASSERT_EQ(ret, 0); + ASSERT_EQ(vals.size(), 2); + + string val; + decode(val, vals["part2"]); + ASSERT_EQ(val, "part2_val"); + decode(val, vals["part4"]); + ASSERT_EQ(val, "part4_val"); +} + +TEST_F(DBStoreTest, ObjectOmapGetAll) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::map vals; + + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + ret = op_target.obj_omap_get_all(dpp, &vals); + ASSERT_EQ(ret, 0); + ASSERT_EQ(vals.size(), 4); + + string val; + decode(val, vals["part1"]); + ASSERT_EQ(val, "part1_val"); + decode(val, vals["part2"]); + ASSERT_EQ(val, "part2_val"); + decode(val, vals["part3"]); + ASSERT_EQ(val, "part3_val"); + decode(val, vals["part4"]); + ASSERT_EQ(val, "part4_val"); +} + +TEST_F(DBStoreTest, ObjectOmapGetVals) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::set keys; + std::map vals; + bool pmore; + + DB::Object op_target(db, params.op.bucket.info, + params.op.obj.state.obj); + + ret = op_target.obj_omap_get_vals(dpp, "part3", 10, &vals, &pmore); + ASSERT_EQ(ret, 0); + ASSERT_EQ(vals.size(), 2); + + string val; + decode(val, vals["part3"]); + ASSERT_EQ(val, "part3_val"); + decode(val, vals["part4"]); + ASSERT_EQ(val, "part4_val"); +} + +TEST_F(DBStoreTest, PutObjectData) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.obj_data.part_num = 1; + params.op.obj_data.offset = 10; + params.op.obj_data.multipart_part_str = "2"; + bufferlist b1; + encode("HELLO WORLD", b1); + params.op.obj_data.data = b1; + params.op.obj_data.size = 12; + params.op.obj.state.mtime = real_clock::now(); + ret = db->ProcessOp(dpp, "PutObjectData", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, UpdateObjectData) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.obj.state.mtime = bucket_mtime; + ret = db->ProcessOp(dpp, "UpdateObjectData", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, GetObjectData) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.obj.state.obj.key.instance = "inst1"; + params.op.obj.state.obj.key.name = "object1"; + ret = db->ProcessOp(dpp, "GetObjectData", ¶ms); + ASSERT_EQ(ret, 0); + ASSERT_EQ(params.op.obj_data.part_num, 1); + ASSERT_EQ(params.op.obj_data.offset, 10); + ASSERT_EQ(params.op.obj_data.multipart_part_str, "2"); + ASSERT_EQ(params.op.obj.state.obj.key.instance, "inst1"); + ASSERT_EQ(params.op.obj.state.obj.key.name, "object1"); + ASSERT_EQ(params.op.obj.state.mtime, bucket_mtime); + string data; + decode(data, params.op.obj_data.data); + ASSERT_EQ(data, "HELLO WORLD"); +} + +TEST_F(DBStoreTest, DeleteObjectData) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ProcessOp(dpp, "DeleteObjectData", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, DeleteObject) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ProcessOp(dpp, "DeleteObject", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, LCTables) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->createLCTables(dpp); + ASSERT_GE(ret, 0); +} + +TEST_F(DBStoreTest, LCHead) { + struct DBOpParams params = GlobalParams; + int ret = -1; + std::string index1 = "bucket1"; + std::string index2 = "bucket2"; + time_t lc_time = ceph_clock_now(); + std::unique_ptr head; + std::string ents[] = {"entry1", "entry2", "entry3"}; + rgw::sal::StoreLifecycle::StoreLCHead head1(lc_time, 0, ents[0]); + rgw::sal::StoreLifecycle::StoreLCHead head2(lc_time, 0, ents[1]); + rgw::sal::StoreLifecycle::StoreLCHead head3(lc_time, 0, ents[2]); + + ret = db->put_head(index1, head1); + ASSERT_EQ(ret, 0); + ret = db->put_head(index2, head2); + ASSERT_EQ(ret, 0); + + ret = db->get_head(index1, &head); + ASSERT_EQ(ret, 0); + ASSERT_EQ(head->get_marker(), "entry1"); + + ret = db->get_head(index2, &head); + ASSERT_EQ(ret, 0); + ASSERT_EQ(head->get_marker(), "entry2"); + + // update index1 + ret = db->put_head(index1, head3); + ASSERT_EQ(ret, 0); + ret = db->get_head(index1, &head); + ASSERT_EQ(ret, 0); + ASSERT_EQ(head->get_marker(), "entry3"); + +} +TEST_F(DBStoreTest, LCEntry) { + struct DBOpParams params = GlobalParams; + int ret = -1; + uint64_t lc_time = ceph_clock_now(); + std::string index1 = "lcindex1"; + std::string index2 = "lcindex2"; + typedef enum {lc_uninitial = 1, lc_complete} status; + std::string ents[] = {"bucket1", "bucket2", "bucket3", "bucket4"}; + std::unique_ptr entry; + rgw::sal::StoreLifecycle::StoreLCEntry entry1(ents[0], lc_time, lc_uninitial); + rgw::sal::StoreLifecycle::StoreLCEntry entry2(ents[1], lc_time, lc_uninitial); + rgw::sal::StoreLifecycle::StoreLCEntry entry3(ents[2], lc_time, lc_uninitial); + rgw::sal::StoreLifecycle::StoreLCEntry entry4(ents[3], lc_time, lc_uninitial); + + vector> lc_entries; + + ret = db->set_entry(index1, entry1); + ASSERT_EQ(ret, 0); + ret = db->set_entry(index1, entry2); + ASSERT_EQ(ret, 0); + ret = db->set_entry(index1, entry3); + ASSERT_EQ(ret, 0); + ret = db->set_entry(index2, entry4); + ASSERT_EQ(ret, 0); + + // get entry index1, entry1 + ret = db->get_entry(index1, ents[0], &entry); + ASSERT_EQ(ret, 0); + ASSERT_EQ(entry->get_status(), lc_uninitial); + ASSERT_EQ(entry->get_start_time(), lc_time); + + // get next entry index1, entry2 + ret = db->get_next_entry(index1, ents[1], &entry); + ASSERT_EQ(ret, 0); + ASSERT_EQ(entry->get_bucket(), ents[2]); + ASSERT_EQ(entry->get_status(), lc_uninitial); + ASSERT_EQ(entry->get_start_time(), lc_time); + + // update entry4 to entry5 + entry4.status = lc_complete; + ret = db->set_entry(index2, entry4); + ASSERT_EQ(ret, 0); + ret = db->get_entry(index2, ents[3], &entry); + ASSERT_EQ(ret, 0); + ASSERT_EQ(entry->get_status(), lc_complete); + + // list entries + ret = db->list_entries(index1, "", 5, lc_entries); + ASSERT_EQ(ret, 0); + for (const auto& ent: lc_entries) { + cout << "###################### \n"; + cout << "lc entry.bucket : " << ent->get_bucket() << "\n"; + cout << "lc entry.status : " << ent->get_status() << "\n"; + } + + // remove index1, entry3 + ret = db->rm_entry(index1, entry3); + ASSERT_EQ(ret, 0); + + // get next entry index1, entry2.. should be null + entry.release(); + ret = db->get_next_entry(index1, ents[1], &entry); + ASSERT_EQ(ret, 0); + ASSERT_EQ(entry.get(), nullptr); +} + +TEST_F(DBStoreTest, RemoveBucket) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ProcessOp(dpp, "RemoveBucket", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, RemoveUser) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + ret = db->ProcessOp(dpp, "RemoveUser", ¶ms); + ASSERT_EQ(ret, 0); +} + +TEST_F(DBStoreTest, InsertTestIDUser) { + struct DBOpParams params = GlobalParams; + int ret = -1; + + params.op.user.uinfo.user_id.id = "testid"; + params.op.user.uinfo.display_name = "M. Tester"; + params.op.user.uinfo.user_id.tenant = "tenant"; + params.op.user.uinfo.user_email = "tester@ceph.com"; + RGWAccessKey k1("0555b35654ad1656d804", "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="); + params.op.user.uinfo.access_keys["0555b35654ad1656d804"] = k1; + params.op.user.user_version.ver = 1; + params.op.user.user_version.tag = "UserTAG"; + + ret = db->ProcessOp(dpp, "InsertUser", ¶ms); + ASSERT_EQ(ret, 0); +} + +int main(int argc, char **argv) +{ + int ret = -1; + string c_logfile = "rgw_dbstore_tests.log"; + int c_loglevel = 20; + + // format: ./dbstore-tests logfile loglevel + if (argc == 3) { + c_logfile = argv[1]; + c_loglevel = (atoi)(argv[2]); + cout << "logfile:" << c_logfile << ", loglevel set to " << c_loglevel << "\n"; + } + + ::testing::InitGoogleTest(&argc, argv); + + gtest::env = new gtest::Environment(); + gtest::env->logfile = c_logfile; + gtest::env->loglevel = c_loglevel; + ::testing::AddGlobalTestEnvironment(gtest::env); + + ret = RUN_ALL_TESTS(); + + return ret; +} diff --git a/src/rgw/driver/immutable_config/store.cc b/src/rgw/driver/immutable_config/store.cc new file mode 100644 index 00000000000..8d3e0765faa --- /dev/null +++ b/src/rgw/driver/immutable_config/store.cc @@ -0,0 +1,422 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_zone.h" +#include "store.h" + +namespace rgw::sal { + +ImmutableConfigStore::ImmutableConfigStore(const RGWZoneGroup& zonegroup, + const RGWZoneParams& zone, + const RGWPeriodConfig& period_config) + : zonegroup(zonegroup), zone(zone), period_config(period_config) +{ +} + +// Realm +int ImmutableConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string& realm_id) +{ + return -ENOENT; +} + +int ImmutableConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y) +{ + return -EROFS; +} + + +int ImmutableConfigStore::create_realm(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWRealm& info, + std::unique_ptr* writer) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWRealm& info, + std::unique_ptr* writer) +{ + return -ENOENT; +} + +int ImmutableConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) +{ + return -ENOENT; +} + +int ImmutableConfigStore::read_default_realm(const DoutPrefixProvider* dpp, + optional_yield y, + RGWRealm& info, + std::unique_ptr* writer) +{ + return -ENOENT; +} + +int ImmutableConfigStore::read_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view realm_name, + std::string& realm_id) +{ + return -ENOENT; +} + +int ImmutableConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp, + optional_yield y, + const RGWPeriod& period) +{ + return -ENOTSUP; +} + +int ImmutableConfigStore::list_realm_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) +{ + result.next.clear(); + result.entries = entries.first(0); + return 0; +} + + +// Period +int ImmutableConfigStore::create_period(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWPeriod& info) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_period(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view period_id, + std::optional epoch, RGWPeriod& info) +{ + return -ENOENT; +} + +int ImmutableConfigStore::delete_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id) +{ + return -EROFS; +} + +int ImmutableConfigStore::list_period_ids(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) +{ + result.next.clear(); + result.entries = entries.first(0); + return 0; +} + + +// ZoneGroup + +class ImmutableZoneGroupWriter : public ZoneGroupWriter { + public: + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWZoneGroup& info) override + { + return -EROFS; + } + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWZoneGroup& info, std::string_view new_name) override + { + return -EROFS; + } + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + return -EROFS; + } +}; + +int ImmutableConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zonegroup_id) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zonegroup_id) +{ + if (!realm_id.empty()) { + return -ENOENT; + } + zonegroup_id = zonegroup.id; + return 0; +} + +int ImmutableConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) +{ + return -EROFS; +} + + +int ImmutableConfigStore::create_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneGroup& info, + std::unique_ptr* writer) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_id, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + if (zonegroup_id != zonegroup.id) { + return -ENOENT; + } + + info = zonegroup; + + if (writer) { + *writer = std::make_unique(); + } + return 0; +} +int ImmutableConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + if (zonegroup_name != zonegroup.name) { + return -ENOENT; + } + + info = zonegroup; + + if (writer) { + *writer = std::make_unique(); + } + return 0; +} + +int ImmutableConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + info = zonegroup; + + if (writer) { + *writer = std::make_unique(); + } + return 0; +} + +int ImmutableConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) +{ + if (marker < zonegroup.name) { + entries[0] = zonegroup.name; + result.next = zonegroup.name; + result.entries = entries.first(1); + } else { + result.next.clear(); + result.entries = entries.first(0); + } + return 0; +} + +// Zone + +class ImmutableZoneWriter : public ZoneWriter { + public: + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWZoneParams& info) override + { + return -EROFS; + } + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWZoneParams& info, std::string_view new_name) override + { + return -EROFS; + } + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + return -EROFS; + } +}; + +int ImmutableConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zone_id) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zone_id) +{ + if (realm_id.empty()) { + return -ENOENT; + } + zone_id = zone.id; + return 0; +} + +int ImmutableConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) +{ + return -EROFS; +} + + +int ImmutableConfigStore::create_zone(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneParams& info, + std::unique_ptr* writer) +{ + return -EROFS; +} + +int ImmutableConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_id, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + if (zone_id != zone.id) { + return -ENOENT; + } + + info = zone; + + if (writer) { + *writer = std::make_unique(); + } + return 0; +} + +int ImmutableConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + if (zone_name != zone.name) { + return -ENOENT; + } + + info = zone; + + if (writer) { + *writer = std::make_unique(); + } + return 0; +} + +int ImmutableConfigStore::read_default_zone(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + if (!realm_id.empty()) { + return -ENOENT; + } + + info = zone; + + if (writer) { + *writer = std::make_unique(); + } + return 0; +} + +int ImmutableConfigStore::list_zone_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) +{ + if (marker < zone.name) { + entries[0] = zone.name; + result.next = zone.name; + result.entries = entries.first(1); + } else { + result.next.clear(); + result.entries = entries.first(0); + } + return 0; +} + + +// PeriodConfig +int ImmutableConfigStore::read_period_config(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWPeriodConfig& info) +{ + if (!realm_id.empty()) { + return -ENOENT; + } + + info = period_config; + return 0; +} + +int ImmutableConfigStore::write_period_config(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + const RGWPeriodConfig& info) +{ + return -EROFS; +} + + +/// ImmutableConfigStore factory function +auto create_immutable_config_store(const DoutPrefixProvider* dpp, + const RGWZoneGroup& zonegroup, + const RGWZoneParams& zone, + const RGWPeriodConfig& period_config) + -> std::unique_ptr +{ + return std::make_unique(zonegroup, zone, period_config); +} + +} // namespace rgw::sal diff --git a/src/rgw/driver/immutable_config/store.h b/src/rgw/driver/immutable_config/store.h new file mode 100644 index 00000000000..9a1ac5f1443 --- /dev/null +++ b/src/rgw/driver/immutable_config/store.h @@ -0,0 +1,180 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "rgw_sal_config.h" + +namespace rgw::sal { + +/// A read-only ConfigStore that serves the given default zonegroup and zone. +class ImmutableConfigStore : public ConfigStore { + public: + explicit ImmutableConfigStore(const RGWZoneGroup& zonegroup, + const RGWZoneParams& zone, + const RGWPeriodConfig& period_config); + + // Realm + virtual int write_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id) override; + virtual int read_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string& realm_id) override; + virtual int delete_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y) override; + + virtual int create_realm(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_realm_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_realm_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_default_realm(const DoutPrefixProvider* dpp, + optional_yield y, + RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view realm_name, + std::string& realm_id) override; + virtual int realm_notify_new_period(const DoutPrefixProvider* dpp, + optional_yield y, + const RGWPeriod& period) override; + virtual int list_realm_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) override; + + // Period + virtual int create_period(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWPeriod& info) override; + virtual int read_period(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view period_id, + std::optional epoch, RGWPeriod& info) override; + virtual int delete_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id) override; + virtual int list_period_ids(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) override; + + // ZoneGroup + virtual int write_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zonegroup_id) override; + virtual int read_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zonegroup_id) override; + virtual int delete_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) override; + + virtual int create_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int read_zonegroup_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_id, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int read_zonegroup_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int read_default_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int list_zonegroup_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) override; + + // Zone + virtual int write_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zone_id) override; + virtual int read_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zone_id) override; + virtual int delete_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) override; + + virtual int create_zone(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int read_zone_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_id, + RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int read_zone_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int read_default_zone(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int list_zone_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + ListResult& result) override; + + // PeriodConfig + virtual int read_period_config(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWPeriodConfig& info) override; + virtual int write_period_config(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + const RGWPeriodConfig& info) override; + + private: + const RGWZoneGroup zonegroup; + const RGWZoneParams zone; + const RGWPeriodConfig period_config; +}; // ImmutableConfigStore + + +/// ImmutableConfigStore factory function +auto create_immutable_config_store(const DoutPrefixProvider* dpp, + const RGWZoneGroup& zonegroup, + const RGWZoneParams& zone, + const RGWPeriodConfig& period_config) + -> std::unique_ptr; + +} // namespace rgw::sal diff --git a/src/rgw/driver/json_config/store.cc b/src/rgw/driver/json_config/store.cc new file mode 100644 index 00000000000..330aa344dc5 --- /dev/null +++ b/src/rgw/driver/json_config/store.cc @@ -0,0 +1,176 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include "include/buffer.h" +#include "common/errno.h" +#include "common/ceph_json.h" +#include "rgw_zone.h" +#include "driver/immutable_config/store.h" +#include "store.h" + +namespace rgw::sal { + +namespace { + +struct DecodedConfig { + RGWZoneGroup zonegroup; + RGWZoneParams zone; + RGWPeriodConfig period_config; + + void decode_json(JSONObj *obj) + { + JSONDecoder::decode_json("zonegroup", zonegroup, obj); + JSONDecoder::decode_json("zone", zone, obj); + JSONDecoder::decode_json("period_config", period_config, obj); + } +}; + +static void parse_config(const DoutPrefixProvider* dpp, const char* filename) +{ + bufferlist bl; + std::string errmsg; + int r = bl.read_file(filename, &errmsg); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to read json config file '" << filename + << "': " << errmsg << dendl; + throw std::system_error(-r, std::system_category()); + } + + JSONParser p; + if (!p.parse(bl.c_str(), bl.length())) { + ldpp_dout(dpp, 0) << "failed to parse json config file" << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } + + DecodedConfig config; + try { + decode_json_obj(config, &p); + } catch (const JSONDecoder::err& e) { + ldpp_dout(dpp, 0) << "failed to decode JSON input: " << e.what() << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } +} + +void sanity_check_config(const DoutPrefixProvider* dpp, DecodedConfig& config) +{ + if (config.zonegroup.id.empty()) { + config.zonegroup.id = "default"; + } + if (config.zonegroup.name.empty()) { + config.zonegroup.name = "default"; + } + if (config.zonegroup.api_name.empty()) { + config.zonegroup.api_name = config.zonegroup.name; + } + + if (config.zone.id.empty()) { + config.zone.id = "default"; + } + if (config.zone.name.empty()) { + config.zone.name = "default"; + } + + // add default placement if it doesn't exist + rgw_pool pool; + RGWZonePlacementInfo placement; + placement.storage_classes.set_storage_class( + RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); + config.zone.placement_pools.emplace("default-placement", + std::move(placement)); + + std::set pools; + int r = rgw::init_zone_pool_names(dpp, null_yield, pools, config.zone); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to set default zone pool names" << dendl; + throw std::system_error(-r, std::system_category()); + } + + // verify that config.zonegroup only contains config.zone + if (config.zonegroup.zones.size() > 1) { + ldpp_dout(dpp, 0) << "zonegroup cannot contain multiple zones" << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } + + if (config.zonegroup.zones.size() == 1) { + auto z = config.zonegroup.zones.begin(); + if (z->first != config.zone.id) { + ldpp_dout(dpp, 0) << "zonegroup contains unknown zone id=" + << z->first << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } + if (z->second.id != config.zone.id) { + ldpp_dout(dpp, 0) << "zonegroup contains unknown zone id=" + << z->second.id << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } + if (z->second.name != config.zone.name) { + ldpp_dout(dpp, 0) << "zonegroup contains unknown zone name=" + << z->second.name << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } + if (config.zonegroup.master_zone != config.zone.id) { + ldpp_dout(dpp, 0) << "zonegroup contains unknown master_zone=" + << config.zonegroup.master_zone << dendl; + throw std::system_error(make_error_code(std::errc::invalid_argument)); + } + } else { + // add the zone to the group + const bool is_master = true; + const bool read_only = false; + std::list endpoints; + std::list sync_from; + std::list sync_from_rm; + rgw::zone_features::set enable_features; + rgw::zone_features::set disable_features; + + enable_features.insert(rgw::zone_features::supported.begin(), + rgw::zone_features::supported.end()); + + int r = rgw::add_zone_to_group(dpp, config.zonegroup, config.zone, + &is_master, &read_only, endpoints, + nullptr, nullptr, sync_from, sync_from_rm, + nullptr, std::nullopt, + enable_features, disable_features); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to add zone to zonegroup: " + << cpp_strerror(r) << dendl; + throw std::system_error(-r, std::system_category()); + } + + config.zonegroup.enabled_features = std::move(enable_features); + } + + // insert the default placement target if it doesn't exist + auto target = RGWZoneGroupPlacementTarget{.name = "default-placement"}; + config.zonegroup.placement_targets.emplace(target.name, target); + if (config.zonegroup.default_placement.name.empty()) { + config.zonegroup.default_placement.name = target.name; + } +} + +} // anonymous namespace + +auto create_json_config_store(const DoutPrefixProvider* dpp, + const std::string& filename) + -> std::unique_ptr +{ + DecodedConfig config; + parse_config(dpp, filename.c_str()); + sanity_check_config(dpp, config); + return create_immutable_config_store(dpp, config.zonegroup, config.zone, + config.period_config); +} + +} // namespace rgw::sal diff --git a/src/rgw/driver/json_config/store.h b/src/rgw/driver/json_config/store.h new file mode 100644 index 00000000000..4482f671651 --- /dev/null +++ b/src/rgw/driver/json_config/store.h @@ -0,0 +1,27 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "driver/immutable_config/store.h" + +namespace rgw::sal { + +/// Create an immutable ConfigStore by parsing the zonegroup and zone from the +/// given json filename. +auto create_json_config_store(const DoutPrefixProvider* dpp, + const std::string& filename) + -> std::unique_ptr; + +} // namespace rgw::sal diff --git a/src/rgw/driver/rados/cls_fifo_legacy.cc b/src/rgw/driver/rados/cls_fifo_legacy.cc new file mode 100644 index 00000000000..23b39b9fa5f --- /dev/null +++ b/src/rgw/driver/rados/cls_fifo_legacy.cc @@ -0,0 +1,2484 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat + * Author: Adam C. Emerson + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include "include/rados/librados.hpp" + +#include "include/buffer.h" + +#include "common/async/yield_context.h" +#include "common/random_string.h" + +#include "cls/fifo/cls_fifo_types.h" +#include "cls/fifo/cls_fifo_ops.h" + +#include "cls_fifo_legacy.h" + +namespace rgw::cls::fifo { +namespace cb = ceph::buffer; +namespace fifo = rados::cls::fifo; + +using ceph::from_error_code; + +inline constexpr auto MAX_RACE_RETRIES = 10; + +void create_meta(lr::ObjectWriteOperation* op, + std::string_view id, + std::optional objv, + std::optional oid_prefix, + bool exclusive, + std::uint64_t max_part_size, + std::uint64_t max_entry_size) +{ + fifo::op::create_meta cm; + + cm.id = id; + cm.version = objv; + cm.oid_prefix = oid_prefix; + cm.max_part_size = max_part_size; + cm.max_entry_size = max_entry_size; + cm.exclusive = exclusive; + + cb::list in; + encode(cm, in); + op->exec(fifo::op::CLASS, fifo::op::CREATE_META, in); +} + +int get_meta(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, + std::optional objv, fifo::info* info, + std::uint32_t* part_header_size, + std::uint32_t* part_entry_overhead, + uint64_t tid, optional_yield y, + bool probe) +{ + lr::ObjectReadOperation op; + fifo::op::get_meta gm; + gm.version = objv; + cb::list in; + encode(gm, in); + cb::list bl; + + op.exec(fifo::op::CLASS, fifo::op::GET_META, in, + &bl, nullptr); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); + if (r >= 0) try { + fifo::op::get_meta_reply reply; + auto iter = bl.cbegin(); + decode(reply, iter); + if (info) *info = std::move(reply.info); + if (part_header_size) *part_header_size = reply.part_header_size; + if (part_entry_overhead) + *part_entry_overhead = reply.part_entry_overhead; + } catch (const cb::error& err) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " decode failed: " << err.what() + << " tid=" << tid << dendl; + r = from_error_code(err.code()); + } else if (!(probe && (r == -ENOENT || r == -ENODATA))) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " fifo::op::GET_META failed r=" << r << " tid=" << tid + << dendl; + } + return r; +}; + +namespace { +void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv, + const fifo::update& update) +{ + fifo::op::update_meta um; + + um.version = objv; + um.tail_part_num = update.tail_part_num(); + um.head_part_num = update.head_part_num(); + um.min_push_part_num = update.min_push_part_num(); + um.max_push_part_num = update.max_push_part_num(); + um.journal_entries_add = std::move(update).journal_entries_add(); + um.journal_entries_rm = std::move(update).journal_entries_rm(); + + cb::list in; + encode(um, in); + op->exec(fifo::op::CLASS, fifo::op::UPDATE_META, in); +} + +void part_init(lr::ObjectWriteOperation* op, std::string_view tag, + fifo::data_params params) +{ + fifo::op::init_part ip; + + ip.tag = tag; + ip.params = params; + + cb::list in; + encode(ip, in); + op->exec(fifo::op::CLASS, fifo::op::INIT_PART, in); +} + +int push_part(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, std::string_view tag, + std::deque data_bufs, std::uint64_t tid, + optional_yield y) +{ + lr::ObjectWriteOperation op; + fifo::op::push_part pp; + + pp.tag = tag; + pp.data_bufs = data_bufs; + pp.total_len = 0; + + for (const auto& bl : data_bufs) + pp.total_len += bl.length(); + + cb::list in; + encode(pp, in); + auto retval = 0; + op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in, nullptr, &retval); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y, lr::OPERATION_RETURNVEC); + if (r < 0) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " fifo::op::PUSH_PART failed r=" << r + << " tid=" << tid << dendl; + return r; + } + if (retval < 0) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " error handling response retval=" << retval + << " tid=" << tid << dendl; + } + return retval; +} + +void push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag, + std::deque data_bufs, std::uint64_t tid, + lr::AioCompletion* c) +{ + lr::ObjectWriteOperation op; + fifo::op::push_part pp; + + pp.tag = tag; + pp.data_bufs = data_bufs; + pp.total_len = 0; + + for (const auto& bl : data_bufs) + pp.total_len += bl.length(); + + cb::list in; + encode(pp, in); + op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in); + auto r = ioctx.aio_operate(oid, c, &op, lr::OPERATION_RETURNVEC); + ceph_assert(r >= 0); +} + +void trim_part(lr::ObjectWriteOperation* op, + std::optional tag, + std::uint64_t ofs, bool exclusive) +{ + fifo::op::trim_part tp; + + tp.tag = tag; + tp.ofs = ofs; + tp.exclusive = exclusive; + + cb::list in; + encode(tp, in); + op->exec(fifo::op::CLASS, fifo::op::TRIM_PART, in); +} + +int list_part(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, + std::optional tag, std::uint64_t ofs, + std::uint64_t max_entries, + std::vector* entries, + bool* more, bool* full_part, std::string* ptag, + std::uint64_t tid, optional_yield y) +{ + lr::ObjectReadOperation op; + fifo::op::list_part lp; + + lp.tag = tag; + lp.ofs = ofs; + lp.max_entries = max_entries; + + cb::list in; + encode(lp, in); + cb::list bl; + op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in, &bl, nullptr); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); + if (r >= 0) try { + fifo::op::list_part_reply reply; + auto iter = bl.cbegin(); + decode(reply, iter); + if (entries) *entries = std::move(reply.entries); + if (more) *more = reply.more; + if (full_part) *full_part = reply.full_part; + if (ptag) *ptag = reply.tag; + } catch (const cb::error& err) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " decode failed: " << err.what() + << " tid=" << tid << dendl; + r = from_error_code(err.code()); + } else if (r != -ENOENT) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " fifo::op::LIST_PART failed r=" << r << " tid=" << tid + << dendl; + } + return r; +} + +struct list_entry_completion : public lr::ObjectOperationCompletion { + CephContext* cct; + int* r_out; + std::vector* entries; + bool* more; + bool* full_part; + std::string* ptag; + std::uint64_t tid; + + list_entry_completion(CephContext* cct, int* r_out, std::vector* entries, + bool* more, bool* full_part, std::string* ptag, + std::uint64_t tid) + : cct(cct), r_out(r_out), entries(entries), more(more), + full_part(full_part), ptag(ptag), tid(tid) {} + virtual ~list_entry_completion() = default; + void handle_completion(int r, bufferlist& bl) override { + if (r >= 0) try { + fifo::op::list_part_reply reply; + auto iter = bl.cbegin(); + decode(reply, iter); + if (entries) *entries = std::move(reply.entries); + if (more) *more = reply.more; + if (full_part) *full_part = reply.full_part; + if (ptag) *ptag = reply.tag; + } catch (const cb::error& err) { + lderr(cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " decode failed: " << err.what() + << " tid=" << tid << dendl; + r = from_error_code(err.code()); + } else if (r < 0) { + lderr(cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " fifo::op::LIST_PART failed r=" << r << " tid=" << tid + << dendl; + } + if (r_out) *r_out = r; + } +}; + +lr::ObjectReadOperation list_part(CephContext* cct, + std::optional tag, + std::uint64_t ofs, + std::uint64_t max_entries, + int* r_out, + std::vector* entries, + bool* more, bool* full_part, + std::string* ptag, std::uint64_t tid) +{ + lr::ObjectReadOperation op; + fifo::op::list_part lp; + + lp.tag = tag; + lp.ofs = ofs; + lp.max_entries = max_entries; + + cb::list in; + encode(lp, in); + op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in, + new list_entry_completion(cct, r_out, entries, more, full_part, + ptag, tid)); + return op; +} + +int get_part_info(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, + fifo::part_header* header, + std::uint64_t tid, optional_yield y) +{ + lr::ObjectReadOperation op; + fifo::op::get_part_info gpi; + + cb::list in; + cb::list bl; + encode(gpi, in); + op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in, &bl, nullptr); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); + if (r >= 0) try { + fifo::op::get_part_info_reply reply; + auto iter = bl.cbegin(); + decode(reply, iter); + if (header) *header = std::move(reply.header); + } catch (const cb::error& err) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " decode failed: " << err.what() + << " tid=" << tid << dendl; + r = from_error_code(err.code()); + } else { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " fifo::op::GET_PART_INFO failed r=" << r << " tid=" << tid + << dendl; + } + return r; +} + +struct partinfo_completion : public lr::ObjectOperationCompletion { + CephContext* cct; + int* rp; + fifo::part_header* h; + std::uint64_t tid; + partinfo_completion(CephContext* cct, int* rp, fifo::part_header* h, + std::uint64_t tid) : + cct(cct), rp(rp), h(h), tid(tid) { + } + virtual ~partinfo_completion() = default; + void handle_completion(int r, bufferlist& bl) override { + if (r >= 0) try { + fifo::op::get_part_info_reply reply; + auto iter = bl.cbegin(); + decode(reply, iter); + if (h) *h = std::move(reply.header); + } catch (const cb::error& err) { + r = from_error_code(err.code()); + lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " decode failed: " << err.what() + << " tid=" << tid << dendl; + } else { + lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " fifo::op::GET_PART_INFO failed r=" << r << " tid=" << tid + << dendl; + } + if (rp) { + *rp = r; + } + } +}; + +lr::ObjectReadOperation get_part_info(CephContext* cct, + fifo::part_header* header, + std::uint64_t tid, int* r = 0) +{ + lr::ObjectReadOperation op; + fifo::op::get_part_info gpi; + + cb::list in; + cb::list bl; + encode(gpi, in); + op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in, + new partinfo_completion(cct, r, header, tid)); + return op; +} +} + +std::optional FIFO::to_marker(std::string_view s) +{ + marker m; + if (s.empty()) { + m.num = info.tail_part_num; + m.ofs = 0; + return m; + } + + auto pos = s.find(':'); + if (pos == s.npos) { + return std::nullopt; + } + + auto num = s.substr(0, pos); + auto ofs = s.substr(pos + 1); + + auto n = ceph::parse(num); + if (!n) { + return std::nullopt; + } + m.num = *n; + auto o = ceph::parse(ofs); + if (!o) { + return std::nullopt; + } + m.ofs = *o; + return m; +} + +std::string FIFO::generate_tag() const +{ + static constexpr auto HEADER_TAG_SIZE = 16; + return gen_rand_alphanumeric_plain(static_cast(ioctx.cct()), + HEADER_TAG_SIZE); +} + + +int FIFO::apply_update(const DoutPrefixProvider *dpp, + fifo::info* info, + const fifo::objv& objv, + const fifo::update& update, + std::uint64_t tid) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::unique_lock l(m); + if (objv != info->version) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " version mismatch, canceling: tid=" << tid << dendl; + return -ECANCELED; + } + auto err = info->apply_update(update); + if (err) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " error applying update: " << *err << " tid=" << tid << dendl; + return -ECANCELED; + } + + ++info->version.ver; + + return {}; +} + +int FIFO::_update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, + fifo::objv version, bool* pcanceled, + std::uint64_t tid, optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectWriteOperation op; + bool canceled = false; + update_meta(&op, info.version, update); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r >= 0 || r == -ECANCELED) { + canceled = (r == -ECANCELED); + if (!canceled) { + r = apply_update(dpp, &info, version, update, tid); + if (r < 0) canceled = true; + } + if (canceled) { + r = read_meta(dpp, tid, y); + canceled = r < 0 ? false : true; + } + } + if (pcanceled) *pcanceled = canceled; + if (canceled) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled: tid=" << tid << dendl; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " returning error: r=" << r << " tid=" << tid << dendl; + } + return r; +} + +struct Updater : public Completion { + FIFO* fifo; + fifo::update update; + fifo::objv version; + bool reread = false; + bool* pcanceled = nullptr; + std::uint64_t tid; + Updater(const DoutPrefixProvider *dpp, FIFO* fifo, lr::AioCompletion* super, + const fifo::update& update, fifo::objv version, + bool* pcanceled, std::uint64_t tid) + : Completion(dpp, super), fifo(fifo), update(update), version(version), + pcanceled(pcanceled) {} + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + if (reread) + handle_reread(dpp, std::move(p), r); + else + handle_update(dpp, std::move(p), r); + } + + void handle_update(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " handling async update_meta: tid=" + << tid << dendl; + if (r < 0 && r != -ECANCELED) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " update failed: r=" << r << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + bool canceled = (r == -ECANCELED); + if (!canceled) { + int r = fifo->apply_update(dpp, &fifo->info, version, update, tid); + if (r < 0) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " update failed, marking canceled: r=" << r + << " tid=" << tid << dendl; + canceled = true; + } + } + if (canceled) { + reread = true; + fifo->read_meta(dpp, tid, call(std::move(p))); + return; + } + if (pcanceled) + *pcanceled = false; + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " completing: tid=" << tid << dendl; + complete(std::move(p), 0); + } + + void handle_reread(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " handling async read_meta: tid=" + << tid << dendl; + if (r < 0 && pcanceled) { + *pcanceled = false; + } else if (r >= 0 && pcanceled) { + *pcanceled = true; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " failed dispatching read_meta: r=" << r << " tid=" + << tid << dendl; + } else { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " completing: tid=" << tid << dendl; + } + complete(std::move(p), r); + } +}; + +void FIFO::_update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, + fifo::objv version, bool* pcanceled, + std::uint64_t tid, lr::AioCompletion* c) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectWriteOperation op; + update_meta(&op, info.version, update); + auto updater = std::make_unique(dpp, this, c, update, version, pcanceled, + tid); + auto r = ioctx.aio_operate(oid, Updater::call(std::move(updater)), &op); + assert(r >= 0); +} + +int FIFO::create_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, + optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectWriteOperation op; + op.create(false); /* We don't need exclusivity, part_init ensures + we're creating from the same journal entry. */ + std::unique_lock l(m); + part_init(&op, tag, info.params); + auto oid = info.part_oid(part_num); + l.unlock(); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " part_init failed: r=" << r << " tid=" + << tid << dendl; + } + return r; +} + +int FIFO::remove_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, + optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectWriteOperation op; + op.remove(); + std::unique_lock l(m); + auto oid = info.part_oid(part_num); + l.unlock(); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " remove failed: r=" << r << " tid=" + << tid << dendl; + } + return r; +} + +int FIFO::process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::vector processed; + + std::unique_lock l(m); + auto tmpjournal = info.journal; + auto new_tail = info.tail_part_num; + auto new_head = info.head_part_num; + auto new_max = info.max_push_part_num; + l.unlock(); + + int r = 0; + for (auto& [n, entry] : tmpjournal) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " processing entry: entry=" << entry << " tid=" << tid + << dendl; + switch (entry.op) { + case fifo::journal_entry::Op::create: + r = create_part(dpp, entry.part_num, entry.part_tag, tid, y); + if (entry.part_num > new_max) { + new_max = entry.part_num; + } + break; + case fifo::journal_entry::Op::set_head: + r = 0; + if (entry.part_num > new_head) { + new_head = entry.part_num; + } + break; + case fifo::journal_entry::Op::remove: + r = remove_part(dpp, entry.part_num, entry.part_tag, tid, y); + if (r == -ENOENT) r = 0; + if (entry.part_num >= new_tail) { + new_tail = entry.part_num + 1; + } + break; + default: + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " unknown journaled op: entry=" << entry << " tid=" + << tid << dendl; + return -EIO; + } + + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " processing entry failed: entry=" << entry + << " r=" << r << " tid=" << tid << dendl; + return -r; + } + + processed.push_back(std::move(entry)); + } + + // Postprocess + bool canceled = true; + + for (auto i = 0; canceled && i < MAX_RACE_RETRIES; ++i) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " postprocessing: i=" << i << " tid=" << tid << dendl; + + std::optional tail_part_num; + std::optional head_part_num; + std::optional max_part_num; + + std::unique_lock l(m); + auto objv = info.version; + if (new_tail > tail_part_num) tail_part_num = new_tail; + if (new_head > info.head_part_num) head_part_num = new_head; + if (new_max > info.max_push_part_num) max_part_num = new_max; + l.unlock(); + + if (processed.empty() && + !tail_part_num && + !max_part_num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " nothing to update any more: i=" << i << " tid=" + << tid << dendl; + canceled = false; + break; + } + auto u = fifo::update().tail_part_num(tail_part_num) + .head_part_num(head_part_num).max_push_part_num(max_part_num) + .journal_entries_rm(processed); + r = _update_meta(dpp, u, objv, &canceled, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _update_meta failed: update=" << u + << " r=" << r << " tid=" << tid << dendl; + break; + } + + if (canceled) { + std::vector new_processed; + std::unique_lock l(m); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " update canceled, retrying: i=" << i << " tid=" + << tid << dendl; + for (auto& e : processed) { + auto jiter = info.journal.find(e.part_num); + /* journal entry was already processed */ + if (jiter == info.journal.end() || + !(jiter->second == e)) { + continue; + } + new_processed.push_back(e); + } + processed = std::move(new_processed); + } + } + if (r == 0 && canceled) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + r = -ECANCELED; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " failed, r=: " << r << " tid=" << tid << dendl; + } + return r; +} + +int FIFO::_prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::unique_lock l(m); + std::vector jentries = { info.next_journal_entry(generate_tag()) }; + if (info.journal.find(jentries.front().part_num) != info.journal.end()) { + l.unlock(); + ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " new part journaled, but not processed: tid=" + << tid << dendl; + auto r = process_journal(dpp, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " process_journal failed: r=" << r << " tid=" << tid << dendl; + } + return r; + } + std::int64_t new_head_part_num = info.head_part_num; + auto version = info.version; + + if (is_head) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " needs new head: tid=" << tid << dendl; + auto new_head_jentry = jentries.front(); + new_head_jentry.op = fifo::journal_entry::Op::set_head; + new_head_part_num = jentries.front().part_num; + jentries.push_back(std::move(new_head_jentry)); + } + l.unlock(); + + int r = 0; + bool canceled = true; + for (auto i = 0; canceled && i < MAX_RACE_RETRIES; ++i) { + canceled = false; + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " updating metadata: i=" << i << " tid=" << tid << dendl; + auto u = fifo::update{}.journal_entries_add(jentries); + r = _update_meta(dpp, u, version, &canceled, tid, y); + if (r >= 0 && canceled) { + std::unique_lock l(m); + auto found = (info.journal.find(jentries.front().part_num) != + info.journal.end()); + if ((info.max_push_part_num >= jentries.front().part_num && + info.head_part_num >= new_head_part_num)) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " raced, but journaled and processed: i=" << i + << " tid=" << tid << dendl; + return 0; + } + if (found) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " raced, journaled but not processed: i=" << i + << " tid=" << tid << dendl; + canceled = false; + } + l.unlock(); + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _update_meta failed: update=" << u << " r=" << r + << " tid=" << tid << dendl; + return r; + } + } + if (canceled) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + return -ECANCELED; + } + r = process_journal(dpp, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " process_journal failed: r=" << r << " tid=" << tid << dendl; + } + return r; +} + +int FIFO::_prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::unique_lock l(m); + std::int64_t new_head_num = info.head_part_num + 1; + auto max_push_part_num = info.max_push_part_num; + auto version = info.version; + l.unlock(); + + int r = 0; + if (max_push_part_num < new_head_num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " need new part: tid=" << tid << dendl; + r = _prepare_new_part(dpp, true, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _prepare_new_part failed: r=" << r + << " tid=" << tid << dendl; + return r; + } + std::unique_lock l(m); + if (info.max_push_part_num < new_head_num) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " inconsistency, push part less than head part: " + << " tid=" << tid << dendl; + return -EIO; + } + l.unlock(); + return 0; + } + + bool canceled = true; + for (auto i = 0; canceled && i < MAX_RACE_RETRIES; ++i) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " updating head: i=" << i << " tid=" << tid << dendl; + auto u = fifo::update{}.head_part_num(new_head_num); + r = _update_meta(dpp, u, version, &canceled, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _update_meta failed: update=" << u << " r=" << r + << " tid=" << tid << dendl; + return r; + } + std::unique_lock l(m); + auto head_part_num = info.head_part_num; + version = info.version; + l.unlock(); + if (canceled && (head_part_num >= new_head_num)) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " raced, but completed by the other caller: i=" << i + << " tid=" << tid << dendl; + canceled = false; + } + } + if (canceled) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + return -ECANCELED; + } + return 0; +} + +struct NewPartPreparer : public Completion { + FIFO* f; + std::vector jentries; + int i = 0; + std::int64_t new_head_part_num; + bool canceled = false; + uint64_t tid; + + NewPartPreparer(const DoutPrefixProvider *dpp, FIFO* f, lr::AioCompletion* super, + std::vector jentries, + std::int64_t new_head_part_num, + std::uint64_t tid) + : Completion(dpp, super), f(f), jentries(std::move(jentries)), + new_head_part_num(new_head_part_num), tid(tid) {} + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _update_meta failed: r=" << r + << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + + if (canceled) { + std::unique_lock l(f->m); + auto iter = f->info.journal.find(jentries.front().part_num); + auto max_push_part_num = f->info.max_push_part_num; + auto head_part_num = f->info.head_part_num; + auto version = f->info.version; + auto found = (iter != f->info.journal.end()); + l.unlock(); + if ((max_push_part_num >= jentries.front().part_num && + head_part_num >= new_head_part_num)) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " raced, but journaled and processed: i=" << i + << " tid=" << tid << dendl; + complete(std::move(p), 0); + return; + } + if (i >= MAX_RACE_RETRIES) { + complete(std::move(p), -ECANCELED); + return; + } + if (!found) { + ++i; + f->_update_meta(dpp, fifo::update{} + .journal_entries_add(jentries), + version, &canceled, tid, call(std::move(p))); + return; + } else { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " raced, journaled but not processed: i=" << i + << " tid=" << tid << dendl; + canceled = false; + } + // Fall through. We still need to process the journal. + } + f->process_journal(dpp, tid, super()); + return; + } +}; + +void FIFO::_prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, + lr::AioCompletion* c) +{ + std::unique_lock l(m); + std::vector jentries = { info.next_journal_entry(generate_tag()) }; + if (info.journal.find(jentries.front().part_num) != info.journal.end()) { + l.unlock(); + ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " new part journaled, but not processed: tid=" + << tid << dendl; + process_journal(dpp, tid, c); + return; + } + std::int64_t new_head_part_num = info.head_part_num; + auto version = info.version; + + if (is_head) { + auto new_head_jentry = jentries.front(); + new_head_jentry.op = fifo::journal_entry::Op::set_head; + new_head_part_num = jentries.front().part_num; + jentries.push_back(std::move(new_head_jentry)); + } + l.unlock(); + + auto n = std::make_unique(dpp, this, c, jentries, + new_head_part_num, tid); + auto np = n.get(); + _update_meta(dpp, fifo::update{}.journal_entries_add(jentries), version, + &np->canceled, tid, NewPartPreparer::call(std::move(n))); +} + +struct NewHeadPreparer : public Completion { + FIFO* f; + int i = 0; + bool newpart; + std::int64_t new_head_num; + bool canceled = false; + std::uint64_t tid; + + NewHeadPreparer(const DoutPrefixProvider *dpp, FIFO* f, lr::AioCompletion* super, + bool newpart, std::int64_t new_head_num, std::uint64_t tid) + : Completion(dpp, super), f(f), newpart(newpart), new_head_num(new_head_num), + tid(tid) {} + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + if (newpart) + handle_newpart(std::move(p), r); + else + handle_update(dpp, std::move(p), r); + } + + void handle_newpart(Ptr&& p, int r) { + if (r < 0) { + lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _prepare_new_part failed: r=" << r + << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + std::unique_lock l(f->m); + if (f->info.max_push_part_num < new_head_num) { + l.unlock(); + lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _prepare_new_part failed: r=" << r + << " tid=" << tid << dendl; + complete(std::move(p), -EIO); + } else { + l.unlock(); + complete(std::move(p), 0); + } + } + + void handle_update(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + std::unique_lock l(f->m); + auto head_part_num = f->info.head_part_num; + auto version = f->info.version; + l.unlock(); + + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _update_meta failed: r=" << r + << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + if (canceled) { + if (i >= MAX_RACE_RETRIES) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + complete(std::move(p), -ECANCELED); + return; + } + + // Raced, but there's still work to do! + if (head_part_num < new_head_num) { + canceled = false; + ++i; + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " updating head: i=" << i << " tid=" << tid << dendl; + f->_update_meta(dpp, fifo::update{}.head_part_num(new_head_num), + version, &this->canceled, tid, call(std::move(p))); + return; + } + } + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " succeeded : i=" << i << " tid=" << tid << dendl; + complete(std::move(p), 0); + return; + } +}; + +void FIFO::_prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::unique_lock l(m); + int64_t new_head_num = info.head_part_num + 1; + auto max_push_part_num = info.max_push_part_num; + auto version = info.version; + l.unlock(); + + if (max_push_part_num < new_head_num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " need new part: tid=" << tid << dendl; + auto n = std::make_unique(dpp, this, c, true, new_head_num, + tid); + _prepare_new_part(dpp, true, tid, NewHeadPreparer::call(std::move(n))); + } else { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " updating head: tid=" << tid << dendl; + auto n = std::make_unique(dpp, this, c, false, new_head_num, + tid); + auto np = n.get(); + _update_meta(dpp, fifo::update{}.head_part_num(new_head_num), version, + &np->canceled, tid, NewHeadPreparer::call(std::move(n))); + } +} + +int FIFO::push_entries(const DoutPrefixProvider *dpp, const std::deque& data_bufs, + std::uint64_t tid, optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::unique_lock l(m); + auto head_part_num = info.head_part_num; + auto tag = info.head_tag; + const auto part_oid = info.part_oid(head_part_num); + l.unlock(); + + auto r = push_part(dpp, ioctx, part_oid, tag, data_bufs, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " push_part failed: r=" << r << " tid=" << tid << dendl; + } + return r; +} + +void FIFO::push_entries(const std::deque& data_bufs, + std::uint64_t tid, lr::AioCompletion* c) +{ + std::unique_lock l(m); + auto head_part_num = info.head_part_num; + auto tag = info.head_tag; + const auto part_oid = info.part_oid(head_part_num); + l.unlock(); + + push_part(ioctx, part_oid, tag, data_bufs, tid, c); +} + +int FIFO::trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, + std::optional tag, + bool exclusive, std::uint64_t tid, + optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectWriteOperation op; + std::unique_lock l(m); + const auto part_oid = info.part_oid(part_num); + l.unlock(); + rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive); + auto r = rgw_rados_operate(dpp, ioctx, part_oid, &op, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " trim_part failed: r=" << r << " tid=" << tid << dendl; + } + return 0; +} + +void FIFO::trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, + std::optional tag, + bool exclusive, std::uint64_t tid, + lr::AioCompletion* c) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectWriteOperation op; + std::unique_lock l(m); + const auto part_oid = info.part_oid(part_num); + l.unlock(); + rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive); + auto r = ioctx.aio_operate(part_oid, c, &op); + ceph_assert(r >= 0); +} + +int FIFO::open(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, std::string oid, std::unique_ptr* fifo, + optional_yield y, std::optional objv, + bool probe) +{ + ldpp_dout(dpp, 20) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering" << dendl; + fifo::info info; + std::uint32_t size; + std::uint32_t over; + int r = get_meta(dpp, ioctx, std::move(oid), objv, &info, &size, &over, 0, y, + probe); + if (r < 0) { + if (!(probe && (r == -ENOENT || r == -ENODATA))) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " get_meta failed: r=" << r << dendl; + } + return r; + } + std::unique_ptr f(new FIFO(std::move(ioctx), oid)); + f->info = info; + f->part_header_size = size; + f->part_entry_overhead = over; + // If there are journal entries, process them, in case + // someone crashed mid-transaction. + if (!info.journal.empty()) { + ldpp_dout(dpp, 20) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " processing leftover journal" << dendl; + r = f->process_journal(dpp, 0, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " process_journal failed: r=" << r << dendl; + return r; + } + } + *fifo = std::move(f); + return 0; +} + +int FIFO::create(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, std::string oid, std::unique_ptr* fifo, + optional_yield y, std::optional objv, + std::optional oid_prefix, + bool exclusive, std::uint64_t max_part_size, + std::uint64_t max_entry_size) +{ + ldpp_dout(dpp, 20) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering" << dendl; + lr::ObjectWriteOperation op; + create_meta(&op, oid, objv, oid_prefix, exclusive, max_part_size, + max_entry_size); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " create_meta failed: r=" << r << dendl; + return r; + } + r = open(dpp, std::move(ioctx), std::move(oid), fifo, y, objv); + return r; +} + +int FIFO::read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + fifo::info _info; + std::uint32_t _phs; + std::uint32_t _peo; + + auto r = get_meta(dpp, ioctx, oid, std::nullopt, &_info, &_phs, &_peo, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " get_meta failed: r=" << r << " tid=" << tid << dendl; + return r; + } + std::unique_lock l(m); + // We have a newer version already! + if (_info.version.same_or_later(this->info.version)) { + info = std::move(_info); + part_header_size = _phs; + part_entry_overhead = _peo; + } + return 0; +} + +int FIFO::read_meta(const DoutPrefixProvider *dpp, optional_yield y) { + std::unique_lock l(m); + auto tid = ++next_tid; + l.unlock(); + return read_meta(dpp, tid, y); +} + +struct Reader : public Completion { + FIFO* fifo; + cb::list bl; + std::uint64_t tid; + Reader(const DoutPrefixProvider *dpp, FIFO* fifo, lr::AioCompletion* super, std::uint64_t tid) + : Completion(dpp, super), fifo(fifo), tid(tid) {} + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + if (r >= 0) try { + fifo::op::get_meta_reply reply; + auto iter = bl.cbegin(); + decode(reply, iter); + std::unique_lock l(fifo->m); + if (reply.info.version.same_or_later(fifo->info.version)) { + fifo->info = std::move(reply.info); + fifo->part_header_size = reply.part_header_size; + fifo->part_entry_overhead = reply.part_entry_overhead; + } + } catch (const cb::error& err) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " failed to decode response err=" << err.what() + << " tid=" << tid << dendl; + r = from_error_code(err.code()); + } else { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " read_meta failed r=" << r + << " tid=" << tid << dendl; + } + complete(std::move(p), r); + } +}; + +void FIFO::read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + lr::ObjectReadOperation op; + fifo::op::get_meta gm; + cb::list in; + encode(gm, in); + auto reader = std::make_unique(dpp, this, c, tid); + auto rp = reader.get(); + auto r = ioctx.aio_exec(oid, Reader::call(std::move(reader)), fifo::op::CLASS, + fifo::op::GET_META, in, &rp->bl); + assert(r >= 0); +} + +const fifo::info& FIFO::meta() const { + return info; +} + +std::pair FIFO::get_part_layout_info() const { + return {part_header_size, part_entry_overhead}; +} + +int FIFO::push(const DoutPrefixProvider *dpp, const cb::list& bl, optional_yield y) { + return push(dpp, std::vector{ bl }, y); +} + +void FIFO::push(const DoutPrefixProvider *dpp, const cb::list& bl, lr::AioCompletion* c) { + push(dpp, std::vector{ bl }, c); +} + +int FIFO::push(const DoutPrefixProvider *dpp, const std::vector& data_bufs, optional_yield y) +{ + std::unique_lock l(m); + auto tid = ++next_tid; + auto max_entry_size = info.params.max_entry_size; + auto need_new_head = info.need_new_head(); + l.unlock(); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + if (data_bufs.empty()) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " empty push, returning success tid=" << tid << dendl; + return 0; + } + + // Validate sizes + for (const auto& bl : data_bufs) { + if (bl.length() > max_entry_size) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entry bigger than max_entry_size tid=" << tid << dendl; + return -E2BIG; + } + } + + int r = 0; + if (need_new_head) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " need new head tid=" << tid << dendl; + r = _prepare_new_head(dpp, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _prepare_new_head failed: r=" << r + << " tid=" << tid << dendl; + return r; + } + } + + std::deque remaining(data_bufs.begin(), data_bufs.end()); + std::deque batch; + + uint64_t batch_len = 0; + auto retries = 0; + bool canceled = true; + while ((!remaining.empty() || !batch.empty()) && + (retries <= MAX_RACE_RETRIES)) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " preparing push: remaining=" << remaining.size() + << " batch=" << batch.size() << " retries=" << retries + << " tid=" << tid << dendl; + std::unique_lock l(m); + auto max_part_size = info.params.max_part_size; + auto overhead = part_entry_overhead; + l.unlock(); + + while (!remaining.empty() && + (remaining.front().length() + batch_len <= max_part_size)) { + /* We can send entries with data_len up to max_entry_size, + however, we want to also account the overhead when + dealing with multiple entries. Previous check doesn't + account for overhead on purpose. */ + batch_len += remaining.front().length() + overhead; + batch.push_back(std::move(remaining.front())); + remaining.pop_front(); + } + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " prepared push: remaining=" << remaining.size() + << " batch=" << batch.size() << " retries=" << retries + << " batch_len=" << batch_len + << " tid=" << tid << dendl; + + auto r = push_entries(dpp, batch, tid, y); + if (r == -ERANGE) { + canceled = true; + ++retries; + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " need new head tid=" << tid << dendl; + r = _prepare_new_head(dpp, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " prepare_new_head failed: r=" << r + << " tid=" << tid << dendl; + return r; + } + r = 0; + continue; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " push_entries failed: r=" << r + << " tid=" << tid << dendl; + return r; + } + // Made forward progress! + canceled = false; + retries = 0; + batch_len = 0; + if (r == ssize(batch)) { + batch.clear(); + } else { + batch.erase(batch.begin(), batch.begin() + r); + for (const auto& b : batch) { + batch_len += b.length() + part_entry_overhead; + } + } + } + if (canceled) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + return -ECANCELED; + } + return 0; +} + +struct Pusher : public Completion { + FIFO* f; + std::deque remaining; + std::deque batch; + int i = 0; + std::uint64_t tid; + bool new_heading = false; + + void prep_then_push(const DoutPrefixProvider *dpp, Ptr&& p, const unsigned successes) { + std::unique_lock l(f->m); + auto max_part_size = f->info.params.max_part_size; + auto part_entry_overhead = f->part_entry_overhead; + l.unlock(); + + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " preparing push: remaining=" << remaining.size() + << " batch=" << batch.size() << " i=" << i + << " tid=" << tid << dendl; + + uint64_t batch_len = 0; + if (successes > 0) { + if (successes == batch.size()) { + batch.clear(); + } else { + batch.erase(batch.begin(), batch.begin() + successes); + for (const auto& b : batch) { + batch_len += b.length() + part_entry_overhead; + } + } + } + + if (batch.empty() && remaining.empty()) { + complete(std::move(p), 0); + return; + } + + while (!remaining.empty() && + (remaining.front().length() + batch_len <= max_part_size)) { + + /* We can send entries with data_len up to max_entry_size, + however, we want to also account the overhead when + dealing with multiple entries. Previous check doesn't + account for overhead on purpose. */ + batch_len += remaining.front().length() + part_entry_overhead; + batch.push_back(std::move(remaining.front())); + remaining.pop_front(); + } + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " prepared push: remaining=" << remaining.size() + << " batch=" << batch.size() << " i=" << i + << " batch_len=" << batch_len + << " tid=" << tid << dendl; + push(std::move(p)); + } + + void push(Ptr&& p) { + f->push_entries(batch, tid, call(std::move(p))); + } + + void new_head(const DoutPrefixProvider *dpp, Ptr&& p) { + new_heading = true; + f->_prepare_new_head(dpp, tid, call(std::move(p))); + } + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + if (!new_heading) { + if (r == -ERANGE) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " need new head tid=" << tid << dendl; + new_head(dpp, std::move(p)); + return; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " push_entries failed: r=" << r + << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + i = 0; // We've made forward progress, so reset the race counter! + prep_then_push(dpp, std::move(p), r); + } else { + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " prepare_new_head failed: r=" << r + << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + new_heading = false; + handle_new_head(dpp, std::move(p), r); + } + } + + void handle_new_head(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + if (r == -ECANCELED) { + if (p->i == MAX_RACE_RETRIES) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + complete(std::move(p), -ECANCELED); + return; + } + ++p->i; + } else if (r) { + complete(std::move(p), r); + return; + } + + if (p->batch.empty()) { + prep_then_push(dpp, std::move(p), 0); + return; + } else { + push(std::move(p)); + return; + } + } + + Pusher(const DoutPrefixProvider *dpp, FIFO* f, std::deque&& remaining, + std::uint64_t tid, lr::AioCompletion* super) + : Completion(dpp, super), f(f), remaining(std::move(remaining)), + tid(tid) {} +}; + +void FIFO::push(const DoutPrefixProvider *dpp, const std::vector& data_bufs, + lr::AioCompletion* c) +{ + std::unique_lock l(m); + auto tid = ++next_tid; + auto max_entry_size = info.params.max_entry_size; + auto need_new_head = info.need_new_head(); + l.unlock(); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + auto p = std::make_unique(dpp, this, std::deque(data_bufs.begin(), data_bufs.end()), + tid, c); + // Validate sizes + for (const auto& bl : data_bufs) { + if (bl.length() > max_entry_size) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entry bigger than max_entry_size tid=" << tid << dendl; + Pusher::complete(std::move(p), -E2BIG); + return; + } + } + + if (data_bufs.empty() ) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " empty push, returning success tid=" << tid << dendl; + Pusher::complete(std::move(p), 0); + return; + } + + if (need_new_head) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " need new head tid=" << tid << dendl; + p->new_head(dpp, std::move(p)); + } else { + p->prep_then_push(dpp, std::move(p), 0); + } +} + +int FIFO::list(const DoutPrefixProvider *dpp, int max_entries, + std::optional markstr, + std::vector* presult, bool* pmore, + optional_yield y) +{ + std::unique_lock l(m); + auto tid = ++next_tid; + std::int64_t part_num = info.tail_part_num; + l.unlock(); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::uint64_t ofs = 0; + if (markstr) { + auto marker = to_marker(*markstr); + if (!marker) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " invalid marker string: " << markstr + << " tid= "<< tid << dendl; + return -EINVAL; + } + part_num = marker->num; + ofs = marker->ofs; + } + + std::vector result; + result.reserve(max_entries); + bool more = false; + + std::vector entries; + int r = 0; + while (max_entries > 0) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " max_entries=" << max_entries << " tid=" << tid << dendl; + bool part_more = false; + bool part_full = false; + + std::unique_lock l(m); + auto part_oid = info.part_oid(part_num); + l.unlock(); + + r = list_part(dpp, ioctx, part_oid, {}, ofs, max_entries, &entries, + &part_more, &part_full, nullptr, tid, y); + if (r == -ENOENT) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " missing part, rereading metadata" + << " tid= "<< tid << dendl; + r = read_meta(dpp, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " read_meta failed: r=" << r + << " tid= "<< tid << dendl; + return r; + } + if (part_num < info.tail_part_num) { + /* raced with trim? restart */ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " raced with trim, restarting: tid=" << tid << dendl; + max_entries += result.size(); + result.clear(); + std::unique_lock l(m); + part_num = info.tail_part_num; + l.unlock(); + ofs = 0; + continue; + } + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " assuming part was not written yet, so end of data: " + << "tid=" << tid << dendl; + more = false; + r = 0; + break; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " list_entries failed: r=" << r + << " tid= "<< tid << dendl; + return r; + } + more = part_full || part_more; + for (auto& entry : entries) { + list_entry e; + e.data = std::move(entry.data); + e.marker = marker{part_num, entry.ofs}.to_string(); + e.mtime = entry.mtime; + result.push_back(std::move(e)); + --max_entries; + if (max_entries == 0) + break; + } + entries.clear(); + if (max_entries > 0 && + part_more) { + } + + if (!part_full) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " head part is not full, so we can assume we're done: " + << "tid=" << tid << dendl; + break; + } + if (!part_more) { + ++part_num; + ofs = 0; + } + } + if (presult) + *presult = std::move(result); + if (pmore) + *pmore = more; + return 0; +} + +int FIFO::trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, optional_yield y) +{ + bool overshoot = false; + auto marker = to_marker(markstr); + if (!marker) { + return -EINVAL; + } + auto part_num = marker->num; + auto ofs = marker->ofs; + std::unique_lock l(m); + auto tid = ++next_tid; + auto hn = info.head_part_num; + const auto max_part_size = info.params.max_part_size; + if (part_num > hn) { + l.unlock(); + auto r = read_meta(dpp, tid, y); + if (r < 0) { + return r; + } + l.lock(); + auto hn = info.head_part_num; + if (part_num > hn) { + overshoot = true; + part_num = hn; + ofs = max_part_size; + } + } + if (part_num < info.tail_part_num) { + return -ENODATA; + } + auto pn = info.tail_part_num; + l.unlock(); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + + int r = 0; + while (pn < part_num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " pn=" << pn << " tid=" << tid << dendl; + std::unique_lock l(m); + l.unlock(); + r = trim_part(dpp, pn, max_part_size, std::nullopt, false, tid, y); + if (r < 0 && r == -ENOENT) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " trim_part failed: r=" << r + << " tid= "<< tid << dendl; + return r; + } + ++pn; + } + r = trim_part(dpp, part_num, ofs, std::nullopt, exclusive, tid, y); + if (r < 0 && r != -ENOENT) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " trim_part failed: r=" << r + << " tid= "<< tid << dendl; + return r; + } + + l.lock(); + auto tail_part_num = info.tail_part_num; + auto objv = info.version; + l.unlock(); + bool canceled = tail_part_num < part_num; + int retries = 0; + while ((tail_part_num < part_num) && + canceled && + (retries <= MAX_RACE_RETRIES)) { + r = _update_meta(dpp, fifo::update{}.tail_part_num(part_num), objv, &canceled, + tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " _update_meta failed: r=" << r + << " tid= "<< tid << dendl; + return r; + } + if (canceled) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled: retries=" << retries + << " tid=" << tid << dendl; + l.lock(); + tail_part_num = info.tail_part_num; + objv = info.version; + l.unlock(); + ++retries; + } + } + if (canceled) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + return -EIO; + } + return overshoot ? -ENODATA : 0; +} + +struct Trimmer : public Completion { + FIFO* fifo; + std::int64_t part_num; + std::uint64_t ofs; + std::int64_t pn; + bool exclusive; + std::uint64_t tid; + bool update = false; + bool reread = false; + bool canceled = false; + bool overshoot = false; + int retries = 0; + + Trimmer(const DoutPrefixProvider *dpp, FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn, + bool exclusive, lr::AioCompletion* super, std::uint64_t tid) + : Completion(dpp, super), fifo(fifo), part_num(part_num), ofs(ofs), pn(pn), + exclusive(exclusive), tid(tid) {} + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + + if (reread) { + reread = false; + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " read_meta failed: r=" + << r << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + std::unique_lock l(fifo->m); + auto hn = fifo->info.head_part_num; + const auto max_part_size = fifo->info.params.max_part_size; + const auto tail_part_num = fifo->info.tail_part_num; + l.unlock(); + if (part_num > hn) { + part_num = hn; + ofs = max_part_size; + overshoot = true; + } + if (part_num < tail_part_num) { + complete(std::move(p), -ENODATA); + return; + } + pn = tail_part_num; + if (pn < part_num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " pn=" << pn << " tid=" << tid << dendl; + fifo->trim_part(dpp, pn++, max_part_size, std::nullopt, + false, tid, call(std::move(p))); + } else { + update = true; + canceled = tail_part_num < part_num; + fifo->trim_part(dpp, part_num, ofs, std::nullopt, exclusive, tid, + call(std::move(p))); + } + return; + } + + if (r == -ENOENT) { + r = 0; + } + + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << (update ? " update_meta " : " trim ") << "failed: r=" + << r << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } + + if (!update) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " handling preceding trim callback: tid=" << tid << dendl; + retries = 0; + if (pn < part_num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " pn=" << pn << " tid=" << tid << dendl; + std::unique_lock l(fifo->m); + const auto max_part_size = fifo->info.params.max_part_size; + l.unlock(); + fifo->trim_part(dpp, pn++, max_part_size, std::nullopt, + false, tid, call(std::move(p))); + return; + } + + std::unique_lock l(fifo->m); + const auto tail_part_num = fifo->info.tail_part_num; + l.unlock(); + update = true; + canceled = tail_part_num < part_num; + fifo->trim_part(dpp, part_num, ofs, std::nullopt, exclusive, tid, + call(std::move(p))); + return; + } + + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " handling update-needed callback: tid=" << tid << dendl; + std::unique_lock l(fifo->m); + auto tail_part_num = fifo->info.tail_part_num; + auto objv = fifo->info.version; + l.unlock(); + if ((tail_part_num < part_num) && + canceled) { + if (retries > MAX_RACE_RETRIES) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" << tid << dendl; + complete(std::move(p), -EIO); + return; + } + ++retries; + fifo->_update_meta(dpp, fifo::update{} + .tail_part_num(part_num), objv, &canceled, + tid, call(std::move(p))); + } else { + complete(std::move(p), overshoot ? -ENODATA : 0); + } + } +}; + +void FIFO::trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, + lr::AioCompletion* c) { + auto marker = to_marker(markstr); + auto realmark = marker.value_or(::rgw::cls::fifo::marker{}); + std::unique_lock l(m); + const auto hn = info.head_part_num; + const auto max_part_size = info.params.max_part_size; + const auto pn = info.tail_part_num; + const auto part_oid = info.part_oid(pn); + auto tid = ++next_tid; + l.unlock(); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + auto trimmer = std::make_unique(dpp, this, realmark.num, realmark.ofs, + pn, exclusive, c, tid); + if (!marker) { + Trimmer::complete(std::move(trimmer), -EINVAL); + return; + } + ++trimmer->pn; + auto ofs = marker->ofs; + if (marker->num > hn) { + trimmer->reread = true; + read_meta(dpp, tid, Trimmer::call(std::move(trimmer))); + return; + } + if (pn < marker->num) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " pn=" << pn << " tid=" << tid << dendl; + ofs = max_part_size; + } else { + trimmer->update = true; + } + trim_part(dpp, pn, ofs, std::nullopt, exclusive, + tid, Trimmer::call(std::move(trimmer))); +} + +int FIFO::get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, + fifo::part_header* header, + optional_yield y) +{ + std::unique_lock l(m); + const auto part_oid = info.part_oid(part_num); + auto tid = ++next_tid; + l.unlock(); + auto r = rgw::cls::fifo::get_part_info(dpp, ioctx, part_oid, header, tid, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " get_part_info failed: r=" + << r << " tid=" << tid << dendl; + } + return r; +} + +void FIFO::get_part_info(int64_t part_num, + fifo::part_header* header, + lr::AioCompletion* c) +{ + std::unique_lock l(m); + const auto part_oid = info.part_oid(part_num); + auto tid = ++next_tid; + l.unlock(); + auto op = rgw::cls::fifo::get_part_info(cct, header, tid); + auto r = ioctx.aio_operate(part_oid, c, &op, nullptr); + ceph_assert(r >= 0); +} + +struct InfoGetter : Completion { + FIFO* fifo; + fifo::part_header header; + fu2::function f; + std::uint64_t tid; + bool headerread = false; + + InfoGetter(const DoutPrefixProvider *dpp, FIFO* fifo, fu2::function f, + std::uint64_t tid, lr::AioCompletion* super) + : Completion(dpp, super), fifo(fifo), f(std::move(f)), tid(tid) {} + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + if (!headerread) { + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " read_meta failed: r=" + << r << " tid=" << tid << dendl; + if (f) + f(r, {}); + complete(std::move(p), r); + return; + } + + auto info = fifo->meta(); + auto hpn = info.head_part_num; + if (hpn < 0) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " no head, returning empty partinfo r=" + << r << " tid=" << tid << dendl; + if (f) + f(0, {}); + complete(std::move(p), r); + return; + } + headerread = true; + auto op = rgw::cls::fifo::get_part_info(fifo->cct, &header, tid); + std::unique_lock l(fifo->m); + auto oid = fifo->info.part_oid(hpn); + l.unlock(); + r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op, + nullptr); + ceph_assert(r >= 0); + return; + } + + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " get_part_info failed: r=" + << r << " tid=" << tid << dendl; + } + + if (f) + f(r, std::move(header)); + complete(std::move(p), r); + return; + } +}; + +void FIFO::get_head_info(const DoutPrefixProvider *dpp, fu2::unique_function f, + lr::AioCompletion* c) +{ + std::unique_lock l(m); + auto tid = ++next_tid; + l.unlock(); + auto ig = std::make_unique(dpp, this, std::move(f), tid, c); + read_meta(dpp, tid, InfoGetter::call(std::move(ig))); +} + +struct JournalProcessor : public Completion { +private: + FIFO* const fifo; + + std::vector processed; + std::multimap journal; + std::multimap::iterator iter; + std::int64_t new_tail; + std::int64_t new_head; + std::int64_t new_max; + int race_retries = 0; + bool first_pp = true; + bool canceled = false; + std::uint64_t tid; + + enum { + entry_callback, + pp_callback, + } state; + + void create_part(const DoutPrefixProvider *dpp, Ptr&& p, int64_t part_num, + std::string_view tag) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + state = entry_callback; + lr::ObjectWriteOperation op; + op.create(false); /* We don't need exclusivity, part_init ensures + we're creating from the same journal entry. */ + std::unique_lock l(fifo->m); + part_init(&op, tag, fifo->info.params); + auto oid = fifo->info.part_oid(part_num); + l.unlock(); + auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op); + ceph_assert(r >= 0); + return; + } + + void remove_part(const DoutPrefixProvider *dpp, Ptr&& p, int64_t part_num, + std::string_view tag) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + state = entry_callback; + lr::ObjectWriteOperation op; + op.remove(); + std::unique_lock l(fifo->m); + auto oid = fifo->info.part_oid(part_num); + l.unlock(); + auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op); + ceph_assert(r >= 0); + return; + } + + void finish_je(const DoutPrefixProvider *dpp, Ptr&& p, int r, + const fifo::journal_entry& entry) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " finishing entry: entry=" << entry + << " tid=" << tid << dendl; + + if (entry.op == fifo::journal_entry::Op::remove && r == -ENOENT) + r = 0; + + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " processing entry failed: entry=" << entry + << " r=" << r << " tid=" << tid << dendl; + complete(std::move(p), r); + return; + } else { + switch (entry.op) { + case fifo::journal_entry::Op::unknown: + case fifo::journal_entry::Op::set_head: + // Can't happen. Filtered out in process. + complete(std::move(p), -EIO); + return; + + case fifo::journal_entry::Op::create: + if (entry.part_num > new_max) { + new_max = entry.part_num; + } + break; + case fifo::journal_entry::Op::remove: + if (entry.part_num >= new_tail) { + new_tail = entry.part_num + 1; + } + break; + } + processed.push_back(entry); + } + ++iter; + process(dpp, std::move(p)); + } + + void postprocess(const DoutPrefixProvider *dpp, Ptr&& p) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + if (processed.empty()) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " nothing to update any more: race_retries=" + << race_retries << " tid=" << tid << dendl; + complete(std::move(p), 0); + return; + } + pp_run(dpp, std::move(p), 0, false); + } + +public: + + JournalProcessor(const DoutPrefixProvider *dpp, FIFO* fifo, std::uint64_t tid, lr::AioCompletion* super) + : Completion(dpp, super), fifo(fifo), tid(tid) { + std::unique_lock l(fifo->m); + journal = fifo->info.journal; + iter = journal.begin(); + new_tail = fifo->info.tail_part_num; + new_head = fifo->info.head_part_num; + new_max = fifo->info.max_push_part_num; + } + + void pp_run(const DoutPrefixProvider *dpp, Ptr&& p, int r, bool canceled) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + std::optional tail_part_num; + std::optional head_part_num; + std::optional max_part_num; + + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " failed, r=: " << r << " tid=" << tid << dendl; + complete(std::move(p), r); + } + + + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " postprocessing: race_retries=" + << race_retries << " tid=" << tid << dendl; + + if (!first_pp && r == 0 && !canceled) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " nothing to update any more: race_retries=" + << race_retries << " tid=" << tid << dendl; + complete(std::move(p), 0); + return; + } + + first_pp = false; + + if (canceled) { + if (race_retries >= MAX_RACE_RETRIES) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " canceled too many times, giving up: tid=" + << tid << dendl; + complete(std::move(p), -ECANCELED); + return; + } + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " update canceled, retrying: race_retries=" + << race_retries << " tid=" << tid << dendl; + + ++race_retries; + + std::vector new_processed; + std::unique_lock l(fifo->m); + for (auto& e : processed) { + auto jiter = fifo->info.journal.find(e.part_num); + /* journal entry was already processed */ + if (jiter == fifo->info.journal.end() || + !(jiter->second == e)) { + continue; + } + new_processed.push_back(e); + } + processed = std::move(new_processed); + } + + std::unique_lock l(fifo->m); + auto objv = fifo->info.version; + if (new_tail > fifo->info.tail_part_num) { + tail_part_num = new_tail; + } + + if (new_head > fifo->info.head_part_num) { + head_part_num = new_head; + } + + if (new_max > fifo->info.max_push_part_num) { + max_part_num = new_max; + } + l.unlock(); + + if (processed.empty() && + !tail_part_num && + !max_part_num) { + /* nothing to update anymore */ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " nothing to update any more: race_retries=" + << race_retries << " tid=" << tid << dendl; + complete(std::move(p), 0); + return; + } + state = pp_callback; + fifo->_update_meta(dpp, fifo::update{} + .tail_part_num(tail_part_num) + .head_part_num(head_part_num) + .max_push_part_num(max_part_num) + .journal_entries_rm(processed), + objv, &this->canceled, tid, call(std::move(p))); + return; + } + + JournalProcessor(const JournalProcessor&) = delete; + JournalProcessor& operator =(const JournalProcessor&) = delete; + JournalProcessor(JournalProcessor&&) = delete; + JournalProcessor& operator =(JournalProcessor&&) = delete; + + void process(const DoutPrefixProvider *dpp, Ptr&& p) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + while (iter != journal.end()) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " processing entry: entry=" << *iter + << " tid=" << tid << dendl; + const auto entry = iter->second; + switch (entry.op) { + case fifo::journal_entry::Op::create: + create_part(dpp, std::move(p), entry.part_num, entry.part_tag); + return; + case fifo::journal_entry::Op::set_head: + if (entry.part_num > new_head) { + new_head = entry.part_num; + } + processed.push_back(entry); + ++iter; + continue; + case fifo::journal_entry::Op::remove: + remove_part(dpp, std::move(p), entry.part_num, entry.part_tag); + return; + default: + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " unknown journaled op: entry=" << entry << " tid=" + << tid << dendl; + complete(std::move(p), -EIO); + return; + } + } + postprocess(dpp, std::move(p)); + return; + } + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " entering: tid=" << tid << dendl; + switch (state) { + case entry_callback: + finish_je(dpp, std::move(p), r, iter->second); + return; + case pp_callback: + auto c = canceled; + canceled = false; + pp_run(dpp, std::move(p), r, c); + return; + } + + abort(); + } + +}; + +void FIFO::process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c) { + auto p = std::make_unique(dpp, this, tid, c); + p->process(dpp, std::move(p)); +} + +struct Lister : Completion { + FIFO* f; + std::vector result; + bool more = false; + std::int64_t part_num; + std::uint64_t ofs; + int max_entries; + int r_out = 0; + std::vector entries; + bool part_more = false; + bool part_full = false; + std::vector* entries_out; + bool* more_out; + std::uint64_t tid; + + bool read = false; + + void complete(Ptr&& p, int r) { + if (r >= 0) { + if (more_out) *more_out = more; + if (entries_out) *entries_out = std::move(result); + } + Completion::complete(std::move(p), r); + } + +public: + Lister(const DoutPrefixProvider *dpp, FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries, + std::vector* entries_out, bool* more_out, + std::uint64_t tid, lr::AioCompletion* super) + : Completion(dpp, super), f(f), part_num(part_num), ofs(ofs), max_entries(max_entries), + entries_out(entries_out), more_out(more_out), tid(tid) { + result.reserve(max_entries); + } + + Lister(const Lister&) = delete; + Lister& operator =(const Lister&) = delete; + Lister(Lister&&) = delete; + Lister& operator =(Lister&&) = delete; + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + if (read) + handle_read(std::move(p), r); + else + handle_list(dpp, std::move(p), r); + } + + void list(Ptr&& p) { + if (max_entries > 0) { + part_more = false; + part_full = false; + entries.clear(); + + std::unique_lock l(f->m); + auto part_oid = f->info.part_oid(part_num); + l.unlock(); + + read = false; + auto op = list_part(f->cct, {}, ofs, max_entries, &r_out, + &entries, &part_more, &part_full, + nullptr, tid); + f->ioctx.aio_operate(part_oid, call(std::move(p)), &op, nullptr); + } else { + complete(std::move(p), 0); + } + } + + void handle_read(Ptr&& p, int r) { + read = false; + if (r >= 0) r = r_out; + r_out = 0; + + if (r < 0) { + complete(std::move(p), r); + return; + } + + if (part_num < f->info.tail_part_num) { + /* raced with trim? restart */ + max_entries += result.size(); + result.clear(); + part_num = f->info.tail_part_num; + ofs = 0; + list(std::move(p)); + return; + } + /* assuming part was not written yet, so end of data */ + more = false; + complete(std::move(p), 0); + return; + } + + void handle_list(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + if (r >= 0) r = r_out; + r_out = 0; + std::unique_lock l(f->m); + auto part_oid = f->info.part_oid(part_num); + l.unlock(); + if (r == -ENOENT) { + read = true; + f->read_meta(dpp, tid, call(std::move(p))); + return; + } + if (r < 0) { + complete(std::move(p), r); + return; + } + + more = part_full || part_more; + for (auto& entry : entries) { + list_entry e; + e.data = std::move(entry.data); + e.marker = marker{part_num, entry.ofs}.to_string(); + e.mtime = entry.mtime; + result.push_back(std::move(e)); + } + max_entries -= entries.size(); + entries.clear(); + if (max_entries > 0 && part_more) { + list(std::move(p)); + return; + } + + if (!part_full) { /* head part is not full */ + complete(std::move(p), 0); + return; + } + ++part_num; + ofs = 0; + list(std::move(p)); + } +}; + +void FIFO::list(const DoutPrefixProvider *dpp, int max_entries, + std::optional markstr, + std::vector* out, + bool* more, + lr::AioCompletion* c) { + std::unique_lock l(m); + auto tid = ++next_tid; + std::int64_t part_num = info.tail_part_num; + l.unlock(); + std::uint64_t ofs = 0; + std::optional<::rgw::cls::fifo::marker> marker; + + if (markstr) { + marker = to_marker(*markstr); + if (marker) { + part_num = marker->num; + ofs = marker->ofs; + } + } + + auto ls = std::make_unique(dpp, this, part_num, ofs, max_entries, out, + more, tid, c); + if (markstr && !marker) { + auto l = ls.get(); + l->complete(std::move(ls), -EINVAL); + } else { + ls->list(std::move(ls)); + } +} +} diff --git a/src/rgw/driver/rados/cls_fifo_legacy.h b/src/rgw/driver/rados/cls_fifo_legacy.h new file mode 100644 index 00000000000..9a35e4dd251 --- /dev/null +++ b/src/rgw/driver/rados/cls_fifo_legacy.h @@ -0,0 +1,342 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat + * Author: Adam C. Emerson + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RGW_CLS_FIFO_LEGACY_H +#define CEPH_RGW_CLS_FIFO_LEGACY_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include "include/rados/librados.hpp" +#include "include/buffer.h" +#include "include/function2.hpp" + +#include "common/async/yield_context.h" + +#include "cls/fifo/cls_fifo_types.h" +#include "cls/fifo/cls_fifo_ops.h" + +#include "librados/AioCompletionImpl.h" + +#include "rgw_tools.h" + +namespace rgw::cls::fifo { +namespace cb = ceph::buffer; +namespace fifo = rados::cls::fifo; +namespace lr = librados; + +inline constexpr std::uint64_t default_max_part_size = 4 * 1024 * 1024; +inline constexpr std::uint64_t default_max_entry_size = 32 * 1024; + +void create_meta(lr::ObjectWriteOperation* op, std::string_view id, + std::optional objv, + std::optional oid_prefix, + bool exclusive = false, + std::uint64_t max_part_size = default_max_part_size, + std::uint64_t max_entry_size = default_max_entry_size); +int get_meta(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, + std::optional objv, fifo::info* info, + std::uint32_t* part_header_size, + std::uint32_t* part_entry_overhead, + std::uint64_t tid, optional_yield y, + bool probe = false); +struct marker { + std::int64_t num = 0; + std::uint64_t ofs = 0; + + marker() = default; + marker(std::int64_t num, std::uint64_t ofs) : num(num), ofs(ofs) {} + static marker max() { + return { std::numeric_limits::max(), + std::numeric_limits::max() }; + } + + std::string to_string() { + return fmt::format("{:0>20}:{:0>20}", num, ofs); + } +}; + +struct list_entry { + cb::list data; + std::string marker; + ceph::real_time mtime; +}; + +using part_info = fifo::part_header; + +/// This is an implementation of FIFO using librados to facilitate +/// backports. Please see /src/neorados/cls/fifo.h for full +/// information. +/// +/// This library uses optional_yield. Please see +/// /src/common/async/yield_context.h. In summary, optional_yield +/// contains either a spawn::yield_context (in which case the current +/// coroutine is suspended until completion) or null_yield (in which +/// case the current thread is blocked until completion.) +/// +/// Please see the librados documentation for information on +/// AioCompletion and IoCtx. + +class FIFO { + friend struct Reader; + friend struct Updater; + friend struct Trimmer; + friend struct InfoGetter; + friend struct Pusher; + friend struct NewPartPreparer; + friend struct NewHeadPreparer; + friend struct JournalProcessor; + friend struct Lister; + + mutable lr::IoCtx ioctx; + CephContext* cct = static_cast(ioctx.cct()); + const std::string oid; + std::mutex m; + std::uint64_t next_tid = 0; + + fifo::info info; + + std::uint32_t part_header_size = 0xdeadbeef; + std::uint32_t part_entry_overhead = 0xdeadbeef; + + std::optional to_marker(std::string_view s); + + FIFO(lr::IoCtx&& ioc, + std::string oid) + : ioctx(std::move(ioc)), oid(oid) {} + + std::string generate_tag() const; + + int apply_update(const DoutPrefixProvider *dpp, + fifo::info* info, + const fifo::objv& objv, + const fifo::update& update, + std::uint64_t tid); + int _update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, + fifo::objv version, bool* pcanceled, + std::uint64_t tid, optional_yield y); + void _update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, + fifo::objv version, bool* pcanceled, + std::uint64_t tid, lr::AioCompletion* c); + int create_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, + optional_yield y); + int remove_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, + optional_yield y); + int process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y); + void process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c); + int _prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, optional_yield y); + void _prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, lr::AioCompletion* c); + int _prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y); + void _prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c); + int push_entries(const DoutPrefixProvider *dpp, const std::deque& data_bufs, + std::uint64_t tid, optional_yield y); + void push_entries(const std::deque& data_bufs, + std::uint64_t tid, lr::AioCompletion* c); + int trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, + std::optional tag, bool exclusive, + std::uint64_t tid, optional_yield y); + void trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, + std::optional tag, bool exclusive, + std::uint64_t tid, lr::AioCompletion* c); + + /// Force refresh of metadata, yielding/blocking style + int read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y); + /// Force refresh of metadata, with a librados Completion + void read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c); + +public: + + FIFO(const FIFO&) = delete; + FIFO& operator =(const FIFO&) = delete; + FIFO(FIFO&&) = delete; + FIFO& operator =(FIFO&&) = delete; + + /// Open an existing FIFO. + static int open(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, //< IO Context + std::string oid, //< OID for metadata object + std::unique_ptr* fifo, //< OUT: Pointer to FIFO object + optional_yield y, //< Optional yield context + /// Operation will fail if FIFO is not at this version + std::optional objv = std::nullopt, + /// Probing for existence, don't print errors if we + /// can't find it. + bool probe = false); + /// Create a new or open an existing FIFO. + static int create(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, //< IO Context + std::string oid, //< OID for metadata object + std::unique_ptr* fifo, //< OUT: Pointer to FIFO object + optional_yield y, //< Optional yield context + /// Operation will fail if the FIFO exists and is + /// not of this version. + std::optional objv = std::nullopt, + /// Prefix for all objects + std::optional oid_prefix = std::nullopt, + /// Fail if the FIFO already exists + bool exclusive = false, + /// Maximum allowed size of parts + std::uint64_t max_part_size = default_max_part_size, + /// Maximum allowed size of entries + std::uint64_t max_entry_size = default_max_entry_size); + + /// Force refresh of metadata, yielding/blocking style + int read_meta(const DoutPrefixProvider *dpp, optional_yield y); + /// Get currently known metadata + const fifo::info& meta() const; + /// Get partition header and entry overhead size + std::pair get_part_layout_info() const; + /// Push an entry to the FIFO + int push(const DoutPrefixProvider *dpp, + const cb::list& bl, //< Entry to push + optional_yield y //< Optional yield + ); + /// Push an entry to the FIFO + void push(const DoutPrefixProvider *dpp, const cb::list& bl, //< Entry to push + lr::AioCompletion* c //< Async Completion + ); + /// Push entries to the FIFO + int push(const DoutPrefixProvider *dpp, + const std::vector& data_bufs, //< Entries to push + optional_yield y //< Optional yield + ); + /// Push entries to the FIFO + void push(const DoutPrefixProvider *dpp, const std::vector& data_bufs, //< Entries to push + lr::AioCompletion* c //< Async Completion + ); + /// List entries + int list(const DoutPrefixProvider *dpp, + int max_entries, //< Maximum entries to list + /// Point after which to begin listing. Start at tail if null + std::optional markstr, + std::vector* out, //< OUT: entries + /// OUT: True if more entries in FIFO beyond the last returned + bool* more, + optional_yield y //< Optional yield + ); + void list(const DoutPrefixProvider *dpp, + int max_entries, //< Maximum entries to list + /// Point after which to begin listing. Start at tail if null + std::optional markstr, + std::vector* out, //< OUT: entries + /// OUT: True if more entries in FIFO beyond the last returned + bool* more, + lr::AioCompletion* c //< Async Completion + ); + /// Trim entries, coroutine/block style + int trim(const DoutPrefixProvider *dpp, + std::string_view markstr, //< Position to which to trim, inclusive + bool exclusive, //< If true, do not trim the target entry + //< itself, just all those before it. + optional_yield y //< Optional yield + ); + /// Trim entries, librados AioCompletion style + void trim(const DoutPrefixProvider *dpp, + std::string_view markstr, //< Position to which to trim, inclusive + bool exclusive, //< If true, do not trim the target entry + //< itself, just all those before it. + lr::AioCompletion* c //< librados AIO Completion + ); + /// Get part info + int get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, /// Part number + fifo::part_header* header, //< OUT: Information + optional_yield y //< Optional yield + ); + /// Get part info + void get_part_info(int64_t part_num, //< Part number + fifo::part_header* header, //< OUT: Information + lr::AioCompletion* c //< AIO Completion + ); + /// A convenience method to fetch the part information for the FIFO + /// head, using librados::AioCompletion, since + /// libradio::AioCompletions compose lousily. + void get_head_info(const DoutPrefixProvider *dpp, fu2::unique_function< //< Function to receive info + void(int r, fifo::part_header&&)>, + lr::AioCompletion* c //< AIO Completion + ); +}; + +template +struct Completion { +private: + const DoutPrefixProvider *_dpp; + lr::AioCompletion* _cur = nullptr; + lr::AioCompletion* _super; +public: + + using Ptr = std::unique_ptr; + + lr::AioCompletion* cur() const { + return _cur; + } + lr::AioCompletion* super() const { + return _super; + } + + Completion(const DoutPrefixProvider *dpp, lr::AioCompletion* super) : _dpp(dpp), _super(super) { + super->pc->get(); + } + + ~Completion() { + if (_super) { + _super->pc->put(); + } + if (_cur) + _cur->release(); + _super = nullptr; + _cur = nullptr; + } + + // The only times that aio_operate can return an error are: + // 1. The completion contains a null pointer. This should just + // crash, and in our case it does. + // 2. An attempt is made to write to a snapshot. RGW doesn't use + // snapshots, so we don't care. + // + // So we will just assert that initiating an Aio operation succeeds + // and not worry about recovering. + static lr::AioCompletion* call(Ptr&& p) { + p->_cur = lr::Rados::aio_create_completion(static_cast(p.get()), + &cb); + auto c = p->_cur; + p.release(); + return c; + } + static void complete(Ptr&& p, int r) { + auto c = p->_super; + p->_super = nullptr; + rgw_complete_aio_completion(c, r); + } + + static void cb(lr::completion_t, void* arg) { + auto t = static_cast(arg); + auto r = t->_cur->get_return_value(); + t->_cur->release(); + t->_cur = nullptr; + t->handle(t->_dpp, Ptr(t), r); + } +}; + +} + +#endif // CEPH_RGW_CLS_FIFO_LEGACY_H diff --git a/src/rgw/driver/rados/config/impl.cc b/src/rgw/driver/rados/config/impl.cc new file mode 100644 index 00000000000..f1b2befadcd --- /dev/null +++ b/src/rgw/driver/rados/config/impl.cc @@ -0,0 +1,129 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "impl.h" + +#include "common/async/yield_context.h" +#include "common/errno.h" +#include "rgw_string.h" +#include "rgw_zone.h" + +namespace rgw::rados { + +// default pool names +constexpr std::string_view default_zone_root_pool = "rgw.root"; +constexpr std::string_view default_zonegroup_root_pool = "rgw.root"; +constexpr std::string_view default_realm_root_pool = "rgw.root"; +constexpr std::string_view default_period_root_pool = "rgw.root"; + +static rgw_pool default_pool(std::string_view name, + std::string_view default_name) +{ + return std::string{name_or_default(name, default_name)}; +} + +ConfigImpl::ConfigImpl(const ceph::common::ConfigProxy& conf) + : realm_pool(default_pool(conf->rgw_realm_root_pool, + default_realm_root_pool)), + period_pool(default_pool(conf->rgw_period_root_pool, + default_period_root_pool)), + zonegroup_pool(default_pool(conf->rgw_zonegroup_root_pool, + default_zonegroup_root_pool)), + zone_pool(default_pool(conf->rgw_zone_root_pool, + default_zone_root_pool)) +{ +} + +int ConfigImpl::read(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + bufferlist& bl, RGWObjVersionTracker* objv) +{ + librados::IoCtx ioctx; + int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); + if (r < 0) { + return r; + } + librados::ObjectReadOperation op; + if (objv) { + objv->prepare_op_for_read(&op); + } + op.read(0, 0, &bl, nullptr); + return rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); +} + +int ConfigImpl::write(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + Create create, const bufferlist& bl, + RGWObjVersionTracker* objv) +{ + librados::IoCtx ioctx; + int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + switch (create) { + case Create::MustNotExist: op.create(true); break; + case Create::MayExist: op.create(false); break; + case Create::MustExist: op.assert_exists(); break; + } + if (objv) { + objv->prepare_op_for_write(&op); + } + op.write_full(bl); + + r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r >= 0 && objv) { + objv->apply_write(); + } + return r; +} + +int ConfigImpl::remove(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + RGWObjVersionTracker* objv) +{ + librados::IoCtx ioctx; + int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + if (objv) { + objv->prepare_op_for_write(&op); + } + op.remove(); + + r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r >= 0 && objv) { + objv->apply_write(); + } + return r; +} + +int ConfigImpl::notify(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + bufferlist& bl, uint64_t timeout_ms) +{ + librados::IoCtx ioctx; + int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); + if (r < 0) { + return r; + } + return rgw_rados_notify(dpp, ioctx, oid, bl, timeout_ms, nullptr, y); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/impl.h b/src/rgw/driver/rados/config/impl.h new file mode 100644 index 00000000000..3aed451f996 --- /dev/null +++ b/src/rgw/driver/rados/config/impl.h @@ -0,0 +1,139 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "include/rados/librados.hpp" +#include "common/dout.h" +#include "rgw_basic_types.h" +#include "rgw_tools.h" +#include "rgw_sal_config.h" + +namespace rgw::rados { + +// write options that control object creation +enum class Create { + MustNotExist, // fail with EEXIST if the object already exists + MayExist, // create if the object didn't exist, overwrite if it did + MustExist, // fail with ENOENT if the object doesn't exist +}; + +struct ConfigImpl { + librados::Rados rados; + + const rgw_pool realm_pool; + const rgw_pool period_pool; + const rgw_pool zonegroup_pool; + const rgw_pool zone_pool; + + ConfigImpl(const ceph::common::ConfigProxy& conf); + + int read(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + bufferlist& bl, RGWObjVersionTracker* objv); + + template + int read(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + T& data, RGWObjVersionTracker* objv) + { + bufferlist bl; + int r = read(dpp, y, pool, oid, bl, objv); + if (r < 0) { + return r; + } + try { + auto p = bl.cbegin(); + decode(data, p); + } catch (const buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode obj from " + << pool << ":" << oid << dendl; + return -EIO; + } + return 0; + } + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, Create create, + const bufferlist& bl, RGWObjVersionTracker* objv); + + template + int write(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, Create create, + const T& data, RGWObjVersionTracker* objv) + { + bufferlist bl; + encode(data, bl); + + return write(dpp, y, pool, oid, create, bl, objv); + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + RGWObjVersionTracker* objv); + + int list(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& marker, + std::regular_invocable auto filter, + std::span entries, + sal::ListResult& result) + { + librados::IoCtx ioctx; + int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); + if (r < 0) { + return r; + } + librados::ObjectCursor oc; + if (!oc.from_str(marker)) { + ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl; + return -EINVAL; + } + std::size_t count = 0; + try { + auto iter = ioctx.nobjects_begin(oc); + const auto end = ioctx.nobjects_end(); + for (; count < entries.size() && iter != end; ++iter) { + std::string entry = filter(iter->get_oid()); + if (!entry.empty()) { + entries[count++] = std::move(entry); + } + } + if (iter == end) { + result.next.clear(); + } else { + result.next = iter.get_cursor().to_str(); + } + } catch (const std::exception& e) { + ldpp_dout(dpp, 10) << "NObjectIterator exception " << e.what() << dendl; + return -EIO; + } + result.entries = entries.first(count); + return 0; + } + + int notify(const DoutPrefixProvider* dpp, optional_yield y, + const rgw_pool& pool, const std::string& oid, + bufferlist& bl, uint64_t timeout_ms); +}; + +inline std::string_view name_or_default(std::string_view name, + std::string_view default_name) +{ + if (!name.empty()) { + return name; + } + return default_name; +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/period.cc b/src/rgw/driver/rados/config/period.cc new file mode 100644 index 00000000000..bc3fa27e72c --- /dev/null +++ b/src/rgw/driver/rados/config/period.cc @@ -0,0 +1,230 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" +#include "common/errno.h" +#include "rgw_zone.h" +#include "driver/rados/config/store.h" + +#include "impl.h" + +namespace rgw::rados { + +// period oids +constexpr std::string_view period_info_oid_prefix = "periods."; +constexpr std::string_view period_latest_epoch_info_oid = ".latest_epoch"; +constexpr std::string_view period_staging_suffix = ":staging"; + +static std::string period_oid(std::string_view period_id, uint32_t epoch) +{ + // omit the epoch for the staging period + if (period_id.ends_with(period_staging_suffix)) { + return string_cat_reserve(period_info_oid_prefix, period_id); + } + return fmt::format("{}{}.{}", period_info_oid_prefix, period_id, epoch); +} + +static std::string latest_epoch_oid(const ceph::common::ConfigProxy& conf, + std::string_view period_id) +{ + return string_cat_reserve( + period_info_oid_prefix, period_id, + name_or_default(conf->rgw_period_latest_epoch_info_oid, + period_latest_epoch_info_oid)); +} + +static int read_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, + ConfigImpl* impl, std::string_view period_id, + uint32_t& epoch, RGWObjVersionTracker* objv) +{ + const auto& pool = impl->period_pool; + const auto latest_oid = latest_epoch_oid(dpp->get_cct()->_conf, period_id); + RGWPeriodLatestEpochInfo latest; + int r = impl->read(dpp, y, pool, latest_oid, latest, objv); + if (r >= 0) { + epoch = latest.epoch; + } + return r; +} + +static int write_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, + ConfigImpl* impl, bool exclusive, + std::string_view period_id, uint32_t epoch, + RGWObjVersionTracker* objv) +{ + const auto& pool = impl->period_pool; + const auto latest_oid = latest_epoch_oid(dpp->get_cct()->_conf, period_id); + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + RGWPeriodLatestEpochInfo latest{epoch}; + return impl->write(dpp, y, pool, latest_oid, create, latest, objv); +} + +static int delete_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, + ConfigImpl* impl, std::string_view period_id, + RGWObjVersionTracker* objv) +{ + const auto& pool = impl->period_pool; + const auto latest_oid = latest_epoch_oid(dpp->get_cct()->_conf, period_id); + return impl->remove(dpp, y, pool, latest_oid, objv); +} + +static int update_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, + ConfigImpl* impl, std::string_view period_id, + uint32_t epoch) +{ + static constexpr int MAX_RETRIES = 20; + + for (int i = 0; i < MAX_RETRIES; i++) { + uint32_t existing_epoch = 0; + RGWObjVersionTracker objv; + bool exclusive = false; + + // read existing epoch + int r = read_latest_epoch(dpp, y, impl, period_id, existing_epoch, &objv); + if (r == -ENOENT) { + // use an exclusive create to set the epoch atomically + exclusive = true; + objv.generate_new_write_ver(dpp->get_cct()); + ldpp_dout(dpp, 20) << "creating initial latest_epoch=" << epoch + << " for period=" << period_id << dendl; + } else if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read latest_epoch" << dendl; + return r; + } else if (epoch <= existing_epoch) { + r = -EEXIST; // fail with EEXIST if epoch is not newer + ldpp_dout(dpp, 10) << "found existing latest_epoch " << existing_epoch + << " >= given epoch " << epoch << ", returning r=" << r << dendl; + return r; + } else { + ldpp_dout(dpp, 20) << "updating latest_epoch from " << existing_epoch + << " -> " << epoch << " on period=" << period_id << dendl; + } + + r = write_latest_epoch(dpp, y, impl, exclusive, period_id, epoch, &objv); + if (r == -EEXIST) { + continue; // exclusive create raced with another update, retry + } else if (r == -ECANCELED) { + continue; // write raced with a conflicting version, retry + } + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to write latest_epoch" << dendl; + return r; + } + return 0; // return success + } + + return -ECANCELED; // fail after max retries +} + +int RadosConfigStore::create_period(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWPeriod& info) +{ + if (info.get_id().empty()) { + ldpp_dout(dpp, 0) << "period cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.get_epoch() == 0) { + ldpp_dout(dpp, 0) << "period cannot have an empty epoch" << dendl; + return -EINVAL; + } + const auto& pool = impl->period_pool; + const auto info_oid = period_oid(info.get_id(), info.get_epoch()); + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + RGWObjVersionTracker objv; + objv.generate_new_write_ver(dpp->get_cct()); + int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); + if (r < 0) { + return r; + } + + (void) update_latest_epoch(dpp, y, impl.get(), info.get_id(), info.get_epoch()); + return 0; +} + +int RadosConfigStore::read_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id, + std::optional epoch, + RGWPeriod& info) +{ + int r = 0; + if (!epoch) { + epoch = 0; + r = read_latest_epoch(dpp, y, impl.get(), period_id, *epoch, nullptr); + if (r < 0) { + return r; + } + } + + const auto& pool = impl->period_pool; + const auto info_oid = period_oid(period_id, *epoch); + return impl->read(dpp, y, pool, info_oid, info, nullptr); +} + +int RadosConfigStore::delete_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id) +{ + const auto& pool = impl->period_pool; + + // read the latest_epoch + uint32_t latest_epoch = 0; + RGWObjVersionTracker latest_objv; + int r = read_latest_epoch(dpp, y, impl.get(), period_id, + latest_epoch, &latest_objv); + if (r < 0 && r != -ENOENT) { // just delete epoch=0 on ENOENT + ldpp_dout(dpp, 0) << "failed to read latest epoch for period " + << period_id << ": " << cpp_strerror(r) << dendl; + return r; + } + + for (uint32_t epoch = 0; epoch <= latest_epoch; epoch++) { + const auto info_oid = period_oid(period_id, epoch); + r = impl->remove(dpp, y, pool, info_oid, nullptr); + if (r < 0 && r != -ENOENT) { // ignore ENOENT + ldpp_dout(dpp, 0) << "failed to delete period " << info_oid + << ": " << cpp_strerror(r) << dendl; + return r; + } + } + + return delete_latest_epoch(dpp, y, impl.get(), period_id, &latest_objv); +} + +int RadosConfigStore::list_period_ids(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + const auto& pool = impl->period_pool; + constexpr auto prefix = [] (std::string oid) -> std::string { + if (!oid.starts_with(period_info_oid_prefix)) { + return {}; + } + if (!oid.ends_with(period_latest_epoch_info_oid)) { + return {}; + } + // trim the prefix and suffix + const std::size_t count = oid.size() - + period_info_oid_prefix.size() - + period_latest_epoch_info_oid.size(); + return oid.substr(period_info_oid_prefix.size(), count); + }; + + return impl->list(dpp, y, pool, marker, prefix, entries, result); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/period_config.cc b/src/rgw/driver/rados/config/period_config.cc new file mode 100644 index 00000000000..ec984ebdc8c --- /dev/null +++ b/src/rgw/driver/rados/config/period_config.cc @@ -0,0 +1,55 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_zone.h" +#include "driver/rados/config/store.h" + +#include "impl.h" + +namespace rgw::rados { + +// period config oids +constexpr std::string_view period_config_prefix = "period_config."; +constexpr std::string_view period_config_realm_default = "default"; + +std::string period_config_oid(std::string_view realm_id) +{ + if (realm_id.empty()) { + realm_id = period_config_realm_default; + } + return string_cat_reserve(period_config_prefix, realm_id); +} + +int RadosConfigStore::read_period_config(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWPeriodConfig& info) +{ + const auto& pool = impl->period_pool; + const auto oid = period_config_oid(realm_id); + return impl->read(dpp, y, pool, oid, info, nullptr); +} + +int RadosConfigStore::write_period_config(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + const RGWPeriodConfig& info) +{ + const auto& pool = impl->period_pool; + const auto oid = period_config_oid(realm_id); + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + return impl->write(dpp, y, pool, oid, create, info, nullptr); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/realm.cc b/src/rgw/driver/rados/config/realm.cc new file mode 100644 index 00000000000..331e0ffd26e --- /dev/null +++ b/src/rgw/driver/rados/config/realm.cc @@ -0,0 +1,364 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" +#include "common/errno.h" +#include "rgw_realm_watcher.h" +#include "rgw_zone.h" +#include "driver/rados/config/store.h" + +#include "impl.h" + +namespace rgw::rados { + +// realm oids +constexpr std::string_view realm_names_oid_prefix = "realms_names."; +constexpr std::string_view realm_info_oid_prefix = "realms."; +constexpr std::string_view realm_control_oid_suffix = ".control"; +constexpr std::string_view default_realm_info_oid = "default.realm"; + +static std::string realm_info_oid(std::string_view realm_id) +{ + return string_cat_reserve(realm_info_oid_prefix, realm_id); +} +static std::string realm_name_oid(std::string_view realm_id) +{ + return string_cat_reserve(realm_names_oid_prefix, realm_id); +} +static std::string realm_control_oid(std::string_view realm_id) +{ + return string_cat_reserve(realm_info_oid_prefix, realm_id, + realm_control_oid_suffix); +} +static std::string default_realm_oid(const ceph::common::ConfigProxy& conf) +{ + return std::string{name_or_default(conf->rgw_default_realm_info_oid, + default_realm_info_oid)}; +} + + +int RadosConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id) +{ + const auto& pool = impl->realm_pool; + const auto oid = default_realm_oid(dpp->get_cct()->_conf); + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + + RGWDefaultSystemMetaObjInfo default_info; + default_info.default_id = realm_id; + + return impl->write(dpp, y, pool, oid, create, default_info, nullptr); +} + +int RadosConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string& realm_id) +{ + const auto& pool = impl->realm_pool; + const auto oid = default_realm_oid(dpp->get_cct()->_conf); + + RGWDefaultSystemMetaObjInfo default_info; + int r = impl->read(dpp, y, pool, oid, default_info, nullptr); + if (r >= 0) { + realm_id = default_info.default_id; + } + return r; +} + +int RadosConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y) +{ + const auto& pool = impl->realm_pool; + const auto oid = default_realm_oid(dpp->get_cct()->_conf); + + return impl->remove(dpp, y, pool, oid, nullptr); +} + + +class RadosRealmWriter : public sal::RealmWriter { + ConfigImpl* impl; + RGWObjVersionTracker objv; + std::string realm_id; + std::string realm_name; + public: + RadosRealmWriter(ConfigImpl* impl, RGWObjVersionTracker objv, + std::string_view realm_id, std::string_view realm_name) + : impl(impl), objv(std::move(objv)), + realm_id(realm_id), realm_name(realm_name) + { + } + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWRealm& info) override + { + if (realm_id != info.get_id() || realm_name != info.get_name()) { + return -EINVAL; // can't modify realm id or name directly + } + + const auto& pool = impl->realm_pool; + const auto info_oid = realm_info_oid(info.get_id()); + return impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); + } + + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWRealm& info, std::string_view new_name) override + { + if (realm_id != info.get_id() || realm_name != info.get_name()) { + return -EINVAL; // can't modify realm id or name directly + } + if (new_name.empty()) { + ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; + return -EINVAL; + } + + const auto& pool = impl->realm_pool; + const auto name = RGWNameToId{info.get_id()}; + const auto info_oid = realm_info_oid(info.get_id()); + const auto old_oid = realm_name_oid(info.get_name()); + const auto new_oid = realm_name_oid(new_name); + + // link the new name + RGWObjVersionTracker new_objv; + new_objv.generate_new_write_ver(dpp->get_cct()); + int r = impl->write(dpp, y, pool, new_oid, Create::MustNotExist, + name, &new_objv); + if (r < 0) { + return r; + } + + // write the info with updated name + info.set_name(std::string{new_name}); + r = impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); + if (r < 0) { + // on failure, unlink the new name + (void) impl->remove(dpp, y, pool, new_oid, &new_objv); + return r; + } + + // unlink the old name + (void) impl->remove(dpp, y, pool, old_oid, nullptr); + + realm_name = new_name; + return 0; + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + const auto& pool = impl->realm_pool; + const auto info_oid = realm_info_oid(realm_id); + int r = impl->remove(dpp, y, pool, info_oid, &objv); + if (r < 0) { + return r; + } + const auto name_oid = realm_name_oid(realm_name); + (void) impl->remove(dpp, y, pool, name_oid, nullptr); + const auto control_oid = realm_control_oid(realm_id); + (void) impl->remove(dpp, y, pool, control_oid, nullptr); + return 0; + } +}; // RadosRealmWriter + + +int RadosConfigStore::create_realm(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWRealm& info, + std::unique_ptr* writer) +{ + if (info.get_id().empty()) { + ldpp_dout(dpp, 0) << "realm cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.get_name().empty()) { + ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; + return -EINVAL; + } + + const auto& pool = impl->realm_pool; + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + + // write the realm info + const auto info_oid = realm_info_oid(info.get_id()); + RGWObjVersionTracker objv; + objv.generate_new_write_ver(dpp->get_cct()); + + int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); + if (r < 0) { + return r; + } + + // write the realm name + const auto name_oid = realm_name_oid(info.get_name()); + const auto name = RGWNameToId{info.get_id()}; + RGWObjVersionTracker name_objv; + name_objv.generate_new_write_ver(dpp->get_cct()); + + r = impl->write(dpp, y, pool, name_oid, create, name, &name_objv); + if (r < 0) { + (void) impl->remove(dpp, y, pool, info_oid, &objv); + return r; + } + + // create control object for watch/notify + const auto control_oid = realm_control_oid(info.get_id()); + bufferlist empty_bl; + r = impl->write(dpp, y, pool, control_oid, Create::MayExist, + empty_bl, nullptr); + if (r < 0) { + (void) impl->remove(dpp, y, pool, name_oid, &name_objv); + (void) impl->remove(dpp, y, pool, info_oid, &objv); + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWRealm& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->realm_pool; + const auto info_oid = realm_info_oid(realm_id); + RGWObjVersionTracker objv; + int r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->realm_pool; + + // look up realm id by name + RGWNameToId name; + const auto name_oid = realm_name_oid(realm_name); + int r = impl->read(dpp, y, pool, name_oid, name, nullptr); + if (r < 0) { + return r; + } + + const auto info_oid = realm_info_oid(name.obj_id); + RGWObjVersionTracker objv; + r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_default_realm(const DoutPrefixProvider* dpp, + optional_yield y, + RGWRealm& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->realm_pool; + + // read default realm id + RGWDefaultSystemMetaObjInfo default_info; + const auto default_oid = default_realm_oid(dpp->get_cct()->_conf); + int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); + if (r < 0) { + return r; + } + + const auto info_oid = realm_info_oid(default_info.default_id); + RGWObjVersionTracker objv; + r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + std::string& realm_id) +{ + const auto& pool = impl->realm_pool; + RGWNameToId name; + + // look up realm id by name + const auto name_oid = realm_name_oid(realm_name); + int r = impl->read(dpp, y, pool, name_oid, name, nullptr); + if (r < 0) { + return r; + } + realm_id = std::move(name.obj_id); + return 0; +} + +int RadosConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp, + optional_yield y, + const RGWPeriod& period) +{ + const auto& pool = impl->realm_pool; + const auto control_oid = realm_control_oid(period.get_realm()); + + bufferlist bl; + using ceph::encode; + // push the period to dependent zonegroups/zones + encode(RGWRealmNotify::ZonesNeedPeriod, bl); + encode(period, bl); + // reload the gateway with the new period + encode(RGWRealmNotify::Reload, bl); + + constexpr uint64_t timeout_ms = 0; + return impl->notify(dpp, y, pool, control_oid, bl, timeout_ms); +} + +int RadosConfigStore::list_realm_names(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + const auto& pool = impl->realm_pool; + constexpr auto prefix = [] (std::string oid) -> std::string { + if (!oid.starts_with(realm_names_oid_prefix)) { + return {}; + } + return oid.substr(realm_names_oid_prefix.size()); + }; + return impl->list(dpp, y, pool, marker, prefix, entries, result); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/store.cc b/src/rgw/driver/rados/config/store.cc new file mode 100644 index 00000000000..ec2b034a8e0 --- /dev/null +++ b/src/rgw/driver/rados/config/store.cc @@ -0,0 +1,52 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/rados/librados.hpp" +#include "common/errno.h" +#include "impl.h" +#include "store.h" + +namespace rgw::rados { + +RadosConfigStore::RadosConfigStore(std::unique_ptr impl) + : impl(std::move(impl)) +{ +} + +RadosConfigStore::~RadosConfigStore() = default; + + +auto create_config_store(const DoutPrefixProvider* dpp) + -> std::unique_ptr +{ + auto impl = std::make_unique(dpp->get_cct()->_conf); + + // initialize a Rados client + int r = impl->rados.init_with_context(dpp->get_cct()); + if (r < 0) { + ldpp_dout(dpp, -1) << "Rados client initialization failed with " + << cpp_strerror(-r) << dendl; + return nullptr; + } + r = impl->rados.connect(); + if (r < 0) { + ldpp_dout(dpp, -1) << "Rados client connection failed with " + << cpp_strerror(-r) << dendl; + return nullptr; + } + + return std::make_unique(std::move(impl)); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/store.h b/src/rgw/driver/rados/config/store.h new file mode 100644 index 00000000000..1b93a803db3 --- /dev/null +++ b/src/rgw/driver/rados/config/store.h @@ -0,0 +1,182 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include +#include +#include +#include "rgw_common.h" +#include "rgw_sal_config.h" + +class DoutPrefixProvider; +class optional_yield; + +namespace rgw::rados { + +struct ConfigImpl; + +class RadosConfigStore : public sal::ConfigStore { + public: + explicit RadosConfigStore(std::unique_ptr impl); + virtual ~RadosConfigStore() override; + + // Realm + virtual int write_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id) override; + virtual int read_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string& realm_id) override; + virtual int delete_default_realm_id(const DoutPrefixProvider* dpp, + optional_yield y) override; + + virtual int create_realm(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_realm_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_realm_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_default_realm(const DoutPrefixProvider* dpp, + optional_yield y, + RGWRealm& info, + std::unique_ptr* writer) override; + virtual int read_realm_id(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view realm_name, + std::string& realm_id) override; + virtual int realm_notify_new_period(const DoutPrefixProvider* dpp, + optional_yield y, + const RGWPeriod& period) override; + virtual int list_realm_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + // Period + virtual int create_period(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWPeriod& info) override; + virtual int read_period(const DoutPrefixProvider* dpp, + optional_yield y, std::string_view period_id, + std::optional epoch, RGWPeriod& info) override; + virtual int delete_period(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view period_id) override; + virtual int list_period_ids(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + // ZoneGroup + virtual int write_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zonegroup_id) override; + virtual int read_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zonegroup_id) override; + virtual int delete_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) override; + + virtual int create_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int read_zonegroup_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_id, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int read_zonegroup_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int read_default_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneGroup& info, + std::unique_ptr* writer) override; + virtual int list_zonegroup_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + // Zone + virtual int write_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + std::string_view zone_id) override; + virtual int read_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zone_id) override; + virtual int delete_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) override; + + virtual int create_zone(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int read_zone_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_id, + RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int read_zone_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int read_default_zone(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneParams& info, + std::unique_ptr* writer) override; + virtual int list_zone_names(const DoutPrefixProvider* dpp, + optional_yield y, const std::string& marker, + std::span entries, + sal::ListResult& result) override; + + // PeriodConfig + virtual int read_period_config(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWPeriodConfig& info) override; + virtual int write_period_config(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + std::string_view realm_id, + const RGWPeriodConfig& info) override; + + private: + std::unique_ptr impl; +}; // RadosConfigStore + + +/// RadosConfigStore factory function +auto create_config_store(const DoutPrefixProvider* dpp) + -> std::unique_ptr; + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/zone.cc b/src/rgw/driver/rados/config/zone.cc new file mode 100644 index 00000000000..e06c1606c1a --- /dev/null +++ b/src/rgw/driver/rados/config/zone.cc @@ -0,0 +1,312 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" +#include "common/errno.h" +#include "rgw_zone.h" +#include "driver/rados/config/store.h" + +#include "impl.h" + +namespace rgw::rados { + +// zone oids +constexpr std::string_view zone_info_oid_prefix = "zone_info."; +constexpr std::string_view zone_names_oid_prefix = "zone_names."; + +std::string zone_info_oid(std::string_view zone_id) +{ + return string_cat_reserve(zone_info_oid_prefix, zone_id); +} +std::string zone_name_oid(std::string_view zone_id) +{ + return string_cat_reserve(zone_names_oid_prefix, zone_id); +} +std::string default_zone_oid(const ceph::common::ConfigProxy& conf, + std::string_view realm_id) +{ + return fmt::format("{}.{}", conf->rgw_default_zone_info_oid, realm_id); +} + + +int RadosConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + bool exclusive, + std::string_view realm_id, + std::string_view zone_id) +{ + const auto& pool = impl->zone_pool; + const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + + RGWDefaultSystemMetaObjInfo default_info; + default_info.default_id = zone_id; + + return impl->write(dpp, y, pool, default_oid, create, default_info, nullptr); +} + +int RadosConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zone_id) +{ + const auto& pool = impl->zone_pool; + const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); + + RGWDefaultSystemMetaObjInfo default_info; + int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); + if (r >= 0) { + zone_id = default_info.default_id; + } + return r; +} + +int RadosConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) +{ + const auto& pool = impl->zone_pool; + const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); + + return impl->remove(dpp, y, pool, default_oid, nullptr); +} + + +class RadosZoneWriter : public sal::ZoneWriter { + ConfigImpl* impl; + RGWObjVersionTracker objv; + std::string zone_id; + std::string zone_name; + public: + RadosZoneWriter(ConfigImpl* impl, RGWObjVersionTracker objv, + std::string_view zone_id, std::string_view zone_name) + : impl(impl), objv(std::move(objv)), + zone_id(zone_id), zone_name(zone_name) + { + } + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWZoneParams& info) override + { + if (zone_id != info.get_id() || zone_name != info.get_name()) { + return -EINVAL; // can't modify zone id or name directly + } + + const auto& pool = impl->zone_pool; + const auto info_oid = zone_info_oid(info.get_id()); + return impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); + } + + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWZoneParams& info, std::string_view new_name) override + { + if (zone_id != info.get_id() || zone_name != info.get_name()) { + return -EINVAL; // can't modify zone id or name directly + } + if (new_name.empty()) { + ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl; + return -EINVAL; + } + + const auto& pool = impl->zone_pool; + const auto name = RGWNameToId{info.get_id()}; + const auto info_oid = zone_info_oid(info.get_id()); + const auto old_oid = zone_name_oid(info.get_name()); + const auto new_oid = zone_name_oid(new_name); + + // link the new name + RGWObjVersionTracker new_objv; + new_objv.generate_new_write_ver(dpp->get_cct()); + int r = impl->write(dpp, y, pool, new_oid, Create::MustNotExist, + name, &new_objv); + if (r < 0) { + return r; + } + + // write the info with updated name + info.set_name(std::string{new_name}); + r = impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); + if (r < 0) { + // on failure, unlink the new name + (void) impl->remove(dpp, y, pool, new_oid, &new_objv); + return r; + } + + // unlink the old name + (void) impl->remove(dpp, y, pool, old_oid, nullptr); + + zone_name = new_name; + return 0; + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + const auto& pool = impl->zone_pool; + const auto info_oid = zone_info_oid(zone_id); + int r = impl->remove(dpp, y, pool, info_oid, &objv); + if (r < 0) { + return r; + } + const auto name_oid = zone_name_oid(zone_name); + (void) impl->remove(dpp, y, pool, name_oid, nullptr); + return 0; + } +}; // RadosZoneWriter + + +int RadosConfigStore::create_zone(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneParams& info, + std::unique_ptr* writer) +{ + if (info.get_id().empty()) { + ldpp_dout(dpp, 0) << "zone cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.get_name().empty()) { + ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl; + return -EINVAL; + } + + const auto& pool = impl->zone_pool; + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + + // write the zone info + const auto info_oid = zone_info_oid(info.get_id()); + RGWObjVersionTracker objv; + objv.generate_new_write_ver(dpp->get_cct()); + + int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); + if (r < 0) { + return r; + } + + // write the zone name + const auto name_oid = zone_name_oid(info.get_name()); + const auto name = RGWNameToId{info.get_id()}; + RGWObjVersionTracker name_objv; + name_objv.generate_new_write_ver(dpp->get_cct()); + + r = impl->write(dpp, y, pool, name_oid, create, name, &name_objv); + if (r < 0) { + (void) impl->remove(dpp, y, pool, info_oid, &objv); + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_id, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->zone_pool; + const auto info_oid = zone_info_oid(zone_id); + RGWObjVersionTracker objv; + + int r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->zone_pool; + + // look up zone id by name + const auto name_oid = zone_name_oid(zone_name); + RGWNameToId name; + int r = impl->read(dpp, y, pool, name_oid, name, nullptr); + if (r < 0) { + return r; + } + + const auto info_oid = zone_info_oid(name.obj_id); + RGWObjVersionTracker objv; + r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_default_zone(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->zone_pool; + + // read default zone id + const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); + RGWDefaultSystemMetaObjInfo default_info; + int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); + if (r < 0) { + return r; + } + + const auto info_oid = zone_info_oid(default_info.default_id); + RGWObjVersionTracker objv; + r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::list_zone_names(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + const auto& pool = impl->zone_pool; + constexpr auto prefix = [] (std::string oid) -> std::string { + if (!oid.starts_with(zone_names_oid_prefix)) { + return {}; + } + return oid.substr(zone_names_oid_prefix.size()); + }; + return impl->list(dpp, y, pool, marker, prefix, entries, result); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/config/zonegroup.cc b/src/rgw/driver/rados/config/zonegroup.cc new file mode 100644 index 00000000000..1766a68ce65 --- /dev/null +++ b/src/rgw/driver/rados/config/zonegroup.cc @@ -0,0 +1,315 @@ +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2022 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" +#include "common/errno.h" +#include "rgw_zone.h" +#include "driver/rados/config/store.h" + +#include "impl.h" + +namespace rgw::rados { + +// zonegroup oids +constexpr std::string_view zonegroup_names_oid_prefix = "zonegroups_names."; +constexpr std::string_view zonegroup_info_oid_prefix = "zonegroup_info."; +constexpr std::string_view default_zonegroup_info_oid = "default.zonegroup"; + +static std::string zonegroup_info_oid(std::string_view zonegroup_id) +{ + return string_cat_reserve(zonegroup_info_oid_prefix, zonegroup_id); +} +static std::string zonegroup_name_oid(std::string_view zonegroup_id) +{ + return string_cat_reserve(zonegroup_names_oid_prefix, zonegroup_id); +} +static std::string default_zonegroup_oid(const ceph::common::ConfigProxy& conf, + std::string_view realm_id) +{ + const auto prefix = name_or_default(conf->rgw_default_zonegroup_info_oid, + default_zonegroup_info_oid); + return fmt::format("{}.{}", prefix, realm_id); +} + + +int RadosConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + bool exclusive, + std::string_view realm_id, + std::string_view zonegroup_id) +{ + const auto& pool = impl->zonegroup_pool; + const auto oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + + RGWDefaultSystemMetaObjInfo default_info; + default_info.default_id = zonegroup_id; + + return impl->write(dpp, y, pool, oid, create, default_info, nullptr); +} + +int RadosConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + std::string& zonegroup_id) +{ + const auto& pool = impl->zonegroup_pool; + const auto oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); + + RGWDefaultSystemMetaObjInfo default_info; + int r = impl->read(dpp, y, pool, oid, default_info, nullptr); + if (r >= 0) { + zonegroup_id = default_info.default_id; + } + return r; +} + +int RadosConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id) +{ + const auto& pool = impl->zonegroup_pool; + const auto oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); + return impl->remove(dpp, y, pool, oid, nullptr); +} + + +class RadosZoneGroupWriter : public sal::ZoneGroupWriter { + ConfigImpl* impl; + RGWObjVersionTracker objv; + std::string zonegroup_id; + std::string zonegroup_name; + public: + RadosZoneGroupWriter(ConfigImpl* impl, RGWObjVersionTracker objv, + std::string_view zonegroup_id, + std::string_view zonegroup_name) + : impl(impl), objv(std::move(objv)), + zonegroup_id(zonegroup_id), zonegroup_name(zonegroup_name) + { + } + + int write(const DoutPrefixProvider* dpp, optional_yield y, + const RGWZoneGroup& info) override + { + if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) { + return -EINVAL; // can't modify zonegroup id or name directly + } + + const auto& pool = impl->zonegroup_pool; + const auto info_oid = zonegroup_info_oid(info.get_id()); + return impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); + } + + int rename(const DoutPrefixProvider* dpp, optional_yield y, + RGWZoneGroup& info, std::string_view new_name) override + { + if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) { + return -EINVAL; // can't modify zonegroup id or name directly + } + if (new_name.empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; + return -EINVAL; + } + + const auto& pool = impl->zonegroup_pool; + const auto name = RGWNameToId{info.get_id()}; + const auto info_oid = zonegroup_info_oid(info.get_id()); + const auto old_oid = zonegroup_name_oid(info.get_name()); + const auto new_oid = zonegroup_name_oid(new_name); + + // link the new name + RGWObjVersionTracker new_objv; + new_objv.generate_new_write_ver(dpp->get_cct()); + int r = impl->write(dpp, y, pool, new_oid, Create::MustNotExist, + name, &new_objv); + if (r < 0) { + return r; + } + + // write the info with updated name + info.set_name(std::string{new_name}); + r = impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); + if (r < 0) { + // on failure, unlink the new name + (void) impl->remove(dpp, y, pool, new_oid, &new_objv); + return r; + } + + // unlink the old name + (void) impl->remove(dpp, y, pool, old_oid, nullptr); + + zonegroup_name = new_name; + return 0; + } + + int remove(const DoutPrefixProvider* dpp, optional_yield y) override + { + const auto& pool = impl->zonegroup_pool; + const auto info_oid = zonegroup_info_oid(zonegroup_id); + int r = impl->remove(dpp, y, pool, info_oid, &objv); + if (r < 0) { + return r; + } + const auto name_oid = zonegroup_name_oid(zonegroup_name); + (void) impl->remove(dpp, y, pool, name_oid, nullptr); + return 0; + } +}; // RadosZoneGroupWriter + + +int RadosConfigStore::create_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, bool exclusive, + const RGWZoneGroup& info, + std::unique_ptr* writer) +{ + if (info.get_id().empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty id" << dendl; + return -EINVAL; + } + if (info.get_name().empty()) { + ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; + return -EINVAL; + } + + const auto& pool = impl->zonegroup_pool; + const auto create = exclusive ? Create::MustNotExist : Create::MayExist; + + // write the zonegroup info + const auto info_oid = zonegroup_info_oid(info.get_id()); + RGWObjVersionTracker objv; + objv.generate_new_write_ver(dpp->get_cct()); + + int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); + if (r < 0) { + return r; + } + + // write the zonegroup name + const auto name_oid = zonegroup_name_oid(info.get_name()); + const auto name = RGWNameToId{info.get_id()}; + RGWObjVersionTracker name_objv; + name_objv.generate_new_write_ver(dpp->get_cct()); + + r = impl->write(dpp, y, pool, name_oid, create, name, &name_objv); + if (r < 0) { + (void) impl->remove(dpp, y, pool, info_oid, &objv); + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_id, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->zonegroup_pool; + const auto info_oid = zonegroup_info_oid(zonegroup_id); + RGWObjVersionTracker objv; + + int r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->zonegroup_pool; + + // look up zonegroup id by name + RGWNameToId name; + const auto name_oid = zonegroup_name_oid(zonegroup_name); + int r = impl->read(dpp, y, pool, name_oid, name, nullptr); + if (r < 0) { + return r; + } + + const auto info_oid = zonegroup_info_oid(name.obj_id); + RGWObjVersionTracker objv; + r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp, + optional_yield y, + std::string_view realm_id, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + const auto& pool = impl->zonegroup_pool; + + // read default zonegroup id + RGWDefaultSystemMetaObjInfo default_info; + const auto default_oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); + int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); + if (r < 0) { + return r; + } + + const auto info_oid = zonegroup_info_oid(default_info.default_id); + RGWObjVersionTracker objv; + r = impl->read(dpp, y, pool, info_oid, info, &objv); + if (r < 0) { + return r; + } + + if (writer) { + *writer = std::make_unique( + impl.get(), std::move(objv), info.get_id(), info.get_name()); + } + return 0; +} + +int RadosConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp, + optional_yield y, + const std::string& marker, + std::span entries, + sal::ListResult& result) +{ + const auto& pool = impl->zonegroup_pool; + constexpr auto prefix = [] (std::string oid) -> std::string { + if (!oid.starts_with(zonegroup_names_oid_prefix)) { + return {}; + } + return oid.substr(zonegroup_names_oid_prefix.size()); + }; + return impl->list(dpp, y, pool, marker, prefix, entries, result); +} + +} // namespace rgw::rados diff --git a/src/rgw/driver/rados/rgw_bucket.cc b/src/rgw/driver/rados/rgw_bucket.cc new file mode 100644 index 00000000000..7f600fe457e --- /dev/null +++ b/src/rgw/driver/rados/rgw_bucket.cc @@ -0,0 +1,2971 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_acl_s3.h" +#include "rgw_tag_s3.h" + +#include "rgw_bucket.h" +#include "rgw_op.h" +#include "rgw_bucket_sync.h" + +#include "services/svc_zone.h" +#include "services/svc_bucket.h" +#include "services/svc_user.h" + +#include "rgw_reshard.h" + +// stolen from src/cls/version/cls_version.cc +#define VERSION_ATTR "ceph.objclass.version" + +#include "cls/user/cls_user_types.h" + +#include "rgw_sal_rados.h" + +#define dout_subsys ceph_subsys_rgw + +// seconds for timeout during RGWBucket::check_object_index +constexpr uint64_t BUCKET_TAG_QUICK_TIMEOUT = 30; + +using namespace std; + +// default number of entries to list with each bucket listing call +// (use marker to bridge between calls) +static constexpr size_t listing_max_entries = 1000; + +/* + * The tenant_name is always returned on purpose. May be empty, of course. + */ +static void parse_bucket(const string& bucket, + string *tenant_name, + string *bucket_name, + string *bucket_instance = nullptr /* optional */) +{ + /* + * expected format: [tenant/]bucket:bucket_instance + */ + int pos = bucket.find('/'); + if (pos >= 0) { + *tenant_name = bucket.substr(0, pos); + } else { + tenant_name->clear(); + } + string bn = bucket.substr(pos + 1); + pos = bn.find (':'); + if (pos < 0) { + *bucket_name = std::move(bn); + return; + } + *bucket_name = bn.substr(0, pos); + if (bucket_instance) { + *bucket_instance = bn.substr(pos + 1); + } + + /* + * deal with the possible tenant:bucket:bucket_instance case + */ + if (tenant_name->empty()) { + pos = bucket_instance->find(':'); + if (pos >= 0) { + *tenant_name = *bucket_name; + *bucket_name = bucket_instance->substr(0, pos); + *bucket_instance = bucket_instance->substr(pos + 1); + } + } +} + +static void dump_mulipart_index_results(list& objs_to_unlink, + Formatter *f) +{ + for (const auto& o : objs_to_unlink) { + f->dump_string("object", o.name); + } +} + +void check_bad_user_bucket_mapping(rgw::sal::Driver* driver, rgw::sal::User* user, + bool fix, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + rgw::sal::BucketList user_buckets; + string marker; + + CephContext *cct = driver->ctx(); + + size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk; + + do { + int ret = user->list_buckets(dpp, marker, string(), max_entries, false, user_buckets, y); + if (ret < 0) { + ldout(driver->ctx(), 0) << "failed to read user buckets: " + << cpp_strerror(-ret) << dendl; + return; + } + + map>& buckets = user_buckets.get_buckets(); + for (auto i = buckets.begin(); + i != buckets.end(); + ++i) { + marker = i->first; + + auto& bucket = i->second; + + std::unique_ptr actual_bucket; + int r = driver->get_bucket(dpp, user, user->get_tenant(), bucket->get_name(), &actual_bucket, null_yield); + if (r < 0) { + ldout(driver->ctx(), 0) << "could not get bucket info for bucket=" << bucket << dendl; + continue; + } + + if (actual_bucket->get_name().compare(bucket->get_name()) != 0 || + actual_bucket->get_tenant().compare(bucket->get_tenant()) != 0 || + actual_bucket->get_marker().compare(bucket->get_marker()) != 0 || + actual_bucket->get_bucket_id().compare(bucket->get_bucket_id()) != 0) { + cout << "bucket info mismatch: expected " << actual_bucket << " got " << bucket << std::endl; + if (fix) { + cout << "fixing" << std::endl; + r = actual_bucket->chown(dpp, user, nullptr, null_yield); + if (r < 0) { + cerr << "failed to fix bucket: " << cpp_strerror(-r) << std::endl; + } + } + } + } + } while (user_buckets.is_truncated()); +} + +// returns true if entry is in the empty namespace. note: function +// type conforms to type RGWBucketListNameFilter +bool rgw_bucket_object_check_filter(const std::string& oid) +{ + const static std::string empty_ns; + rgw_obj_key key; // thrown away but needed for parsing + return rgw_obj_key::oid_to_key_in_ns(oid, &key, empty_ns); +} + +int rgw_remove_object(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, rgw_obj_key& key) +{ + if (key.instance.empty()) { + key.instance = "null"; + } + + std::unique_ptr object = bucket->get_object(key); + + return object->delete_object(dpp, null_yield); +} + +static void set_err_msg(std::string *sink, std::string msg) +{ + if (sink && !msg.empty()) + *sink = msg; +} + +int RGWBucket::init(rgw::sal::Driver* _driver, RGWBucketAdminOpState& op_state, + optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg) +{ + if (!_driver) { + set_err_msg(err_msg, "no storage!"); + return -EINVAL; + } + + driver = _driver; + + std::string bucket_name = op_state.get_bucket_name(); + + if (bucket_name.empty() && op_state.get_user_id().empty()) + return -EINVAL; + + user = driver->get_user(op_state.get_user_id()); + std::string tenant = user->get_tenant(); + + // split possible tenant/name + auto pos = bucket_name.find('/'); + if (pos != string::npos) { + tenant = bucket_name.substr(0, pos); + bucket_name = bucket_name.substr(pos + 1); + } + + int r = driver->get_bucket(dpp, user.get(), tenant, bucket_name, &bucket, y); + if (r < 0) { + set_err_msg(err_msg, "failed to fetch bucket info for bucket=" + bucket_name); + return r; + } + + op_state.set_bucket(bucket->clone()); + + if (!rgw::sal::User::empty(user.get())) { + r = user->load_user(dpp, y); + if (r < 0) { + set_err_msg(err_msg, "failed to fetch user info"); + return r; + } + } + + op_state.display_name = user->get_display_name(); + + clear_failure(); + return 0; +} + +bool rgw_find_bucket_by_id(const DoutPrefixProvider *dpp, CephContext *cct, rgw::sal::Driver* driver, + const string& marker, const string& bucket_id, rgw_bucket* bucket_out) +{ + void *handle = NULL; + bool truncated = false; + string s; + + int ret = driver->meta_list_keys_init(dpp, "bucket.instance", marker, &handle); + if (ret < 0) { + cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl; + driver->meta_list_keys_complete(handle); + return -ret; + } + do { + list keys; + ret = driver->meta_list_keys_next(dpp, handle, 1000, keys, &truncated); + if (ret < 0) { + cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl; + driver->meta_list_keys_complete(handle); + return -ret; + } + for (list::iterator iter = keys.begin(); iter != keys.end(); ++iter) { + s = *iter; + ret = rgw_bucket_parse_bucket_key(cct, s, bucket_out, nullptr); + if (ret < 0) { + continue; + } + if (bucket_id == bucket_out->bucket_id) { + driver->meta_list_keys_complete(handle); + return true; + } + } + } while (truncated); + driver->meta_list_keys_complete(handle); + return false; +} + +int RGWBucket::chown(RGWBucketAdminOpState& op_state, const string& marker, + optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg) +{ + int ret = bucket->chown(dpp, user.get(), user.get(), y, &marker); + if (ret < 0) { + set_err_msg(err_msg, "Failed to change object ownership: " + cpp_strerror(-ret)); + } + + return ret; +} + +int RGWBucket::set_quota(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg) +{ + bucket = op_state.get_bucket()->clone(); + + bucket->get_info().quota = op_state.quota; + int r = bucket->put_info(dpp, false, real_time()); + if (r < 0) { + set_err_msg(err_msg, "ERROR: failed writing bucket instance info: " + cpp_strerror(-r)); + return r; + } + return r; +} + +int RGWBucket::remove_object(const DoutPrefixProvider *dpp, RGWBucketAdminOpState& op_state, std::string *err_msg) +{ + std::string object_name = op_state.get_object_name(); + + rgw_obj_key key(object_name); + + bucket = op_state.get_bucket()->clone(); + + int ret = rgw_remove_object(dpp, driver, bucket.get(), key); + if (ret < 0) { + set_err_msg(err_msg, "unable to remove object" + cpp_strerror(-ret)); + return ret; + } + + return 0; +} + +static void dump_bucket_index(const vector& objs, Formatter *f) +{ + for (auto iter = objs.begin(); iter != objs.end(); ++iter) { + f->dump_string("object", iter->key.name); + } +} + +static void dump_bucket_usage(map& stats, Formatter *formatter) +{ + map::iterator iter; + + formatter->open_object_section("usage"); + for (iter = stats.begin(); iter != stats.end(); ++iter) { + RGWStorageStats& s = iter->second; + formatter->open_object_section(to_string(iter->first)); + s.dump(formatter); + formatter->close_section(); + } + formatter->close_section(); +} + +static void dump_index_check(map existing_stats, + map calculated_stats, + Formatter *formatter) +{ + formatter->open_object_section("check_result"); + formatter->open_object_section("existing_header"); + dump_bucket_usage(existing_stats, formatter); + formatter->close_section(); + formatter->open_object_section("calculated_header"); + dump_bucket_usage(calculated_stats, formatter); + formatter->close_section(); + formatter->close_section(); +} + +int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp, + std::string *err_msg) +{ + const bool fix_index = op_state.will_fix_index(); + + bucket = op_state.get_bucket()->clone(); + + rgw::sal::Bucket::ListParams params; + params.list_versions = true; + params.ns = RGW_OBJ_NS_MULTIPART; + + std::map meta_objs; + std::map all_objs; + bool is_truncated; + do { + rgw::sal::Bucket::ListResults results; + int r = bucket->list(dpp, params, listing_max_entries, results, null_yield); + if (r < 0) { + set_err_msg(err_msg, "failed to list objects in bucket=" + bucket->get_name() + + " err=" + cpp_strerror(-r)); + + return r; + } + is_truncated = results.is_truncated; + + for (const auto& o : results.objs) { + rgw_obj_index_key key = o.key; + rgw_obj obj(bucket->get_key(), key); + std::string oid = obj.get_oid(); + + int pos = oid.find_last_of('.'); + if (pos < 0) { + /* obj has no suffix */ + all_objs[key] = oid; + } else { + /* obj has suffix */ + std::string name = oid.substr(0, pos); + std::string suffix = oid.substr(pos + 1); + + if (suffix.compare("meta") == 0) { + meta_objs[name] = true; + } else { + all_objs[key] = name; + } + } + } + } while (is_truncated); + + std::list objs_to_unlink; + Formatter *f = flusher.get_formatter(); + + f->open_array_section("invalid_multipart_entries"); + + for (const auto& o : all_objs) { + const std::string& name = o.second; + if (meta_objs.find(name) == meta_objs.end()) { + objs_to_unlink.push_back(o.first); + } + + if (objs_to_unlink.size() > listing_max_entries) { + if (fix_index) { + // note: under rados this removes directly from rados index objects + int r = bucket->remove_objs_from_index(dpp, objs_to_unlink); + if (r < 0) { + set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " + + cpp_strerror(-r)); + return r; + } + } + + dump_mulipart_index_results(objs_to_unlink, f); + flusher.flush(); + objs_to_unlink.clear(); + } + } + + if (fix_index) { + // note: under rados this removes directly from rados index objects + int r = bucket->remove_objs_from_index(dpp, objs_to_unlink); + if (r < 0) { + set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " + + cpp_strerror(-r)); + + return r; + } + } + + dump_mulipart_index_results(objs_to_unlink, f); + f->close_section(); + flusher.flush(); + + return 0; +} + +int RGWBucket::check_object_index(const DoutPrefixProvider *dpp, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y, + std::string *err_msg) +{ + + bool fix_index = op_state.will_fix_index(); + + if (!fix_index) { + set_err_msg(err_msg, "check-objects flag requires fix index enabled"); + return -EINVAL; + } + + // use a quicker/shorter tag timeout during this process + bucket->set_tag_timeout(dpp, BUCKET_TAG_QUICK_TIMEOUT); + + rgw::sal::Bucket::ListResults results; + results.is_truncated = true; + + Formatter *formatter = flusher.get_formatter(); + formatter->open_object_section("objects"); + + while (results.is_truncated) { + rgw::sal::Bucket::ListParams params; + params.marker = results.next_marker; + params.force_check_filter = rgw_bucket_object_check_filter; + + int r = bucket->list(dpp, params, listing_max_entries, results, y); + + if (r == -ENOENT) { + break; + } else if (r < 0) { + set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r)); + } + + dump_bucket_index(results.objs, formatter); + flusher.flush(); + } + + formatter->close_section(); + + // restore normal tag timeout for bucket + bucket->set_tag_timeout(dpp, 0); + + return 0; +} + + +int RGWBucket::check_index(const DoutPrefixProvider *dpp, + RGWBucketAdminOpState& op_state, + map& existing_stats, + map& calculated_stats, + std::string *err_msg) +{ + bool fix_index = op_state.will_fix_index(); + + int r = bucket->check_index(dpp, existing_stats, calculated_stats); + if (r < 0) { + set_err_msg(err_msg, "failed to check index error=" + cpp_strerror(-r)); + return r; + } + + if (fix_index) { + r = bucket->rebuild_index(dpp); + if (r < 0) { + set_err_msg(err_msg, "failed to rebuild index err=" + cpp_strerror(-r)); + return r; + } + } + + return 0; +} + +int RGWBucket::sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg) +{ + if (!driver->is_meta_master()) { + set_err_msg(err_msg, "ERROR: failed to update bucket sync: only allowed on meta master zone"); + return -EINVAL; + } + bool sync = op_state.will_sync_bucket(); + if (sync) { + bucket->get_info().flags &= ~BUCKET_DATASYNC_DISABLED; + } else { + bucket->get_info().flags |= BUCKET_DATASYNC_DISABLED; + } + + // when writing this metadata, RGWSI_BucketIndex_RADOS::handle_overwrite() + // will write the corresponding datalog and bilog entries + int r = bucket->put_info(dpp, false, real_time()); + if (r < 0) { + set_err_msg(err_msg, "ERROR: failed writing bucket instance info:" + cpp_strerror(-r)); + return r; + } + + return 0; +} + + +int RGWBucket::policy_bl_to_stream(bufferlist& bl, ostream& o) +{ + RGWAccessControlPolicy_S3 policy(g_ceph_context); + int ret = decode_bl(bl, policy); + if (ret < 0) { + ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; + } + policy.to_xml(o); + return 0; +} + +int rgw_object_get_attr(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, rgw::sal::Object* obj, + const char* attr_name, bufferlist& out_bl, optional_yield y) +{ + std::unique_ptr rop = obj->get_read_op(); + + return rop->get_attr(dpp, attr_name, out_bl, y); +} + +int RGWBucket::get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy, optional_yield y, const DoutPrefixProvider *dpp) +{ + int ret; + std::string object_name = op_state.get_object_name(); + + bucket = op_state.get_bucket()->clone(); + + if (!object_name.empty()) { + bufferlist bl; + std::unique_ptr obj = bucket->get_object(rgw_obj_key(object_name)); + + ret = rgw_object_get_attr(dpp, driver, obj.get(), RGW_ATTR_ACL, bl, y); + if (ret < 0){ + return ret; + } + + ret = decode_bl(bl, policy); + if (ret < 0) { + ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; + } + return ret; + } + + map::iterator aiter = bucket->get_attrs().find(RGW_ATTR_ACL); + if (aiter == bucket->get_attrs().end()) { + return -ENOENT; + } + + ret = decode_bl(aiter->second, policy); + if (ret < 0) { + ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; + } + + return ret; +} + + +int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWAccessControlPolicy& policy, const DoutPrefixProvider *dpp) +{ + RGWBucket bucket; + + int ret = bucket.init(driver, op_state, null_yield, dpp); + if (ret < 0) + return ret; + + ret = bucket.get_policy(op_state, policy, null_yield, dpp); + if (ret < 0) + return ret; + + return 0; +} + +/* Wrappers to facilitate RESTful interface */ + + +int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp) +{ + RGWAccessControlPolicy policy(driver->ctx()); + + int ret = get_policy(driver, op_state, policy, dpp); + if (ret < 0) + return ret; + + Formatter *formatter = flusher.get_formatter(); + + flusher.start(0); + + formatter->open_object_section("policy"); + policy.dump(formatter); + formatter->close_section(); + + flusher.flush(); + + return 0; +} + +int RGWBucketAdminOp::dump_s3_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + ostream& os, const DoutPrefixProvider *dpp) +{ + RGWAccessControlPolicy_S3 policy(driver->ctx()); + + int ret = get_policy(driver, op_state, policy, dpp); + if (ret < 0) + return ret; + + policy.to_xml(os); + + return 0; +} + +int RGWBucketAdminOp::unlink(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp) +{ + RGWBucket bucket; + + int ret = bucket.init(driver, op_state, null_yield, dpp); + if (ret < 0) + return ret; + + return static_cast(driver)->ctl()->bucket->unlink_bucket(op_state.get_user_id(), op_state.get_bucket()->get_info().bucket, null_yield, dpp, true); +} + +int RGWBucketAdminOp::link(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, string *err) +{ + if (!op_state.is_user_op()) { + set_err_msg(err, "empty user id"); + return -EINVAL; + } + + RGWBucket bucket; + int ret = bucket.init(driver, op_state, null_yield, dpp, err); + if (ret < 0) + return ret; + + string bucket_id = op_state.get_bucket_id(); + std::string display_name = op_state.get_user_display_name(); + std::unique_ptr loc_bucket; + std::unique_ptr old_bucket; + + loc_bucket = op_state.get_bucket()->clone(); + + if (!bucket_id.empty() && bucket_id != loc_bucket->get_bucket_id()) { + set_err_msg(err, + "specified bucket id does not match " + loc_bucket->get_bucket_id()); + return -EINVAL; + } + + old_bucket = loc_bucket->clone(); + + loc_bucket->get_key().tenant = op_state.get_user_id().tenant; + + if (!op_state.new_bucket_name.empty()) { + auto pos = op_state.new_bucket_name.find('/'); + if (pos != string::npos) { + loc_bucket->get_key().tenant = op_state.new_bucket_name.substr(0, pos); + loc_bucket->get_key().name = op_state.new_bucket_name.substr(pos + 1); + } else { + loc_bucket->get_key().name = op_state.new_bucket_name; + } + } + + RGWObjVersionTracker objv_tracker; + RGWObjVersionTracker old_version = loc_bucket->get_info().objv_tracker; + + map::iterator aiter = loc_bucket->get_attrs().find(RGW_ATTR_ACL); + if (aiter == loc_bucket->get_attrs().end()) { + // should never happen; only pre-argonaut buckets lacked this. + ldpp_dout(dpp, 0) << "WARNING: can't bucket link because no acl on bucket=" << old_bucket << dendl; + set_err_msg(err, + "While crossing the Anavros you have displeased the goddess Hera." + " You must sacrifice your ancient bucket " + loc_bucket->get_bucket_id()); + return -EINVAL; + } + bufferlist& aclbl = aiter->second; + RGWAccessControlPolicy policy; + ACLOwner owner; + try { + auto iter = aclbl.cbegin(); + decode(policy, iter); + owner = policy.get_owner(); + } catch (buffer::error& e) { + set_err_msg(err, "couldn't decode policy"); + return -EIO; + } + + int r = static_cast(driver)->ctl()->bucket->unlink_bucket(owner.get_id(), old_bucket->get_info().bucket, null_yield, dpp, false); + if (r < 0) { + set_err_msg(err, "could not unlink policy from user " + owner.get_id().to_str()); + return r; + } + + // now update the user for the bucket... + if (display_name.empty()) { + ldpp_dout(dpp, 0) << "WARNING: user " << op_state.get_user_id() << " has no display name set" << dendl; + } + + RGWAccessControlPolicy policy_instance; + policy_instance.create_default(op_state.get_user_id(), display_name); + owner = policy_instance.get_owner(); + + aclbl.clear(); + policy_instance.encode(aclbl); + + bool exclusive = false; + loc_bucket->get_info().owner = op_state.get_user_id(); + if (*loc_bucket != *old_bucket) { + loc_bucket->get_info().bucket = loc_bucket->get_key(); + loc_bucket->get_info().objv_tracker.version_for_read()->ver = 0; + exclusive = true; + } + + r = loc_bucket->put_info(dpp, exclusive, ceph::real_time()); + if (r < 0) { + set_err_msg(err, "ERROR: failed writing bucket instance info: " + cpp_strerror(-r)); + return r; + } + + /* link to user */ + RGWBucketEntryPoint ep; + ep.bucket = loc_bucket->get_info().bucket; + ep.owner = op_state.get_user_id(); + ep.creation_time = loc_bucket->get_info().creation_time; + ep.linked = true; + rgw::sal::Attrs ep_attrs; + rgw_ep_info ep_data{ep, ep_attrs}; + + r = static_cast(driver)->ctl()->bucket->link_bucket(op_state.get_user_id(), loc_bucket->get_info().bucket, loc_bucket->get_info().creation_time, null_yield, dpp, true, &ep_data); + if (r < 0) { + set_err_msg(err, "failed to relink bucket"); + return r; + } + + if (*loc_bucket != *old_bucket) { + // like RGWRados::delete_bucket -- excepting no bucket_index work. + r = static_cast(driver)->ctl()->bucket->remove_bucket_entrypoint_info( + old_bucket->get_key(), null_yield, dpp, + RGWBucketCtl::Bucket::RemoveParams() + .set_objv_tracker(&ep_data.ep_objv)); + if (r < 0) { + set_err_msg(err, "failed to unlink old bucket " + old_bucket->get_tenant() + "/" + old_bucket->get_name()); + return r; + } + r = static_cast(driver)->ctl()->bucket->remove_bucket_instance_info( + old_bucket->get_key(), old_bucket->get_info(), + null_yield, dpp, + RGWBucketCtl::BucketInstance::RemoveParams() + .set_objv_tracker(&ep_data.ep_objv)); + if (r < 0) { + set_err_msg(err, "failed to unlink old bucket " + old_bucket->get_tenant() + "/" + old_bucket->get_name()); + return r; + } + } + + return 0; +} + +int RGWBucketAdminOp::chown(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const string& marker, const DoutPrefixProvider *dpp, string *err) +{ + RGWBucket bucket; + + int ret = bucket.init(driver, op_state, null_yield, dpp, err); + if (ret < 0) + return ret; + + return bucket.chown(op_state, marker, null_yield, dpp, err); + +} + +int RGWBucketAdminOp::check_index(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y, const DoutPrefixProvider *dpp) +{ + int ret; + map existing_stats; + map calculated_stats; + + + RGWBucket bucket; + + ret = bucket.init(driver, op_state, null_yield, dpp); + if (ret < 0) + return ret; + + Formatter *formatter = flusher.get_formatter(); + flusher.start(0); + + ret = bucket.check_bad_index_multipart(op_state, flusher, dpp); + if (ret < 0) + return ret; + + ret = bucket.check_object_index(dpp, op_state, flusher, y); + if (ret < 0) + return ret; + + ret = bucket.check_index(dpp, op_state, existing_stats, calculated_stats); + if (ret < 0) + return ret; + + dump_index_check(existing_stats, calculated_stats, formatter); + flusher.flush(); + + return 0; +} + +int RGWBucketAdminOp::remove_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + optional_yield y, const DoutPrefixProvider *dpp, + bool bypass_gc, bool keep_index_consistent) +{ + std::unique_ptr bucket; + std::unique_ptr user = driver->get_user(op_state.get_user_id()); + + int ret = driver->get_bucket(dpp, user.get(), user->get_tenant(), op_state.get_bucket_name(), + &bucket, y); + if (ret < 0) + return ret; + + if (bypass_gc) + ret = bucket->remove_bucket_bypass_gc(op_state.get_max_aio(), keep_index_consistent, y, dpp); + else + ret = bucket->remove_bucket(dpp, op_state.will_delete_children(), + false, nullptr, y); + + return ret; +} + +int RGWBucketAdminOp::remove_object(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp) +{ + RGWBucket bucket; + + int ret = bucket.init(driver, op_state, null_yield, dpp); + if (ret < 0) + return ret; + + return bucket.remove_object(dpp, op_state); +} + +int RGWBucketAdminOp::sync_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, string *err_msg) +{ + RGWBucket bucket; + int ret = bucket.init(driver, op_state, null_yield, dpp, err_msg); + if (ret < 0) + { + return ret; + } + return bucket.sync(op_state, dpp, err_msg); +} + +static int bucket_stats(rgw::sal::Driver* driver, + const std::string& tenant_name, + const std::string& bucket_name, + Formatter *formatter, + const DoutPrefixProvider *dpp) +{ + std::unique_ptr bucket; + map stats; + + real_time mtime; + int ret = driver->get_bucket(dpp, nullptr, tenant_name, bucket_name, &bucket, null_yield); + if (ret < 0) { + return ret; + } + + const auto& index = bucket->get_info().get_current_index(); + if (is_layout_indexless(index)) { + cerr << "error, indexless buckets do not maintain stats; bucket=" << + bucket->get_name() << std::endl; + return -EINVAL; + } + + std::string bucket_ver, master_ver; + std::string max_marker; + ret = bucket->read_stats(dpp, index, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, &max_marker); + if (ret < 0) { + cerr << "error getting bucket stats bucket=" << bucket->get_name() << " ret=" << ret << std::endl; + return ret; + } + + utime_t ut(mtime); + utime_t ctime_ut(bucket->get_creation_time()); + + formatter->open_object_section("stats"); + formatter->dump_string("bucket", bucket->get_name()); + formatter->dump_int("num_shards", + bucket->get_info().layout.current_index.layout.normal.num_shards); + formatter->dump_string("tenant", bucket->get_tenant()); + formatter->dump_string("zonegroup", bucket->get_info().zonegroup); + formatter->dump_string("placement_rule", bucket->get_info().placement_rule.to_str()); + ::encode_json("explicit_placement", bucket->get_key().explicit_placement, formatter); + formatter->dump_string("id", bucket->get_bucket_id()); + formatter->dump_string("marker", bucket->get_marker()); + formatter->dump_stream("index_type") << bucket->get_info().layout.current_index.layout.type; + ::encode_json("owner", bucket->get_info().owner, formatter); + formatter->dump_string("ver", bucket_ver); + formatter->dump_string("master_ver", master_ver); + ut.gmtime(formatter->dump_stream("mtime")); + ctime_ut.gmtime(formatter->dump_stream("creation_time")); + formatter->dump_string("max_marker", max_marker); + dump_bucket_usage(stats, formatter); + encode_json("bucket_quota", bucket->get_info().quota, formatter); + + // bucket tags + auto iter = bucket->get_attrs().find(RGW_ATTR_TAGS); + if (iter != bucket->get_attrs().end()) { + RGWObjTagSet_S3 tagset; + bufferlist::const_iterator piter{&iter->second}; + try { + tagset.decode(piter); + tagset.dump(formatter); + } catch (buffer::error& err) { + cerr << "ERROR: caught buffer:error, couldn't decode TagSet" << std::endl; + } + } + + // TODO: bucket CORS + // TODO: bucket LC + formatter->close_section(); + + return 0; +} + +int RGWBucketAdminOp::limit_check(rgw::sal::Driver* driver, + RGWBucketAdminOpState& op_state, + const std::list& user_ids, + RGWFormatterFlusher& flusher, optional_yield y, + const DoutPrefixProvider *dpp, + bool warnings_only) +{ + int ret = 0; + const size_t max_entries = + driver->ctx()->_conf->rgw_list_buckets_max_chunk; + + const size_t safe_max_objs_per_shard = + driver->ctx()->_conf->rgw_safe_max_objects_per_shard; + + uint16_t shard_warn_pct = + driver->ctx()->_conf->rgw_shard_warning_threshold; + if (shard_warn_pct > 100) + shard_warn_pct = 90; + + Formatter *formatter = flusher.get_formatter(); + flusher.start(0); + + formatter->open_array_section("users"); + + for (const auto& user_id : user_ids) { + + formatter->open_object_section("user"); + formatter->dump_string("user_id", user_id); + formatter->open_array_section("buckets"); + + string marker; + rgw::sal::BucketList buckets; + do { + std::unique_ptr user = driver->get_user(rgw_user(user_id)); + + ret = user->list_buckets(dpp, marker, string(), max_entries, false, buckets, y); + + if (ret < 0) + return ret; + + map>& m_buckets = buckets.get_buckets(); + + for (const auto& iter : m_buckets) { + auto& bucket = iter.second; + uint64_t num_objects = 0; + + marker = bucket->get_name(); /* Casey's location for marker update, + * as we may now not reach the end of + * the loop body */ + + ret = bucket->load_bucket(dpp, null_yield); + if (ret < 0) + continue; + + const auto& index = bucket->get_info().get_current_index(); + if (is_layout_indexless(index)) { + continue; // indexless buckets don't have stats + } + + /* need stats for num_entries */ + string bucket_ver, master_ver; + std::map stats; + ret = bucket->read_stats(dpp, index, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, nullptr); + + if (ret < 0) + continue; + + for (const auto& s : stats) { + num_objects += s.second.num_objects; + } + + const uint32_t num_shards = rgw::num_shards(index.layout.normal); + uint64_t objs_per_shard = + (num_shards) ? num_objects/num_shards : num_objects; + { + bool warn; + stringstream ss; + uint64_t fill_pct = objs_per_shard * 100 / safe_max_objs_per_shard; + if (fill_pct > 100) { + ss << "OVER " << fill_pct << "%"; + warn = true; + } else if (fill_pct >= shard_warn_pct) { + ss << "WARN " << fill_pct << "%"; + warn = true; + } else { + ss << "OK"; + warn = false; + } + + if (warn || !warnings_only) { + formatter->open_object_section("bucket"); + formatter->dump_string("bucket", bucket->get_name()); + formatter->dump_string("tenant", bucket->get_tenant()); + formatter->dump_int("num_objects", num_objects); + formatter->dump_int("num_shards", num_shards); + formatter->dump_int("objects_per_shard", objs_per_shard); + formatter->dump_string("fill_status", ss.str()); + formatter->close_section(); + } + } + } + formatter->flush(cout); + } while (buckets.is_truncated()); /* foreach: bucket */ + + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + + } /* foreach: user_id */ + + formatter->close_section(); + formatter->flush(cout); + + return ret; +} /* RGWBucketAdminOp::limit_check */ + +int RGWBucketAdminOp::info(rgw::sal::Driver* driver, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWBucket bucket; + int ret = 0; + const std::string& bucket_name = op_state.get_bucket_name(); + if (!bucket_name.empty()) { + ret = bucket.init(driver, op_state, null_yield, dpp); + if (-ENOENT == ret) + return -ERR_NO_SUCH_BUCKET; + else if (ret < 0) + return ret; + } + + Formatter *formatter = flusher.get_formatter(); + flusher.start(0); + + CephContext *cct = driver->ctx(); + + const size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk; + + const bool show_stats = op_state.will_fetch_stats(); + const rgw_user& user_id = op_state.get_user_id(); + if (op_state.is_user_op()) { + formatter->open_array_section("buckets"); + + rgw::sal::BucketList buckets; + std::unique_ptr user = driver->get_user(op_state.get_user_id()); + std::string marker; + const std::string empty_end_marker; + constexpr bool no_need_stats = false; // set need_stats to false + + do { + ret = user->list_buckets(dpp, marker, empty_end_marker, max_entries, + no_need_stats, buckets, y); + if (ret < 0) { + return ret; + } + + const std::string* marker_cursor = nullptr; + map>& m = buckets.get_buckets(); + + for (const auto& i : m) { + const std::string& obj_name = i.first; + if (!bucket_name.empty() && bucket_name != obj_name) { + continue; + } + + if (show_stats) { + bucket_stats(driver, user_id.tenant, obj_name, formatter, dpp); + } else { + formatter->dump_string("bucket", obj_name); + } + + marker_cursor = &obj_name; + } // for loop + if (marker_cursor) { + marker = *marker_cursor; + } + + flusher.flush(); + } while (buckets.is_truncated()); + + formatter->close_section(); + } else if (!bucket_name.empty()) { + ret = bucket_stats(driver, user_id.tenant, bucket_name, formatter, dpp); + if (ret < 0) { + return ret; + } + } else { + void *handle = nullptr; + bool truncated = true; + + formatter->open_array_section("buckets"); + ret = driver->meta_list_keys_init(dpp, "bucket", string(), &handle); + while (ret == 0 && truncated) { + std::list buckets; + constexpr int max_keys = 1000; + ret = driver->meta_list_keys_next(dpp, handle, max_keys, buckets, + &truncated); + for (auto& bucket_name : buckets) { + if (show_stats) { + bucket_stats(driver, user_id.tenant, bucket_name, formatter, dpp); + } else { + formatter->dump_string("bucket", bucket_name); + } + } + } + driver->meta_list_keys_complete(handle); + + formatter->close_section(); + } + + flusher.flush(); + + return 0; +} + +int RGWBucketAdminOp::set_quota(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp) +{ + RGWBucket bucket; + + int ret = bucket.init(driver, op_state, null_yield, dpp); + if (ret < 0) + return ret; + return bucket.set_quota(op_state, dpp); +} + +inline auto split_tenant(const std::string& bucket_name){ + auto p = bucket_name.find('/'); + if(p != std::string::npos) { + return std::make_pair(bucket_name.substr(0,p), bucket_name.substr(p+1)); + } + return std::make_pair(std::string(), bucket_name); +} + +using bucket_instance_ls = std::vector; +void get_stale_instances(rgw::sal::Driver* driver, const std::string& bucket_name, + const vector& lst, + bucket_instance_ls& stale_instances, + const DoutPrefixProvider *dpp) +{ + + bucket_instance_ls other_instances; +// first iterate over the entries, and pick up the done buckets; these +// are guaranteed to be stale + for (const auto& bucket_instance : lst){ + RGWBucketInfo binfo; + std::unique_ptr bucket; + rgw_bucket rbucket; + rgw_bucket_parse_bucket_key(driver->ctx(), bucket_instance, &rbucket, nullptr); + int r = driver->get_bucket(dpp, nullptr, rbucket, &bucket, null_yield); + if (r < 0){ + // this can only happen if someone deletes us right when we're processing + ldpp_dout(dpp, -1) << "Bucket instance is invalid: " << bucket_instance + << cpp_strerror(-r) << dendl; + continue; + } + binfo = bucket->get_info(); + if (binfo.reshard_status == cls_rgw_reshard_status::DONE) + stale_instances.emplace_back(std::move(binfo)); + else { + other_instances.emplace_back(std::move(binfo)); + } + } + + // Read the cur bucket info, if the bucket doesn't exist we can simply return + // all the instances + auto [tenant, bname] = split_tenant(bucket_name); + RGWBucketInfo cur_bucket_info; + std::unique_ptr cur_bucket; + int r = driver->get_bucket(dpp, nullptr, tenant, bname, &cur_bucket, null_yield); + if (r < 0) { + if (r == -ENOENT) { + // bucket doesn't exist, everything is stale then + stale_instances.insert(std::end(stale_instances), + std::make_move_iterator(other_instances.begin()), + std::make_move_iterator(other_instances.end())); + } else { + // all bets are off if we can't read the bucket, just return the sureshot stale instances + ldpp_dout(dpp, -1) << "error: reading bucket info for bucket: " + << bname << cpp_strerror(-r) << dendl; + } + return; + } + + // Don't process further in this round if bucket is resharding + cur_bucket_info = cur_bucket->get_info(); + if (cur_bucket_info.reshard_status == cls_rgw_reshard_status::IN_PROGRESS) + return; + + other_instances.erase(std::remove_if(other_instances.begin(), other_instances.end(), + [&cur_bucket_info](const RGWBucketInfo& b){ + return (b.bucket.bucket_id == cur_bucket_info.bucket.bucket_id || + b.bucket.bucket_id == cur_bucket_info.new_bucket_instance_id); + }), + other_instances.end()); + + // check if there are still instances left + if (other_instances.empty()) { + return; + } + + // Now we have a bucket with instances where the reshard status is none, this + // usually happens when the reshard process couldn't complete, lockdown the + // bucket and walk through these instances to make sure no one else interferes + // with these + { + RGWBucketReshardLock reshard_lock(static_cast(driver), cur_bucket->get_info(), true); + r = reshard_lock.lock(dpp); + if (r < 0) { + // most likely bucket is under reshard, return the sureshot stale instances + ldpp_dout(dpp, 5) << __func__ + << "failed to take reshard lock; reshard underway likey" << dendl; + return; + } + auto sg = make_scope_guard([&reshard_lock](){ reshard_lock.unlock();} ); + // this should be fast enough that we may not need to renew locks and check + // exit status?, should we read the values of the instances again? + stale_instances.insert(std::end(stale_instances), + std::make_move_iterator(other_instances.begin()), + std::make_move_iterator(other_instances.end())); + } + + return; +} + +static int process_stale_instances(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp, + std::function process_f) +{ + std::string marker; + void *handle; + Formatter *formatter = flusher.get_formatter(); + static constexpr auto default_max_keys = 1000; + + int ret = driver->meta_list_keys_init(dpp, "bucket.instance", marker, &handle); + if (ret < 0) { + cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl; + return ret; + } + + bool truncated; + + formatter->open_array_section("keys"); + auto g = make_scope_guard([&driver, &handle, &formatter]() { + driver->meta_list_keys_complete(handle); + formatter->close_section(); // keys + formatter->flush(cout); + }); + + do { + list keys; + + ret = driver->meta_list_keys_next(dpp, handle, default_max_keys, keys, &truncated); + if (ret < 0 && ret != -ENOENT) { + cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl; + return ret; + } if (ret != -ENOENT) { + // partition the list of buckets by buckets as the listing is un sorted, + // since it would minimize the reads to bucket_info + std::unordered_map> bucket_instance_map; + for (auto &key: keys) { + auto pos = key.find(':'); + if(pos != std::string::npos) + bucket_instance_map[key.substr(0,pos)].emplace_back(std::move(key)); + } + for (const auto& kv: bucket_instance_map) { + bucket_instance_ls stale_lst; + get_stale_instances(driver, kv.first, kv.second, stale_lst, dpp); + process_f(stale_lst, formatter, driver); + } + } + } while (truncated); + + return 0; +} + +int RGWBucketAdminOp::list_stale_instances(rgw::sal::Driver* driver, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp) +{ + auto process_f = [](const bucket_instance_ls& lst, + Formatter *formatter, + rgw::sal::Driver*){ + for (const auto& binfo: lst) + formatter->dump_string("key", binfo.bucket.get_key()); + }; + return process_stale_instances(driver, op_state, flusher, dpp, process_f); +} + + +int RGWBucketAdminOp::clear_stale_instances(rgw::sal::Driver* driver, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp) +{ + auto process_f = [dpp](const bucket_instance_ls& lst, + Formatter *formatter, + rgw::sal::Driver* driver){ + for (const auto &binfo: lst) { + std::unique_ptr bucket; + driver->get_bucket(nullptr, binfo, &bucket); + int ret = bucket->purge_instance(dpp); + if (ret == 0){ + auto md_key = "bucket.instance:" + binfo.bucket.get_key(); + ret = driver->meta_remove(dpp, md_key, null_yield); + } + formatter->open_object_section("delete_status"); + formatter->dump_string("bucket_instance", binfo.bucket.get_key()); + formatter->dump_int("status", -ret); + formatter->close_section(); + } + }; + + return process_stale_instances(driver, op_state, flusher, dpp, process_f); +} + +static int fix_single_bucket_lc(rgw::sal::Driver* driver, + const std::string& tenant_name, + const std::string& bucket_name, + const DoutPrefixProvider *dpp) +{ + std::unique_ptr bucket; + int ret = driver->get_bucket(dpp, nullptr, tenant_name, bucket_name, &bucket, null_yield); + if (ret < 0) { + // TODO: Should we handle the case where the bucket could've been removed between + // listing and fetching? + return ret; + } + + return rgw::lc::fix_lc_shard_entry(dpp, driver, driver->get_rgwlc()->get_lc(), bucket.get()); +} + +static void format_lc_status(Formatter* formatter, + const std::string& tenant_name, + const std::string& bucket_name, + int status) +{ + formatter->open_object_section("bucket_entry"); + std::string entry = tenant_name.empty() ? bucket_name : tenant_name + "/" + bucket_name; + formatter->dump_string("bucket", entry); + formatter->dump_int("status", status); + formatter->close_section(); // bucket_entry +} + +static void process_single_lc_entry(rgw::sal::Driver* driver, + Formatter *formatter, + const std::string& tenant_name, + const std::string& bucket_name, + const DoutPrefixProvider *dpp) +{ + int ret = fix_single_bucket_lc(driver, tenant_name, bucket_name, dpp); + format_lc_status(formatter, tenant_name, bucket_name, -ret); +} + +int RGWBucketAdminOp::fix_lc_shards(rgw::sal::Driver* driver, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp) +{ + std::string marker; + void *handle; + Formatter *formatter = flusher.get_formatter(); + static constexpr auto default_max_keys = 1000; + + bool truncated; + if (const std::string& bucket_name = op_state.get_bucket_name(); + ! bucket_name.empty()) { + const rgw_user user_id = op_state.get_user_id(); + process_single_lc_entry(driver, formatter, user_id.tenant, bucket_name, dpp); + formatter->flush(cout); + } else { + int ret = driver->meta_list_keys_init(dpp, "bucket", marker, &handle); + if (ret < 0) { + std::cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl; + return ret; + } + + { + formatter->open_array_section("lc_fix_status"); + auto sg = make_scope_guard([&driver, &handle, &formatter](){ + driver->meta_list_keys_complete(handle); + formatter->close_section(); // lc_fix_status + formatter->flush(cout); + }); + do { + list keys; + ret = driver->meta_list_keys_next(dpp, handle, default_max_keys, keys, &truncated); + if (ret < 0 && ret != -ENOENT) { + std::cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl; + return ret; + } if (ret != -ENOENT) { + for (const auto &key:keys) { + auto [tenant_name, bucket_name] = split_tenant(key); + process_single_lc_entry(driver, formatter, tenant_name, bucket_name, dpp); + } + } + formatter->flush(cout); // regularly flush every 1k entries + } while (truncated); + } + + } + return 0; + +} + +static bool has_object_expired(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + rgw::sal::Bucket* bucket, + const rgw_obj_key& key, utime_t& delete_at) +{ + std::unique_ptr obj = bucket->get_object(key); + bufferlist delete_at_bl; + + int ret = rgw_object_get_attr(dpp, driver, obj.get(), RGW_ATTR_DELETE_AT, delete_at_bl, null_yield); + if (ret < 0) { + return false; // no delete at attr, proceed + } + + ret = decode_bl(delete_at_bl, delete_at); + if (ret < 0) { + return false; // failed to parse + } + + if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { + return true; + } + + return false; +} + +static int fix_bucket_obj_expiry(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + rgw::sal::Bucket* bucket, + RGWFormatterFlusher& flusher, bool dry_run) +{ + if (bucket->get_key().bucket_id == bucket->get_key().marker) { + ldpp_dout(dpp, -1) << "Not a resharded bucket skipping" << dendl; + return 0; // not a resharded bucket, move along + } + + Formatter *formatter = flusher.get_formatter(); + formatter->open_array_section("expired_deletion_status"); + auto sg = make_scope_guard([&formatter] { + formatter->close_section(); + formatter->flush(std::cout); + }); + + rgw::sal::Bucket::ListParams params; + rgw::sal::Bucket::ListResults results; + + params.list_versions = bucket->versioned(); + params.allow_unordered = true; + + do { + int ret = bucket->list(dpp, params, listing_max_entries, results, null_yield); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR failed to list objects in the bucket" << dendl; + return ret; + } + for (const auto& obj : results.objs) { + rgw_obj_key key(obj.key); + utime_t delete_at; + if (has_object_expired(dpp, driver, bucket, key, delete_at)) { + formatter->open_object_section("object_status"); + formatter->dump_string("object", key.name); + formatter->dump_stream("delete_at") << delete_at; + + if (!dry_run) { + ret = rgw_remove_object(dpp, driver, bucket, key); + formatter->dump_int("status", ret); + } + + formatter->close_section(); // object_status + } + } + formatter->flush(cout); // regularly flush every 1k entries + } while (results.is_truncated); + + return 0; +} + +int RGWBucketAdminOp::fix_obj_expiry(rgw::sal::Driver* driver, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp, bool dry_run) +{ + RGWBucket admin_bucket; + int ret = admin_bucket.init(driver, op_state, null_yield, dpp); + if (ret < 0) { + ldpp_dout(dpp, -1) << "failed to initialize bucket" << dendl; + return ret; + } + std::unique_ptr bucket; + ret = driver->get_bucket(nullptr, admin_bucket.get_bucket_info(), &bucket); + if (ret < 0) { + return ret; + } + + return fix_bucket_obj_expiry(dpp, driver, bucket.get(), flusher, dry_run); +} + +void RGWBucketCompleteInfo::dump(Formatter *f) const { + encode_json("bucket_info", info, f); + encode_json("attrs", attrs, f); +} + +void RGWBucketCompleteInfo::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("bucket_info", info, obj); + JSONDecoder::decode_json("attrs", attrs, obj); +} + +class RGWBucketMetadataHandler : public RGWBucketMetadataHandlerBase { +public: + struct Svc { + RGWSI_Bucket *bucket{nullptr}; + } svc; + + struct Ctl { + RGWBucketCtl *bucket{nullptr}; + } ctl; + + RGWBucketMetadataHandler() {} + + void init(RGWSI_Bucket *bucket_svc, + RGWBucketCtl *bucket_ctl) override { + base_init(bucket_svc->ctx(), + bucket_svc->get_ep_be_handler().get()); + svc.bucket = bucket_svc; + ctl.bucket = bucket_ctl; + } + + string get_type() override { return "bucket"; } + + RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { + RGWBucketEntryPoint be; + + try { + decode_json_obj(be, jo); + } catch (JSONDecoder::err& e) { + return nullptr; + } + + return new RGWBucketEntryMetadataObject(be, objv, mtime); + } + + int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { + RGWObjVersionTracker ot; + RGWBucketEntryPoint be; + + real_time mtime; + map attrs; + + RGWSI_Bucket_EP_Ctx ctx(op->ctx()); + + int ret = svc.bucket->read_bucket_entrypoint_info(ctx, entry, &be, &ot, &mtime, &attrs, y, dpp); + if (ret < 0) + return ret; + + RGWBucketEntryMetadataObject *mdo = new RGWBucketEntryMetadataObject(be, ot.read_version, mtime, std::move(attrs)); + + *obj = mdo; + + return 0; + } + + int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) override; + + int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) override { + RGWBucketEntryPoint be; + + real_time orig_mtime; + + RGWSI_Bucket_EP_Ctx ctx(op->ctx()); + + int ret = svc.bucket->read_bucket_entrypoint_info(ctx, entry, &be, &objv_tracker, &orig_mtime, nullptr, y, dpp); + if (ret < 0) + return ret; + + /* + * We're unlinking the bucket but we don't want to update the entrypoint here - we're removing + * it immediately and don't want to invalidate our cached objv_version or the bucket obj removal + * will incorrectly fail. + */ + ret = ctl.bucket->unlink_bucket(be.owner, be.bucket, y, dpp, false); + if (ret < 0) { + ldpp_dout(dpp, -1) << "could not unlink bucket=" << entry << " owner=" << be.owner << dendl; + } + + ret = svc.bucket->remove_bucket_entrypoint_info(ctx, entry, &objv_tracker, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, -1) << "could not delete bucket=" << entry << dendl; + } + /* idempotent */ + return 0; + } + + int call(std::function f) { + return call(nullopt, f); + } + + int call(std::optional bectx_params, + std::function f) { + return be_handler->call(bectx_params, [&](RGWSI_MetaBackend_Handler::Op *op) { + RGWSI_Bucket_EP_Ctx ctx(op->ctx()); + return f(ctx); + }); + } +}; + +class RGWMetadataHandlerPut_Bucket : public RGWMetadataHandlerPut_SObj +{ + RGWBucketMetadataHandler *bhandler; + RGWBucketEntryMetadataObject *obj; +public: + RGWMetadataHandlerPut_Bucket(RGWBucketMetadataHandler *_handler, + RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, + optional_yield y, + RGWMDLogSyncType type, bool from_remote_zone) : RGWMetadataHandlerPut_SObj(_handler, op, entry, obj, objv_tracker, y, type, from_remote_zone), + bhandler(_handler) { + obj = static_cast(_obj); + } + ~RGWMetadataHandlerPut_Bucket() {} + + void encode_obj(bufferlist *bl) override { + obj->get_ep().encode(*bl); + } + + int put_checked(const DoutPrefixProvider *dpp) override; + int put_post(const DoutPrefixProvider *dpp) override; +}; + +int RGWBucketMetadataHandler::do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) +{ + RGWMetadataHandlerPut_Bucket put_op(this, op, entry, obj, objv_tracker, y, type, from_remote_zone); + return do_put_operate(&put_op, dpp); +} + +int RGWMetadataHandlerPut_Bucket::put_checked(const DoutPrefixProvider *dpp) +{ + RGWBucketEntryMetadataObject *orig_obj = static_cast(old_obj); + + if (orig_obj) { + obj->set_pattrs(&orig_obj->get_attrs()); + } + + auto& be = obj->get_ep(); + auto mtime = obj->get_mtime(); + auto pattrs = obj->get_pattrs(); + + RGWSI_Bucket_EP_Ctx ctx(op->ctx()); + + return bhandler->svc.bucket->store_bucket_entrypoint_info(ctx, entry, + be, + false, + mtime, + pattrs, + &objv_tracker, + y, + dpp); +} + +int RGWMetadataHandlerPut_Bucket::put_post(const DoutPrefixProvider *dpp) +{ + auto& be = obj->get_ep(); + + int ret; + + /* link bucket */ + if (be.linked) { + ret = bhandler->ctl.bucket->link_bucket(be.owner, be.bucket, be.creation_time, y, dpp, false); + } else { + ret = bhandler->ctl.bucket->unlink_bucket(be.owner, be.bucket, y, dpp, false); + } + + return ret; +} + +static void get_md5_digest(const RGWBucketEntryPoint *be, string& md5_digest) { + + char md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + bufferlist bl; + + Formatter *f = new JSONFormatter(false); + be->dump(f); + f->flush(bl); + + MD5 hash; + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + hash.Update((const unsigned char *)bl.c_str(), bl.length()); + hash.Final(m); + + buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, md5); + + delete f; + + md5_digest = md5; +} + +#define ARCHIVE_META_ATTR RGW_ATTR_PREFIX "zone.archive.info" + +struct archive_meta_info { + rgw_bucket orig_bucket; + + bool from_attrs(CephContext *cct, map& attrs) { + auto iter = attrs.find(ARCHIVE_META_ATTR); + if (iter == attrs.end()) { + return false; + } + + auto bliter = iter->second.cbegin(); + try { + decode(bliter); + } catch (buffer::error& err) { + ldout(cct, 0) << "ERROR: failed to decode archive meta info" << dendl; + return false; + } + + return true; + } + + void store_in_attrs(map& attrs) const { + encode(attrs[ARCHIVE_META_ATTR]); + } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(orig_bucket, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(orig_bucket, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(archive_meta_info) + +class RGWArchiveBucketMetadataHandler : public RGWBucketMetadataHandler { +public: + RGWArchiveBucketMetadataHandler() {} + + int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) override { + auto cct = svc.bucket->ctx(); + + RGWSI_Bucket_EP_Ctx ctx(op->ctx()); + + ldpp_dout(dpp, 5) << "SKIP: bucket removal is not allowed on archive zone: bucket:" << entry << " ... proceeding to rename" << dendl; + + string tenant_name, bucket_name; + parse_bucket(entry, &tenant_name, &bucket_name); + rgw_bucket entry_bucket; + entry_bucket.tenant = tenant_name; + entry_bucket.name = bucket_name; + + real_time mtime; + + /* read original entrypoint */ + + RGWBucketEntryPoint be; + map attrs; + int ret = svc.bucket->read_bucket_entrypoint_info(ctx, entry, &be, &objv_tracker, &mtime, &attrs, y, dpp); + if (ret < 0) { + return ret; + } + + string bi_meta_name = RGWSI_Bucket::get_bi_meta_key(be.bucket); + + /* read original bucket instance info */ + + map attrs_m; + ceph::real_time orig_mtime; + RGWBucketInfo old_bi; + + ret = ctl.bucket->read_bucket_instance_info(be.bucket, &old_bi, y, dpp, RGWBucketCtl::BucketInstance::GetParams() + .set_mtime(&orig_mtime) + .set_attrs(&attrs_m)); + if (ret < 0) { + return ret; + } + + archive_meta_info ami; + + if (!ami.from_attrs(svc.bucket->ctx(), attrs_m)) { + ami.orig_bucket = old_bi.bucket; + ami.store_in_attrs(attrs_m); + } + + /* generate a new bucket instance. We could have avoided this if we could just point a new + * bucket entry point to the old bucket instance, however, due to limitation in the way + * we index buckets under the user, bucket entrypoint and bucket instance of the same + * bucket need to have the same name, so we need to copy the old bucket instance into + * to a new entry with the new name + */ + + string new_bucket_name; + + RGWBucketInfo new_bi = old_bi; + RGWBucketEntryPoint new_be = be; + + string md5_digest; + + get_md5_digest(&new_be, md5_digest); + new_bucket_name = ami.orig_bucket.name + "-deleted-" + md5_digest; + + new_bi.bucket.name = new_bucket_name; + new_bi.objv_tracker.clear(); + + new_be.bucket.name = new_bucket_name; + + ret = ctl.bucket->store_bucket_instance_info(be.bucket, new_bi, y, dpp, RGWBucketCtl::BucketInstance::PutParams() + .set_exclusive(false) + .set_mtime(orig_mtime) + .set_attrs(&attrs_m) + .set_orig_info(&old_bi)); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to put new bucket instance info for bucket=" << new_bi.bucket << " ret=" << ret << dendl; + return ret; + } + + /* store a new entrypoint */ + + RGWObjVersionTracker ot; + ot.generate_new_write_ver(cct); + + ret = svc.bucket->store_bucket_entrypoint_info(ctx, RGWSI_Bucket::get_entrypoint_meta_key(new_be.bucket), + new_be, true, mtime, &attrs, nullptr, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to put new bucket entrypoint for bucket=" << new_be.bucket << " ret=" << ret << dendl; + return ret; + } + + /* link new bucket */ + + ret = ctl.bucket->link_bucket(new_be.owner, new_be.bucket, new_be.creation_time, y, dpp, false); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to link new bucket for bucket=" << new_be.bucket << " ret=" << ret << dendl; + return ret; + } + + /* clean up old stuff */ + + ret = ctl.bucket->unlink_bucket(be.owner, entry_bucket, y, dpp, false); + if (ret < 0) { + ldpp_dout(dpp, -1) << "could not unlink bucket=" << entry << " owner=" << be.owner << dendl; + } + + // if (ret == -ECANCELED) it means that there was a race here, and someone + // wrote to the bucket entrypoint just before we removed it. The question is + // whether it was a newly created bucket entrypoint ... in which case we + // should ignore the error and move forward, or whether it is a higher version + // of the same bucket instance ... in which we should retry + ret = svc.bucket->remove_bucket_entrypoint_info(ctx, + RGWSI_Bucket::get_entrypoint_meta_key(be.bucket), + &objv_tracker, + y, + dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to put new bucket entrypoint for bucket=" << new_be.bucket << " ret=" << ret << dendl; + return ret; + } + + ret = ctl.bucket->remove_bucket_instance_info(be.bucket, old_bi, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, -1) << "could not delete bucket=" << entry << dendl; + } + + + /* idempotent */ + + return 0; + } + + int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) override { + if (entry.find("-deleted-") != string::npos) { + RGWObjVersionTracker ot; + RGWMetadataObject *robj; + int ret = do_get(op, entry, &robj, y, dpp); + if (ret != -ENOENT) { + if (ret < 0) { + return ret; + } + ot.read_version = robj->get_version(); + delete robj; + + ret = do_remove(op, entry, ot, y, dpp); + if (ret < 0) { + return ret; + } + } + } + + return RGWBucketMetadataHandler::do_put(op, entry, obj, + objv_tracker, y, dpp, type, from_remote_zone); + } + +}; + +class RGWBucketInstanceMetadataHandler : public RGWBucketInstanceMetadataHandlerBase { + int read_bucket_instance_entry(RGWSI_Bucket_BI_Ctx& ctx, + const string& entry, + RGWBucketCompleteInfo *bi, + ceph::real_time *pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) { + return svc.bucket->read_bucket_instance_info(ctx, + entry, + &bi->info, + pmtime, &bi->attrs, + y, + dpp); + } + +public: + struct Svc { + RGWSI_Zone *zone{nullptr}; + RGWSI_Bucket *bucket{nullptr}; + RGWSI_BucketIndex *bi{nullptr}; + } svc; + + rgw::sal::Driver* driver; + + RGWBucketInstanceMetadataHandler(rgw::sal::Driver* driver) + : driver(driver) {} + + void init(RGWSI_Zone *zone_svc, + RGWSI_Bucket *bucket_svc, + RGWSI_BucketIndex *bi_svc) override { + base_init(bucket_svc->ctx(), + bucket_svc->get_bi_be_handler().get()); + svc.zone = zone_svc; + svc.bucket = bucket_svc; + svc.bi = bi_svc; + } + + string get_type() override { return "bucket.instance"; } + + RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { + RGWBucketCompleteInfo bci; + + try { + decode_json_obj(bci, jo); + } catch (JSONDecoder::err& e) { + return nullptr; + } + + return new RGWBucketInstanceMetadataObject(bci, objv, mtime); + } + + int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { + RGWBucketCompleteInfo bci; + real_time mtime; + + RGWSI_Bucket_BI_Ctx ctx(op->ctx()); + + int ret = svc.bucket->read_bucket_instance_info(ctx, entry, &bci.info, &mtime, &bci.attrs, y, dpp); + if (ret < 0) + return ret; + + RGWBucketInstanceMetadataObject *mdo = new RGWBucketInstanceMetadataObject(bci, bci.info.objv_tracker.read_version, mtime); + + *obj = mdo; + + return 0; + } + + int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp, + RGWMDLogSyncType sync_type, bool from_remote_zone) override; + + int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) override { + RGWBucketCompleteInfo bci; + + RGWSI_Bucket_BI_Ctx ctx(op->ctx()); + + int ret = read_bucket_instance_entry(ctx, entry, &bci, nullptr, y, dpp); + if (ret < 0 && ret != -ENOENT) + return ret; + + return svc.bucket->remove_bucket_instance_info(ctx, entry, bci.info, &bci.info.objv_tracker, y, dpp); + } + + int call(std::function f) { + return call(nullopt, f); + } + + int call(std::optional bectx_params, + std::function f) { + return be_handler->call(bectx_params, [&](RGWSI_MetaBackend_Handler::Op *op) { + RGWSI_Bucket_BI_Ctx ctx(op->ctx()); + return f(ctx); + }); + } +}; + +class RGWMetadataHandlerPut_BucketInstance : public RGWMetadataHandlerPut_SObj +{ + CephContext *cct; + RGWBucketInstanceMetadataHandler *bihandler; + RGWBucketInstanceMetadataObject *obj; +public: + RGWMetadataHandlerPut_BucketInstance(CephContext *_cct, + RGWBucketInstanceMetadataHandler *_handler, + RGWSI_MetaBackend_Handler::Op *_op, string& entry, + RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, + optional_yield y, + RGWMDLogSyncType type, bool from_remote_zone) : RGWMetadataHandlerPut_SObj(_handler, _op, entry, obj, objv_tracker, y, type, from_remote_zone), + cct(_cct), bihandler(_handler) { + obj = static_cast(_obj); + + auto& bci = obj->get_bci(); + obj->set_pattrs(&bci.attrs); + } + + void encode_obj(bufferlist *bl) override { + obj->get_bucket_info().encode(*bl); + } + + int put_check(const DoutPrefixProvider *dpp) override; + int put_checked(const DoutPrefixProvider *dpp) override; + int put_post(const DoutPrefixProvider *dpp) override; +}; + +int RGWBucketInstanceMetadataHandler::do_put(RGWSI_MetaBackend_Handler::Op *op, + string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) +{ + RGWMetadataHandlerPut_BucketInstance put_op(svc.bucket->ctx(), this, op, entry, obj, + objv_tracker, y, type, from_remote_zone); + return do_put_operate(&put_op, dpp); +} + +void init_default_bucket_layout(CephContext *cct, rgw::BucketLayout& layout, + const RGWZone& zone, + std::optional shards, + std::optional type) { + layout.current_index.gen = 0; + layout.current_index.layout.normal.hash_type = rgw::BucketHashType::Mod; + + layout.current_index.layout.type = + type.value_or(rgw::BucketIndexType::Normal); + + if (shards) { + layout.current_index.layout.normal.num_shards = *shards; + } else if (cct->_conf->rgw_override_bucket_index_max_shards > 0) { + layout.current_index.layout.normal.num_shards = + cct->_conf->rgw_override_bucket_index_max_shards; + } else { + layout.current_index.layout.normal.num_shards = + zone.bucket_index_max_shards; + } + + if (layout.current_index.layout.type == rgw::BucketIndexType::Normal) { + layout.logs.push_back(log_layout_from_index(0, layout.current_index)); + } +} + +int RGWMetadataHandlerPut_BucketInstance::put_check(const DoutPrefixProvider *dpp) +{ + int ret; + + RGWBucketCompleteInfo& bci = obj->get_bci(); + + RGWBucketInstanceMetadataObject *orig_obj = static_cast(old_obj); + + RGWBucketCompleteInfo *old_bci = (orig_obj ? &orig_obj->get_bci() : nullptr); + + const bool exists = (!!orig_obj); + + if (from_remote_zone) { + // don't sync bucket layout changes + if (!exists) { + // replace peer's layout with default-constructed, then apply our defaults + bci.info.layout = rgw::BucketLayout{}; + init_default_bucket_layout(cct, bci.info.layout, + bihandler->svc.zone->get_zone(), + std::nullopt, std::nullopt); + } else { + bci.info.layout = old_bci->info.layout; + } + } + + if (!exists || old_bci->info.bucket.bucket_id != bci.info.bucket.bucket_id) { + /* a new bucket, we need to select a new bucket placement for it */ + string tenant_name; + string bucket_name; + string bucket_instance; + parse_bucket(entry, &tenant_name, &bucket_name, &bucket_instance); + + RGWZonePlacementInfo rule_info; + bci.info.bucket.name = bucket_name; + bci.info.bucket.bucket_id = bucket_instance; + bci.info.bucket.tenant = tenant_name; + // if the sync module never writes data, don't require the zone to specify all placement targets + if (bihandler->svc.zone->sync_module_supports_writes()) { + ret = bihandler->svc.zone->select_bucket_location_by_rule(dpp, bci.info.placement_rule, &rule_info, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: select_bucket_placement() returned " << ret << dendl; + return ret; + } + } + bci.info.layout.current_index.layout.type = rule_info.index_type; + } else { + /* existing bucket, keep its placement */ + bci.info.bucket.explicit_placement = old_bci->info.bucket.explicit_placement; + bci.info.placement_rule = old_bci->info.placement_rule; + } + + /* record the read version (if any), store the new version */ + bci.info.objv_tracker.read_version = objv_tracker.read_version; + bci.info.objv_tracker.write_version = objv_tracker.write_version; + + return 0; +} + +int RGWMetadataHandlerPut_BucketInstance::put_checked(const DoutPrefixProvider *dpp) +{ + RGWBucketInstanceMetadataObject *orig_obj = static_cast(old_obj); + + RGWBucketInfo *orig_info = (orig_obj ? &orig_obj->get_bucket_info() : nullptr); + + auto& info = obj->get_bucket_info(); + auto mtime = obj->get_mtime(); + auto pattrs = obj->get_pattrs(); + + RGWSI_Bucket_BI_Ctx ctx(op->ctx()); + + return bihandler->svc.bucket->store_bucket_instance_info(ctx, + entry, + info, + orig_info, + false, + mtime, + pattrs, + y, + dpp); +} + +int RGWMetadataHandlerPut_BucketInstance::put_post(const DoutPrefixProvider *dpp) +{ + RGWBucketCompleteInfo& bci = obj->get_bci(); + + objv_tracker = bci.info.objv_tracker; + + int ret = bihandler->svc.bi->init_index(dpp, bci.info, bci.info.layout.current_index); + if (ret < 0) { + return ret; + } + + /* update lifecyle policy */ + { + std::unique_ptr bucket; + ret = bihandler->driver->get_bucket(nullptr, bci.info, &bucket); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to get_bucket(...) for " + << bci.info.bucket.name + << dendl; + return ret; + } + + auto lc = bihandler->driver->get_rgwlc(); + + auto lc_it = bci.attrs.find(RGW_ATTR_LC); + if (lc_it != bci.attrs.end()) { + ldpp_dout(dpp, 20) << "set lc config for " << bci.info.bucket.name << dendl; + ret = lc->set_bucket_config(bucket.get(), bci.attrs, nullptr); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to set lc config for " + << bci.info.bucket.name + << dendl; + return ret; + } + + } else { + ldpp_dout(dpp, 20) << "remove lc config for " << bci.info.bucket.name << dendl; + ret = lc->remove_bucket_config(bucket.get(), bci.attrs, false /* cannot merge attrs */); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to remove lc config for " + << bci.info.bucket.name + << dendl; + return ret; + } + } + } /* update lc */ + + return STATUS_APPLIED; +} + +class RGWArchiveBucketInstanceMetadataHandler : public RGWBucketInstanceMetadataHandler { +public: + RGWArchiveBucketInstanceMetadataHandler(rgw::sal::Driver* driver) + : RGWBucketInstanceMetadataHandler(driver) {} + + // N.B. replication of lifecycle policy relies on logic in RGWBucketInstanceMetadataHandler::do_put(...), override with caution + + int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) override { + ldpp_dout(dpp, 0) << "SKIP: bucket instance removal is not allowed on archive zone: bucket.instance:" << entry << dendl; + return 0; + } +}; + +RGWBucketCtl::RGWBucketCtl(RGWSI_Zone *zone_svc, + RGWSI_Bucket *bucket_svc, + RGWSI_Bucket_Sync *bucket_sync_svc, + RGWSI_BucketIndex *bi_svc, + RGWSI_User* user_svc) + : cct(zone_svc->ctx()) +{ + svc.zone = zone_svc; + svc.bucket = bucket_svc; + svc.bucket_sync = bucket_sync_svc; + svc.bi = bi_svc; + svc.user = user_svc; +} + +void RGWBucketCtl::init(RGWUserCtl *user_ctl, + RGWBucketMetadataHandler *_bm_handler, + RGWBucketInstanceMetadataHandler *_bmi_handler, + RGWDataChangesLog *datalog, + const DoutPrefixProvider *dpp) +{ + ctl.user = user_ctl; + + bm_handler = _bm_handler; + bmi_handler = _bmi_handler; + + bucket_be_handler = bm_handler->get_be_handler(); + bi_be_handler = bmi_handler->get_be_handler(); + + datalog->set_bucket_filter( + [this](const rgw_bucket& bucket, optional_yield y, const DoutPrefixProvider *dpp) { + return bucket_exports_data(bucket, y, dpp); + }); +} + +int RGWBucketCtl::call(std::function f) { + return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ep_ctx) { + return bmi_handler->call([&](RGWSI_Bucket_BI_Ctx& bi_ctx) { + RGWSI_Bucket_X_Ctx ctx{ep_ctx, bi_ctx}; + return f(ctx); + }); + }); +} + +int RGWBucketCtl::read_bucket_entrypoint_info(const rgw_bucket& bucket, + RGWBucketEntryPoint *info, + optional_yield y, const DoutPrefixProvider *dpp, + const Bucket::GetParams& params) +{ + return bm_handler->call(params.bectx_params, [&](RGWSI_Bucket_EP_Ctx& ctx) { + return svc.bucket->read_bucket_entrypoint_info(ctx, + RGWSI_Bucket::get_entrypoint_meta_key(bucket), + info, + params.objv_tracker, + params.mtime, + params.attrs, + y, + dpp, + params.cache_info, + params.refresh_version); + }); +} + +int RGWBucketCtl::store_bucket_entrypoint_info(const rgw_bucket& bucket, + RGWBucketEntryPoint& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const Bucket::PutParams& params) +{ + return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { + return svc.bucket->store_bucket_entrypoint_info(ctx, + RGWSI_Bucket::get_entrypoint_meta_key(bucket), + info, + params.exclusive, + params.mtime, + params.attrs, + params.objv_tracker, + y, + dpp); + }); +} + +int RGWBucketCtl::remove_bucket_entrypoint_info(const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp, + const Bucket::RemoveParams& params) +{ + return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { + return svc.bucket->remove_bucket_entrypoint_info(ctx, + RGWSI_Bucket::get_entrypoint_meta_key(bucket), + params.objv_tracker, + y, + dpp); + }); +} + +int RGWBucketCtl::read_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo *info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::GetParams& params) +{ + int ret = bmi_handler->call(params.bectx_params, [&](RGWSI_Bucket_BI_Ctx& ctx) { + return svc.bucket->read_bucket_instance_info(ctx, + RGWSI_Bucket::get_bi_meta_key(bucket), + info, + params.mtime, + params.attrs, + y, + dpp, + params.cache_info, + params.refresh_version); + }); + + if (ret < 0) { + return ret; + } + + if (params.objv_tracker) { + *params.objv_tracker = info->objv_tracker; + } + + return 0; +} + +int RGWBucketCtl::read_bucket_info(const rgw_bucket& bucket, + RGWBucketInfo *info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::GetParams& params, + RGWObjVersionTracker *ep_objv_tracker) +{ + const rgw_bucket *b = &bucket; + + std::optional ep; + + if (b->bucket_id.empty()) { + ep.emplace(); + + int r = read_bucket_entrypoint_info(*b, &(*ep), y, dpp, RGWBucketCtl::Bucket::GetParams() + .set_bectx_params(params.bectx_params) + .set_objv_tracker(ep_objv_tracker)); + if (r < 0) { + return r; + } + + b = &ep->bucket; + } + + int ret = bmi_handler->call(params.bectx_params, [&](RGWSI_Bucket_BI_Ctx& ctx) { + return svc.bucket->read_bucket_instance_info(ctx, + RGWSI_Bucket::get_bi_meta_key(*b), + info, + params.mtime, + params.attrs, + y, dpp, + params.cache_info, + params.refresh_version); + }); + + if (ret < 0) { + return ret; + } + + if (params.objv_tracker) { + *params.objv_tracker = info->objv_tracker; + } + + return 0; +} + +int RGWBucketCtl::do_store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::PutParams& params) +{ + if (params.objv_tracker) { + info.objv_tracker = *params.objv_tracker; + } + + return svc.bucket->store_bucket_instance_info(ctx, + RGWSI_Bucket::get_bi_meta_key(bucket), + info, + params.orig_info, + params.exclusive, + params.mtime, + params.attrs, + y, + dpp); +} + +int RGWBucketCtl::store_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::PutParams& params) +{ + return bmi_handler->call([&](RGWSI_Bucket_BI_Ctx& ctx) { + return do_store_bucket_instance_info(ctx, bucket, info, y, dpp, params); + }); +} + +int RGWBucketCtl::remove_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::RemoveParams& params) +{ + if (params.objv_tracker) { + info.objv_tracker = *params.objv_tracker; + } + + return bmi_handler->call([&](RGWSI_Bucket_BI_Ctx& ctx) { + return svc.bucket->remove_bucket_instance_info(ctx, + RGWSI_Bucket::get_bi_meta_key(bucket), + info, + &info.objv_tracker, + y, + dpp); + }); +} + +int RGWBucketCtl::do_store_linked_bucket_info(RGWSI_Bucket_X_Ctx& ctx, + RGWBucketInfo& info, + RGWBucketInfo *orig_info, + bool exclusive, real_time mtime, + obj_version *pep_objv, + map *pattrs, + bool create_entry_point, + optional_yield y, const DoutPrefixProvider *dpp) +{ + bool create_head = !info.has_instance_obj || create_entry_point; + + int ret = svc.bucket->store_bucket_instance_info(ctx.bi, + RGWSI_Bucket::get_bi_meta_key(info.bucket), + info, + orig_info, + exclusive, + mtime, pattrs, + y, dpp); + if (ret < 0) { + return ret; + } + + if (!create_head) + return 0; /* done! */ + + RGWBucketEntryPoint entry_point; + entry_point.bucket = info.bucket; + entry_point.owner = info.owner; + entry_point.creation_time = info.creation_time; + entry_point.linked = true; + RGWObjVersionTracker ot; + if (pep_objv && !pep_objv->tag.empty()) { + ot.write_version = *pep_objv; + } else { + ot.generate_new_write_ver(cct); + if (pep_objv) { + *pep_objv = ot.write_version; + } + } + ret = svc.bucket->store_bucket_entrypoint_info(ctx.ep, + RGWSI_Bucket::get_entrypoint_meta_key(info.bucket), + entry_point, + exclusive, + mtime, + pattrs, + &ot, + y, + dpp); + if (ret < 0) + return ret; + + return 0; +} +int RGWBucketCtl::convert_old_bucket_info(RGWSI_Bucket_X_Ctx& ctx, + const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWBucketEntryPoint entry_point; + real_time ep_mtime; + RGWObjVersionTracker ot; + map attrs; + RGWBucketInfo info; + auto cct = svc.bucket->ctx(); + + ldpp_dout(dpp, 10) << "RGWRados::convert_old_bucket_info(): bucket=" << bucket << dendl; + + int ret = svc.bucket->read_bucket_entrypoint_info(ctx.ep, + RGWSI_Bucket::get_entrypoint_meta_key(bucket), + &entry_point, &ot, &ep_mtime, &attrs, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: get_bucket_entrypoint_info() returned " << ret << " bucket=" << bucket << dendl; + return ret; + } + + if (!entry_point.has_bucket_info) { + /* already converted! */ + return 0; + } + + info = entry_point.old_bucket_info; + + ot.generate_new_write_ver(cct); + + ret = do_store_linked_bucket_info(ctx, info, nullptr, false, ep_mtime, &ot.write_version, &attrs, true, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to put_linked_bucket_info(): " << ret << dendl; + return ret; + } + + return 0; +} + +int RGWBucketCtl::set_bucket_instance_attrs(RGWBucketInfo& bucket_info, + map& attrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + return call([&](RGWSI_Bucket_X_Ctx& ctx) { + rgw_bucket& bucket = bucket_info.bucket; + + if (!bucket_info.has_instance_obj) { + /* an old bucket object, need to convert it */ + int ret = convert_old_bucket_info(ctx, bucket, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed converting old bucket info: " << ret << dendl; + return ret; + } + } + + return do_store_bucket_instance_info(ctx.bi, + bucket, + bucket_info, + y, + dpp, + BucketInstance::PutParams().set_attrs(&attrs) + .set_objv_tracker(objv_tracker) + .set_orig_info(&bucket_info)); + }); +} + + +int RGWBucketCtl::link_bucket(const rgw_user& user_id, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y, + const DoutPrefixProvider *dpp, + bool update_entrypoint, + rgw_ep_info *pinfo) +{ + return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { + return do_link_bucket(ctx, user_id, bucket, creation_time, + update_entrypoint, pinfo, y, dpp); + }); +} + +int RGWBucketCtl::do_link_bucket(RGWSI_Bucket_EP_Ctx& ctx, + const rgw_user& user_id, + const rgw_bucket& bucket, + ceph::real_time creation_time, + bool update_entrypoint, + rgw_ep_info *pinfo, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + int ret; + + RGWBucketEntryPoint ep; + RGWObjVersionTracker ot; + RGWObjVersionTracker& rot = (pinfo) ? pinfo->ep_objv : ot; + map attrs, *pattrs = nullptr; + string meta_key; + + if (update_entrypoint) { + meta_key = RGWSI_Bucket::get_entrypoint_meta_key(bucket); + if (pinfo) { + ep = pinfo->ep; + pattrs = &pinfo->attrs; + } else { + ret = svc.bucket->read_bucket_entrypoint_info(ctx, + meta_key, + &ep, &rot, + nullptr, &attrs, + y, dpp); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: read_bucket_entrypoint_info() returned: " + << cpp_strerror(-ret) << dendl; + } + pattrs = &attrs; + } + } + + ret = svc.user->add_bucket(dpp, user_id, bucket, creation_time, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: error adding bucket to user directory:" + << " user=" << user_id + << " bucket=" << bucket + << " err=" << cpp_strerror(-ret) + << dendl; + goto done_err; + } + + if (!update_entrypoint) + return 0; + + ep.linked = true; + ep.owner = user_id; + ep.bucket = bucket; + ret = svc.bucket->store_bucket_entrypoint_info( + ctx, meta_key, ep, false, real_time(), pattrs, &rot, y, dpp); + if (ret < 0) + goto done_err; + + return 0; + +done_err: + int r = do_unlink_bucket(ctx, user_id, bucket, true, y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed unlinking bucket on error cleanup: " + << cpp_strerror(-r) << dendl; + } + return ret; +} + +int RGWBucketCtl::unlink_bucket(const rgw_user& user_id, const rgw_bucket& bucket, optional_yield y, const DoutPrefixProvider *dpp, bool update_entrypoint) +{ + return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { + return do_unlink_bucket(ctx, user_id, bucket, update_entrypoint, y, dpp); + }); +} + +int RGWBucketCtl::do_unlink_bucket(RGWSI_Bucket_EP_Ctx& ctx, + const rgw_user& user_id, + const rgw_bucket& bucket, + bool update_entrypoint, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + int ret = svc.user->remove_bucket(dpp, user_id, bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: error removing bucket from directory: " + << cpp_strerror(-ret)<< dendl; + } + + if (!update_entrypoint) + return 0; + + RGWBucketEntryPoint ep; + RGWObjVersionTracker ot; + map attrs; + string meta_key = RGWSI_Bucket::get_entrypoint_meta_key(bucket); + ret = svc.bucket->read_bucket_entrypoint_info(ctx, meta_key, &ep, &ot, nullptr, &attrs, y, dpp); + if (ret == -ENOENT) + return 0; + if (ret < 0) + return ret; + + if (!ep.linked) + return 0; + + if (ep.owner != user_id) { + ldpp_dout(dpp, 0) << "bucket entry point user mismatch, can't unlink bucket: " << ep.owner << " != " << user_id << dendl; + return -EINVAL; + } + + ep.linked = false; + return svc.bucket->store_bucket_entrypoint_info(ctx, meta_key, ep, false, real_time(), &attrs, &ot, y, dpp); +} + +// TODO: remove RGWRados dependency for bucket listing +int RGWBucketCtl::chown(rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, + const rgw_user& user_id, const std::string& display_name, + const std::string& marker, optional_yield y, const DoutPrefixProvider *dpp) +{ + map common_prefixes; + + rgw::sal::Bucket::ListParams params; + rgw::sal::Bucket::ListResults results; + + params.list_versions = true; + params.allow_unordered = true; + params.marker = marker; + + int count = 0; + int max_entries = 1000; + + //Loop through objects and update object acls to point to bucket owner + + do { + RGWObjectCtx obj_ctx(driver); + results.objs.clear(); + int ret = bucket->list(dpp, params, max_entries, results, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: list objects failed: " << cpp_strerror(-ret) << dendl; + return ret; + } + + params.marker = results.next_marker; + count += results.objs.size(); + + for (const auto& obj : results.objs) { + std::unique_ptr r_obj = bucket->get_object(obj.key); + + ret = r_obj->get_obj_attrs(y, dpp); + if (ret < 0){ + ldpp_dout(dpp, 0) << "ERROR: failed to read object " << obj.key.name << cpp_strerror(-ret) << dendl; + continue; + } + const auto& aiter = r_obj->get_attrs().find(RGW_ATTR_ACL); + if (aiter == r_obj->get_attrs().end()) { + ldpp_dout(dpp, 0) << "ERROR: no acls found for object " << obj.key.name << " .Continuing with next object." << dendl; + continue; + } else { + bufferlist& bl = aiter->second; + RGWAccessControlPolicy policy(driver->ctx()); + ACLOwner owner; + try { + decode(policy, bl); + owner = policy.get_owner(); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: decode policy failed" << err.what() + << dendl; + return -EIO; + } + + //Get the ACL from the policy + RGWAccessControlList& acl = policy.get_acl(); + + //Remove grant that is set to old owner + acl.remove_canon_user_grant(owner.get_id()); + + //Create a grant and add grant + ACLGrant grant; + grant.set_canon(user_id, display_name, RGW_PERM_FULL_CONTROL); + acl.add_grant(&grant); + + //Update the ACL owner to the new user + owner.set_id(user_id); + owner.set_name(display_name); + policy.set_owner(owner); + + bl.clear(); + encode(policy, bl); + + r_obj->set_atomic(); + map attrs; + attrs[RGW_ATTR_ACL] = bl; + ret = r_obj->set_obj_attrs(dpp, &attrs, nullptr, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: modify attr failed " << cpp_strerror(-ret) << dendl; + return ret; + } + } + } + cerr << count << " objects processed in " << bucket + << ". Next marker " << params.marker.name << std::endl; + } while(results.is_truncated); + return 0; +} + +int RGWBucketCtl::read_bucket_stats(const rgw_bucket& bucket, + RGWBucketEnt *result, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + return call([&](RGWSI_Bucket_X_Ctx& ctx) { + return svc.bucket->read_bucket_stats(ctx, bucket, result, y, dpp); + }); +} + +int RGWBucketCtl::read_buckets_stats(map& m, + optional_yield y, const DoutPrefixProvider *dpp) +{ + return call([&](RGWSI_Bucket_X_Ctx& ctx) { + return svc.bucket->read_buckets_stats(ctx, m, y, dpp); + }); +} + +int RGWBucketCtl::sync_user_stats(const DoutPrefixProvider *dpp, + const rgw_user& user_id, + const RGWBucketInfo& bucket_info, + optional_yield y, + RGWBucketEnt* pent) +{ + RGWBucketEnt ent; + if (!pent) { + pent = &ent; + } + int r = svc.bi->read_stats(dpp, bucket_info, pent, null_yield); + if (r < 0) { + ldpp_dout(dpp, 20) << __func__ << "(): failed to read bucket stats (r=" << r << ")" << dendl; + return r; + } + + return svc.user->flush_bucket_stats(dpp, user_id, *pent, y); +} + +int RGWBucketCtl::get_sync_policy_handler(std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef *phandler, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + int r = call([&](RGWSI_Bucket_X_Ctx& ctx) { + return svc.bucket_sync->get_policy_handler(ctx, zone, bucket, phandler, y, dpp); + }); + if (r < 0) { + ldpp_dout(dpp, 20) << __func__ << "(): failed to get policy handler for bucket=" << bucket << " (r=" << r << ")" << dendl; + return r; + } + return 0; +} + +int RGWBucketCtl::bucket_exports_data(const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + + RGWBucketSyncPolicyHandlerRef handler; + + int r = get_sync_policy_handler(std::nullopt, bucket, &handler, y, dpp); + if (r < 0) { + return r; + } + + return handler->bucket_exports_data(); +} + +int RGWBucketCtl::bucket_imports_data(const rgw_bucket& bucket, + optional_yield y, const DoutPrefixProvider *dpp) +{ + + RGWBucketSyncPolicyHandlerRef handler; + + int r = get_sync_policy_handler(std::nullopt, bucket, &handler, y, dpp); + if (r < 0) { + return r; + } + + return handler->bucket_imports_data(); +} + +RGWBucketMetadataHandlerBase* RGWBucketMetaHandlerAllocator::alloc() +{ + return new RGWBucketMetadataHandler(); +} + +RGWBucketInstanceMetadataHandlerBase* RGWBucketInstanceMetaHandlerAllocator::alloc(rgw::sal::Driver* driver) +{ + return new RGWBucketInstanceMetadataHandler(driver); +} + +RGWBucketMetadataHandlerBase* RGWArchiveBucketMetaHandlerAllocator::alloc() +{ + return new RGWArchiveBucketMetadataHandler(); +} + +RGWBucketInstanceMetadataHandlerBase* RGWArchiveBucketInstanceMetaHandlerAllocator::alloc(rgw::sal::Driver* driver) +{ + return new RGWArchiveBucketInstanceMetadataHandler(driver); +} + + +void RGWBucketEntryPoint::generate_test_instances(list& o) +{ + RGWBucketEntryPoint *bp = new RGWBucketEntryPoint(); + init_bucket(&bp->bucket, "tenant", "bucket", "pool", ".index.pool", "marker", "10"); + bp->owner = "owner"; + bp->creation_time = ceph::real_clock::from_ceph_timespec({ceph_le32(2), ceph_le32(3)}); + + o.push_back(bp); + o.push_back(new RGWBucketEntryPoint); +} + +void RGWBucketEntryPoint::dump(Formatter *f) const +{ + encode_json("bucket", bucket, f); + encode_json("owner", owner, f); + utime_t ut(creation_time); + encode_json("creation_time", ut, f); + encode_json("linked", linked, f); + encode_json("has_bucket_info", has_bucket_info, f); + if (has_bucket_info) { + encode_json("old_bucket_info", old_bucket_info, f); + } +} + +void RGWBucketEntryPoint::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("bucket", bucket, obj); + JSONDecoder::decode_json("owner", owner, obj); + utime_t ut; + JSONDecoder::decode_json("creation_time", ut, obj); + creation_time = ut.to_real_time(); + JSONDecoder::decode_json("linked", linked, obj); + JSONDecoder::decode_json("has_bucket_info", has_bucket_info, obj); + if (has_bucket_info) { + JSONDecoder::decode_json("old_bucket_info", old_bucket_info, obj); + } +} + diff --git a/src/rgw/driver/rados/rgw_bucket.h b/src/rgw/driver/rados/rgw_bucket.h new file mode 100644 index 00000000000..636a1f2f6c0 --- /dev/null +++ b/src/rgw/driver/rados/rgw_bucket.h @@ -0,0 +1,765 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include +#include +#include + +#include +#include + +#include "include/types.h" +#include "rgw_common.h" +#include "rgw_tools.h" +#include "rgw_metadata.h" + +#include "rgw_string.h" +#include "rgw_sal.h" + +#include "common/Formatter.h" +#include "common/lru_map.h" +#include "common/ceph_time.h" + +#include "rgw_formats.h" + +#include "services/svc_bucket_types.h" +#include "services/svc_bucket_sync.h" + +// define as static when RGWBucket implementation completes +extern void rgw_get_buckets_obj(const rgw_user& user_id, std::string& buckets_obj_id); + +class RGWSI_Meta; +class RGWBucketMetadataHandler; +class RGWBucketInstanceMetadataHandler; +class RGWUserCtl; +class RGWBucketCtl; +class RGWZone; +struct RGWZoneParams; + +extern void init_bucket(rgw_bucket *b, const char *t, const char *n, const char *dp, const char *ip, const char *m, const char *id); +extern int rgw_bucket_parse_bucket_key(CephContext *cct, const std::string& key, + rgw_bucket* bucket, int *shard_id); + +extern std::string rgw_make_bucket_entry_name(const std::string& tenant_name, + const std::string& bucket_name); + +extern void rgw_parse_url_bucket(const std::string& bucket, + const std::string& auth_tenant, + std::string &tenant_name, std::string &bucket_name); + +// this is used as a filter to RGWRados::cls_bucket_list_ordered; it +// conforms to the type RGWBucketListNameFilter +extern bool rgw_bucket_object_check_filter(const std::string& oid); + +void init_default_bucket_layout(CephContext *cct, rgw::BucketLayout& layout, + const RGWZone& zone, + std::optional shards, + std::optional type); + +struct RGWBucketCompleteInfo { + RGWBucketInfo info; + std::map attrs; + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; + +class RGWBucketEntryMetadataObject : public RGWMetadataObject { + RGWBucketEntryPoint ep; + std::map attrs; +public: + RGWBucketEntryMetadataObject(RGWBucketEntryPoint& _ep, const obj_version& v, real_time m) : ep(_ep) { + objv = v; + mtime = m; + set_pattrs (&attrs); + } + RGWBucketEntryMetadataObject(RGWBucketEntryPoint& _ep, const obj_version& v, real_time m, std::map&& _attrs) : + ep(_ep), attrs(std::move(_attrs)) { + objv = v; + mtime = m; + set_pattrs (&attrs); + } + + void dump(Formatter *f) const override { + ep.dump(f); + } + + RGWBucketEntryPoint& get_ep() { + return ep; + } + + std::map& get_attrs() { + return attrs; + } +}; + +class RGWBucketInstanceMetadataObject : public RGWMetadataObject { + RGWBucketCompleteInfo info; +public: + RGWBucketInstanceMetadataObject() {} + RGWBucketInstanceMetadataObject(RGWBucketCompleteInfo& i, const obj_version& v, real_time m) : info(i) { + objv = v; + mtime = m; + } + + void dump(Formatter *f) const override { + info.dump(f); + } + + void decode_json(JSONObj *obj) { + info.decode_json(obj); + } + + RGWBucketCompleteInfo& get_bci() { + return info; + } + RGWBucketInfo& get_bucket_info() { + return info.info; + } +}; + +/** + * store a list of the user's buckets, with associated functinos. + */ +class RGWUserBuckets { + std::map buckets; + +public: + RGWUserBuckets() = default; + RGWUserBuckets(RGWUserBuckets&&) = default; + + RGWUserBuckets& operator=(const RGWUserBuckets&) = default; + + void encode(bufferlist& bl) const { + using ceph::encode; + encode(buckets, bl); + } + void decode(bufferlist::const_iterator& bl) { + using ceph::decode; + decode(buckets, bl); + } + /** + * Check if the user owns a bucket by the given name. + */ + bool owns(std::string& name) { + std::map::iterator iter; + iter = buckets.find(name); + return (iter != buckets.end()); + } + + /** + * Add a (created) bucket to the user's bucket list. + */ + void add(const RGWBucketEnt& bucket) { + buckets[bucket.bucket.name] = bucket; + } + + /** + * Remove a bucket from the user's list by name. + */ + void remove(const std::string& name) { + std::map::iterator iter; + iter = buckets.find(name); + if (iter != buckets.end()) { + buckets.erase(iter); + } + } + + /** + * Get the user's buckets as a map. + */ + std::map& get_buckets() { return buckets; } + + /** + * Cleanup data structure + */ + void clear() { buckets.clear(); } + + size_t count() { return buckets.size(); } +}; +WRITE_CLASS_ENCODER(RGWUserBuckets) + +class RGWBucketMetadataHandlerBase : public RGWMetadataHandler_GenericMetaBE { +public: + virtual ~RGWBucketMetadataHandlerBase() {} + virtual void init(RGWSI_Bucket *bucket_svc, + RGWBucketCtl *bucket_ctl) = 0; + +}; + +class RGWBucketInstanceMetadataHandlerBase : public RGWMetadataHandler_GenericMetaBE { +public: + virtual ~RGWBucketInstanceMetadataHandlerBase() {} + virtual void init(RGWSI_Zone *zone_svc, + RGWSI_Bucket *bucket_svc, + RGWSI_BucketIndex *bi_svc) = 0; +}; + +class RGWBucketMetaHandlerAllocator { +public: + static RGWBucketMetadataHandlerBase *alloc(); +}; + +class RGWBucketInstanceMetaHandlerAllocator { +public: + static RGWBucketInstanceMetadataHandlerBase *alloc(rgw::sal::Driver* driver); +}; + +class RGWArchiveBucketMetaHandlerAllocator { +public: + static RGWBucketMetadataHandlerBase *alloc(); +}; + +class RGWArchiveBucketInstanceMetaHandlerAllocator { +public: + static RGWBucketInstanceMetadataHandlerBase *alloc(rgw::sal::Driver* driver); +}; + +extern int rgw_remove_object(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, rgw_obj_key& key); + +extern int rgw_object_get_attr(rgw::sal::Driver* driver, rgw::sal::Object* obj, + const char* attr_name, bufferlist& out_bl, + optional_yield y); + +extern void check_bad_user_bucket_mapping(rgw::sal::Driver* driver, rgw::sal::User* user, bool fix, optional_yield y, const DoutPrefixProvider *dpp); + +struct RGWBucketAdminOpState { + rgw_user uid; + std::string display_name; + std::string bucket_name; + std::string bucket_id; + std::string object_name; + std::string new_bucket_name; + + bool list_buckets; + bool stat_buckets; + bool check_objects; + bool fix_index; + bool delete_child_objects; + bool bucket_stored; + bool sync_bucket; + int max_aio = 0; + + std::unique_ptr bucket; + + RGWQuotaInfo quota; + RGWRateLimitInfo ratelimit_info; + + void set_fetch_stats(bool value) { stat_buckets = value; } + void set_check_objects(bool value) { check_objects = value; } + void set_fix_index(bool value) { fix_index = value; } + void set_delete_children(bool value) { delete_child_objects = value; } + + void set_max_aio(int value) { max_aio = value; } + + void set_user_id(const rgw_user& user_id) { + if (!user_id.empty()) + uid = user_id; + } + void set_tenant(const std::string& tenant_str) { + uid.tenant = tenant_str; + } + void set_bucket_name(const std::string& bucket_str) { + bucket_name = bucket_str; + } + void set_object(std::string& object_str) { + object_name = object_str; + } + void set_new_bucket_name(std::string& new_bucket_str) { + new_bucket_name = new_bucket_str; + } + void set_quota(RGWQuotaInfo& value) { + quota = value; + } + void set_bucket_ratelimit(RGWRateLimitInfo& value) { + ratelimit_info = value; + } + + + void set_sync_bucket(bool value) { sync_bucket = value; } + + rgw_user& get_user_id() { return uid; } + std::string& get_user_display_name() { return display_name; } + std::string& get_bucket_name() { return bucket_name; } + std::string& get_object_name() { return object_name; } + std::string& get_tenant() { return uid.tenant; } + + rgw::sal::Bucket* get_bucket() { return bucket.get(); } + void set_bucket(std::unique_ptr _bucket) { + bucket = std::move(_bucket); + bucket_stored = true; + } + + void set_bucket_id(const std::string& bi) { + bucket_id = bi; + } + const std::string& get_bucket_id() { return bucket_id; } + + bool will_fetch_stats() { return stat_buckets; } + bool will_fix_index() { return fix_index; } + bool will_delete_children() { return delete_child_objects; } + bool will_check_objects() { return check_objects; } + bool is_user_op() { return !uid.empty(); } + bool is_system_op() { return uid.empty(); } + bool has_bucket_stored() { return bucket_stored; } + int get_max_aio() { return max_aio; } + bool will_sync_bucket() { return sync_bucket; } + + RGWBucketAdminOpState() : list_buckets(false), stat_buckets(false), check_objects(false), + fix_index(false), delete_child_objects(false), + bucket_stored(false), sync_bucket(true) {} +}; + + +/* + * A simple wrapper class for administrative bucket operations + */ +class RGWBucket { + RGWUserBuckets buckets; + rgw::sal::Driver* driver; + RGWAccessHandle handle; + + std::unique_ptr bucket; + std::unique_ptr user; + + bool failure; + + RGWObjVersionTracker ep_objv; // entrypoint object version + +public: + RGWBucket() : driver(NULL), handle(NULL), failure(false) {} + int init(rgw::sal::Driver* storage, RGWBucketAdminOpState& op_state, optional_yield y, + const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + + int check_bad_index_multipart(RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + + int check_object_index(const DoutPrefixProvider *dpp, + RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y, + std::string *err_msg = NULL); + + int check_index(const DoutPrefixProvider *dpp, + RGWBucketAdminOpState& op_state, + std::map& existing_stats, + std::map& calculated_stats, + std::string *err_msg = NULL); + + int chown(RGWBucketAdminOpState& op_state, const std::string& marker, + optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + int set_quota(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + + int remove_object(const DoutPrefixProvider *dpp, RGWBucketAdminOpState& op_state, std::string *err_msg = NULL); + int policy_bl_to_stream(bufferlist& bl, std::ostream& o); + int get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy, optional_yield y, const DoutPrefixProvider *dpp); + int sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + + void clear_failure() { failure = false; } + + const RGWBucketInfo& get_bucket_info() const { return bucket->get_info(); } +}; + +class RGWBucketAdminOp { +public: + static int get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); + static int get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWAccessControlPolicy& policy, const DoutPrefixProvider *dpp); + static int dump_s3_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + std::ostream& os, const DoutPrefixProvider *dpp); + + static int unlink(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp); + static int link(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + static int chown(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const std::string& marker, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + + static int check_index(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y, const DoutPrefixProvider *dpp); + + static int remove_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, optional_yield y, + const DoutPrefixProvider *dpp, bool bypass_gc = false, bool keep_index_consistent = true); + static int remove_object(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp); + static int info(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher, optional_yield y, const DoutPrefixProvider *dpp); + static int limit_check(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + const std::list& user_ids, + RGWFormatterFlusher& flusher, optional_yield y, + const DoutPrefixProvider *dpp, + bool warnings_only = false); + static int set_quota(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp); + + static int list_stale_instances(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); + + static int clear_stale_instances(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); + static int fix_lc_shards(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); + static int fix_obj_expiry(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, + RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp, bool dry_run = false); + + static int sync_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); +}; + +struct rgw_ep_info { + RGWBucketEntryPoint &ep; + std::map& attrs; + RGWObjVersionTracker ep_objv; + rgw_ep_info(RGWBucketEntryPoint &ep, std::map& attrs) + : ep(ep), attrs(attrs) {} +}; + +class RGWBucketCtl { + CephContext *cct; + + struct Svc { + RGWSI_Zone *zone{nullptr}; + RGWSI_Bucket *bucket{nullptr}; + RGWSI_Bucket_Sync *bucket_sync{nullptr}; + RGWSI_BucketIndex *bi{nullptr}; + RGWSI_User* user = nullptr; + } svc; + + struct Ctl { + RGWUserCtl *user{nullptr}; + } ctl; + + RGWBucketMetadataHandler *bm_handler; + RGWBucketInstanceMetadataHandler *bmi_handler; + + RGWSI_Bucket_BE_Handler bucket_be_handler; /* bucket backend handler */ + RGWSI_BucketInstance_BE_Handler bi_be_handler; /* bucket instance backend handler */ + + int call(std::function f); + +public: + RGWBucketCtl(RGWSI_Zone *zone_svc, + RGWSI_Bucket *bucket_svc, + RGWSI_Bucket_Sync *bucket_sync_svc, + RGWSI_BucketIndex *bi_svc, + RGWSI_User* user_svc); + + void init(RGWUserCtl *user_ctl, + RGWBucketMetadataHandler *_bm_handler, + RGWBucketInstanceMetadataHandler *_bmi_handler, + RGWDataChangesLog *datalog, + const DoutPrefixProvider *dpp); + + struct Bucket { + struct GetParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + real_time *mtime{nullptr}; + std::map *attrs{nullptr}; + rgw_cache_entry_info *cache_info{nullptr}; + boost::optional refresh_version; + std::optional bectx_params; + + GetParams() {} + + GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + GetParams& set_mtime(ceph::real_time *_mtime) { + mtime = _mtime; + return *this; + } + + GetParams& set_attrs(std::map *_attrs) { + attrs = _attrs; + return *this; + } + + GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { + cache_info = _cache_info; + return *this; + } + + GetParams& set_refresh_version(const obj_version& _refresh_version) { + refresh_version = _refresh_version; + return *this; + } + + GetParams& set_bectx_params(std::optional _bectx_params) { + bectx_params = _bectx_params; + return *this; + } + }; + + struct PutParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + ceph::real_time mtime; + bool exclusive{false}; + std::map *attrs{nullptr}; + + PutParams() {} + + PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + PutParams& set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + return *this; + } + + PutParams& set_exclusive(bool _exclusive) { + exclusive = _exclusive; + return *this; + } + + PutParams& set_attrs(std::map *_attrs) { + attrs = _attrs; + return *this; + } + }; + + struct RemoveParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + + RemoveParams() {} + + RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + }; + }; + + struct BucketInstance { + struct GetParams { + real_time *mtime{nullptr}; + std::map *attrs{nullptr}; + rgw_cache_entry_info *cache_info{nullptr}; + boost::optional refresh_version; + RGWObjVersionTracker *objv_tracker{nullptr}; + std::optional bectx_params; + + GetParams() {} + + GetParams& set_mtime(ceph::real_time *_mtime) { + mtime = _mtime; + return *this; + } + + GetParams& set_attrs(std::map *_attrs) { + attrs = _attrs; + return *this; + } + + GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { + cache_info = _cache_info; + return *this; + } + + GetParams& set_refresh_version(const obj_version& _refresh_version) { + refresh_version = _refresh_version; + return *this; + } + + GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + GetParams& set_bectx_params(std::optional _bectx_params) { + bectx_params = _bectx_params; + return *this; + } + }; + + struct PutParams { + std::optional orig_info; /* nullopt: orig_info was not fetched, + nullptr: orig_info was not found (new bucket instance */ + ceph::real_time mtime; + bool exclusive{false}; + std::map *attrs{nullptr}; + RGWObjVersionTracker *objv_tracker{nullptr}; + + PutParams() {} + + PutParams& set_orig_info(RGWBucketInfo *pinfo) { + orig_info = pinfo; + return *this; + } + + PutParams& set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + return *this; + } + + PutParams& set_exclusive(bool _exclusive) { + exclusive = _exclusive; + return *this; + } + + PutParams& set_attrs(std::map *_attrs) { + attrs = _attrs; + return *this; + } + + PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + }; + + struct RemoveParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + + RemoveParams() {} + + RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + }; + }; + + /* bucket entrypoint */ + int read_bucket_entrypoint_info(const rgw_bucket& bucket, + RGWBucketEntryPoint *info, + optional_yield y, + const DoutPrefixProvider *dpp, + const Bucket::GetParams& params = {}); + int store_bucket_entrypoint_info(const rgw_bucket& bucket, + RGWBucketEntryPoint& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const Bucket::PutParams& params = {}); + int remove_bucket_entrypoint_info(const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp, + const Bucket::RemoveParams& params = {}); + + /* bucket instance */ + int read_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo *info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::GetParams& params = {}); + int store_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::PutParams& params = {}); + int remove_bucket_instance_info(const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::RemoveParams& params = {}); + + /* + * bucket_id may or may not be provided + * + * ep_objv_tracker might not be populated even if provided. Will only be set if entrypoint is read + * (that is: if bucket_id is empty). + */ + int read_bucket_info(const rgw_bucket& bucket, + RGWBucketInfo *info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::GetParams& params = {}, + RGWObjVersionTracker *ep_objv_tracker = nullptr); + + + int set_bucket_instance_attrs(RGWBucketInfo& bucket_info, + std::map& attrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + + /* user/bucket */ + int link_bucket(const rgw_user& user_id, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y, + const DoutPrefixProvider *dpp, + bool update_entrypoint = true, + rgw_ep_info *pinfo = nullptr); + + int unlink_bucket(const rgw_user& user_id, + const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp, + bool update_entrypoint = true); + + int chown(rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, + const rgw_user& user_id, const std::string& display_name, + const std::string& marker, optional_yield y, const DoutPrefixProvider *dpp); + + int read_buckets_stats(std::map& m, + optional_yield y, + const DoutPrefixProvider *dpp); + + int read_bucket_stats(const rgw_bucket& bucket, + RGWBucketEnt *result, + optional_yield y, + const DoutPrefixProvider *dpp); + + /* quota related */ + int sync_user_stats(const DoutPrefixProvider *dpp, + const rgw_user& user_id, const RGWBucketInfo& bucket_info, + optional_yield y, + RGWBucketEnt* pent); + + /* bucket sync */ + int get_sync_policy_handler(std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef *phandler, + optional_yield y, + const DoutPrefixProvider *dpp); + int bucket_exports_data(const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp); + int bucket_imports_data(const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp); + +private: + int convert_old_bucket_info(RGWSI_Bucket_X_Ctx& ctx, + const rgw_bucket& bucket, + optional_yield y, + const DoutPrefixProvider *dpp); + + int do_store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const rgw_bucket& bucket, + RGWBucketInfo& info, + optional_yield y, + const DoutPrefixProvider *dpp, + const BucketInstance::PutParams& params); + + int do_store_linked_bucket_info(RGWSI_Bucket_X_Ctx& ctx, + RGWBucketInfo& info, + RGWBucketInfo *orig_info, + bool exclusive, real_time mtime, + obj_version *pep_objv, + std::map *pattrs, + bool create_entry_point, + optional_yield, + const DoutPrefixProvider *dpp); + + int do_link_bucket(RGWSI_Bucket_EP_Ctx& ctx, + const rgw_user& user, + const rgw_bucket& bucket, + ceph::real_time creation_time, + bool update_entrypoint, + rgw_ep_info *pinfo, + optional_yield y, + const DoutPrefixProvider *dpp); + + int do_unlink_bucket(RGWSI_Bucket_EP_Ctx& ctx, + const rgw_user& user_id, + const rgw_bucket& bucket, + bool update_entrypoint, + optional_yield y, + const DoutPrefixProvider *dpp); + +}; + +bool rgw_find_bucket_by_id(const DoutPrefixProvider *dpp, CephContext *cct, rgw::sal::Driver* driver, const std::string& marker, + const std::string& bucket_id, rgw_bucket* bucket_out); diff --git a/src/rgw/driver/rados/rgw_bucket_sync.cc b/src/rgw/driver/rados/rgw_bucket_sync.cc new file mode 100644 index 00000000000..5fd81c53b1e --- /dev/null +++ b/src/rgw/driver/rados/rgw_bucket_sync.cc @@ -0,0 +1,941 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_common.h" +#include "rgw_bucket_sync.h" +#include "rgw_data_sync.h" +#include "rgw_zone.h" + +#include "services/svc_zone.h" +#include "services/svc_bucket_sync.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +ostream& operator<<(ostream& os, const rgw_sync_bucket_entity& e) { + os << "{b=" << rgw_sync_bucket_entities::bucket_key(e.bucket) << ",z=" << e.zone.value_or(rgw_zone_id()) << ",az=" << (int)e.all_zones << "}"; + return os; +} + +ostream& operator<<(ostream& os, const rgw_sync_bucket_pipe& pipe) { + os << "{s=" << pipe.source << ",d=" << pipe.dest << "}"; + return os; +} + +ostream& operator<<(ostream& os, const rgw_sync_bucket_entities& e) { + os << "{b=" << rgw_sync_bucket_entities::bucket_key(e.bucket) << ",z=" << e.zones.value_or(std::set()) << "}"; + return os; +} + +ostream& operator<<(ostream& os, const rgw_sync_bucket_pipes& pipe) { + os << "{id=" << pipe.id << ",s=" << pipe.source << ",d=" << pipe.dest << "}"; + return os; +} + +static std::vector filter_relevant_pipes(const std::vector& pipes, + const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone) +{ + std::vector relevant_pipes; + for (auto& p : pipes) { + if (p.source.match_zone(source_zone) && + p.dest.match_zone(dest_zone)) { + for (auto pipe : p.expand()) { + pipe.source.apply_zone(source_zone); + pipe.dest.apply_zone(dest_zone); + relevant_pipes.push_back(pipe); + } + } + } + + return relevant_pipes; +} + +static bool is_wildcard_bucket(const rgw_bucket& bucket) +{ + return bucket.name.empty(); +} + +void rgw_sync_group_pipe_map::dump(ceph::Formatter *f) const +{ + encode_json("zone", zone.id, f); + encode_json("buckets", rgw_sync_bucket_entities::bucket_key(bucket), f); + encode_json("sources", sources, f); + encode_json("dests", dests, f); +} + + +template +void rgw_sync_group_pipe_map::try_add_to_pipe_map(const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone, + const std::vector& pipes, + zb_pipe_map_t *pipe_map, + CB1 filter_cb, + CB2 call_filter_cb) +{ + if (!filter_cb(source_zone, nullopt, dest_zone, nullopt)) { + return; + } + auto relevant_pipes = filter_relevant_pipes(pipes, source_zone, dest_zone); + + for (auto& pipe : relevant_pipes) { + rgw_sync_bucket_entity zb; + if (!call_filter_cb(pipe, &zb)) { + continue; + } + pipe_map->insert(make_pair(zb, pipe)); + } +} + +template +void rgw_sync_group_pipe_map::try_add_source(const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone, + const std::vector& pipes, + CB filter_cb) +{ + return try_add_to_pipe_map(source_zone, dest_zone, pipes, + &sources, + filter_cb, + [&](const rgw_sync_bucket_pipe& pipe, rgw_sync_bucket_entity *zb) { + *zb = rgw_sync_bucket_entity{source_zone, pipe.source.get_bucket()}; + return filter_cb(source_zone, zb->bucket, dest_zone, pipe.dest.get_bucket()); + }); +} + +template +void rgw_sync_group_pipe_map::try_add_dest(const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone, + const std::vector& pipes, + CB filter_cb) +{ + return try_add_to_pipe_map(source_zone, dest_zone, pipes, + &dests, + filter_cb, + [&](const rgw_sync_bucket_pipe& pipe, rgw_sync_bucket_entity *zb) { + *zb = rgw_sync_bucket_entity{dest_zone, pipe.dest.get_bucket()}; + return filter_cb(source_zone, pipe.source.get_bucket(), dest_zone, zb->bucket); + }); +} + +using zb_pipe_map_t = rgw_sync_group_pipe_map::zb_pipe_map_t; + +pair rgw_sync_group_pipe_map::find_pipes(const zb_pipe_map_t& m, + const rgw_zone_id& zone, + std::optional b) const +{ + if (!b) { + return m.equal_range(rgw_sync_bucket_entity{zone, rgw_bucket()}); + } + + auto zb = rgw_sync_bucket_entity{zone, *b}; + + auto range = m.equal_range(zb); + if (range.first == range.second && + !is_wildcard_bucket(*b)) { + /* couldn't find the specific bucket, try to find by wildcard */ + zb.bucket = rgw_bucket(); + range = m.equal_range(zb); + } + + return range; +} + + +template +void rgw_sync_group_pipe_map::init(const DoutPrefixProvider *dpp, + CephContext *cct, + const rgw_zone_id& _zone, + std::optional _bucket, + const rgw_sync_policy_group& group, + rgw_sync_data_flow_group *_default_flow, + std::set *_pall_zones, + CB filter_cb) { + zone = _zone; + bucket = _bucket; + default_flow = _default_flow; + pall_zones = _pall_zones; + + rgw_sync_bucket_entity zb(zone, bucket); + + status = group.status; + + std::vector zone_pipes; + + string bucket_key = (bucket ? bucket->get_key() : "*"); + + /* only look at pipes that touch the specific zone and bucket */ + for (auto& pipe : group.pipes) { + if (pipe.contains_zone_bucket(zone, bucket)) { + ldpp_dout(dpp, 20) << __func__ << "(): pipe_map (zone=" << zone << " bucket=" << bucket_key << "): adding potential pipe: " << pipe << dendl; + zone_pipes.push_back(pipe); + } + } + + const rgw_sync_data_flow_group *pflow; + + if (!group.data_flow.empty()) { + pflow = &group.data_flow; + } else { + if (!default_flow) { + return; + } + pflow = default_flow; + } + + auto& flow = *pflow; + + pall_zones->insert(zone); + + /* symmetrical */ + for (auto& symmetrical_group : flow.symmetrical) { + if (symmetrical_group.zones.find(zone) != symmetrical_group.zones.end()) { + for (auto& z : symmetrical_group.zones) { + if (z != zone) { + pall_zones->insert(z); + try_add_source(z, zone, zone_pipes, filter_cb); + try_add_dest(zone, z, zone_pipes, filter_cb); + } + } + } + } + + /* directional */ + for (auto& rule : flow.directional) { + if (rule.source_zone == zone) { + pall_zones->insert(rule.dest_zone); + try_add_dest(zone, rule.dest_zone, zone_pipes, filter_cb); + } else if (rule.dest_zone == zone) { + pall_zones->insert(rule.source_zone); + try_add_source(rule.source_zone, zone, zone_pipes, filter_cb); + } + } +} + +/* + * find all relevant pipes in our zone that match {dest_bucket} <- {source_zone, source_bucket} + */ +vector rgw_sync_group_pipe_map::find_source_pipes(const rgw_zone_id& source_zone, + std::optional source_bucket, + std::optional dest_bucket) const { + vector result; + + auto range = find_pipes(sources, source_zone, source_bucket); + + for (auto iter = range.first; iter != range.second; ++iter) { + auto pipe = iter->second; + if (pipe.dest.match_bucket(dest_bucket)) { + result.push_back(pipe); + } + } + return result; +} + +/* + * find all relevant pipes in other zones that pull from a specific + * source bucket in out zone {source_bucket} -> {dest_zone, dest_bucket} + */ +vector rgw_sync_group_pipe_map::find_dest_pipes(std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket) const { + vector result; + + auto range = find_pipes(dests, dest_zone, dest_bucket); + + for (auto iter = range.first; iter != range.second; ++iter) { + auto pipe = iter->second; + if (pipe.source.match_bucket(source_bucket)) { + result.push_back(pipe); + } + } + + return result; +} + +/* + * find all relevant pipes from {source_zone, source_bucket} -> {dest_zone, dest_bucket} + */ +vector rgw_sync_group_pipe_map::find_pipes(const rgw_zone_id& source_zone, + std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket) const { + if (dest_zone == zone) { + return find_source_pipes(source_zone, source_bucket, dest_bucket); + } + + if (source_zone == zone) { + return find_dest_pipes(source_bucket, dest_zone, dest_bucket); + } + + return vector(); +} + +void RGWBucketSyncFlowManager::pipe_rules::insert(const rgw_sync_bucket_pipe& pipe) +{ + pipes.push_back(pipe); + + auto ppipe = &pipes.back(); + auto prefix = ppipe->params.source.filter.prefix.value_or(string()); + + prefix_refs.insert(make_pair(prefix, ppipe)); + + for (auto& t : ppipe->params.source.filter.tags) { + string tag = t.key + "=" + t.value; + auto titer = tag_refs.find(tag); + if (titer != tag_refs.end() && + ppipe->params.priority > titer->second->params.priority) { + titer->second = ppipe; + } else { + tag_refs[tag] = ppipe; + } + } +} + +bool RGWBucketSyncFlowManager::pipe_rules::find_basic_info_without_tags(const rgw_obj_key& key, + std::optional *user, + std::optional *acl_translation_owner, + std::optional *storage_class, + rgw_sync_pipe_params::Mode *mode, + bool *need_more_info) const +{ + std::optional owner; + + *need_more_info = false; + + if (prefix_refs.empty()) { + return false; + } + + auto end = prefix_refs.upper_bound(key.name); + auto iter = end; + if (iter != prefix_refs.begin()) { + --iter; + } + if (iter == prefix_refs.end()) { + return false; + } + + if (iter != prefix_refs.begin()) { + iter = prefix_refs.find(iter->first); /* prefix_refs is multimap, find first element + holding that key */ + } + + std::vector iters; + + std::optional priority; + + for (; iter != end; ++iter) { + auto& prefix = iter->first; + if (!boost::starts_with(key.name, prefix)) { + continue; + } + + auto& rule_params = iter->second->params; + auto& filter = rule_params.source.filter; + + if (rule_params.priority > priority) { + priority = rule_params.priority; + + if (!filter.has_tags()) { + iters.clear(); + } + iters.push_back(iter); + + *need_more_info = filter.has_tags(); /* if highest priority filter has tags, then + we can't be sure if it would be used. + We need to first read the info from the source object */ + } + } + + if (iters.empty()) { + return false; + } + + std::optional _user; + std::optional _acl_translation; + std::optional _storage_class; + rgw_sync_pipe_params::Mode _mode{rgw_sync_pipe_params::Mode::MODE_SYSTEM}; + + // make sure all params are the same by saving the first one + // encountered and comparing all subsequent to it + bool first_iter = true; + for (auto& iter : iters) { + const rgw_sync_pipe_params& rule_params = iter->second->params; + if (first_iter) { + _user = rule_params.user; + _acl_translation = rule_params.dest.acl_translation; + _storage_class = rule_params.dest.storage_class; + _mode = rule_params.mode; + first_iter = false; + } else { + // note: three of these == operators are comparing std::optional + // against std::optional; as one would expect they are equal a) + // if both do not contain values or b) if both do and those + // contained values are the same + const bool conflict = + !(_user == rule_params.user && + _acl_translation == rule_params.dest.acl_translation && + _storage_class == rule_params.dest.storage_class && + _mode == rule_params.mode); + if (conflict) { + *need_more_info = true; + return false; + } + } + } + + *user = _user; + if (_acl_translation) { + *acl_translation_owner = _acl_translation->owner; + } + *storage_class = _storage_class; + *mode = _mode; + + return true; +} + +bool RGWBucketSyncFlowManager::pipe_rules::find_obj_params(const rgw_obj_key& key, + const RGWObjTags::tag_map_t& tags, + rgw_sync_pipe_params *params) const +{ + if (prefix_refs.empty()) { + return false; + } + + auto iter = prefix_refs.upper_bound(key.name); + if (iter != prefix_refs.begin()) { + --iter; + } + if (iter == prefix_refs.end()) { + return false; + } + + auto end = prefix_refs.upper_bound(key.name); + auto max = end; + + std::optional priority; + + for (; iter != end; ++iter) { + /* NOTE: this is not the most efficient way to do it, + * a trie data structure would be better + */ + auto& prefix = iter->first; + if (!boost::starts_with(key.name, prefix)) { + continue; + } + + auto& rule_params = iter->second->params; + auto& filter = rule_params.source.filter; + + if (!filter.check_tags(tags)) { + continue; + } + + if (rule_params.priority > priority) { + priority = rule_params.priority; + max = iter; + } + } + + if (max == end) { + return false; + } + + *params = max->second->params; + return true; +} + +/* + * return either the current prefix for s, or the next one if s is not within a prefix + */ + +RGWBucketSyncFlowManager::pipe_rules::prefix_map_t::const_iterator RGWBucketSyncFlowManager::pipe_rules::prefix_search(const std::string& s) const +{ + if (prefix_refs.empty()) { + return prefix_refs.end(); + } + auto next = prefix_refs.upper_bound(s); + auto iter = next; + if (iter != prefix_refs.begin()) { + --iter; + } + if (!boost::starts_with(s, iter->first)) { + return next; + } + + return iter; +} + +void RGWBucketSyncFlowManager::pipe_set::insert(const rgw_sync_bucket_pipe& pipe) { + pipe_map.insert(make_pair(pipe.id, pipe)); + + auto& rules_ref = rules[endpoints_pair(pipe)]; + + if (!rules_ref) { + rules_ref = make_shared(); + } + + rules_ref->insert(pipe); + + pipe_handler h(rules_ref, pipe); + + handlers.insert(h); +} + +void RGWBucketSyncFlowManager::pipe_set::dump(ceph::Formatter *f) const +{ + encode_json("pipes", pipe_map, f); +} + +bool RGWBucketSyncFlowManager::allowed_data_flow(const rgw_zone_id& source_zone, + std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket, + bool check_activated) const +{ + bool found = false; + bool found_activated = false; + + for (auto m : flow_groups) { + auto& fm = m.second; + auto pipes = fm.find_pipes(source_zone, source_bucket, + dest_zone, dest_bucket); + + bool is_found = !pipes.empty(); + + if (is_found) { + switch (fm.status) { + case rgw_sync_policy_group::Status::FORBIDDEN: + return false; + case rgw_sync_policy_group::Status::ENABLED: + found = true; + found_activated = true; + break; + case rgw_sync_policy_group::Status::ALLOWED: + found = true; + break; + default: + break; /* unknown -- ignore */ + } + } + } + + if (check_activated && found_activated) { + return true; + } + + return found; +} + +void RGWBucketSyncFlowManager::init(const DoutPrefixProvider *dpp, const rgw_sync_policy_info& sync_policy) { + std::optional default_flow; + if (parent) { + default_flow.emplace(); + default_flow->init_default(parent->all_zones); + } + + for (auto& item : sync_policy.groups) { + auto& group = item.second; + auto& flow_group_map = flow_groups[group.id]; + + flow_group_map.init(dpp, cct, zone_id, bucket, group, + (default_flow ? &(*default_flow) : nullptr), + &all_zones, + [&](const rgw_zone_id& source_zone, + std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket) { + if (!parent) { + return true; + } + return parent->allowed_data_flow(source_zone, + source_bucket, + dest_zone, + dest_bucket, + false); /* just check that it's not disabled */ + }); + } +} + +void RGWBucketSyncFlowManager::reflect(const DoutPrefixProvider *dpp, + std::optional effective_bucket, + RGWBucketSyncFlowManager::pipe_set *source_pipes, + RGWBucketSyncFlowManager::pipe_set *dest_pipes, + bool only_enabled) const + +{ + string effective_bucket_key; + if (effective_bucket) { + effective_bucket_key = effective_bucket->get_key(); + } + if (parent) { + parent->reflect(dpp, effective_bucket, source_pipes, dest_pipes, only_enabled); + } + + for (auto& item : flow_groups) { + auto& flow_group_map = item.second; + + /* only return enabled groups */ + if (flow_group_map.status != rgw_sync_policy_group::Status::ENABLED && + (only_enabled || flow_group_map.status != rgw_sync_policy_group::Status::ALLOWED)) { + continue; + } + + for (auto& entry : flow_group_map.sources) { + rgw_sync_bucket_pipe pipe = entry.second; + if (!pipe.dest.match_bucket(effective_bucket)) { + continue; + } + + pipe.source.apply_bucket(effective_bucket); + pipe.dest.apply_bucket(effective_bucket); + + ldpp_dout(dpp, 20) << __func__ << "(): flow manager (bucket=" << effective_bucket_key << "): adding source pipe: " << pipe << dendl; + source_pipes->insert(pipe); + } + + for (auto& entry : flow_group_map.dests) { + rgw_sync_bucket_pipe pipe = entry.second; + + if (!pipe.source.match_bucket(effective_bucket)) { + continue; + } + + pipe.source.apply_bucket(effective_bucket); + pipe.dest.apply_bucket(effective_bucket); + + ldpp_dout(dpp, 20) << __func__ << "(): flow manager (bucket=" << effective_bucket_key << "): adding dest pipe: " << pipe << dendl; + dest_pipes->insert(pipe); + } + } +} + + +RGWBucketSyncFlowManager::RGWBucketSyncFlowManager(CephContext *_cct, + const rgw_zone_id& _zone_id, + std::optional _bucket, + const RGWBucketSyncFlowManager *_parent) : cct(_cct), + zone_id(_zone_id), + bucket(_bucket), + parent(_parent) {} + + +void RGWSyncPolicyCompat::convert_old_sync_config(RGWSI_Zone *zone_svc, + RGWSI_SyncModules *sync_modules_svc, + rgw_sync_policy_info *ppolicy) +{ + bool found = false; + + rgw_sync_policy_info policy; + + auto& group = policy.groups["default"]; + auto& zonegroup = zone_svc->get_zonegroup(); + + for (const auto& ziter1 : zonegroup.zones) { + auto& id1 = ziter1.first; + const RGWZone& z1 = ziter1.second; + + for (const auto& ziter2 : zonegroup.zones) { + auto& id2 = ziter2.first; + const RGWZone& z2 = ziter2.second; + + if (id1 == id2) { + continue; + } + + if (z1.syncs_from(z2.name)) { + found = true; + rgw_sync_directional_rule *rule; + group.data_flow.find_or_create_directional(id2, + id1, + &rule); + } + } + } + + if (!found) { /* nothing syncs */ + return; + } + + rgw_sync_bucket_pipes pipes; + pipes.id = "all"; + pipes.source.all_zones = true; + pipes.dest.all_zones = true; + + group.pipes.emplace_back(std::move(pipes)); + + + group.status = rgw_sync_policy_group::Status::ENABLED; + + *ppolicy = std::move(policy); +} + +RGWBucketSyncPolicyHandler::RGWBucketSyncPolicyHandler(RGWSI_Zone *_zone_svc, + RGWSI_SyncModules *sync_modules_svc, + RGWSI_Bucket_Sync *_bucket_sync_svc, + std::optional effective_zone) : zone_svc(_zone_svc) , + bucket_sync_svc(_bucket_sync_svc) { + zone_id = effective_zone.value_or(zone_svc->zone_id()); + flow_mgr.reset(new RGWBucketSyncFlowManager(zone_svc->ctx(), + zone_id, + nullopt, + nullptr)); + sync_policy = zone_svc->get_zonegroup().sync_policy; + + if (sync_policy.empty()) { + RGWSyncPolicyCompat::convert_old_sync_config(zone_svc, sync_modules_svc, &sync_policy); + legacy_config = true; + } +} + +RGWBucketSyncPolicyHandler::RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, + const RGWBucketInfo& _bucket_info, + map&& _bucket_attrs) : parent(_parent), + bucket_info(_bucket_info), + bucket_attrs(std::move(_bucket_attrs)) { + if (_bucket_info.sync_policy) { + sync_policy = *_bucket_info.sync_policy; + + for (auto& entry : sync_policy.groups) { + for (auto& pipe : entry.second.pipes) { + if (pipe.params.mode == rgw_sync_pipe_params::MODE_USER && + pipe.params.user.empty()) { + pipe.params.user = _bucket_info.owner; + } + } + } + } + legacy_config = parent->legacy_config; + bucket = _bucket_info.bucket; + zone_svc = parent->zone_svc; + bucket_sync_svc = parent->bucket_sync_svc; + flow_mgr.reset(new RGWBucketSyncFlowManager(zone_svc->ctx(), + parent->zone_id, + _bucket_info.bucket, + parent->flow_mgr.get())); +} + +RGWBucketSyncPolicyHandler::RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, + const rgw_bucket& _bucket, + std::optional _sync_policy) : parent(_parent) { + if (_sync_policy) { + sync_policy = *_sync_policy; + } + legacy_config = parent->legacy_config; + bucket = _bucket; + zone_svc = parent->zone_svc; + bucket_sync_svc = parent->bucket_sync_svc; + flow_mgr.reset(new RGWBucketSyncFlowManager(zone_svc->ctx(), + parent->zone_id, + _bucket, + parent->flow_mgr.get())); +} + +RGWBucketSyncPolicyHandler *RGWBucketSyncPolicyHandler::alloc_child(const RGWBucketInfo& bucket_info, + map&& bucket_attrs) const +{ + return new RGWBucketSyncPolicyHandler(this, bucket_info, std::move(bucket_attrs)); +} + +RGWBucketSyncPolicyHandler *RGWBucketSyncPolicyHandler::alloc_child(const rgw_bucket& bucket, + std::optional sync_policy) const +{ + return new RGWBucketSyncPolicyHandler(this, bucket, sync_policy); +} + +int RGWBucketSyncPolicyHandler::init(const DoutPrefixProvider *dpp, optional_yield y) +{ + int r = bucket_sync_svc->get_bucket_sync_hints(dpp, bucket.value_or(rgw_bucket()), + &source_hints, + &target_hints, + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize bucket sync policy handler: get_bucket_sync_hints() on bucket=" + << bucket << " returned r=" << r << dendl; + return r; + } + + flow_mgr->init(dpp, sync_policy); + + reflect(dpp, &source_pipes, + &target_pipes, + &sources, + &targets, + &source_zones, + &target_zones, + true); + + return 0; +} + +void RGWBucketSyncPolicyHandler::reflect(const DoutPrefixProvider *dpp, RGWBucketSyncFlowManager::pipe_set *psource_pipes, + RGWBucketSyncFlowManager::pipe_set *ptarget_pipes, + map *psources, + map *ptargets, + std::set *psource_zones, + std::set *ptarget_zones, + bool only_enabled) const +{ + RGWBucketSyncFlowManager::pipe_set _source_pipes; + RGWBucketSyncFlowManager::pipe_set _target_pipes; + map _sources; + map _targets; + std::set _source_zones; + std::set _target_zones; + + flow_mgr->reflect(dpp, bucket, &_source_pipes, &_target_pipes, only_enabled); + + for (auto& entry : _source_pipes.pipe_map) { + auto& pipe = entry.second; + if (!pipe.source.zone) { + continue; + } + _source_zones.insert(*pipe.source.zone); + _sources[*pipe.source.zone].insert(pipe); + } + + for (auto& entry : _target_pipes.pipe_map) { + auto& pipe = entry.second; + if (!pipe.dest.zone) { + continue; + } + _target_zones.insert(*pipe.dest.zone); + _targets[*pipe.dest.zone].insert(pipe); + } + + if (psource_pipes) { + *psource_pipes = std::move(_source_pipes); + } + if (ptarget_pipes) { + *ptarget_pipes = std::move(_target_pipes); + } + if (psources) { + *psources = std::move(_sources); + } + if (ptargets) { + *ptargets = std::move(_targets); + } + if (psource_zones) { + *psource_zones = std::move(_source_zones); + } + if (ptarget_zones) { + *ptarget_zones = std::move(_target_zones); + } +} + +multimap RGWBucketSyncPolicyHandler::get_all_sources() const +{ + multimap m; + + for (auto& source_entry : sources) { + auto& zone_id = source_entry.first; + + auto& pipes = source_entry.second.pipe_map; + + for (auto& entry : pipes) { + auto& pipe = entry.second; + m.insert(make_pair(zone_id, pipe)); + } + } + + for (auto& pipe : resolved_sources) { + if (!pipe.source.zone) { + continue; + } + + m.insert(make_pair(*pipe.source.zone, pipe)); + } + + return m; +} + +multimap RGWBucketSyncPolicyHandler::get_all_dests() const +{ + multimap m; + + for (auto& dest_entry : targets) { + auto& zone_id = dest_entry.first; + + auto& pipes = dest_entry.second.pipe_map; + + for (auto& entry : pipes) { + auto& pipe = entry.second; + m.insert(make_pair(zone_id, pipe)); + } + } + + for (auto& pipe : resolved_dests) { + if (!pipe.dest.zone) { + continue; + } + + m.insert(make_pair(*pipe.dest.zone, pipe)); + } + + return m; +} + +multimap RGWBucketSyncPolicyHandler::get_all_dests_in_zone(const rgw_zone_id& zone_id) const +{ + multimap m; + + auto iter = targets.find(zone_id); + if (iter != targets.end()) { + auto& pipes = iter->second.pipe_map; + + for (auto& entry : pipes) { + auto& pipe = entry.second; + m.insert(make_pair(zone_id, pipe)); + } + } + + for (auto& pipe : resolved_dests) { + if (!pipe.dest.zone || + *pipe.dest.zone != zone_id) { + continue; + } + + m.insert(make_pair(*pipe.dest.zone, pipe)); + } + + return m; +} + +void RGWBucketSyncPolicyHandler::get_pipes(std::set *_sources, std::set *_targets, + std::optional filter_peer) { /* return raw pipes */ + for (auto& entry : source_pipes.pipe_map) { + auto& source_pipe = entry.second; + if (!filter_peer || + source_pipe.source.match(*filter_peer)) { + _sources->insert(source_pipe); + } + } + + for (auto& entry : target_pipes.pipe_map) { + auto& target_pipe = entry.second; + if (!filter_peer || + target_pipe.dest.match(*filter_peer)) { + _targets->insert(target_pipe); + } + } +} + +bool RGWBucketSyncPolicyHandler::bucket_exports_data() const +{ + if (!bucket) { + return false; + } + + if (bucket_is_sync_source()) { + return true; + } + + return (zone_svc->need_to_log_data() && + bucket_info->datasync_flag_enabled()); +} + +bool RGWBucketSyncPolicyHandler::bucket_imports_data() const +{ + return bucket_is_sync_target(); +} + diff --git a/src/rgw/driver/rados/rgw_bucket_sync.h b/src/rgw/driver/rados/rgw_bucket_sync.h new file mode 100644 index 00000000000..76143773e8d --- /dev/null +++ b/src/rgw/driver/rados/rgw_bucket_sync.h @@ -0,0 +1,412 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "rgw_common.h" +#include "rgw_sync_policy.h" + +class RGWSI_Zone; +class RGWSI_SyncModules; +class RGWSI_Bucket_Sync; + +struct rgw_sync_group_pipe_map; +struct rgw_sync_bucket_pipes; +struct rgw_sync_policy_info; + +struct rgw_sync_group_pipe_map { + rgw_zone_id zone; + std::optional bucket; + + rgw_sync_policy_group::Status status{rgw_sync_policy_group::Status::FORBIDDEN}; + + using zb_pipe_map_t = std::multimap; + + zb_pipe_map_t sources; /* all the pipes where zone is pulling from */ + zb_pipe_map_t dests; /* all the pipes that pull from zone */ + + std::set *pall_zones{nullptr}; + rgw_sync_data_flow_group *default_flow{nullptr}; /* flow to use if policy doesn't define it, + used in the case of bucket sync policy, not at the + zonegroup level */ + + void dump(ceph::Formatter *f) const; + + template + void try_add_to_pipe_map(const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone, + const std::vector& pipes, + zb_pipe_map_t *pipe_map, + CB1 filter_cb, + CB2 call_filter_cb); + + template + void try_add_source(const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone, + const std::vector& pipes, + CB filter_cb); + + template + void try_add_dest(const rgw_zone_id& source_zone, + const rgw_zone_id& dest_zone, + const std::vector& pipes, + CB filter_cb); + + std::pair find_pipes(const zb_pipe_map_t& m, + const rgw_zone_id& zone, + std::optional b) const; + + template + void init(const DoutPrefixProvider *dpp, CephContext *cct, + const rgw_zone_id& _zone, + std::optional _bucket, + const rgw_sync_policy_group& group, + rgw_sync_data_flow_group *_default_flow, + std::set *_pall_zones, + CB filter_cb); + + /* + * find all relevant pipes in our zone that match {dest_bucket} <- {source_zone, source_bucket} + */ + std::vector find_source_pipes(const rgw_zone_id& source_zone, + std::optional source_bucket, + std::optional dest_bucket) const; + + /* + * find all relevant pipes in other zones that pull from a specific + * source bucket in out zone {source_bucket} -> {dest_zone, dest_bucket} + */ + std::vector find_dest_pipes(std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket) const; + + /* + * find all relevant pipes from {source_zone, source_bucket} -> {dest_zone, dest_bucket} + */ + std::vector find_pipes(const rgw_zone_id& source_zone, + std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket) const; +}; + +class RGWSyncPolicyCompat { +public: + static void convert_old_sync_config(RGWSI_Zone *zone_svc, + RGWSI_SyncModules *sync_modules_svc, + rgw_sync_policy_info *ppolicy); +}; + +class RGWBucketSyncFlowManager { + friend class RGWBucketSyncPolicyHandler; +public: + struct endpoints_pair { + rgw_sync_bucket_entity source; + rgw_sync_bucket_entity dest; + + endpoints_pair() {} + endpoints_pair(const rgw_sync_bucket_pipe& pipe) { + source = pipe.source; + dest = pipe.dest; + } + + bool operator<(const endpoints_pair& e) const { + if (source < e.source) { + return true; + } + if (e.source < source) { + return false; + } + return (dest < e.dest); + } + }; + + /* + * pipe_rules: deal with a set of pipes that have common endpoints_pair + */ + class pipe_rules { + std::list pipes; + + public: + using prefix_map_t = std::multimap; + + std::map tag_refs; + prefix_map_t prefix_refs; + + void insert(const rgw_sync_bucket_pipe& pipe); + + bool find_basic_info_without_tags(const rgw_obj_key& key, + std::optional *user, + std::optional *acl_translation, + std::optional *storage_class, + rgw_sync_pipe_params::Mode *mode, + bool *need_more_info) const; + bool find_obj_params(const rgw_obj_key& key, + const RGWObjTags::tag_map_t& tags, + rgw_sync_pipe_params *params) const; + + void scan_prefixes(std::vector *prefixes) const; + + prefix_map_t::const_iterator prefix_begin() const { + return prefix_refs.begin(); + } + prefix_map_t::const_iterator prefix_search(const std::string& s) const; + prefix_map_t::const_iterator prefix_end() const { + return prefix_refs.end(); + } + }; + + using pipe_rules_ref = std::shared_ptr; + + /* + * pipe_handler: extends endpoints_rule to point at the corresponding rules handler + */ + struct pipe_handler : public endpoints_pair { + pipe_rules_ref rules; + + pipe_handler() {} + pipe_handler(pipe_rules_ref& _rules, + const rgw_sync_bucket_pipe& _pipe) : endpoints_pair(_pipe), + rules(_rules) {} + bool specific() const { + return source.specific() && dest.specific(); + } + + bool find_basic_info_without_tags(const rgw_obj_key& key, + std::optional *user, + std::optional *acl_translation, + std::optional *storage_class, + rgw_sync_pipe_params::Mode *mode, + bool *need_more_info) const { + if (!rules) { + return false; + } + return rules->find_basic_info_without_tags(key, user, acl_translation, storage_class, mode, need_more_info); + } + + bool find_obj_params(const rgw_obj_key& key, + const RGWObjTags::tag_map_t& tags, + rgw_sync_pipe_params *params) const { + if (!rules) { + return false; + } + return rules->find_obj_params(key, tags, params); + } + }; + + struct pipe_set { + std::map rules; + std::multimap pipe_map; + + std::set handlers; + + using iterator = std::set::iterator; + + void clear() { + rules.clear(); + pipe_map.clear(); + handlers.clear(); + } + + void insert(const rgw_sync_bucket_pipe& pipe); + + iterator begin() const { + return handlers.begin(); + } + + iterator end() const { + return handlers.end(); + } + + void dump(ceph::Formatter *f) const; + }; + +private: + + CephContext *cct; + + rgw_zone_id zone_id; + std::optional bucket; + + const RGWBucketSyncFlowManager *parent{nullptr}; + + std::map flow_groups; + + std::set all_zones; + + bool allowed_data_flow(const rgw_zone_id& source_zone, + std::optional source_bucket, + const rgw_zone_id& dest_zone, + std::optional dest_bucket, + bool check_activated) const; + + /* + * find all the matching flows om a flow map for a specific bucket + */ + void update_flow_maps(const rgw_sync_bucket_pipes& pipe); + + void init(const DoutPrefixProvider *dpp, const rgw_sync_policy_info& sync_policy); + +public: + + RGWBucketSyncFlowManager(CephContext *_cct, + const rgw_zone_id& _zone_id, + std::optional _bucket, + const RGWBucketSyncFlowManager *_parent); + + void reflect(const DoutPrefixProvider *dpp, std::optional effective_bucket, + pipe_set *flow_by_source, + pipe_set *flow_by_dest, + bool only_enabled) const; + +}; + +static inline std::ostream& operator<<(std::ostream& os, const RGWBucketSyncFlowManager::endpoints_pair& e) { + os << e.dest << " -> " << e.source; + return os; +} + +class RGWBucketSyncPolicyHandler { + bool legacy_config{false}; + const RGWBucketSyncPolicyHandler *parent{nullptr}; + RGWSI_Zone *zone_svc; + RGWSI_Bucket_Sync *bucket_sync_svc; + rgw_zone_id zone_id; + std::optional bucket_info; + std::optional > bucket_attrs; + std::optional bucket; + std::unique_ptr flow_mgr; + rgw_sync_policy_info sync_policy; + + RGWBucketSyncFlowManager::pipe_set source_pipes; + RGWBucketSyncFlowManager::pipe_set target_pipes; + + std::map sources; /* source pipes by source zone id */ + std::map targets; /* target pipes by target zone id */ + + std::set source_zones; + std::set target_zones; + + std::set source_hints; + std::set target_hints; + std::set resolved_sources; + std::set resolved_dests; + + + bool bucket_is_sync_source() const { + return !targets.empty() || !resolved_dests.empty(); + } + + bool bucket_is_sync_target() const { + return !sources.empty() || !resolved_sources.empty(); + } + + RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, + const RGWBucketInfo& _bucket_info, + std::map&& _bucket_attrs); + + RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, + const rgw_bucket& _bucket, + std::optional _sync_policy); +public: + RGWBucketSyncPolicyHandler(RGWSI_Zone *_zone_svc, + RGWSI_SyncModules *sync_modules_svc, + RGWSI_Bucket_Sync *bucket_sync_svc, + std::optional effective_zone = std::nullopt); + + RGWBucketSyncPolicyHandler *alloc_child(const RGWBucketInfo& bucket_info, + std::map&& bucket_attrs) const; + RGWBucketSyncPolicyHandler *alloc_child(const rgw_bucket& bucket, + std::optional sync_policy) const; + + int init(const DoutPrefixProvider *dpp, optional_yield y); + + void reflect(const DoutPrefixProvider *dpp, RGWBucketSyncFlowManager::pipe_set *psource_pipes, + RGWBucketSyncFlowManager::pipe_set *ptarget_pipes, + std::map *psources, + std::map *ptargets, + std::set *psource_zones, + std::set *ptarget_zones, + bool only_enabled) const; + + void set_resolved_hints(std::set&& _resolved_sources, + std::set&& _resolved_dests) { + resolved_sources = std::move(_resolved_sources); + resolved_dests = std::move(_resolved_dests); + } + + const std::set& get_resolved_source_hints() { + return resolved_sources; + } + + const std::set& get_resolved_dest_hints() { + return resolved_dests; + } + + const std::set& get_source_zones() const { + return source_zones; + } + + const std::set& get_target_zones() const { + return target_zones; + } + + const std::map& get_sources() { + return sources; + } + + std::multimap get_all_sources() const; + std::multimap get_all_dests() const; + std::multimap get_all_dests_in_zone(const rgw_zone_id& zone_id) const; + + const std::map& get_targets() { + return targets; + } + + const std::optional& get_bucket_info() const { + return bucket_info; + } + + const std::optional >& get_bucket_attrs() const { + return bucket_attrs; + } + + void get_pipes(RGWBucketSyncFlowManager::pipe_set **_sources, RGWBucketSyncFlowManager::pipe_set **_targets) { /* return raw pipes (with zone name) */ + *_sources = &source_pipes; + *_targets = &target_pipes; + } + void get_pipes(std::set *sources, std::set *targets, + std::optional filter_peer); + + const std::set& get_source_hints() const { + return source_hints; + } + + const std::set& get_target_hints() const { + return target_hints; + } + + bool bucket_exports_data() const; + bool bucket_imports_data() const; + + const rgw_sync_policy_info& get_sync_policy() const { + return sync_policy; + } + + bool is_legacy_config() const { + return legacy_config; + } +}; + diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc new file mode 100644 index 00000000000..05079723792 --- /dev/null +++ b/src/rgw/driver/rados/rgw_cr_rados.cc @@ -0,0 +1,1138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "include/compat.h" +#include "rgw_sal.h" +#include "rgw_zone.h" +#include "rgw_coroutine.h" +#include "rgw_cr_rados.h" +#include "rgw_sync_counters.h" +#include "rgw_bucket.h" +#include "rgw_datalog_notify.h" +#include "rgw_cr_rest.h" +#include "rgw_rest_conn.h" +#include "rgw_rados.h" + +#include "services/svc_zone.h" +#include "services/svc_zone_utils.h" +#include "services/svc_sys_obj.h" +#include "services/svc_cls.h" + +#include "cls/lock/cls_lock_client.h" +#include "cls/rgw/cls_rgw_client.h" + +#include +#include + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +bool RGWAsyncRadosProcessor::RGWWQ::_enqueue(RGWAsyncRadosRequest *req) { + if (processor->is_going_down()) { + return false; + } + req->get(); + processor->m_req_queue.push_back(req); + dout(20) << "enqueued request req=" << hex << req << dec << dendl; + _dump_queue(); + return true; +} + +bool RGWAsyncRadosProcessor::RGWWQ::_empty() { + return processor->m_req_queue.empty(); +} + +RGWAsyncRadosRequest *RGWAsyncRadosProcessor::RGWWQ::_dequeue() { + if (processor->m_req_queue.empty()) + return NULL; + RGWAsyncRadosRequest *req = processor->m_req_queue.front(); + processor->m_req_queue.pop_front(); + dout(20) << "dequeued request req=" << hex << req << dec << dendl; + _dump_queue(); + return req; +} + +void RGWAsyncRadosProcessor::RGWWQ::_process(RGWAsyncRadosRequest *req, ThreadPool::TPHandle& handle) { + processor->handle_request(this, req); + processor->req_throttle.put(1); +} + +void RGWAsyncRadosProcessor::RGWWQ::_dump_queue() { + if (!g_conf()->subsys.should_gather()) { + return; + } + deque::iterator iter; + if (processor->m_req_queue.empty()) { + dout(20) << "RGWWQ: empty" << dendl; + return; + } + dout(20) << "RGWWQ:" << dendl; + for (iter = processor->m_req_queue.begin(); iter != processor->m_req_queue.end(); ++iter) { + dout(20) << "req: " << hex << *iter << dec << dendl; + } +} + +RGWAsyncRadosProcessor::RGWAsyncRadosProcessor(CephContext *_cct, int num_threads) + : cct(_cct), m_tp(cct, "RGWAsyncRadosProcessor::m_tp", "rados_async", num_threads), + req_throttle(_cct, "rgw_async_rados_ops", num_threads * 2), + req_wq(this, + ceph::make_timespan(g_conf()->rgw_op_thread_timeout), + ceph::make_timespan(g_conf()->rgw_op_thread_suicide_timeout), + &m_tp) { +} + +void RGWAsyncRadosProcessor::start() { + m_tp.start(); +} + +void RGWAsyncRadosProcessor::stop() { + going_down = true; + m_tp.drain(&req_wq); + m_tp.stop(); + for (auto iter = m_req_queue.begin(); iter != m_req_queue.end(); ++iter) { + (*iter)->put(); + } +} + +void RGWAsyncRadosProcessor::handle_request(const DoutPrefixProvider *dpp, RGWAsyncRadosRequest *req) { + req->send_request(dpp); + req->put(); +} + +void RGWAsyncRadosProcessor::queue(RGWAsyncRadosRequest *req) { + req_throttle.get(1); + req_wq.queue(req); +} + +int RGWAsyncGetSystemObj::_send_request(const DoutPrefixProvider *dpp) +{ + map *pattrs = want_attrs ? &attrs : nullptr; + + auto sysobj = svc_sysobj->get_obj(obj); + return sysobj.rop() + .set_objv_tracker(&objv_tracker) + .set_attrs(pattrs) + .set_raw_attrs(raw_attrs) + .read(dpp, &bl, null_yield); +} + +RGWAsyncGetSystemObj::RGWAsyncGetSystemObj(const DoutPrefixProvider *_dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + bool want_attrs, bool raw_attrs) + : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), svc_sysobj(_svc), + obj(_obj), want_attrs(want_attrs), raw_attrs(raw_attrs) +{ + if (_objv_tracker) { + objv_tracker = *_objv_tracker; + } +} + +int RGWSimpleRadosReadAttrsCR::send_request(const DoutPrefixProvider *dpp) +{ + req = new RGWAsyncGetSystemObj(dpp, this, stack->create_completion_notifier(), + svc, objv_tracker, obj, true, raw_attrs); + async_rados->queue(req); + return 0; +} + +int RGWSimpleRadosReadAttrsCR::request_complete() +{ + if (pattrs) { + *pattrs = std::move(req->attrs); + } + if (objv_tracker) { + *objv_tracker = req->objv_tracker; + } + return req->get_ret_status(); +} + +int RGWAsyncPutSystemObj::_send_request(const DoutPrefixProvider *dpp) +{ + auto sysobj = svc->get_obj(obj); + return sysobj.wop() + .set_objv_tracker(&objv_tracker) + .set_exclusive(exclusive) + .write_data(dpp, bl, null_yield); +} + +RGWAsyncPutSystemObj::RGWAsyncPutSystemObj(const DoutPrefixProvider *_dpp, + RGWCoroutine *caller, + RGWAioCompletionNotifier *cn, + RGWSI_SysObj *_svc, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + bool _exclusive, bufferlist _bl) + : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), svc(_svc), + obj(_obj), exclusive(_exclusive), bl(std::move(_bl)) +{ + if (_objv_tracker) { + objv_tracker = *_objv_tracker; + } +} + +int RGWAsyncPutSystemObjAttrs::_send_request(const DoutPrefixProvider *dpp) +{ + auto sysobj = svc->get_obj(obj); + return sysobj.wop() + .set_objv_tracker(&objv_tracker) + .set_exclusive(exclusive) + .set_attrs(attrs) + .write_attrs(dpp, null_yield); +} + +RGWAsyncPutSystemObjAttrs::RGWAsyncPutSystemObjAttrs(const DoutPrefixProvider *_dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, + RGWSI_SysObj *_svc, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + map _attrs, bool exclusive) + : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), svc(_svc), + obj(_obj), attrs(std::move(_attrs)), exclusive(exclusive) +{ + if (_objv_tracker) { + objv_tracker = *_objv_tracker; + } +} + + +RGWOmapAppend::RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, const rgw_raw_obj& _obj, + uint64_t _window_size) + : RGWConsumerCR(_store->ctx()), async_rados(_async_rados), + store(_store), obj(_obj), going_down(false), num_pending_entries(0), window_size(_window_size), total_entries(0) +{ +} + +int RGWAsyncLockSystemObj::_send_request(const DoutPrefixProvider *dpp) +{ + rgw_rados_ref ref; + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + rados::cls::lock::Lock l(lock_name); + utime_t duration(duration_secs, 0); + l.set_duration(duration); + l.set_cookie(cookie); + l.set_may_renew(true); + + return l.lock_exclusive(&ref.pool.ioctx(), ref.obj.oid); +} + +RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + const string& _name, const string& _cookie, uint32_t _duration_secs) : RGWAsyncRadosRequest(caller, cn), store(_store), + obj(_obj), + lock_name(_name), + cookie(_cookie), + duration_secs(_duration_secs) +{ +} + +int RGWAsyncUnlockSystemObj::_send_request(const DoutPrefixProvider *dpp) +{ + rgw_rados_ref ref; + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + rados::cls::lock::Lock l(lock_name); + + l.set_cookie(cookie); + + return l.unlock(&ref.pool.ioctx(), ref.obj.oid); +} + +RGWAsyncUnlockSystemObj::RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + const string& _name, const string& _cookie) : RGWAsyncRadosRequest(caller, cn), store(_store), + obj(_obj), + lock_name(_name), cookie(_cookie) +{ +} + +RGWRadosSetOmapKeysCR::RGWRadosSetOmapKeysCR(rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + map& _entries) : RGWSimpleCoroutine(_store->ctx()), + store(_store), + entries(_entries), + obj(_obj), cn(NULL) +{ + stringstream& s = set_description(); + s << "set omap keys dest=" << obj << " keys=[" << s.str() << "]"; + for (auto i = entries.begin(); i != entries.end(); ++i) { + if (i != entries.begin()) { + s << ", "; + } + s << i->first; + } + s << "]"; +} + +int RGWRadosSetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) +{ + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + set_status() << "sending request"; + + librados::ObjectWriteOperation op; + op.omap_set(entries); + + cn = stack->create_completion_notifier(); + return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); +} + +int RGWRadosSetOmapKeysCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWRadosGetOmapKeysCR::RGWRadosGetOmapKeysCR(rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const string& _marker, + int _max_entries, + ResultPtr _result) + : RGWSimpleCoroutine(_store->ctx()), store(_store), obj(_obj), + marker(_marker), max_entries(_max_entries), + result(std::move(_result)) +{ + ceph_assert(result); // must be allocated + set_description() << "get omap keys dest=" << obj << " marker=" << marker; +} + +int RGWRadosGetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) { + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &result->ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + set_status() << "send request"; + + librados::ObjectReadOperation op; + op.omap_get_keys2(marker, max_entries, &result->entries, &result->more, nullptr); + + cn = stack->create_completion_notifier(result); + return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); +} + +int RGWRadosGetOmapKeysCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWRadosGetOmapValsCR::RGWRadosGetOmapValsCR(rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const string& _marker, + int _max_entries, + ResultPtr _result) + : RGWSimpleCoroutine(_store->ctx()), store(_store), obj(_obj), + marker(_marker), max_entries(_max_entries), + result(std::move(_result)) +{ + ceph_assert(result); // must be allocated + set_description() << "get omap keys dest=" << obj << " marker=" << marker; +} + +int RGWRadosGetOmapValsCR::send_request(const DoutPrefixProvider *dpp) { + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &result->ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + set_status() << "send request"; + + librados::ObjectReadOperation op; + op.omap_get_vals2(marker, max_entries, &result->entries, &result->more, nullptr); + + cn = stack->create_completion_notifier(result); + return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); +} + +int RGWRadosGetOmapValsCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWRadosRemoveOmapKeysCR::RGWRadosRemoveOmapKeysCR(rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const set& _keys) : RGWSimpleCoroutine(_store->ctx()), + store(_store), + keys(_keys), + obj(_obj), cn(NULL) +{ + set_description() << "remove omap keys dest=" << obj << " keys=" << keys; +} + +int RGWRadosRemoveOmapKeysCR::send_request(const DoutPrefixProvider *dpp) { + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + set_status() << "send request"; + + librados::ObjectWriteOperation op; + op.omap_rm_keys(keys); + + cn = stack->create_completion_notifier(); + return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); +} + +int RGWRadosRemoveOmapKeysCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWRadosRemoveCR::RGWRadosRemoveCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, + RGWObjVersionTracker* objv_tracker) + : RGWSimpleCoroutine(store->ctx()), + store(store), obj(obj), objv_tracker(objv_tracker) +{ + set_description() << "remove dest=" << obj; +} + +int RGWRadosRemoveCR::send_request(const DoutPrefixProvider *dpp) +{ + auto rados = store->getRados()->get_rados_handle(); + int r = rados->ioctx_create(obj.pool.name.c_str(), ioctx); + if (r < 0) { + lderr(cct) << "ERROR: failed to open pool (" << obj.pool.name << ") ret=" << r << dendl; + return r; + } + ioctx.locator_set_key(obj.loc); + + set_status() << "send request"; + + librados::ObjectWriteOperation op; + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + op.remove(); + + cn = stack->create_completion_notifier(); + return ioctx.aio_operate(obj.oid, cn->completion(), &op); +} + +int RGWRadosRemoveCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, + librados::IoCtx&& ioctx, + std::string_view oid, + RGWObjVersionTracker* objv_tracker) + : RGWSimpleCoroutine(store->ctx()), ioctx(std::move(ioctx)), + oid(std::string(oid)), objv_tracker(objv_tracker) +{ + set_description() << "remove dest=" << oid; +} + +RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, + RGWSI_RADOS::Obj& obj, + RGWObjVersionTracker* objv_tracker) + : RGWSimpleCoroutine(store->ctx()), + ioctx(librados::IoCtx(obj.get_ref().pool.ioctx())), + oid(obj.get_ref().obj.oid), + objv_tracker(objv_tracker) +{ + set_description() << "remove dest=" << oid; +} + +RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, + RGWSI_RADOS::Obj&& obj, + RGWObjVersionTracker* objv_tracker) + : RGWSimpleCoroutine(store->ctx()), + ioctx(std::move(obj.get_ref().pool.ioctx())), + oid(std::move(obj.get_ref().obj.oid)), + objv_tracker(objv_tracker) +{ + set_description() << "remove dest=" << oid; +} + +int RGWRadosRemoveOidCR::send_request(const DoutPrefixProvider *dpp) +{ + librados::ObjectWriteOperation op; + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + op.remove(); + + cn = stack->create_completion_notifier(); + return ioctx.aio_operate(oid, cn->completion(), &op); +} + +int RGWRadosRemoveOidCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const string& _lock_name, + const string& _cookie, + uint32_t _duration) : RGWSimpleCoroutine(_store->ctx()), + async_rados(_async_rados), + store(_store), + lock_name(_lock_name), + cookie(_cookie), + duration(_duration), + obj(_obj), + req(NULL) +{ + set_description() << "rados lock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie << " duration=" << duration; +} + +void RGWSimpleRadosLockCR::request_cleanup() +{ + if (req) { + req->finish(); + req = NULL; + } +} + +int RGWSimpleRadosLockCR::send_request(const DoutPrefixProvider *dpp) +{ + set_status() << "sending request"; + req = new RGWAsyncLockSystemObj(this, stack->create_completion_notifier(), + store, NULL, obj, lock_name, cookie, duration); + async_rados->queue(req); + return 0; +} + +int RGWSimpleRadosLockCR::request_complete() +{ + set_status() << "request complete; ret=" << req->get_ret_status(); + return req->get_ret_status(); +} + +RGWSimpleRadosUnlockCR::RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const string& _lock_name, + const string& _cookie) : RGWSimpleCoroutine(_store->ctx()), + async_rados(_async_rados), + store(_store), + lock_name(_lock_name), + cookie(_cookie), + obj(_obj), + req(NULL) +{ + set_description() << "rados unlock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie; +} + +void RGWSimpleRadosUnlockCR::request_cleanup() +{ + if (req) { + req->finish(); + req = NULL; + } +} + +int RGWSimpleRadosUnlockCR::send_request(const DoutPrefixProvider *dpp) +{ + set_status() << "sending request"; + + req = new RGWAsyncUnlockSystemObj(this, stack->create_completion_notifier(), + store, NULL, obj, lock_name, cookie); + async_rados->queue(req); + return 0; +} + +int RGWSimpleRadosUnlockCR::request_complete() +{ + set_status() << "request complete; ret=" << req->get_ret_status(); + return req->get_ret_status(); +} + +int RGWOmapAppend::operate(const DoutPrefixProvider *dpp) { + reenter(this) { + for (;;) { + if (!has_product() && going_down) { + set_status() << "going down"; + break; + } + set_status() << "waiting for product"; + yield wait_for_product(); + yield { + string entry; + while (consume(&entry)) { + set_status() << "adding entry: " << entry; + entries[entry] = bufferlist(); + if (entries.size() >= window_size) { + break; + } + } + if (entries.size() >= window_size || going_down) { + set_status() << "flushing to omap"; + call(new RGWRadosSetOmapKeysCR(store, obj, entries)); + entries.clear(); + } + } + if (get_ret_status() < 0) { + ldout(cct, 0) << "ERROR: failed to store entries in omap" << dendl; + return set_state(RGWCoroutine_Error); + } + } + /* done with coroutine */ + return set_state(RGWCoroutine_Done); + } + return 0; +} + +void RGWOmapAppend::flush_pending() { + receive(pending_entries); + num_pending_entries = 0; +} + +bool RGWOmapAppend::append(const string& s) { + if (is_done()) { + return false; + } + ++total_entries; + pending_entries.push_back(s); + if (++num_pending_entries >= (int)window_size) { + flush_pending(); + } + return true; +} + +bool RGWOmapAppend::finish() { + going_down = true; + flush_pending(); + set_sleeping(false); + return (!is_done()); +} + +int RGWAsyncGetBucketInstanceInfo::_send_request(const DoutPrefixProvider *dpp) +{ + int r; + if (!bucket.bucket_id.empty()) { + r = store->getRados()->get_bucket_instance_info(bucket, bucket_info, nullptr, &attrs, null_yield, dpp); + } else { + r = store->ctl()->bucket->read_bucket_info(bucket, &bucket_info, null_yield, dpp, + RGWBucketCtl::BucketInstance::GetParams().set_attrs(&attrs)); + } + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to get bucket instance info for " + << bucket << dendl; + return r; + } + + return 0; +} + +int RGWAsyncPutBucketInstanceInfo::_send_request(const DoutPrefixProvider *dpp) +{ + auto r = store->getRados()->put_bucket_instance_info(bucket_info, exclusive, + mtime, attrs, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to put bucket instance info for " + << bucket_info.bucket << dendl; + return r; + } + + return 0; +} + +RGWRadosBILogTrimCR::RGWRadosBILogTrimCR( + const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + const RGWBucketInfo& bucket_info, + int shard_id, + const rgw::bucket_index_layout_generation& generation, + const std::string& start_marker, + const std::string& end_marker) + : RGWSimpleCoroutine(store->ctx()), bucket_info(bucket_info), + shard_id(shard_id), generation(generation), bs(store->getRados()), + start_marker(BucketIndexShardsManager::get_shard_marker(start_marker)), + end_marker(BucketIndexShardsManager::get_shard_marker(end_marker)) +{ +} + +int RGWRadosBILogTrimCR::send_request(const DoutPrefixProvider *dpp) +{ + int r = bs.init(dpp, bucket_info, generation, shard_id); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: bucket shard init failed ret=" << r << dendl; + return r; + } + + bufferlist in; + cls_rgw_bi_log_trim_op call; + call.start_marker = std::move(start_marker); + call.end_marker = std::move(end_marker); + encode(call, in); + + librados::ObjectWriteOperation op; + op.exec(RGW_CLASS, RGW_BI_LOG_TRIM, in); + + cn = stack->create_completion_notifier(); + return bs.bucket_obj.aio_operate(cn->completion(), &op); +} + +int RGWRadosBILogTrimCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + set_status() << "request complete; ret=" << r; + return r; +} + +int RGWAsyncFetchRemoteObj::_send_request(const DoutPrefixProvider *dpp) +{ + RGWObjectCtx obj_ctx(store); + + char buf[16]; + snprintf(buf, sizeof(buf), ".%lld", (long long)store->getRados()->instance_id()); + rgw::sal::Attrs attrs; + + rgw::sal::RadosBucket bucket(store, src_bucket); + rgw::sal::RadosObject src_obj(store, key, &bucket); + rgw::sal::RadosBucket dest_bucket(store, dest_bucket_info); + rgw::sal::RadosObject dest_obj(store, dest_key.value_or(key), &dest_bucket); + + std::string etag; + + std::optional bytes_transferred; + int r = store->getRados()->fetch_remote_obj(obj_ctx, + user_id.value_or(rgw_user()), + NULL, /* req_info */ + source_zone, + &dest_obj, + &src_obj, + &dest_bucket, /* dest */ + nullptr, /* source */ + dest_placement_rule, + nullptr, /* real_time* src_mtime, */ + NULL, /* real_time* mtime, */ + NULL, /* const real_time* mod_ptr, */ + NULL, /* const real_time* unmod_ptr, */ + false, /* high precision time */ + NULL, /* const char *if_match, */ + NULL, /* const char *if_nomatch, */ + RGWRados::ATTRSMOD_NONE, + copy_if_newer, + attrs, + RGWObjCategory::Main, + versioned_epoch, + real_time(), /* delete_at */ + NULL, /* string *ptag, */ + &etag, /* string *petag, */ + NULL, /* void (*progress_cb)(off_t, void *), */ + NULL, /* void *progress_data*); */ + dpp, + filter.get(), + &zones_trace, + &bytes_transferred); + + if (r < 0) { + ldpp_dout(dpp, 0) << "store->fetch_remote_obj() returned r=" << r << dendl; + if (counters) { + counters->inc(sync_counters::l_fetch_err, 1); + } + } else { + // r >= 0 + if (bytes_transferred) { + // send notification that object was succesfully synced + std::string user_id = "rgw sync"; + std::string req_id = "0"; + + RGWObjTags obj_tags; + auto iter = attrs.find(RGW_ATTR_TAGS); + if (iter != attrs.end()) { + try { + auto it = iter->second.cbegin(); + obj_tags.decode(it); + } catch (buffer::error &err) { + ldpp_dout(dpp, 1) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; + } + } + + // NOTE: we create a mutable copy of bucket.get_tenant as the get_notification function expects a std::string&, not const + std::string tenant(dest_bucket.get_tenant()); + + std::unique_ptr notify + = store->get_notification(dpp, &dest_obj, nullptr, rgw::notify::ObjectSyncedCreate, + &dest_bucket, user_id, + tenant, + req_id, null_yield); + + auto notify_res = static_cast(notify.get())->get_reservation(); + int ret = rgw::notify::publish_reserve(dpp, rgw::notify::ObjectSyncedCreate, notify_res, &obj_tags); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: reserving notification failed, with error: " << ret << dendl; + // no need to return, the sync already happened + } else { + ret = rgw::notify::publish_commit(&dest_obj, dest_obj.get_obj_size(), ceph::real_clock::now(), etag, dest_obj.get_instance(), rgw::notify::ObjectSyncedCreate, notify_res, dpp); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; + } + } + } + + if (counters) { + if (bytes_transferred) { + counters->inc(sync_counters::l_fetch, *bytes_transferred); + } else { + counters->inc(sync_counters::l_fetch_not_modified); + } + } + } + return r; +} + +int RGWAsyncStatRemoteObj::_send_request(const DoutPrefixProvider *dpp) +{ + RGWObjectCtx obj_ctx(store); + + string user_id; + char buf[16]; + snprintf(buf, sizeof(buf), ".%lld", (long long)store->getRados()->instance_id()); + + rgw::sal::RadosBucket bucket(store, src_bucket); + rgw::sal::RadosObject src_obj(store, key, &bucket); + + int r = store->getRados()->stat_remote_obj(dpp, + obj_ctx, + rgw_user(user_id), + nullptr, /* req_info */ + source_zone, + &src_obj, + nullptr, /* source */ + pmtime, /* real_time* src_mtime, */ + psize, /* uint64_t * */ + nullptr, /* const real_time* mod_ptr, */ + nullptr, /* const real_time* unmod_ptr, */ + true, /* high precision time */ + nullptr, /* const char *if_match, */ + nullptr, /* const char *if_nomatch, */ + pattrs, + pheaders, + nullptr, + nullptr, /* string *ptag, */ + petag); /* string *petag, */ + + if (r < 0) { + ldpp_dout(dpp, 0) << "store->stat_remote_obj() returned r=" << r << dendl; + } + return r; +} + + +int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) +{ + ldpp_dout(dpp, 0) << __func__ << "(): deleting obj=" << obj << dendl; + + obj->set_atomic(); + + RGWObjState *state; + + int ret = obj->get_obj_state(dpp, &state, null_yield); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << "(): get_obj_state() obj=" << obj << " returned ret=" << ret << dendl; + return ret; + } + + /* has there been any racing object write? */ + if (del_if_older && (state->mtime > timestamp)) { + ldpp_dout(dpp, 20) << __func__ << "(): skipping object removal obj=" << obj << " (obj mtime=" << state->mtime << ", request timestamp=" << timestamp << ")" << dendl; + return 0; + } + + RGWAccessControlPolicy policy; + + /* decode policy */ + map::iterator iter = state->attrset.find(RGW_ATTR_ACL); + if (iter != state->attrset.end()) { + auto bliter = iter->second.cbegin(); + try { + policy.decode(bliter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl; + return -EIO; + } + } + + std::unique_ptr del_op = obj->get_delete_op(); + + del_op->params.bucket_owner = bucket->get_info().owner; + del_op->params.obj_owner = policy.get_owner(); + if (del_if_older) { + del_op->params.unmod_since = timestamp; + } + if (versioned) { + del_op->params.versioning_status = BUCKET_VERSIONED; + } + del_op->params.olh_epoch = versioned_epoch; + del_op->params.marker_version_id = marker_version_id; + del_op->params.obj_owner.set_id(rgw_user(owner)); + del_op->params.obj_owner.set_name(owner_display_name); + del_op->params.mtime = timestamp; + del_op->params.high_precision_time = true; + del_op->params.zones_trace = &zones_trace; + + ret = del_op->delete_obj(dpp, null_yield); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << "(): delete_obj() obj=" << obj << " returned ret=" << ret << dendl; + } + return ret; +} + +int RGWContinuousLeaseCR::operate(const DoutPrefixProvider *dpp) +{ + if (aborted) { + caller->set_sleeping(false); + return set_cr_done(); + } + reenter(this) { + last_renew_try_time = ceph::coarse_mono_clock::now(); + while (!going_down) { + yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, interval)); + current_time = ceph::coarse_mono_clock::now(); + if (current_time - last_renew_try_time > interval_tolerance) { + // renewal should happen between 50%-90% of interval + ldout(store->ctx(), 1) << *this << ": WARNING: did not renew lock " << obj << ":" << lock_name << ": within 90\% of interval. " << + (current_time - last_renew_try_time) << " > " << interval_tolerance << dendl; + } + last_renew_try_time = current_time; + + caller->set_sleeping(false); /* will only be relevant when we return, that's why we can do it early */ + if (retcode < 0) { + set_locked(false); + ldout(store->ctx(), 20) << *this << ": couldn't lock " << obj << ":" << lock_name << ": retcode=" << retcode << dendl; + return set_state(RGWCoroutine_Error, retcode); + } + ldout(store->ctx(), 20) << *this << ": successfully locked " << obj << ":" << lock_name << dendl; + set_locked(true); + yield wait(utime_t(interval / 2, 0)); + } + set_locked(false); /* moot at this point anyway */ + yield call(new RGWSimpleRadosUnlockCR(async_rados, store, obj, lock_name, cookie)); + return set_state(RGWCoroutine_Done); + } + return 0; +} + +RGWRadosTimelogAddCR::RGWRadosTimelogAddCR(const DoutPrefixProvider *_dpp, rgw::sal::RadosStore* _store, const string& _oid, + const cls_log_entry& entry) : RGWSimpleCoroutine(_store->ctx()), + dpp(_dpp), + store(_store), + oid(_oid), cn(NULL) +{ + stringstream& s = set_description(); + s << "timelog add entry oid=" << oid << "entry={id=" << entry.id << ", section=" << entry.section << ", name=" << entry.name << "}"; + entries.push_back(entry); +} + +int RGWRadosTimelogAddCR::send_request(const DoutPrefixProvider *dpp) +{ + set_status() << "sending request"; + + cn = stack->create_completion_notifier(); + return store->svc()->cls->timelog.add(dpp, oid, entries, cn->completion(), true, null_yield); +} + +int RGWRadosTimelogAddCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + +RGWRadosTimelogTrimCR::RGWRadosTimelogTrimCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + const std::string& oid, + const real_time& start_time, + const real_time& end_time, + const std::string& from_marker, + const std::string& to_marker) + : RGWSimpleCoroutine(store->ctx()), dpp(dpp), store(store), oid(oid), + start_time(start_time), end_time(end_time), + from_marker(from_marker), to_marker(to_marker) +{ + set_description() << "timelog trim oid=" << oid + << " start_time=" << start_time << " end_time=" << end_time + << " from_marker=" << from_marker << " to_marker=" << to_marker; +} + +int RGWRadosTimelogTrimCR::send_request(const DoutPrefixProvider *dpp) +{ + set_status() << "sending request"; + + cn = stack->create_completion_notifier(); + return store->svc()->cls->timelog.trim(dpp, oid, start_time, end_time, from_marker, + to_marker, cn->completion(), + null_yield); +} + +int RGWRadosTimelogTrimCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + + +RGWSyncLogTrimCR::RGWSyncLogTrimCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, const std::string& oid, + const std::string& to_marker, + std::string *last_trim_marker) + : RGWRadosTimelogTrimCR(dpp, store, oid, real_time{}, real_time{}, + std::string{}, to_marker), + cct(store->ctx()), last_trim_marker(last_trim_marker) +{ +} + +int RGWSyncLogTrimCR::request_complete() +{ + int r = RGWRadosTimelogTrimCR::request_complete(); + if (r != -ENODATA) { + return r; + } + // nothing left to trim, update last_trim_marker + if (*last_trim_marker < to_marker && to_marker != max_marker) { + *last_trim_marker = to_marker; + } + return 0; +} + + +int RGWAsyncStatObj::_send_request(const DoutPrefixProvider *dpp) +{ + rgw_raw_obj raw_obj; + store->getRados()->obj_to_raw(bucket_info.placement_rule, obj, &raw_obj); + return store->getRados()->raw_obj_stat(dpp, raw_obj, psize, pmtime, pepoch, + nullptr, nullptr, objv_tracker, null_yield); +} + +RGWStatObjCR::RGWStatObjCR(const DoutPrefixProvider *dpp, + RGWAsyncRadosProcessor *async_rados, rgw::sal::RadosStore* store, + const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize, + real_time* pmtime, uint64_t *pepoch, + RGWObjVersionTracker *objv_tracker) + : RGWSimpleCoroutine(store->ctx()), dpp(dpp), store(store), async_rados(async_rados), + bucket_info(_bucket_info), obj(obj), psize(psize), pmtime(pmtime), pepoch(pepoch), + objv_tracker(objv_tracker) +{ +} + +void RGWStatObjCR::request_cleanup() +{ + if (req) { + req->finish(); + req = NULL; + } +} + +int RGWStatObjCR::send_request(const DoutPrefixProvider *dpp) +{ + req = new RGWAsyncStatObj(dpp, this, stack->create_completion_notifier(), + store, bucket_info, obj, psize, pmtime, pepoch, objv_tracker); + async_rados->queue(req); + return 0; +} + +int RGWStatObjCR::request_complete() +{ + return req->get_ret_status(); +} + +RGWRadosNotifyCR::RGWRadosNotifyCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, + bufferlist& request, uint64_t timeout_ms, + bufferlist *response) + : RGWSimpleCoroutine(store->ctx()), store(store), obj(obj), + request(request), timeout_ms(timeout_ms), response(response) +{ + set_description() << "notify dest=" << obj; +} + +int RGWRadosNotifyCR::send_request(const DoutPrefixProvider *dpp) +{ + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + + set_status() << "sending request"; + + cn = stack->create_completion_notifier(); + return ref.pool.ioctx().aio_notify(ref.obj.oid, cn->completion(), request, + timeout_ms, response); +} + +int RGWRadosNotifyCR::request_complete() +{ + int r = cn->completion()->get_return_value(); + + set_status() << "request complete; ret=" << r; + + return r; +} + + +int RGWDataPostNotifyCR::operate(const DoutPrefixProvider* dpp) +{ + reenter(this) { + using PostNotify2 = RGWPostRESTResourceCR>, int>; + yield { + rgw_http_param_pair pairs[] = { { "type", "data" }, + { "notify2", NULL }, + { "source-zone", source_zone }, + { NULL, NULL } }; + call(new PostNotify2(store->ctx(), conn, &http_manager, "/admin/log", pairs, shards, nullptr)); + } + if (retcode == -ERR_METHOD_NOT_ALLOWED) { + using PostNotify1 = RGWPostRESTResourceCR; + yield { + rgw_http_param_pair pairs[] = { { "type", "data" }, + { "notify", NULL }, + { "source-zone", source_zone }, + { NULL, NULL } }; + auto encoder = rgw_data_notify_v1_encoder{shards}; + call(new PostNotify1(store->ctx(), conn, &http_manager, "/admin/log", pairs, encoder, nullptr)); + } + } + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; +} diff --git a/src/rgw/driver/rados/rgw_cr_rados.h b/src/rgw/driver/rados/rgw_cr_rados.h new file mode 100644 index 00000000000..03c5303ebf7 --- /dev/null +++ b/src/rgw/driver/rados/rgw_cr_rados.h @@ -0,0 +1,1595 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_CR_RADOS_H +#define CEPH_RGW_CR_RADOS_H + +#include +#include "include/ceph_assert.h" +#include "rgw_coroutine.h" +#include "rgw_sal.h" +#include "rgw_sal_rados.h" +#include "common/WorkQueue.h" +#include "common/Throttle.h" + +#include +#include "common/ceph_time.h" + +#include "services/svc_sys_obj.h" +#include "services/svc_bucket.h" + +struct rgw_http_param_pair; +class RGWRESTConn; + +class RGWAsyncRadosRequest : public RefCountedObject { + RGWCoroutine *caller; + RGWAioCompletionNotifier *notifier; + + int retcode; + + ceph::mutex lock = ceph::make_mutex("RGWAsyncRadosRequest::lock"); + +protected: + virtual int _send_request(const DoutPrefixProvider *dpp) = 0; +public: + RGWAsyncRadosRequest(RGWCoroutine *_caller, RGWAioCompletionNotifier *_cn) + : caller(_caller), notifier(_cn), retcode(0) { + } + ~RGWAsyncRadosRequest() override { + if (notifier) { + notifier->put(); + } + } + + void send_request(const DoutPrefixProvider *dpp) { + get(); + retcode = _send_request(dpp); + { + std::lock_guard l{lock}; + if (notifier) { + notifier->cb(); // drops its own ref + notifier = nullptr; + } + } + put(); + } + + int get_ret_status() { return retcode; } + + void finish() { + { + std::lock_guard l{lock}; + if (notifier) { + // we won't call notifier->cb() to drop its ref, so drop it here + notifier->put(); + notifier = nullptr; + } + } + put(); + } +}; + + +class RGWAsyncRadosProcessor { + std::deque m_req_queue; + std::atomic going_down = { false }; +protected: + CephContext *cct; + ThreadPool m_tp; + Throttle req_throttle; + + struct RGWWQ : public DoutPrefixProvider, public ThreadPool::WorkQueue { + RGWAsyncRadosProcessor *processor; + RGWWQ(RGWAsyncRadosProcessor *p, + ceph::timespan timeout, ceph::timespan suicide_timeout, + ThreadPool *tp) + : ThreadPool::WorkQueue("RGWWQ", timeout, suicide_timeout, tp), processor(p) {} + + bool _enqueue(RGWAsyncRadosRequest *req) override; + void _dequeue(RGWAsyncRadosRequest *req) override { + ceph_abort(); + } + bool _empty() override; + RGWAsyncRadosRequest *_dequeue() override; + using ThreadPool::WorkQueue::_process; + void _process(RGWAsyncRadosRequest *req, ThreadPool::TPHandle& handle) override; + void _dump_queue(); + void _clear() override { + ceph_assert(processor->m_req_queue.empty()); + } + + CephContext *get_cct() const { return processor->cct; } + unsigned get_subsys() const { return ceph_subsys_rgw; } + std::ostream& gen_prefix(std::ostream& out) const { return out << "rgw async rados processor: ";} + + } req_wq; + +public: + RGWAsyncRadosProcessor(CephContext *_cct, int num_threads); + ~RGWAsyncRadosProcessor() {} + void start(); + void stop(); + void handle_request(const DoutPrefixProvider *dpp, RGWAsyncRadosRequest *req); + void queue(RGWAsyncRadosRequest *req); + + bool is_going_down() { + return going_down; + } + +}; + +template +class RGWSimpleWriteOnlyAsyncCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + + P params; + const DoutPrefixProvider *dpp; + + class Request : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + P params; + const DoutPrefixProvider *dpp; + protected: + int _send_request(const DoutPrefixProvider *dpp) override; + public: + Request(RGWCoroutine *caller, + RGWAioCompletionNotifier *cn, + rgw::sal::RadosStore* store, + const P& _params, + const DoutPrefixProvider *dpp) : RGWAsyncRadosRequest(caller, cn), + store(store), + params(_params), + dpp(dpp) {} + } *req{nullptr}; + + public: + RGWSimpleWriteOnlyAsyncCR(RGWAsyncRadosProcessor *_async_rados, + rgw::sal::RadosStore* _store, + const P& _params, + const DoutPrefixProvider *_dpp) : RGWSimpleCoroutine(_store->ctx()), + async_rados(_async_rados), + store(_store), + params(_params), + dpp(_dpp) {} + + ~RGWSimpleWriteOnlyAsyncCR() override { + request_cleanup(); + } + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new Request(this, + stack->create_completion_notifier(), + store, + params, + dpp); + + async_rados->queue(req); + return 0; + } + int request_complete() override { + return req->get_ret_status(); + } +}; + + +template +class RGWSimpleAsyncCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + + P params; + std::shared_ptr result; + const DoutPrefixProvider *dpp; + + class Request : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + P params; + std::shared_ptr result; + const DoutPrefixProvider *dpp; + protected: + int _send_request(const DoutPrefixProvider *dpp) override; + public: + Request(const DoutPrefixProvider *dpp, + RGWCoroutine *caller, + RGWAioCompletionNotifier *cn, + rgw::sal::RadosStore* _store, + const P& _params, + std::shared_ptr& _result, + const DoutPrefixProvider *_dpp) : RGWAsyncRadosRequest(caller, cn), + store(_store), + params(_params), + result(_result), + dpp(_dpp) {} + } *req{nullptr}; + + public: + RGWSimpleAsyncCR(RGWAsyncRadosProcessor *_async_rados, + rgw::sal::RadosStore* _store, + const P& _params, + std::shared_ptr& _result, + const DoutPrefixProvider *_dpp) : RGWSimpleCoroutine(_store->ctx()), + async_rados(_async_rados), + store(_store), + params(_params), + result(_result), + dpp(_dpp) {} + + ~RGWSimpleAsyncCR() override { + request_cleanup(); + } + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new Request(dpp, + this, + stack->create_completion_notifier(), + store, + params, + result, + dpp); + + async_rados->queue(req); + return 0; + } + int request_complete() override { + return req->get_ret_status(); + } +}; + +class RGWGenericAsyncCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + + +public: + class Action { + public: + virtual ~Action() {} + virtual int operate() = 0; + }; + +private: + std::shared_ptr action; + + class Request : public RGWAsyncRadosRequest { + std::shared_ptr action; + protected: + int _send_request(const DoutPrefixProvider *dpp) override { + if (!action) { + return 0; + } + return action->operate(); + } + public: + Request(const DoutPrefixProvider *dpp, + RGWCoroutine *caller, + RGWAioCompletionNotifier *cn, + std::shared_ptr& _action) : RGWAsyncRadosRequest(caller, cn), + action(_action) {} + } *req{nullptr}; + + public: + RGWGenericAsyncCR(CephContext *_cct, + RGWAsyncRadosProcessor *_async_rados, + std::shared_ptr& _action) : RGWSimpleCoroutine(_cct), + async_rados(_async_rados), + action(_action) {} + template + RGWGenericAsyncCR(CephContext *_cct, + RGWAsyncRadosProcessor *_async_rados, + std::shared_ptr& _action) : RGWSimpleCoroutine(_cct), + async_rados(_async_rados), + action(std::static_pointer_cast(_action)) {} + + ~RGWGenericAsyncCR() override { + request_cleanup(); + } + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new Request(dpp, this, + stack->create_completion_notifier(), + action); + + async_rados->queue(req); + return 0; + } + int request_complete() override { + return req->get_ret_status(); + } +}; + + +class RGWAsyncGetSystemObj : public RGWAsyncRadosRequest { + const DoutPrefixProvider *dpp; + RGWSI_SysObj* svc_sysobj; + rgw_raw_obj obj; + const bool want_attrs; + const bool raw_attrs; +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncGetSystemObj(const DoutPrefixProvider *dpp, + RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + bool want_attrs, bool raw_attrs); + + bufferlist bl; + std::map attrs; + RGWObjVersionTracker objv_tracker; +}; + +class RGWAsyncPutSystemObj : public RGWAsyncRadosRequest { + const DoutPrefixProvider *dpp; + RGWSI_SysObj *svc; + rgw_raw_obj obj; + bool exclusive; + bufferlist bl; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncPutSystemObj(const DoutPrefixProvider *dpp, RGWCoroutine *caller, + RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + bool _exclusive, bufferlist _bl); + + RGWObjVersionTracker objv_tracker; +}; + +class RGWAsyncPutSystemObjAttrs : public RGWAsyncRadosRequest { + const DoutPrefixProvider *dpp; + RGWSI_SysObj *svc; + rgw_raw_obj obj; + std::map attrs; + bool exclusive; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncPutSystemObjAttrs(const DoutPrefixProvider *dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + std::map _attrs, bool exclusive); + + RGWObjVersionTracker objv_tracker; +}; + +class RGWAsyncLockSystemObj : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + rgw_raw_obj obj; + std::string lock_name; + std::string cookie; + uint32_t duration_secs; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + const std::string& _name, const std::string& _cookie, uint32_t _duration_secs); +}; + +class RGWAsyncUnlockSystemObj : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + rgw_raw_obj obj; + std::string lock_name; + std::string cookie; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, + const std::string& _name, const std::string& _cookie); +}; + +template +class RGWSimpleRadosReadCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + RGWAsyncRadosProcessor *async_rados; + RGWSI_SysObj *svc; + + rgw_raw_obj obj; + T *result; + /// on ENOENT, call handle_data() with an empty object instead of failing + const bool empty_on_enoent; + RGWObjVersionTracker *objv_tracker; + RGWAsyncGetSystemObj *req{nullptr}; + +public: + RGWSimpleRadosReadCR(const DoutPrefixProvider *_dpp, + RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc, + const rgw_raw_obj& _obj, + T *_result, bool empty_on_enoent = true, + RGWObjVersionTracker *objv_tracker = nullptr) + : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), svc(_svc), + obj(_obj), result(_result), + empty_on_enoent(empty_on_enoent), objv_tracker(objv_tracker) {} + ~RGWSimpleRadosReadCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; + + virtual int handle_data(T& data) { + return 0; + } +}; + +template +int RGWSimpleRadosReadCR::send_request(const DoutPrefixProvider *dpp) +{ + req = new RGWAsyncGetSystemObj(dpp, this, stack->create_completion_notifier(), svc, + objv_tracker, obj, false, false); + async_rados->queue(req); + return 0; +} + +template +int RGWSimpleRadosReadCR::request_complete() +{ + int ret = req->get_ret_status(); + retcode = ret; + if (ret == -ENOENT && empty_on_enoent) { + *result = T(); + } else { + if (ret < 0) { + return ret; + } + if (objv_tracker) { // copy the updated version + *objv_tracker = req->objv_tracker; + } + try { + auto iter = req->bl.cbegin(); + if (iter.end()) { + // allow successful reads with empty buffers. ReadSyncStatus coroutines + // depend on this to be able to read without locking, because the + // cls lock from InitSyncStatus will create an empty object if it didn't + // exist + *result = T(); + } else { + decode(*result, iter); + } + } catch (buffer::error& err) { + return -EIO; + } + } + + return handle_data(*result); +} + +class RGWSimpleRadosReadAttrsCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + RGWAsyncRadosProcessor *async_rados; + RGWSI_SysObj *svc; + + rgw_raw_obj obj; + std::map *pattrs; + bool raw_attrs; + RGWObjVersionTracker* objv_tracker; + RGWAsyncGetSystemObj *req = nullptr; + +public: + RGWSimpleRadosReadAttrsCR(const DoutPrefixProvider *_dpp, RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc, + const rgw_raw_obj& _obj, std::map *_pattrs, + bool _raw_attrs, RGWObjVersionTracker* objv_tracker = nullptr) + : RGWSimpleCoroutine(_svc->ctx()), + dpp(_dpp), + async_rados(_async_rados), svc(_svc), + obj(_obj), + pattrs(_pattrs), + raw_attrs(_raw_attrs), + objv_tracker(objv_tracker) + {} + ~RGWSimpleRadosReadAttrsCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +template +class RGWSimpleRadosWriteCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + RGWAsyncRadosProcessor *async_rados; + RGWSI_SysObj *svc; + bufferlist bl; + rgw_raw_obj obj; + RGWObjVersionTracker *objv_tracker; + bool exclusive; + RGWAsyncPutSystemObj *req{nullptr}; + +public: + RGWSimpleRadosWriteCR(const DoutPrefixProvider *_dpp, + RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc, + const rgw_raw_obj& _obj, const T& _data, + RGWObjVersionTracker *objv_tracker = nullptr, + bool exclusive = false) + : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), + svc(_svc), obj(_obj), objv_tracker(objv_tracker), exclusive(exclusive) { + encode(_data, bl); + } + + ~RGWSimpleRadosWriteCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncPutSystemObj(dpp, this, stack->create_completion_notifier(), + svc, objv_tracker, obj, exclusive, std::move(bl)); + async_rados->queue(req); + return 0; + } + + int request_complete() override { + if (objv_tracker) { // copy the updated version + *objv_tracker = req->objv_tracker; + } + return req->get_ret_status(); + } +}; + +class RGWSimpleRadosWriteAttrsCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + RGWAsyncRadosProcessor *async_rados; + RGWSI_SysObj *svc; + RGWObjVersionTracker *objv_tracker; + + rgw_raw_obj obj; + std::map attrs; + bool exclusive; + RGWAsyncPutSystemObjAttrs *req = nullptr; + +public: + RGWSimpleRadosWriteAttrsCR(const DoutPrefixProvider *_dpp, + RGWAsyncRadosProcessor *_async_rados, + RGWSI_SysObj *_svc, const rgw_raw_obj& _obj, + std::map _attrs, + RGWObjVersionTracker *objv_tracker = nullptr, + bool exclusive = false) + : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), + svc(_svc), objv_tracker(objv_tracker), obj(_obj), + attrs(std::move(_attrs)), exclusive(exclusive) { + } + ~RGWSimpleRadosWriteAttrsCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncPutSystemObjAttrs(dpp, this, stack->create_completion_notifier(), + svc, objv_tracker, obj, std::move(attrs), + exclusive); + async_rados->queue(req); + return 0; + } + + int request_complete() override { + if (objv_tracker) { // copy the updated version + *objv_tracker = req->objv_tracker; + } + return req->get_ret_status(); + } +}; + +class RGWRadosSetOmapKeysCR : public RGWSimpleCoroutine { + rgw::sal::RadosStore* store; + std::map entries; + + rgw_rados_ref ref; + + rgw_raw_obj obj; + + boost::intrusive_ptr cn; + +public: + RGWRadosSetOmapKeysCR(rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + std::map& _entries); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +class RGWRadosGetOmapKeysCR : public RGWSimpleCoroutine { + public: + struct Result { + rgw_rados_ref ref; + std::set entries; + bool more = false; + }; + using ResultPtr = std::shared_ptr; + + RGWRadosGetOmapKeysCR(rgw::sal::RadosStore* _store, const rgw_raw_obj& _obj, + const std::string& _marker, int _max_entries, + ResultPtr result); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; + + private: + rgw::sal::RadosStore* store; + rgw_raw_obj obj; + std::string marker; + int max_entries; + ResultPtr result; + boost::intrusive_ptr cn; +}; + +class RGWRadosGetOmapValsCR : public RGWSimpleCoroutine { + public: + struct Result { + rgw_rados_ref ref; + std::map entries; + bool more = false; + }; + using ResultPtr = std::shared_ptr; + + RGWRadosGetOmapValsCR(rgw::sal::RadosStore* _store, const rgw_raw_obj& _obj, + const std::string& _marker, int _max_entries, + ResultPtr result); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; + + private: + rgw::sal::RadosStore* store; + rgw_raw_obj obj; + std::string marker; + int max_entries; + ResultPtr result; + boost::intrusive_ptr cn; +}; + +class RGWRadosRemoveOmapKeysCR : public RGWSimpleCoroutine { + rgw::sal::RadosStore* store; + + rgw_rados_ref ref; + + std::set keys; + + rgw_raw_obj obj; + + boost::intrusive_ptr cn; + +public: + RGWRadosRemoveOmapKeysCR(rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const std::set& _keys); + + int send_request(const DoutPrefixProvider *dpp) override; + + int request_complete() override; +}; + +class RGWRadosRemoveCR : public RGWSimpleCoroutine { + rgw::sal::RadosStore* store; + librados::IoCtx ioctx; + const rgw_raw_obj obj; + RGWObjVersionTracker* objv_tracker; + boost::intrusive_ptr cn; + +public: + RGWRadosRemoveCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, + RGWObjVersionTracker* objv_tracker = nullptr); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +class RGWRadosRemoveOidCR : public RGWSimpleCoroutine { + librados::IoCtx ioctx; + const std::string oid; + RGWObjVersionTracker* objv_tracker; + boost::intrusive_ptr cn; + +public: + RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, + librados::IoCtx&& ioctx, std::string_view oid, + RGWObjVersionTracker* objv_tracker = nullptr); + + RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, + RGWSI_RADOS::Obj& obj, + RGWObjVersionTracker* objv_tracker = nullptr); + + RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, + RGWSI_RADOS::Obj&& obj, + RGWObjVersionTracker* objv_tracker = nullptr); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +class RGWSimpleRadosLockCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + std::string lock_name; + std::string cookie; + uint32_t duration; + + rgw_raw_obj obj; + + RGWAsyncLockSystemObj *req; + +public: + RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const std::string& _lock_name, + const std::string& _cookie, + uint32_t _duration); + ~RGWSimpleRadosLockCR() override { + request_cleanup(); + } + void request_cleanup() override; + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; + + static std::string gen_random_cookie(CephContext* cct) { +#define COOKIE_LEN 16 + char buf[COOKIE_LEN + 1]; + gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1); + return buf; + } +}; + +class RGWSimpleRadosUnlockCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + std::string lock_name; + std::string cookie; + + rgw_raw_obj obj; + + RGWAsyncUnlockSystemObj *req; + +public: + RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const std::string& _lock_name, + const std::string& _cookie); + ~RGWSimpleRadosUnlockCR() override { + request_cleanup(); + } + void request_cleanup() override; + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +#define OMAP_APPEND_MAX_ENTRIES_DEFAULT 100 + +class RGWOmapAppend : public RGWConsumerCR { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + + rgw_raw_obj obj; + + bool going_down; + + int num_pending_entries; + std::list pending_entries; + + std::map entries; + + uint64_t window_size; + uint64_t total_entries; +public: + RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + uint64_t _window_size = OMAP_APPEND_MAX_ENTRIES_DEFAULT); + int operate(const DoutPrefixProvider *dpp) override; + void flush_pending(); + bool append(const std::string& s); + bool finish(); + + uint64_t get_total_entries() { + return total_entries; + } + + const rgw_raw_obj& get_obj() { + return obj; + } +}; + +class RGWShardedOmapCRManager { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + RGWCoroutine *op; + + int num_shards; + + std::vector shards; +public: + RGWShardedOmapCRManager(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, RGWCoroutine *_op, int _num_shards, const rgw_pool& pool, const std::string& oid_prefix) + : async_rados(_async_rados), + store(_store), op(_op), num_shards(_num_shards) { + shards.reserve(num_shards); + for (int i = 0; i < num_shards; ++i) { + char buf[oid_prefix.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%d", oid_prefix.c_str(), i); + RGWOmapAppend *shard = new RGWOmapAppend(async_rados, store, rgw_raw_obj(pool, buf)); + shard->get(); + shards.push_back(shard); + op->spawn(shard, false); + } + } + + ~RGWShardedOmapCRManager() { + for (auto shard : shards) { + shard->put(); + } + } + + bool append(const std::string& entry, int shard_id) { + return shards[shard_id]->append(entry); + } + bool finish() { + bool success = true; + for (auto& append_op : shards) { + success &= (append_op->finish() && (!append_op->is_error())); + } + return success; + } + + uint64_t get_total_entries(int shard_id) { + return shards[shard_id]->get_total_entries(); + } +}; + +class RGWAsyncGetBucketInstanceInfo : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + rgw_bucket bucket; + const DoutPrefixProvider *dpp; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncGetBucketInstanceInfo(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, + rgw::sal::RadosStore* _store, const rgw_bucket& bucket, + const DoutPrefixProvider *dpp) + : RGWAsyncRadosRequest(caller, cn), store(_store), bucket(bucket), dpp(dpp) {} + + RGWBucketInfo bucket_info; + std::map attrs; +}; + +class RGWAsyncPutBucketInstanceInfo : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + RGWBucketInfo& bucket_info; + bool exclusive; + real_time mtime; + std::map* attrs; + const DoutPrefixProvider *dpp; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncPutBucketInstanceInfo(RGWCoroutine* caller, + RGWAioCompletionNotifier* cn, + rgw::sal::RadosStore* store, + RGWBucketInfo& bucket_info, + bool exclusive, + real_time mtime, + std::map* attrs, + const DoutPrefixProvider* dpp) + : RGWAsyncRadosRequest(caller, cn), store(store), bucket_info(bucket_info), + exclusive(exclusive), mtime(mtime), attrs(attrs), dpp(dpp) {} +}; + +class RGWGetBucketInstanceInfoCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + rgw_bucket bucket; + RGWBucketInfo *bucket_info; + std::map *pattrs; + const DoutPrefixProvider *dpp; + + RGWAsyncGetBucketInstanceInfo *req{nullptr}; + +public: + // rgw_bucket constructor + RGWGetBucketInstanceInfoCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_bucket& _bucket, RGWBucketInfo *_bucket_info, + std::map *_pattrs, const DoutPrefixProvider *dpp) + : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store), + bucket(_bucket), bucket_info(_bucket_info), pattrs(_pattrs), dpp(dpp) {} + ~RGWGetBucketInstanceInfoCR() override { + request_cleanup(); + } + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncGetBucketInstanceInfo(this, stack->create_completion_notifier(), store, bucket, dpp); + async_rados->queue(req); + return 0; + } + int request_complete() override { + if (bucket_info) { + *bucket_info = std::move(req->bucket_info); + } + if (pattrs) { + *pattrs = std::move(req->attrs); + } + return req->get_ret_status(); + } +}; + +class RGWPutBucketInstanceInfoCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + RGWBucketInfo& bucket_info; + bool exclusive; + real_time mtime; + std::map* attrs; + const DoutPrefixProvider *dpp; + + RGWAsyncPutBucketInstanceInfo* req = nullptr; + +public: + // rgw_bucket constructor + RGWPutBucketInstanceInfoCR(RGWAsyncRadosProcessor *async_rados, + rgw::sal::RadosStore* store, + RGWBucketInfo& bucket_info, + bool exclusive, + real_time mtime, + std::map* attrs, + const DoutPrefixProvider *dpp) + : RGWSimpleCoroutine(store->ctx()), async_rados(async_rados), store(store), + bucket_info(bucket_info), exclusive(exclusive), + mtime(mtime), attrs(attrs), dpp(dpp) {} + ~RGWPutBucketInstanceInfoCR() override { + request_cleanup(); + } + void request_cleanup() override { + if (req) { + req->finish(); + req = nullptr; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncPutBucketInstanceInfo(this, + stack->create_completion_notifier(), + store, bucket_info, exclusive, + mtime, attrs, dpp); + async_rados->queue(req); + return 0; + } + int request_complete() override { + return req->get_ret_status(); + } +}; + +class RGWRadosBILogTrimCR : public RGWSimpleCoroutine { + const RGWBucketInfo& bucket_info; + int shard_id; + const rgw::bucket_index_layout_generation generation; + RGWRados::BucketShard bs; + std::string start_marker; + std::string end_marker; + boost::intrusive_ptr cn; + public: + RGWRadosBILogTrimCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info, + int shard_id, + const rgw::bucket_index_layout_generation& generation, + const std::string& start_marker, + const std::string& end_marker); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +class RGWAsyncFetchRemoteObj : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + rgw_zone_id source_zone; + + std::optional user_id; + + rgw_bucket src_bucket; + std::optional dest_placement_rule; + RGWBucketInfo dest_bucket_info; + + rgw_obj_key key; + std::optional dest_key; + std::optional versioned_epoch; + + real_time src_mtime; + + bool copy_if_newer; + std::shared_ptr filter; + rgw_zone_set zones_trace; + PerfCounters* counters; + const DoutPrefixProvider *dpp; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncFetchRemoteObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + const rgw_zone_id& _source_zone, + std::optional& _user_id, + const rgw_bucket& _src_bucket, + std::optional _dest_placement_rule, + const RGWBucketInfo& _dest_bucket_info, + const rgw_obj_key& _key, + const std::optional& _dest_key, + std::optional _versioned_epoch, + bool _if_newer, + std::shared_ptr _filter, + rgw_zone_set *_zones_trace, + PerfCounters* counters, const DoutPrefixProvider *dpp) + : RGWAsyncRadosRequest(caller, cn), store(_store), + source_zone(_source_zone), + user_id(_user_id), + src_bucket(_src_bucket), + dest_placement_rule(_dest_placement_rule), + dest_bucket_info(_dest_bucket_info), + key(_key), + dest_key(_dest_key), + versioned_epoch(_versioned_epoch), + copy_if_newer(_if_newer), + filter(_filter), + counters(counters), + dpp(dpp) + { + if (_zones_trace) { + zones_trace = *_zones_trace; + } + } +}; + +class RGWFetchRemoteObjCR : public RGWSimpleCoroutine { + CephContext *cct; + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + rgw_zone_id source_zone; + + std::optional user_id; + + rgw_bucket src_bucket; + std::optional dest_placement_rule; + RGWBucketInfo dest_bucket_info; + + rgw_obj_key key; + std::optional dest_key; + std::optional versioned_epoch; + + real_time src_mtime; + + bool copy_if_newer; + + std::shared_ptr filter; + + RGWAsyncFetchRemoteObj *req; + rgw_zone_set *zones_trace; + PerfCounters* counters; + const DoutPrefixProvider *dpp; + +public: + RGWFetchRemoteObjCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_zone_id& _source_zone, + std::optional _user_id, + const rgw_bucket& _src_bucket, + std::optional _dest_placement_rule, + const RGWBucketInfo& _dest_bucket_info, + const rgw_obj_key& _key, + const std::optional& _dest_key, + std::optional _versioned_epoch, + bool _if_newer, + std::shared_ptr _filter, + rgw_zone_set *_zones_trace, + PerfCounters* counters, const DoutPrefixProvider *dpp) + : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()), + async_rados(_async_rados), store(_store), + source_zone(_source_zone), + user_id(_user_id), + src_bucket(_src_bucket), + dest_placement_rule(_dest_placement_rule), + dest_bucket_info(_dest_bucket_info), + key(_key), + dest_key(_dest_key), + versioned_epoch(_versioned_epoch), + copy_if_newer(_if_newer), + filter(_filter), + req(NULL), + zones_trace(_zones_trace), counters(counters), dpp(dpp) {} + + + ~RGWFetchRemoteObjCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncFetchRemoteObj(this, stack->create_completion_notifier(), store, + source_zone, user_id, src_bucket, dest_placement_rule, dest_bucket_info, + key, dest_key, versioned_epoch, copy_if_newer, filter, + zones_trace, counters, dpp); + async_rados->queue(req); + return 0; + } + + int request_complete() override { + return req->get_ret_status(); + } +}; + +class RGWAsyncStatRemoteObj : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + rgw_zone_id source_zone; + + rgw_bucket src_bucket; + rgw_obj_key key; + + ceph::real_time *pmtime; + uint64_t *psize; + std::string *petag; + std::map *pattrs; + std::map *pheaders; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncStatRemoteObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + const rgw_zone_id& _source_zone, + rgw_bucket& _src_bucket, + const rgw_obj_key& _key, + ceph::real_time *_pmtime, + uint64_t *_psize, + std::string *_petag, + std::map *_pattrs, + std::map *_pheaders) : RGWAsyncRadosRequest(caller, cn), store(_store), + source_zone(_source_zone), + src_bucket(_src_bucket), + key(_key), + pmtime(_pmtime), + psize(_psize), + petag(_petag), + pattrs(_pattrs), + pheaders(_pheaders) {} +}; + +class RGWStatRemoteObjCR : public RGWSimpleCoroutine { + CephContext *cct; + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + rgw_zone_id source_zone; + + rgw_bucket src_bucket; + rgw_obj_key key; + + ceph::real_time *pmtime; + uint64_t *psize; + std::string *petag; + std::map *pattrs; + std::map *pheaders; + + RGWAsyncStatRemoteObj *req; + +public: + RGWStatRemoteObjCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_zone_id& _source_zone, + rgw_bucket& _src_bucket, + const rgw_obj_key& _key, + ceph::real_time *_pmtime, + uint64_t *_psize, + std::string *_petag, + std::map *_pattrs, + std::map *_pheaders) : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()), + async_rados(_async_rados), store(_store), + source_zone(_source_zone), + src_bucket(_src_bucket), + key(_key), + pmtime(_pmtime), + psize(_psize), + petag(_petag), + pattrs(_pattrs), + pheaders(_pheaders), + req(NULL) {} + + + ~RGWStatRemoteObjCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncStatRemoteObj(this, stack->create_completion_notifier(), store, source_zone, + src_bucket, key, pmtime, psize, petag, pattrs, pheaders); + async_rados->queue(req); + return 0; + } + + int request_complete() override { + return req->get_ret_status(); + } +}; + +class RGWAsyncRemoveObj : public RGWAsyncRadosRequest { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + rgw_zone_id source_zone; + + std::unique_ptr bucket; + std::unique_ptr obj; + + std::string owner; + std::string owner_display_name; + bool versioned; + uint64_t versioned_epoch; + std::string marker_version_id; + + bool del_if_older; + ceph::real_time timestamp; + rgw_zone_set zones_trace; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncRemoveObj(const DoutPrefixProvider *_dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, + rgw::sal::RadosStore* _store, + const rgw_zone_id& _source_zone, + RGWBucketInfo& _bucket_info, + const rgw_obj_key& _key, + const std::string& _owner, + const std::string& _owner_display_name, + bool _versioned, + uint64_t _versioned_epoch, + bool _delete_marker, + bool _if_older, + real_time& _timestamp, + rgw_zone_set* _zones_trace) : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), store(_store), + source_zone(_source_zone), + owner(_owner), + owner_display_name(_owner_display_name), + versioned(_versioned), + versioned_epoch(_versioned_epoch), + del_if_older(_if_older), + timestamp(_timestamp) { + if (_delete_marker) { + marker_version_id = _key.instance; + } + + if (_zones_trace) { + zones_trace = *_zones_trace; + } + store->get_bucket(nullptr, _bucket_info, &bucket); + obj = bucket->get_object(_key); + } +}; + +class RGWRemoveObjCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + CephContext *cct; + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + rgw_zone_id source_zone; + + RGWBucketInfo bucket_info; + + rgw_obj_key key; + bool versioned; + uint64_t versioned_epoch; + bool delete_marker; + std::string owner; + std::string owner_display_name; + + bool del_if_older; + real_time timestamp; + + RGWAsyncRemoveObj *req; + + rgw_zone_set *zones_trace; + +public: + RGWRemoveObjCR(const DoutPrefixProvider *_dpp, RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_zone_id& _source_zone, + RGWBucketInfo& _bucket_info, + const rgw_obj_key& _key, + bool _versioned, + uint64_t _versioned_epoch, + std::string *_owner, + std::string *_owner_display_name, + bool _delete_marker, + real_time *_timestamp, + rgw_zone_set *_zones_trace) : RGWSimpleCoroutine(_store->ctx()), dpp(_dpp), cct(_store->ctx()), + async_rados(_async_rados), store(_store), + source_zone(_source_zone), + bucket_info(_bucket_info), + key(_key), + versioned(_versioned), + versioned_epoch(_versioned_epoch), + delete_marker(_delete_marker), req(NULL), zones_trace(_zones_trace) { + del_if_older = (_timestamp != NULL); + if (_timestamp) { + timestamp = *_timestamp; + } + + if (_owner) { + owner = *_owner; + } + + if (_owner_display_name) { + owner_display_name = *_owner_display_name; + } + } + ~RGWRemoveObjCR() override { + request_cleanup(); + } + + void request_cleanup() override { + if (req) { + req->finish(); + req = NULL; + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncRemoveObj(dpp, this, stack->create_completion_notifier(), store, source_zone, bucket_info, + key, owner, owner_display_name, versioned, versioned_epoch, + delete_marker, del_if_older, timestamp, zones_trace); + async_rados->queue(req); + return 0; + } + + int request_complete() override { + return req->get_ret_status(); + } +}; + +class RGWContinuousLeaseCR : public RGWCoroutine { + RGWAsyncRadosProcessor *async_rados; + rgw::sal::RadosStore* store; + + const rgw_raw_obj obj; + + const std::string lock_name; + const std::string cookie; + + int interval; + bool going_down{ false }; + bool locked{false}; + + const ceph::timespan interval_tolerance; + const ceph::timespan ts_interval; + + RGWCoroutine *caller; + + bool aborted{false}; + + ceph::coarse_mono_time last_renew_try_time; + ceph::coarse_mono_time current_time; + +public: + RGWContinuousLeaseCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, + const rgw_raw_obj& _obj, + const std::string& _lock_name, int _interval, RGWCoroutine *_caller) + : RGWCoroutine(_store->ctx()), async_rados(_async_rados), store(_store), + obj(_obj), lock_name(_lock_name), + cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)), + interval(_interval), interval_tolerance(ceph::make_timespan(9*interval/10)), ts_interval(ceph::make_timespan(interval)), + caller(_caller) + {} + + virtual ~RGWContinuousLeaseCR() override; + + int operate(const DoutPrefixProvider *dpp) override; + + bool is_locked() const { + if (ceph::coarse_mono_clock::now() - last_renew_try_time > ts_interval) { + return false; + } + return locked; + } + + void set_locked(bool status) { + locked = status; + } + + void go_down() { + going_down = true; + wakeup(); + } + + void abort() { + aborted = true; + } +}; + +class RGWRadosTimelogAddCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + std::list entries; + + std::string oid; + + boost::intrusive_ptr cn; + +public: + RGWRadosTimelogAddCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* _store, const std::string& _oid, + const cls_log_entry& entry); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +class RGWRadosTimelogTrimCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + boost::intrusive_ptr cn; + protected: + std::string oid; + real_time start_time; + real_time end_time; + std::string from_marker; + std::string to_marker; + + public: + RGWRadosTimelogTrimCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, const std::string& oid, + const real_time& start_time, const real_time& end_time, + const std::string& from_marker, + const std::string& to_marker); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +// wrapper to update last_trim_marker on success +class RGWSyncLogTrimCR : public RGWRadosTimelogTrimCR { + CephContext *cct; + std::string *last_trim_marker; + public: + static constexpr const char* max_marker = "99999999"; + + RGWSyncLogTrimCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, const std::string& oid, + const std::string& to_marker, std::string *last_trim_marker); + int request_complete() override; +}; + +class RGWAsyncStatObj : public RGWAsyncRadosRequest { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + RGWBucketInfo bucket_info; + rgw_obj obj; + uint64_t *psize; + real_time *pmtime; + uint64_t *pepoch; + RGWObjVersionTracker *objv_tracker; +protected: + int _send_request(const DoutPrefixProvider *dpp) override; +public: + RGWAsyncStatObj(const DoutPrefixProvider *dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* store, + const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize = nullptr, + real_time *pmtime = nullptr, uint64_t *pepoch = nullptr, + RGWObjVersionTracker *objv_tracker = nullptr) + : RGWAsyncRadosRequest(caller, cn), dpp(dpp), store(store), obj(obj), psize(psize), + pmtime(pmtime), pepoch(pepoch), objv_tracker(objv_tracker) {} +}; + +class RGWStatObjCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + RGWAsyncRadosProcessor *async_rados; + RGWBucketInfo bucket_info; + rgw_obj obj; + uint64_t *psize; + real_time *pmtime; + uint64_t *pepoch; + RGWObjVersionTracker *objv_tracker; + RGWAsyncStatObj *req = nullptr; + public: + RGWStatObjCR(const DoutPrefixProvider *dpp, RGWAsyncRadosProcessor *async_rados, rgw::sal::RadosStore* store, + const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize = nullptr, + real_time* pmtime = nullptr, uint64_t *pepoch = nullptr, + RGWObjVersionTracker *objv_tracker = nullptr); + ~RGWStatObjCR() override { + request_cleanup(); + } + void request_cleanup() override; + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +/// coroutine wrapper for IoCtx::aio_notify() +class RGWRadosNotifyCR : public RGWSimpleCoroutine { + rgw::sal::RadosStore* const store; + const rgw_raw_obj obj; + bufferlist request; + const uint64_t timeout_ms; + bufferlist *response; + rgw_rados_ref ref; + boost::intrusive_ptr cn; + +public: + RGWRadosNotifyCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, + bufferlist& request, uint64_t timeout_ms, + bufferlist *response); + + int send_request(const DoutPrefixProvider *dpp) override; + int request_complete() override; +}; + +class RGWDataPostNotifyCR : public RGWCoroutine { + RGWRados *store; + RGWHTTPManager& http_manager; + bc::flat_map >& shards; + const char *source_zone; + RGWRESTConn *conn; + +public: + RGWDataPostNotifyCR(RGWRados *_store, RGWHTTPManager& _http_manager, bc::flat_map >& _shards, const char *_zone, RGWRESTConn *_conn) + : RGWCoroutine(_store->ctx()), store(_store), http_manager(_http_manager), + shards(_shards), source_zone(_zone), conn(_conn) {} + + int operate(const DoutPrefixProvider* dpp) override; +}; + +#endif diff --git a/src/rgw/driver/rados/rgw_cr_tools.cc b/src/rgw/driver/rados/rgw_cr_tools.cc new file mode 100644 index 00000000000..94665a35aaa --- /dev/null +++ b/src/rgw/driver/rados/rgw_cr_tools.cc @@ -0,0 +1,292 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/errno.h" + +#include "rgw_cr_tools.h" +#include "rgw_bucket.h" +#include "rgw_user.h" +#include "rgw_op.h" +#include "rgw_acl_s3.h" +#include "rgw_zone.h" + +#include "services/svc_zone.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +template<> +int RGWUserCreateCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + CephContext *cct = store->ctx(); + + const int32_t default_max_buckets = + cct->_conf.get_val("rgw_user_max_buckets"); + + RGWUserAdminOpState op_state(store); + + auto& user = params.user; + + op_state.set_user_id(user); + op_state.set_display_name(params.display_name); + op_state.set_user_email(params.email); + op_state.set_caps(params.caps); + op_state.set_access_key(params.access_key); + op_state.set_secret_key(params.secret_key); + + if (!params.key_type.empty()) { + int32_t key_type = KEY_TYPE_S3; + if (params.key_type == "swift") { + key_type = KEY_TYPE_SWIFT; + } + + op_state.set_key_type(key_type); + } + + op_state.set_max_buckets(params.max_buckets.value_or(default_max_buckets)); + op_state.set_suspension(params.suspended); + op_state.set_system(params.system); + op_state.set_exclusive(params.exclusive); + + if (params.generate_key) { + op_state.set_generate_key(); + } + + + if (params.apply_quota) { + RGWQuota quota; + + if (cct->_conf->rgw_bucket_default_quota_max_objects >= 0) { + quota.bucket_quota.max_objects = cct->_conf->rgw_bucket_default_quota_max_objects; + quota.bucket_quota.enabled = true; + } + + if (cct->_conf->rgw_bucket_default_quota_max_size >= 0) { + quota.bucket_quota.max_size = cct->_conf->rgw_bucket_default_quota_max_size; + quota.bucket_quota.enabled = true; + } + + if (cct->_conf->rgw_user_default_quota_max_objects >= 0) { + quota.user_quota.max_objects = cct->_conf->rgw_user_default_quota_max_objects; + quota.user_quota.enabled = true; + } + + if (cct->_conf->rgw_user_default_quota_max_size >= 0) { + quota.user_quota.max_size = cct->_conf->rgw_user_default_quota_max_size; + quota.user_quota.enabled = true; + } + + if (quota.bucket_quota.enabled) { + op_state.set_bucket_quota(quota.bucket_quota); + } + + if (quota.user_quota.enabled) { + op_state.set_user_quota(quota.user_quota); + } + } + + RGWNullFlusher flusher; + return RGWUserAdminOp_User::create(dpp, store, op_state, flusher, null_yield); +} + +template<> +int RGWGetUserInfoCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + return store->ctl()->user->get_info_by_uid(dpp, params.user, result.get(), null_yield); +} + +template<> +int RGWGetBucketInfoCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + return store->get_bucket(dpp, nullptr, params.tenant, params.bucket_name, &result->bucket, null_yield); +} + +template<> +int RGWBucketCreateLocalCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + CephContext *cct = store->ctx(); + auto& zone_svc = store->svc()->zone; + + const auto& user_info = params.user_info.get(); + const auto& user = user_info->user_id; + const auto& bucket_name = params.bucket_name; + auto& placement_rule = params.placement_rule; + + if (!placement_rule.empty() && + !zone_svc->get_zone_params().valid_placement(placement_rule)) { + ldpp_dout(dpp, 0) << "placement target (" << placement_rule << ")" + << " doesn't exist in the placement targets of zonegroup" + << " (" << zone_svc->get_zonegroup().api_name << ")" << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + + /* we need to make sure we read bucket info, it's not read before for this + * specific request */ + RGWBucketInfo bucket_info; + map bucket_attrs; + + int ret = store->getRados()->get_bucket_info(store->svc(), user.tenant, bucket_name, + bucket_info, nullptr, null_yield, dpp, &bucket_attrs); + if (ret < 0 && ret != -ENOENT) + return ret; + bool bucket_exists = (ret != -ENOENT); + + RGWAccessControlPolicy old_policy(cct); + ACLOwner bucket_owner; + bucket_owner.set_id(user); + bucket_owner.set_name(user_info->display_name); + if (bucket_exists) { + ret = rgw_op_get_bucket_policy_from_attr(dpp, cct, store, bucket_info, + bucket_attrs, &old_policy, null_yield); + if (ret >= 0) { + if (old_policy.get_owner().get_id().compare(user) != 0) { + return -EEXIST; + } + } + } + + RGWBucketInfo master_info; + rgw_bucket *pmaster_bucket = nullptr; + uint32_t *pmaster_num_shards = nullptr; + real_time creation_time; + + string zonegroup_id = zone_svc->get_zonegroup().get_id(); + + if (bucket_exists) { + rgw_placement_rule selected_placement_rule; + rgw_bucket bucket; + bucket.tenant = user.tenant; + bucket.name = bucket_name; + ret = zone_svc->select_bucket_placement(dpp, *user_info, zonegroup_id, + placement_rule, + &selected_placement_rule, nullptr, null_yield); + if (selected_placement_rule != bucket_info.placement_rule) { + ldpp_dout(dpp, 0) << "bucket already exists on a different placement rule: " + << " selected_rule= " << selected_placement_rule + << " existing_rule= " << bucket_info.placement_rule << dendl; + return -EEXIST; + } + } + + /* Encode special metadata first as we're using std::map::emplace under + * the hood. This method will add the new items only if the map doesn't + * contain such keys yet. */ + RGWAccessControlPolicy_S3 policy(cct); + policy.create_canned(bucket_owner, bucket_owner, string()); /* default private policy */ + bufferlist aclbl; + policy.encode(aclbl); + map attrs; + attrs.emplace(std::move(RGW_ATTR_ACL), std::move(aclbl)); + + RGWQuotaInfo quota_info; + const RGWQuotaInfo * pquota_info = nullptr; + + rgw_bucket bucket; + bucket.tenant = user.tenant; + bucket.name = bucket_name; + + RGWBucketInfo info; + obj_version ep_objv; + + ret = store->getRados()->create_bucket(*user_info, bucket, zonegroup_id, + placement_rule, bucket_info.swift_ver_location, + pquota_info, attrs, + info, nullptr, &ep_objv, creation_time, + pmaster_bucket, pmaster_num_shards, null_yield, dpp, true); + + + if (ret && ret != -EEXIST) + return ret; + + bool existed = (ret == -EEXIST); + + if (existed) { + if (info.owner != user) { + ldpp_dout(dpp, 20) << "NOTICE: bucket already exists under a different user (bucket=" << bucket << " user=" << user << " bucket_owner=" << info.owner << dendl; + return -EEXIST; + } + bucket = info.bucket; + } + + ret = store->ctl()->bucket->link_bucket(user, bucket, info.creation_time, null_yield, dpp, false); + if (ret && !existed && ret != -EEXIST) { + /* if it exists (or previously existed), don't remove it! */ + int r = store->ctl()->bucket->unlink_bucket(user, bucket, null_yield, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to unlink bucket: ret=" << r << dendl; + } + } else if (ret == -EEXIST || (ret == 0 && existed)) { + ret = -ERR_BUCKET_EXISTS; + } + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: bucket creation (bucket=" << bucket << ") return ret=" << ret << dendl; + } + + return ret; +} + +template<> +int RGWObjectSimplePutCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + RGWDataAccess::ObjectRef obj; + + CephContext *cct = store->ctx(); + + int ret = params.bucket->get_object(params.key, &obj); + if (ret < 0) { + lderr(cct) << "ERROR: failed to get object: " << cpp_strerror(-ret) << dendl; + return -ret; + } + + if (params.user_data) { + obj->set_user_data(*params.user_data); + } + + ret = obj->put(params.data, params.attrs, dpp, null_yield); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: put object returned error: " << cpp_strerror(-ret) << dendl; + } + + return 0; +} + +template<> +int RGWBucketLifecycleConfigCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + CephContext *cct = store->ctx(); + + RGWLC *lc = store->getRados()->get_lc(); + if (!lc) { + lderr(cct) << "ERROR: lifecycle object is not initialized!" << dendl; + return -EIO; + } + + int ret = lc->set_bucket_config(params.bucket, + params.bucket_attrs, + ¶ms.config); + if (ret < 0) { + lderr(cct) << "ERROR: failed to set lifecycle on bucke: " << cpp_strerror(-ret) << dendl; + return -ret; + } + + return 0; +} + +template<> +int RGWBucketGetSyncPolicyHandlerCR::Request::_send_request(const DoutPrefixProvider *dpp) +{ + int r = store->ctl()->bucket->get_sync_policy_handler(params.zone, + params.bucket, + &result->policy_handler, + null_yield, + dpp); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: " << __func__ << "(): get_sync_policy_handler() returned " << r << dendl; + return r; + } + + return 0; +} diff --git a/src/rgw/driver/rados/rgw_cr_tools.h b/src/rgw/driver/rados/rgw_cr_tools.h new file mode 100644 index 00000000000..ebdbfeb51b7 --- /dev/null +++ b/src/rgw/driver/rados/rgw_cr_tools.h @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_CR_TOOLS_H +#define CEPH_RGW_CR_TOOLS_H + +#include "rgw_cr_rados.h" +#include "rgw_tools.h" +#include "rgw_lc.h" + +#include "services/svc_bucket_sync.h" + +struct rgw_user_create_params { + rgw_user user; + std::string display_name; + std::string email; + std::string access_key; + std::string secret_key; + std::string key_type; /* "swift" or "s3" */ + std::string caps; + + bool generate_key{true}; + bool suspended{false}; + std::optional max_buckets; + bool system{false}; + bool exclusive{false}; + bool apply_quota{true}; +}; + +using RGWUserCreateCR = RGWSimpleWriteOnlyAsyncCR; + +struct rgw_get_user_info_params { + rgw_user user; +}; + +using RGWGetUserInfoCR = RGWSimpleAsyncCR; + +struct rgw_get_bucket_info_params { + std::string tenant; + std::string bucket_name; +}; + +struct rgw_get_bucket_info_result { + std::unique_ptr bucket; +}; + +using RGWGetBucketInfoCR = RGWSimpleAsyncCR; + +struct rgw_bucket_create_local_params { + std::shared_ptr user_info; + std::string bucket_name; + rgw_placement_rule placement_rule; +}; + +using RGWBucketCreateLocalCR = RGWSimpleWriteOnlyAsyncCR; + +struct rgw_object_simple_put_params { + RGWDataAccess::BucketRef bucket; + rgw_obj_key key; + bufferlist data; + std::map attrs; + std::optional user_data; +}; + +using RGWObjectSimplePutCR = RGWSimpleWriteOnlyAsyncCR; + + +struct rgw_bucket_lifecycle_config_params { + rgw::sal::Bucket* bucket; + rgw::sal::Attrs bucket_attrs; + RGWLifecycleConfiguration config; +}; + +using RGWBucketLifecycleConfigCR = RGWSimpleWriteOnlyAsyncCR; + +struct rgw_bucket_get_sync_policy_params { + std::optional zone; + std::optional bucket; +}; + +struct rgw_bucket_get_sync_policy_result { + RGWBucketSyncPolicyHandlerRef policy_handler; +}; + +using RGWBucketGetSyncPolicyHandlerCR = RGWSimpleAsyncCR; + +#endif diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.cc b/src/rgw/driver/rados/rgw_d3n_datacache.cc new file mode 100644 index 00000000000..ed375e2ac94 --- /dev/null +++ b/src/rgw/driver/rados/rgw_d3n_datacache.cc @@ -0,0 +1,369 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_d3n_datacache.h" +#include "rgw_rest_client.h" +#include "rgw_auth_s3.h" +#include "rgw_op.h" +#include "rgw_common.h" +#include "rgw_auth_s3.h" +#include "rgw_op.h" +#include "rgw_crypt_sanitize.h" +#if defined(__linux__) +#include +#endif + +#if __has_include() +#include +namespace efs = std::filesystem; +#else +#include +namespace efs = std::experimental::filesystem; +#endif + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +int D3nCacheAioWriteRequest::d3n_prepare_libaio_write_op(bufferlist& bl, unsigned int len, string oid, string cache_location) +{ + std::string location = cache_location + oid; + int r = 0; + + lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): Write To Cache, location=" << location << dendl; + cb = new struct aiocb; + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + memset(cb, 0, sizeof(struct aiocb)); + r = fd = ::open(location.c_str(), O_WRONLY | O_CREAT | O_TRUNC, mode); + if (fd < 0) { + ldout(cct, 0) << "ERROR: D3nCacheAioWriteRequest::create_io: open file failed, errno=" << errno << ", location='" << location.c_str() << "'" << dendl; + goto done; + } + if (g_conf()->rgw_d3n_l1_fadvise != POSIX_FADV_NORMAL) + posix_fadvise(fd, 0, 0, g_conf()->rgw_d3n_l1_fadvise); + cb->aio_fildes = fd; + + data = malloc(len); + if (!data) { + ldout(cct, 0) << "ERROR: D3nCacheAioWriteRequest::create_io: memory allocation failed" << dendl; + goto close_file; + } + cb->aio_buf = data; + memcpy((void*)data, bl.c_str(), len); + cb->aio_nbytes = len; + goto done; + +close_file: + ::close(fd); +done: + return r; +} + +D3nDataCache::D3nDataCache() + : cct(nullptr), io_type(_io_type::ASYNC_IO), free_data_cache_size(0), outstanding_write_size(0) +{ + lsubdout(g_ceph_context, rgw_datacache, 5) << "D3nDataCache: " << __func__ << "()" << dendl; +} + +void D3nDataCache::init(CephContext *_cct) { + cct = _cct; + free_data_cache_size = cct->_conf->rgw_d3n_l1_datacache_size; + head = nullptr; + tail = nullptr; + cache_location = cct->_conf->rgw_d3n_l1_datacache_persistent_path; + if(cache_location.back() != '/') { + cache_location += "/"; + } + try { + if (efs::exists(cache_location)) { + // d3n: evict the cache storage directory + if (g_conf()->rgw_d3n_l1_evict_cache_on_start) { + lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: init: evicting the persistent storage directory on start" << dendl; + for (auto& p : efs::directory_iterator(cache_location)) { + efs::remove_all(p.path()); + } + } + } else { + // create the cache storage directory + lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: init: creating the persistent storage directory on start" << dendl; + efs::create_directories(cache_location); + } + } catch (const efs::filesystem_error& e) { + lderr(g_ceph_context) << "D3nDataCache: init: ERROR initializing the cache storage directory '" << cache_location << + "' : " << e.what() << dendl; + } + + auto conf_eviction_policy = cct->_conf.get_val("rgw_d3n_l1_eviction_policy"); + ceph_assert(conf_eviction_policy == "lru" || conf_eviction_policy == "random"); + if (conf_eviction_policy == "lru") + eviction_policy = _eviction_policy::LRU; + if (conf_eviction_policy == "random") + eviction_policy = _eviction_policy::RANDOM; + +#if defined(HAVE_LIBAIO) && defined(__GLIBC__) + // libaio setup + struct aioinit ainit{0}; + ainit.aio_threads = cct->_conf.get_val("rgw_d3n_libaio_aio_threads"); + ainit.aio_num = cct->_conf.get_val("rgw_d3n_libaio_aio_num"); + ainit.aio_idle_time = 120; + aio_init(&ainit); +#endif +} + +int D3nDataCache::d3n_io_write(bufferlist& bl, unsigned int len, std::string oid) +{ + D3nChunkDataInfo* chunk_info = new D3nChunkDataInfo; + std::string location = cache_location + oid; + + lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): location=" << location << dendl; + FILE *cache_file = nullptr; + int r = 0; + size_t nbytes = 0; + + cache_file = fopen(location.c_str(), "w+"); + if (cache_file == nullptr) { + ldout(cct, 0) << "ERROR: D3nDataCache::fopen file has return error, errno=" << errno << dendl; + return -errno; + } + + nbytes = fwrite(bl.c_str(), 1, len, cache_file); + if (nbytes != len) { + ldout(cct, 0) << "ERROR: D3nDataCache::io_write: fwrite has returned error: nbytes!=len, nbytes=" << nbytes << ", len=" << len << dendl; + return -EIO; + } + + r = fclose(cache_file); + if (r != 0) { + ldout(cct, 0) << "ERROR: D3nDataCache::fclsoe file has return error, errno=" << errno << dendl; + return -errno; + } + + { // update cahce_map entries for new chunk in cache + const std::lock_guard l(d3n_cache_lock); + chunk_info->oid = oid; + chunk_info->set_ctx(cct); + chunk_info->size = len; + d3n_cache_map.insert(pair(oid, chunk_info)); + } + + return r; +} + +void d3n_libaio_write_cb(sigval sigval) +{ + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; + D3nCacheAioWriteRequest* c = static_cast(sigval.sival_ptr); + c->priv_data->d3n_libaio_write_completion_cb(c); +} + + +void D3nDataCache::d3n_libaio_write_completion_cb(D3nCacheAioWriteRequest* c) +{ + D3nChunkDataInfo* chunk_info{nullptr}; + + ldout(cct, 5) << "D3nDataCache: " << __func__ << "(): oid=" << c->oid << dendl; + + { // update cache_map entries for new chunk in cache + const std::lock_guard l(d3n_cache_lock); + d3n_outstanding_write_list.erase(c->oid); + chunk_info = new D3nChunkDataInfo; + chunk_info->oid = c->oid; + chunk_info->set_ctx(cct); + chunk_info->size = c->cb->aio_nbytes; + d3n_cache_map.insert(pair(c->oid, chunk_info)); + } + + { // update free size + const std::lock_guard l(d3n_eviction_lock); + free_data_cache_size -= c->cb->aio_nbytes; + outstanding_write_size -= c->cb->aio_nbytes; + lru_insert_head(chunk_info); + } + delete c; + c = nullptr; +} + +int D3nDataCache::d3n_libaio_create_write_request(bufferlist& bl, unsigned int len, std::string oid) +{ + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "(): Write To Cache, oid=" << oid << ", len=" << len << dendl; + struct D3nCacheAioWriteRequest* wr = new struct D3nCacheAioWriteRequest(cct); + int r=0; + if ((r = wr->d3n_prepare_libaio_write_op(bl, len, oid, cache_location)) < 0) { + ldout(cct, 0) << "ERROR: D3nDataCache: " << __func__ << "() prepare libaio write op r=" << r << dendl; + goto done; + } + wr->cb->aio_sigevent.sigev_notify = SIGEV_THREAD; + wr->cb->aio_sigevent.sigev_notify_function = d3n_libaio_write_cb; + wr->cb->aio_sigevent.sigev_notify_attributes = nullptr; + wr->cb->aio_sigevent.sigev_value.sival_ptr = (void*)wr; + wr->oid = oid; + wr->priv_data = this; + + if ((r = ::aio_write(wr->cb)) != 0) { + ldout(cct, 0) << "ERROR: D3nDataCache: " << __func__ << "() aio_write r=" << r << dendl; + goto error; + } + return 0; + +error: + delete wr; +done: + return r; +} + +void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) +{ + size_t sr = 0; + uint64_t freed_size = 0, _free_data_cache_size = 0, _outstanding_write_size = 0; + + ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << ", len=" << len << dendl; + { + const std::lock_guard l(d3n_cache_lock); + std::unordered_map::iterator iter = d3n_cache_map.find(oid); + if (iter != d3n_cache_map.end()) { + ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): data already cached, no rewrite" << dendl; + return; + } + auto it = d3n_outstanding_write_list.find(oid); + if (it != d3n_outstanding_write_list.end()) { + ldout(cct, 10) << "D3nDataCache: NOTE: data put in cache already issued, no rewrite" << dendl; + return; + } + d3n_outstanding_write_list.insert(oid); + } + { + const std::lock_guard l(d3n_eviction_lock); + _free_data_cache_size = free_data_cache_size; + _outstanding_write_size = outstanding_write_size; + } + ldout(cct, 20) << "D3nDataCache: Before eviction _free_data_cache_size:" << _free_data_cache_size << ", _outstanding_write_size:" << _outstanding_write_size << ", freed_size:" << freed_size << dendl; + while (len > (_free_data_cache_size - _outstanding_write_size + freed_size)) { + ldout(cct, 20) << "D3nDataCache: enter eviction" << dendl; + if (eviction_policy == _eviction_policy::LRU) { + sr = lru_eviction(); + } else if (eviction_policy == _eviction_policy::RANDOM) { + sr = random_eviction(); + } else { + ldout(cct, 0) << "D3nDataCache: Warning: unknown cache eviction policy, defaulting to lru eviction" << dendl; + sr = lru_eviction(); + } + if (sr == 0) { + ldout(cct, 2) << "D3nDataCache: Warning: eviction was not able to free disk space, not writing to cache" << dendl; + d3n_outstanding_write_list.erase(oid); + return; + } + ldout(cct, 20) << "D3nDataCache: completed eviction of " << sr << " bytes" << dendl; + freed_size += sr; + } + int r = 0; + r = d3n_libaio_create_write_request(bl, len, oid); + if (r < 0) { + const std::lock_guard l(d3n_cache_lock); + d3n_outstanding_write_list.erase(oid); + ldout(cct, 1) << "D3nDataCache: create_aio_write_request fail, r=" << r << dendl; + return; + } + + const std::lock_guard l(d3n_eviction_lock); + free_data_cache_size += freed_size; + outstanding_write_size += len; +} + +bool D3nDataCache::get(const string& oid, const off_t len) +{ + const std::lock_guard l(d3n_cache_lock); + bool exist = false; + string location = cache_location + oid; + + lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): location=" << location << dendl; + std::unordered_map::iterator iter = d3n_cache_map.find(oid); + if (!(iter == d3n_cache_map.end())) { + // check inside cache whether file exists or not!!!! then make exist true; + struct D3nChunkDataInfo* chdo = iter->second; + struct stat st; + int r = stat(location.c_str(), &st); + if ( r != -1 && st.st_size == len) { // file exists and containes required data range length + exist = true; + /*LRU*/ + /*get D3nChunkDataInfo*/ + const std::lock_guard l(d3n_eviction_lock); + lru_remove(chdo); + lru_insert_head(chdo); + } else { + d3n_cache_map.erase(oid); + const std::lock_guard l(d3n_eviction_lock); + lru_remove(chdo); + delete chdo; + exist = false; + } + } + return exist; +} + +size_t D3nDataCache::random_eviction() +{ + lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "()" << dendl; + int n_entries = 0; + int random_index = 0; + size_t freed_size = 0; + D3nChunkDataInfo* del_entry; + string del_oid, location; + { + const std::lock_guard l(d3n_cache_lock); + n_entries = d3n_cache_map.size(); + if (n_entries <= 0) { + return -1; + } + srand (time(NULL)); + random_index = ceph::util::generate_random_number(0, n_entries-1); + std::unordered_map::iterator iter = d3n_cache_map.begin(); + std::advance(iter, random_index); + del_oid = iter->first; + del_entry = iter->second; + ldout(cct, 20) << "D3nDataCache: random_eviction: index:" << random_index << ", free size: " << del_entry->size << dendl; + freed_size = del_entry->size; + delete del_entry; + del_entry = nullptr; + d3n_cache_map.erase(del_oid); // oid + } + + location = cache_location + del_oid; + ::remove(location.c_str()); + return freed_size; +} + +size_t D3nDataCache::lru_eviction() +{ + lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "()" << dendl; + int n_entries = 0; + size_t freed_size = 0; + D3nChunkDataInfo* del_entry; + string del_oid, location; + + { + const std::lock_guard l(d3n_eviction_lock); + del_entry = tail; + if (del_entry == nullptr) { + ldout(cct, 2) << "D3nDataCache: lru_eviction: del_entry=null_ptr" << dendl; + return 0; + } + lru_remove(del_entry); + } + + { + const std::lock_guard l(d3n_cache_lock); + n_entries = d3n_cache_map.size(); + if (n_entries <= 0) { + ldout(cct, 2) << "D3nDataCache: lru_eviction: cache_map.size<=0" << dendl; + return -1; + } + del_oid = del_entry->oid; + ldout(cct, 20) << "D3nDataCache: lru_eviction: oid to remove: " << del_oid << dendl; + d3n_cache_map.erase(del_oid); // oid + } + freed_size = del_entry->size; + delete del_entry; + location = cache_location + del_oid; + ::remove(location.c_str()); + return freed_size; +} diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.h b/src/rgw/driver/rados/rgw_d3n_datacache.h new file mode 100644 index 00000000000..5d3537f3b14 --- /dev/null +++ b/src/rgw/driver/rados/rgw_d3n_datacache.h @@ -0,0 +1,261 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGWD3NDATACACHE_H +#define CEPH_RGWD3NDATACACHE_H + +#include "rgw_rados.h" +#include + +#include "rgw_common.h" + +#include +#include +#include "include/Context.h" +#include "include/lru.h" +#include "rgw_d3n_cacherequest.h" + + +/*D3nDataCache*/ +struct D3nDataCache; + + +struct D3nChunkDataInfo : public LRUObject { + CephContext *cct; + uint64_t size; + time_t access_time; + std::string address; + std::string oid; + bool complete; + struct D3nChunkDataInfo* lru_prev; + struct D3nChunkDataInfo* lru_next; + + D3nChunkDataInfo(): size(0) {} + + void set_ctx(CephContext *_cct) { + cct = _cct; + } + + void dump(Formatter *f) const; + static void generate_test_instances(std::list& o); +}; + +struct D3nCacheAioWriteRequest { + std::string oid; + void *data; + int fd; + struct aiocb *cb; + D3nDataCache *priv_data; + CephContext *cct; + + D3nCacheAioWriteRequest(CephContext *_cct) : cct(_cct) {} + int d3n_prepare_libaio_write_op(bufferlist& bl, unsigned int len, std::string oid, std::string cache_location); + + ~D3nCacheAioWriteRequest() { + ::close(fd); + cb->aio_buf = nullptr; + free(data); + data = nullptr; + delete(cb); + } +}; + +struct D3nDataCache { + +private: + std::unordered_map d3n_cache_map; + std::set d3n_outstanding_write_list; + std::mutex d3n_cache_lock; + std::mutex d3n_eviction_lock; + + CephContext *cct; + enum class _io_type { + SYNC_IO = 1, + ASYNC_IO = 2, + SEND_FILE = 3 + } io_type; + enum class _eviction_policy { + LRU=0, RANDOM=1 + } eviction_policy; + + struct sigaction action; + uint64_t free_data_cache_size = 0; + uint64_t outstanding_write_size = 0; + struct D3nChunkDataInfo* head; + struct D3nChunkDataInfo* tail; + +private: + void add_io(); + +public: + D3nDataCache(); + ~D3nDataCache() { + while (lru_eviction() > 0); + } + + std::string cache_location; + + bool get(const std::string& oid, const off_t len); + void put(bufferlist& bl, unsigned int len, std::string& obj_key); + int d3n_io_write(bufferlist& bl, unsigned int len, std::string oid); + int d3n_libaio_create_write_request(bufferlist& bl, unsigned int len, std::string oid); + void d3n_libaio_write_completion_cb(D3nCacheAioWriteRequest* c); + size_t random_eviction(); + size_t lru_eviction(); + + void init(CephContext *_cct); + + void lru_insert_head(struct D3nChunkDataInfo* o) { + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; + o->lru_next = head; + o->lru_prev = nullptr; + if (head) { + head->lru_prev = o; + } else { + tail = o; + } + head = o; + } + + void lru_insert_tail(struct D3nChunkDataInfo* o) { + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; + o->lru_next = nullptr; + o->lru_prev = tail; + if (tail) { + tail->lru_next = o; + } else { + head = o; + } + tail = o; + } + + void lru_remove(struct D3nChunkDataInfo* o) { + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; + if (o->lru_next) + o->lru_next->lru_prev = o->lru_prev; + else + tail = o->lru_prev; + if (o->lru_prev) + o->lru_prev->lru_next = o->lru_next; + else + head = o->lru_next; + o->lru_next = o->lru_prev = nullptr; + } +}; + + +template +class D3nRGWDataCache : public T { + +public: + D3nRGWDataCache() {} + + int init_rados() override { + int ret; + ret = T::init_rados(); + if (ret < 0) + return ret; + + return 0; + } + + int get_obj_iterate_cb(const DoutPrefixProvider *dpp, const rgw_raw_obj& read_obj, off_t obj_ofs, + off_t read_ofs, off_t len, bool is_head_obj, + RGWObjState *astate, void *arg) override; +}; + +template +int D3nRGWDataCache::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const rgw_raw_obj& read_obj, off_t obj_ofs, + off_t read_ofs, off_t len, bool is_head_obj, + RGWObjState *astate, void *arg) { + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache::" << __func__ << "(): is head object : " << is_head_obj << dendl; + librados::ObjectReadOperation op; + struct get_obj_data* d = static_cast(arg); + std::string oid, key; + + if (is_head_obj) { + // only when reading from the head object do we need to do the atomic test + int r = T::append_atomic_test(dpp, astate, op); + if (r < 0) + return r; + + if (astate && + obj_ofs < astate->data.length()) { + unsigned chunk_len = std::min((uint64_t)astate->data.length() - obj_ofs, (uint64_t)len); + + r = d->client_cb->handle_data(astate->data, obj_ofs, chunk_len); + if (r < 0) + return r; + + len -= chunk_len; + d->offset += chunk_len; + read_ofs += chunk_len; + obj_ofs += chunk_len; + if (!len) + return 0; + } + + auto obj = d->rgwrados->svc.rados->obj(read_obj); + r = obj.open(dpp); + if (r < 0) { + lsubdout(g_ceph_context, rgw, 4) << "failed to open rados context for " << read_obj << dendl; + return r; + } + + ldpp_dout(dpp, 20) << "D3nDataCache::" << __func__ << "(): oid=" << read_obj.oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl; + op.read(read_ofs, len, nullptr, nullptr); + + const uint64_t cost = len; + const uint64_t id = obj_ofs; // use logical object offset for sorting replies + + auto completed = d->aio->get(obj, rgw::Aio::librados_op(std::move(op), d->yield), cost, id); + return d->flush(std::move(completed)); + } else { + ldpp_dout(dpp, 20) << "D3nDataCache::" << __func__ << "(): oid=" << read_obj.oid << ", is_head_obj=" << is_head_obj << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << ", len=" << len << dendl; + int r; + + op.read(read_ofs, len, nullptr, nullptr); + + const uint64_t cost = len; + const uint64_t id = obj_ofs; // use logical object offset for sorting replies + oid = read_obj.oid; + + auto obj = d->rgwrados->svc.rados->obj(read_obj); + r = obj.open(dpp); + if (r < 0) { + lsubdout(g_ceph_context, rgw, 0) << "D3nDataCache: Error: failed to open rados context for " << read_obj << ", r=" << r << dendl; + return r; + } + + const bool is_compressed = (astate->attrset.find(RGW_ATTR_COMPRESSION) != astate->attrset.end()); + const bool is_encrypted = (astate->attrset.find(RGW_ATTR_CRYPT_MODE) != astate->attrset.end()); + if (read_ofs != 0 || astate->size != astate->accounted_size || is_compressed || is_encrypted) { + d->d3n_bypass_cache_write = true; + lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: " << __func__ << "(): Note - bypassing datacache: oid=" << read_obj.oid << ", read_ofs!=0 = " << read_ofs << ", size=" << astate->size << " != accounted_size=" << astate->accounted_size << ", is_compressed=" << is_compressed << ", is_encrypted=" << is_encrypted << dendl; + auto completed = d->aio->get(obj, rgw::Aio::librados_op(std::move(op), d->yield), cost, id); + r = d->flush(std::move(completed)); + return r; + } + + if (d->rgwrados->d3n_data_cache->get(oid, len)) { + // Read From Cache + ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): READ FROM CACHE: oid=" << read_obj.oid << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << ", len=" << len << dendl; + auto completed = d->aio->get(obj, rgw::Aio::d3n_cache_op(dpp, d->yield, read_ofs, len, d->rgwrados->d3n_data_cache->cache_location), cost, id); + r = d->flush(std::move(completed)); + if (r < 0) { + lsubdout(g_ceph_context, rgw, 0) << "D3nDataCache: " << __func__ << "(): Error: failed to drain/flush, r= " << r << dendl; + } + return r; + } else { + // Write To Cache + ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): WRITE TO CACHE: oid=" << read_obj.oid << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << " len=" << len << dendl; + auto completed = d->aio->get(obj, rgw::Aio::librados_op(std::move(op), d->yield), cost, id); + return d->flush(std::move(completed)); + } + } + lsubdout(g_ceph_context, rgw, 1) << "D3nDataCache: " << __func__ << "(): Warning: Check head object cache handling flow, oid=" << read_obj.oid << dendl; + + return 0; +} + +#endif diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc new file mode 100644 index 00000000000..47573b765da --- /dev/null +++ b/src/rgw/driver/rados/rgw_data_sync.cc @@ -0,0 +1,6460 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/ceph_json.h" +#include "common/RefCountedObj.h" +#include "common/WorkQueue.h" +#include "common/Throttle.h" +#include "common/errno.h" + +#include "rgw_common.h" +#include "rgw_zone.h" +#include "rgw_sync.h" +#include "rgw_data_sync.h" +#include "rgw_rest_conn.h" +#include "rgw_cr_rados.h" +#include "rgw_cr_rest.h" +#include "rgw_cr_tools.h" +#include "rgw_http_client.h" +#include "rgw_bucket.h" +#include "rgw_bucket_sync.h" +#include "rgw_bucket_sync_cache.h" +#include "rgw_datalog.h" +#include "rgw_metadata.h" +#include "rgw_sync_counters.h" +#include "rgw_sync_error_repo.h" +#include "rgw_sync_module.h" +#include "rgw_sal.h" + +#include "cls/lock/cls_lock_client.h" +#include "cls/rgw/cls_rgw_client.h" + +#include "services/svc_zone.h" +#include "services/svc_sync_modules.h" +#include "rgw_bucket.h" + +#include "include/common_fwd.h" +#include "include/random.h" + +#include +#include + +#define dout_subsys ceph_subsys_rgw + +#undef dout_prefix +#define dout_prefix (*_dout << "data sync: ") + +using namespace std; + +static const string datalog_sync_status_oid_prefix = "datalog.sync-status"; +static const string datalog_sync_status_shard_prefix = "datalog.sync-status.shard"; +static const string datalog_sync_full_sync_index_prefix = "data.full-sync.index"; +static const string bucket_full_status_oid_prefix = "bucket.full-sync-status"; +static const string bucket_status_oid_prefix = "bucket.sync-status"; +static const string object_status_oid_prefix = "bucket.sync-status"; + +void rgw_datalog_info::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("num_objects", num_shards, obj); +} + +void rgw_datalog_entry::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("key", key, obj); + utime_t ut; + JSONDecoder::decode_json("timestamp", ut, obj); + timestamp = ut.to_real_time(); +} + +void rgw_datalog_shard_data::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("truncated", truncated, obj); + JSONDecoder::decode_json("entries", entries, obj); +}; + +// print a bucket shard with [gen] +std::string to_string(const rgw_bucket_shard& bs, std::optional gen) +{ + constexpr auto digits10 = std::numeric_limits::digits10; + constexpr auto reserve = 2 + digits10; // [value] + auto str = bs.get_key('/', ':', ':', reserve); + str.append(1, '['); + str.append(std::to_string(gen.value_or(0))); + str.append(1, ']'); + return str; +} + +class RGWReadDataSyncStatusMarkersCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + RGWDataSyncCtx *sc; + RGWDataSyncEnv *env; + const int num_shards; + int shard_id{0};; + + map& markers; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to read data sync status: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + RGWReadDataSyncStatusMarkersCR(RGWDataSyncCtx *sc, int num_shards, + map& markers) + : RGWShardCollectCR(sc->cct, MAX_CONCURRENT_SHARDS), + sc(sc), env(sc->env), num_shards(num_shards), markers(markers) + {} + bool spawn_next() override; +}; + +bool RGWReadDataSyncStatusMarkersCR::spawn_next() +{ + if (shard_id >= num_shards) { + return false; + } + using CR = RGWSimpleRadosReadCR; + spawn(new CR(env->dpp, env->async_rados, env->svc->sysobj, + rgw_raw_obj(env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id)), + &markers[shard_id]), + false); + shard_id++; + return true; +} + +class RGWReadDataSyncRecoveringShardsCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + RGWDataSyncCtx *sc; + RGWDataSyncEnv *env; + + uint64_t max_entries; + int num_shards; + int shard_id{0}; + + string marker; + std::vector& omapkeys; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to list recovering data sync: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + RGWReadDataSyncRecoveringShardsCR(RGWDataSyncCtx *sc, uint64_t _max_entries, int _num_shards, + std::vector& omapkeys) + : RGWShardCollectCR(sc->cct, MAX_CONCURRENT_SHARDS), sc(sc), env(sc->env), + max_entries(_max_entries), num_shards(_num_shards), omapkeys(omapkeys) + {} + bool spawn_next() override; +}; + +bool RGWReadDataSyncRecoveringShardsCR::spawn_next() +{ + if (shard_id >= num_shards) + return false; + + string error_oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id) + ".retry"; + auto& shard_keys = omapkeys[shard_id]; + shard_keys = std::make_shared(); + spawn(new RGWRadosGetOmapKeysCR(env->driver, rgw_raw_obj(env->svc->zone->get_zone_params().log_pool, error_oid), + marker, max_entries, shard_keys), false); + + ++shard_id; + return true; +} + +class RGWReadDataSyncStatusCoroutine : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_data_sync_status *sync_status; + +public: + RGWReadDataSyncStatusCoroutine(RGWDataSyncCtx *_sc, + rgw_data_sync_status *_status) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(sc->env), sync_status(_status) + {} + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWReadDataSyncStatusCoroutine::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + // read sync info + using ReadInfoCR = RGWSimpleRadosReadCR; + yield { + bool empty_on_enoent = false; // fail on ENOENT + call(new ReadInfoCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sc->source_zone)), + &sync_status->sync_info, empty_on_enoent)); + } + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to read sync status info with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + // read shard markers + using ReadMarkersCR = RGWReadDataSyncStatusMarkersCR; + yield call(new ReadMarkersCR(sc, sync_status->sync_info.num_shards, + sync_status->sync_markers)); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to read sync status markers with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; +} + +class RGWReadRemoteDataLogShardInfoCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + RGWRESTReadResource *http_op; + + int shard_id; + RGWDataChangesLogInfo *shard_info; + +public: + RGWReadRemoteDataLogShardInfoCR(RGWDataSyncCtx *_sc, + int _shard_id, RGWDataChangesLogInfo *_shard_info) : RGWCoroutine(_sc->cct), + sc(_sc), + sync_env(_sc->env), + http_op(NULL), + shard_id(_shard_id), + shard_info(_shard_info) { + } + + ~RGWReadRemoteDataLogShardInfoCR() override { + if (http_op) { + http_op->put(); + } + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield { + char buf[16]; + snprintf(buf, sizeof(buf), "%d", shard_id); + rgw_http_param_pair pairs[] = { { "type" , "data" }, + { "id", buf }, + { "info" , NULL }, + { NULL, NULL } }; + + string p = "/admin/log/"; + + http_op = new RGWRESTReadResource(sc->conn, p, pairs, NULL, sync_env->http_manager); + + init_new_io(http_op); + + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + return set_cr_error(ret); + } + + return io_block(0); + } + yield { + int ret = http_op->wait(shard_info, null_yield); + if (ret < 0) { + return set_cr_error(ret); + } + return set_cr_done(); + } + } + return 0; + } +}; + +struct read_remote_data_log_response { + string marker; + bool truncated; + vector entries; + + read_remote_data_log_response() : truncated(false) {} + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("truncated", truncated, obj); + JSONDecoder::decode_json("entries", entries, obj); + }; +}; + +class RGWReadRemoteDataLogShardCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + RGWRESTReadResource *http_op = nullptr; + + int shard_id; + const std::string& marker; + string *pnext_marker; + vector *entries; + bool *truncated; + + read_remote_data_log_response response; + std::optional timer; + +public: + RGWReadRemoteDataLogShardCR(RGWDataSyncCtx *_sc, int _shard_id, + const std::string& marker, string *pnext_marker, + vector *_entries, + bool *_truncated) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + shard_id(_shard_id), marker(marker), pnext_marker(pnext_marker), + entries(_entries), truncated(_truncated) { + } + ~RGWReadRemoteDataLogShardCR() override { + if (http_op) { + http_op->put(); + } + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield { + char buf[16]; + snprintf(buf, sizeof(buf), "%d", shard_id); + rgw_http_param_pair pairs[] = { { "type" , "data" }, + { "id", buf }, + { "marker", marker.c_str() }, + { "extra-info", "true" }, + { NULL, NULL } }; + + string p = "/admin/log/"; + + http_op = new RGWRESTReadResource(sc->conn, p, pairs, NULL, sync_env->http_manager); + + init_new_io(http_op); + + if (sync_env->counters) { + timer.emplace(sync_env->counters, sync_counters::l_poll); + } + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + if (sync_env->counters) { + sync_env->counters->inc(sync_counters::l_poll_err); + } + return set_cr_error(ret); + } + + return io_block(0); + } + yield { + timer.reset(); + int ret = http_op->wait(&response, null_yield); + if (ret < 0) { + if (sync_env->counters && ret != -ENOENT) { + sync_env->counters->inc(sync_counters::l_poll_err); + } + return set_cr_error(ret); + } + entries->clear(); + entries->swap(response.entries); + *pnext_marker = response.marker; + *truncated = response.truncated; + return set_cr_done(); + } + } + return 0; + } +}; + +class RGWReadRemoteDataLogInfoCR : public RGWShardCollectCR { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + int num_shards; + map *datalog_info; + + int shard_id; +#define READ_DATALOG_MAX_CONCURRENT 10 + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to fetch remote datalog info: " + << cpp_strerror(r) << dendl; + } + return r; + } +public: + RGWReadRemoteDataLogInfoCR(RGWDataSyncCtx *_sc, + int _num_shards, + map *_datalog_info) : RGWShardCollectCR(_sc->cct, READ_DATALOG_MAX_CONCURRENT), + sc(_sc), sync_env(_sc->env), num_shards(_num_shards), + datalog_info(_datalog_info), shard_id(0) {} + bool spawn_next() override; +}; + +bool RGWReadRemoteDataLogInfoCR::spawn_next() { + if (shard_id >= num_shards) { + return false; + } + spawn(new RGWReadRemoteDataLogShardInfoCR(sc, shard_id, &(*datalog_info)[shard_id]), false); + shard_id++; + return true; +} + +class RGWListRemoteDataLogShardCR : public RGWSimpleCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + RGWRESTReadResource *http_op; + + int shard_id; + string marker; + uint32_t max_entries; + rgw_datalog_shard_data *result; + +public: + RGWListRemoteDataLogShardCR(RGWDataSyncCtx *sc, int _shard_id, + const string& _marker, uint32_t _max_entries, + rgw_datalog_shard_data *_result) + : RGWSimpleCoroutine(sc->cct), sc(sc), sync_env(sc->env), http_op(NULL), + shard_id(_shard_id), marker(_marker), max_entries(_max_entries), result(_result) {} + + int send_request(const DoutPrefixProvider *dpp) override { + RGWRESTConn *conn = sc->conn; + + char buf[32]; + snprintf(buf, sizeof(buf), "%d", shard_id); + + char max_entries_buf[32]; + snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", (int)max_entries); + + const char *marker_key = (marker.empty() ? "" : "marker"); + + rgw_http_param_pair pairs[] = { { "type", "data" }, + { "id", buf }, + { "max-entries", max_entries_buf }, + { marker_key, marker.c_str() }, + { NULL, NULL } }; + + string p = "/admin/log/"; + + http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager); + init_new_io(http_op); + + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + http_op->put(); + return ret; + } + + return 0; + } + + int request_complete() override { + int ret = http_op->wait(result, null_yield); + http_op->put(); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to list remote datalog shard, ret=" << ret << dendl; + return ret; + } + return 0; + } +}; + +class RGWListRemoteDataLogCR : public RGWShardCollectCR { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + map shards; + int max_entries_per_shard; + map *result; + + map::iterator iter; +#define READ_DATALOG_MAX_CONCURRENT 10 + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to list remote datalog: " + << cpp_strerror(r) << dendl; + } + return r; + } +public: + RGWListRemoteDataLogCR(RGWDataSyncCtx *_sc, + map& _shards, + int _max_entries_per_shard, + map *_result) : RGWShardCollectCR(_sc->cct, READ_DATALOG_MAX_CONCURRENT), + sc(_sc), sync_env(_sc->env), max_entries_per_shard(_max_entries_per_shard), + result(_result) { + shards.swap(_shards); + iter = shards.begin(); + } + bool spawn_next() override; +}; + +bool RGWListRemoteDataLogCR::spawn_next() { + if (iter == shards.end()) { + return false; + } + + spawn(new RGWListRemoteDataLogShardCR(sc, iter->first, iter->second, max_entries_per_shard, &(*result)[iter->first]), false); + ++iter; + return true; +} + +class RGWInitDataSyncStatusCoroutine : public RGWCoroutine { + static constexpr uint32_t lock_duration = 30; + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw::sal::RadosStore* driver; // RGWDataSyncEnv also has a pointer to driver + const rgw_pool& pool; + const uint32_t num_shards; + + string sync_status_oid; + + string lock_name; + string cookie; + rgw_data_sync_status *status; + map shards_info; + + RGWSyncTraceNodeRef tn; +public: + RGWInitDataSyncStatusCoroutine(RGWDataSyncCtx *_sc, uint32_t num_shards, + uint64_t instance_id, + RGWSyncTraceNodeRef& _tn_parent, + rgw_data_sync_status *status) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), driver(sync_env->driver), + pool(sync_env->svc->zone->get_zone_params().log_pool), + num_shards(num_shards), status(status), + tn(sync_env->sync_tracer->add_node(_tn_parent, "init_data_sync_status")) { + lock_name = "sync_lock"; + + status->sync_info.instance_id = instance_id; + +#define COOKIE_LEN 16 + char buf[COOKIE_LEN + 1]; + + gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1); + cookie = buf; + + sync_status_oid = RGWDataSyncStatusManager::sync_status_oid(sc->source_zone); + + } + + int operate(const DoutPrefixProvider *dpp) override { + int ret; + reenter(this) { + using LockCR = RGWSimpleRadosLockCR; + yield call(new LockCR(sync_env->async_rados, driver, + rgw_raw_obj{pool, sync_status_oid}, + lock_name, cookie, lock_duration)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to take a lock on " << sync_status_oid)); + return set_cr_error(retcode); + } + using WriteInfoCR = RGWSimpleRadosWriteCR; + yield call(new WriteInfoCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj{pool, sync_status_oid}, + status->sync_info)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to write sync status info with " << retcode)); + return set_cr_error(retcode); + } + + /* take lock again, we just recreated the object */ + yield call(new LockCR(sync_env->async_rados, driver, + rgw_raw_obj{pool, sync_status_oid}, + lock_name, cookie, lock_duration)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to take a lock on " << sync_status_oid)); + return set_cr_error(retcode); + } + + tn->log(10, "took lease"); + + /* fetch current position in logs */ + yield { + RGWRESTConn *conn = sync_env->svc->zone->get_zone_conn(sc->source_zone); + if (!conn) { + tn->log(0, SSTR("ERROR: connection to zone " << sc->source_zone << " does not exist!")); + return set_cr_error(-EIO); + } + for (uint32_t i = 0; i < num_shards; i++) { + spawn(new RGWReadRemoteDataLogShardInfoCR(sc, i, &shards_info[i]), true); + } + } + while (collect(&ret, NULL)) { + if (ret < 0) { + tn->log(0, SSTR("ERROR: failed to read remote data log shards")); + return set_state(RGWCoroutine_Error); + } + yield; + } + yield { + for (uint32_t i = 0; i < num_shards; i++) { + RGWDataChangesLogInfo& info = shards_info[i]; + auto& marker = status->sync_markers[i]; + marker.next_step_marker = info.marker; + marker.timestamp = info.last_update; + const auto& oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, i); + using WriteMarkerCR = RGWSimpleRadosWriteCR; + spawn(new WriteMarkerCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj{pool, oid}, marker), true); + } + } + while (collect(&ret, NULL)) { + if (ret < 0) { + tn->log(0, SSTR("ERROR: failed to write data sync status markers")); + return set_state(RGWCoroutine_Error); + } + yield; + } + + status->sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps; + yield call(new WriteInfoCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj{pool, sync_status_oid}, + status->sync_info)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to write sync status info with " << retcode)); + return set_cr_error(retcode); + } + yield call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, driver, + rgw_raw_obj{pool, sync_status_oid}, + lock_name, cookie)); + return set_cr_done(); + } + return 0; + } +}; + +RGWRemoteDataLog::RGWRemoteDataLog(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* driver, + RGWAsyncRadosProcessor *async_rados) + : RGWCoroutinesManager(driver->ctx(), driver->getRados()->get_cr_registry()), + dpp(dpp), driver(driver), + cct(driver->ctx()), cr_registry(driver->getRados()->get_cr_registry()), + async_rados(async_rados), + http_manager(driver->ctx(), completion_mgr), + data_sync_cr(NULL), + initialized(false) +{ +} + +int RGWRemoteDataLog::read_log_info(const DoutPrefixProvider *dpp, rgw_datalog_info *log_info) +{ + rgw_http_param_pair pairs[] = { { "type", "data" }, + { NULL, NULL } }; + + int ret = sc.conn->get_json_resource(dpp, "/admin/log", pairs, null_yield, *log_info); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch datalog info" << dendl; + return ret; + } + + ldpp_dout(dpp, 20) << "remote datalog, num_shards=" << log_info->num_shards << dendl; + + return 0; +} + +int RGWRemoteDataLog::read_source_log_shards_info(const DoutPrefixProvider *dpp, map *shards_info) +{ + rgw_datalog_info log_info; + int ret = read_log_info(dpp, &log_info); + if (ret < 0) { + return ret; + } + + return run(dpp, new RGWReadRemoteDataLogInfoCR(&sc, log_info.num_shards, shards_info)); +} + +int RGWRemoteDataLog::read_source_log_shards_next(const DoutPrefixProvider *dpp, map shard_markers, map *result) +{ + return run(dpp, new RGWListRemoteDataLogCR(&sc, shard_markers, 1, result)); +} + +int RGWRemoteDataLog::init(const rgw_zone_id& _source_zone, RGWRESTConn *_conn, RGWSyncErrorLogger *_error_logger, + RGWSyncTraceManager *_sync_tracer, RGWSyncModuleInstanceRef& _sync_module, + PerfCounters* counters) +{ + sync_env.init(dpp, cct, driver, driver->svc(), async_rados, &http_manager, _error_logger, + _sync_tracer, _sync_module, counters); + sc.init(&sync_env, _conn, _source_zone); + + if (initialized) { + return 0; + } + + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + + tn = sync_env.sync_tracer->add_node(sync_env.sync_tracer->root_node, "data"); + + initialized = true; + + return 0; +} + +void RGWRemoteDataLog::finish() +{ + stop(); +} + +int RGWRemoteDataLog::read_sync_status(const DoutPrefixProvider *dpp, rgw_data_sync_status *sync_status) +{ + // cannot run concurrently with run_sync(), so run in a separate manager + RGWCoroutinesManager crs(cct, cr_registry); + RGWHTTPManager http_manager(cct, crs.get_completion_mgr()); + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + RGWDataSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + + RGWDataSyncCtx sc_local = sc; + sc_local.env = &sync_env_local; + + ret = crs.run(dpp, new RGWReadDataSyncStatusCoroutine(&sc_local, sync_status)); + http_manager.stop(); + return ret; +} + +int RGWRemoteDataLog::read_recovering_shards(const DoutPrefixProvider *dpp, const int num_shards, set& recovering_shards) +{ + // cannot run concurrently with run_sync(), so run in a separate manager + RGWCoroutinesManager crs(cct, cr_registry); + RGWHTTPManager http_manager(cct, crs.get_completion_mgr()); + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + RGWDataSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + + RGWDataSyncCtx sc_local = sc; + sc_local.env = &sync_env_local; + + std::vector omapkeys; + omapkeys.resize(num_shards); + uint64_t max_entries{1}; + + ret = crs.run(dpp, new RGWReadDataSyncRecoveringShardsCR(&sc_local, max_entries, num_shards, omapkeys)); + http_manager.stop(); + + if (ret == 0) { + for (int i = 0; i < num_shards; i++) { + if (omapkeys[i]->entries.size() != 0) { + recovering_shards.insert(i); + } + } + } + + return ret; +} + +int RGWRemoteDataLog::init_sync_status(const DoutPrefixProvider *dpp, int num_shards) +{ + rgw_data_sync_status sync_status; + sync_status.sync_info.num_shards = num_shards; + + RGWCoroutinesManager crs(cct, cr_registry); + RGWHTTPManager http_manager(cct, crs.get_completion_mgr()); + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + RGWDataSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + auto instance_id = ceph::util::generate_random_number(); + RGWDataSyncCtx sc_local = sc; + sc_local.env = &sync_env_local; + ret = crs.run(dpp, new RGWInitDataSyncStatusCoroutine(&sc_local, num_shards, instance_id, tn, &sync_status)); + http_manager.stop(); + return ret; +} + +static string full_data_sync_index_shard_oid(const rgw_zone_id& source_zone, int shard_id) +{ + char buf[datalog_sync_full_sync_index_prefix.size() + 1 + source_zone.id.size() + 1 + 16]; + snprintf(buf, sizeof(buf), "%s.%s.%d", datalog_sync_full_sync_index_prefix.c_str(), source_zone.id.c_str(), shard_id); + return string(buf); +} + +struct read_metadata_list { + string marker; + bool truncated; + list keys; + int count; + + read_metadata_list() : truncated(false), count(0) {} + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("truncated", truncated, obj); + JSONDecoder::decode_json("keys", keys, obj); + JSONDecoder::decode_json("count", count, obj); + } +}; + +struct bucket_instance_meta_info { + string key; + obj_version ver; + utime_t mtime; + RGWBucketInstanceMetadataObject data; + + bucket_instance_meta_info() {} + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("key", key, obj); + JSONDecoder::decode_json("ver", ver, obj); + JSONDecoder::decode_json("mtime", mtime, obj); + JSONDecoder::decode_json("data", data, obj); + } +}; + +class RGWReadRemoteBucketIndexLogInfoCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + const string instance_key; + + rgw_bucket_index_marker_info *info; + +public: + RGWReadRemoteBucketIndexLogInfoCR(RGWDataSyncCtx *_sc, + const rgw_bucket& bucket, + rgw_bucket_index_marker_info *_info) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + instance_key(bucket.get_key()), info(_info) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield { + rgw_http_param_pair pairs[] = { { "type" , "bucket-index" }, + { "bucket-instance", instance_key.c_str() }, + { "info" , NULL }, + { NULL, NULL } }; + + string p = "/admin/log/"; + call(new RGWReadRESTResourceCR(sync_env->cct, sc->conn, sync_env->http_manager, p, pairs, info)); + } + if (retcode < 0) { + return set_cr_error(retcode); + } + + return set_cr_done(); + } + return 0; + } +}; + + +class RGWListBucketIndexesCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env = sc->env; + + rgw::sal::RadosStore* driver = sync_env->driver; + + rgw_data_sync_status *sync_status; + + int req_ret = 0; + int ret = 0; + + list::iterator iter; + + unique_ptr entries_index; + string oid_prefix = + datalog_sync_full_sync_index_prefix + "." + sc->source_zone.id; + + string path = "/admin/metadata/bucket.instance"; + bucket_instance_meta_info meta_info; + string key; + + bool failed = false; + bool truncated = false; + read_metadata_list result; + +public: + RGWListBucketIndexesCR(RGWDataSyncCtx* sc, + rgw_data_sync_status* sync_status) + : RGWCoroutine(sc->cct), sc(sc), sync_status(sync_status) {} + ~RGWListBucketIndexesCR() override { } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + entries_index = std::make_unique( + sync_env->async_rados, driver, this, + cct->_conf->rgw_data_log_num_shards, + sync_env->svc->zone->get_zone_params().log_pool, + oid_prefix); + yield; // yield so OmapAppendCRs can start + + do { + yield { + string entrypoint = "/admin/metadata/bucket.instance"s; + + rgw_http_param_pair pairs[] = {{"max-entries", "1000"}, + {"marker", result.marker.c_str()}, + {NULL, NULL}}; + + call(new RGWReadRESTResourceCR( + sync_env->cct, sc->conn, sync_env->http_manager, + entrypoint, pairs, &result)); + } + if (retcode < 0) { + ldpp_dout(dpp, 0) + << "ERROR: failed to fetch metadata for section bucket.instance" + << dendl; + return set_cr_error(retcode); + } + + for (iter = result.keys.begin(); iter != result.keys.end(); ++iter) { + ldpp_dout(dpp, 20) << "list metadata: section=bucket.instance key=" + << *iter << dendl; + key = *iter; + + yield { + rgw_http_param_pair pairs[] = {{"key", key.c_str()}, + {NULL, NULL}}; + + call(new RGWReadRESTResourceCR( + sync_env->cct, sc->conn, sync_env->http_manager, path, pairs, + &meta_info)); + } + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch metadata for key: " + << key << dendl; + return set_cr_error(retcode); + } + // Now that bucket full sync is bucket-wide instead of + // per-shard, we only need to register a single shard of + // each bucket to guarantee that sync will see everything + // that happened before data full sync starts. This also + // means we don't have to care about the bucket's current + // shard count. + yield entries_index->append( + fmt::format("{}:{}", key, 0), + sync_env->svc->datalog_rados->get_log_shard_id( + meta_info.data.get_bucket_info().bucket, 0)); + } + truncated = result.truncated; + } while (truncated); + + yield { + if (!entries_index->finish()) { + failed = true; + } + } + if (!failed) { + for (auto iter = sync_status->sync_markers.begin(); + iter != sync_status->sync_markers.end(); + ++iter) { + int shard_id = (int)iter->first; + rgw_data_sync_marker& marker = iter->second; + marker.total_entries = entries_index->get_total_entries(shard_id); + spawn(new RGWSimpleRadosWriteCR( + dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, + RGWDataSyncStatusManager::shard_obj_name( + sc->source_zone, shard_id)), + marker), + true); + } + } else { + yield call(sync_env->error_logger->log_error_cr( + dpp, sc->conn->get_remote_id(), "data.init", "", + EIO, string("failed to build bucket instances map"))); + } + while (collect(&ret, NULL)) { + if (ret < 0) { + yield call(sync_env->error_logger->log_error_cr( + dpp, sc->conn->get_remote_id(), "data.init", "", + -ret, string("failed to driver sync status: ") + + cpp_strerror(-ret))); + req_ret = ret; + } + yield; + } + drain_all(); + if (req_ret < 0) { + yield return set_cr_error(req_ret); + } + yield return set_cr_done(); + } + return 0; + } +}; + +#define DATA_SYNC_UPDATE_MARKER_WINDOW 1 + +class RGWDataSyncShardMarkerTrack : public RGWSyncShardMarkerTrack { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + string marker_oid; + rgw_data_sync_marker sync_marker; + RGWSyncTraceNodeRef tn; + +public: + RGWDataSyncShardMarkerTrack(RGWDataSyncCtx *_sc, + const string& _marker_oid, + const rgw_data_sync_marker& _marker, + RGWSyncTraceNodeRef& _tn) : RGWSyncShardMarkerTrack(DATA_SYNC_UPDATE_MARKER_WINDOW), + sc(_sc), sync_env(_sc->env), + marker_oid(_marker_oid), + sync_marker(_marker), + tn(_tn) {} + + RGWCoroutine* store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override { + sync_marker.marker = new_marker; + sync_marker.pos = index_pos; + sync_marker.timestamp = timestamp; + + tn->log(20, SSTR("updating marker marker_oid=" << marker_oid << " marker=" << new_marker)); + + return new RGWSimpleRadosWriteCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, marker_oid), + sync_marker); + } + + RGWOrderCallCR *allocate_order_control_cr() override { + return new RGWLastCallerWinsCR(sync_env->cct); + } +}; + +// ostream wrappers to print buckets without copying strings +struct bucket_str { + const rgw_bucket& b; + explicit bucket_str(const rgw_bucket& b) : b(b) {} +}; +std::ostream& operator<<(std::ostream& out, const bucket_str& rhs) { + auto& b = rhs.b; + if (!b.tenant.empty()) { + out << b.tenant << '/'; + } + out << b.name; + if (!b.bucket_id.empty()) { + out << ':' << b.bucket_id; + } + return out; +} + +struct bucket_str_noinstance { + const rgw_bucket& b; + explicit bucket_str_noinstance(const rgw_bucket& b) : b(b) {} +}; +std::ostream& operator<<(std::ostream& out, const bucket_str_noinstance& rhs) { + auto& b = rhs.b; + if (!b.tenant.empty()) { + out << b.tenant << '/'; + } + out << b.name; + return out; +} + +struct bucket_shard_str { + const rgw_bucket_shard& bs; + explicit bucket_shard_str(const rgw_bucket_shard& bs) : bs(bs) {} +}; +std::ostream& operator<<(std::ostream& out, const bucket_shard_str& rhs) { + auto& bs = rhs.bs; + out << bucket_str{bs.bucket}; + if (bs.shard_id >= 0) { + out << ':' << bs.shard_id; + } + return out; +} + +struct all_bucket_info { + RGWBucketInfo bucket_info; + map attrs; +}; + +struct rgw_sync_pipe_info_entity +{ +private: + RGWBucketInfo bucket_info; + map bucket_attrs; + bool _has_bucket_info{false}; + +public: + rgw_zone_id zone; + + rgw_sync_pipe_info_entity() {} + rgw_sync_pipe_info_entity(const rgw_sync_bucket_entity& e, + std::optional& binfo) { + if (e.zone) { + zone = *e.zone; + } + if (!e.bucket) { + return; + } + if (!binfo || + binfo->bucket_info.bucket != *e.bucket) { + bucket_info.bucket = *e.bucket; + } else { + set_bucket_info(*binfo); + } + } + + void update_empty_bucket_info(const std::map& buckets_info) { + if (_has_bucket_info) { + return; + } + if (bucket_info.bucket.name.empty()) { + return; + } + + auto iter = buckets_info.find(bucket_info.bucket); + if (iter == buckets_info.end()) { + return; + } + + set_bucket_info(iter->second); + } + + bool has_bucket_info() const { + return _has_bucket_info; + } + + void set_bucket_info(const all_bucket_info& all_info) { + bucket_info = all_info.bucket_info; + bucket_attrs = all_info.attrs; + _has_bucket_info = true; + } + + const RGWBucketInfo& get_bucket_info() const { + return bucket_info; + } + + const rgw_bucket& get_bucket() const { + return bucket_info.bucket; + } + + bool operator<(const rgw_sync_pipe_info_entity& e) const { + if (zone < e.zone) { + return false; + } + if (zone > e.zone) { + return true; + } + return (bucket_info.bucket < e.bucket_info.bucket); + } +}; + +std::ostream& operator<<(std::ostream& out, const rgw_sync_pipe_info_entity& e) { + auto& bucket = e.get_bucket_info().bucket; + + out << e.zone << ":" << bucket.get_key(); + return out; +} + +struct rgw_sync_pipe_handler_info { + RGWBucketSyncFlowManager::pipe_handler handler; + rgw_sync_pipe_info_entity source; + rgw_sync_pipe_info_entity target; + + rgw_sync_pipe_handler_info() {} + rgw_sync_pipe_handler_info(const RGWBucketSyncFlowManager::pipe_handler& _handler, + std::optional source_bucket_info, + std::optional target_bucket_info) : handler(_handler), + source(handler.source, source_bucket_info), + target(handler.dest, target_bucket_info) { + } + + bool operator<(const rgw_sync_pipe_handler_info& p) const { + if (source < p.source) { + return true; + } + if (p.source < source) { + return false; + } + return (target < p.target); + } + + void update_empty_bucket_info(const std::map& buckets_info) { + source.update_empty_bucket_info(buckets_info); + target.update_empty_bucket_info(buckets_info); + } +}; + +std::ostream& operator<<(std::ostream& out, const rgw_sync_pipe_handler_info& p) { + out << p.source << ">" << p.target; + return out; +} + +struct rgw_sync_pipe_info_set { + std::set handlers; + + using iterator = std::set::iterator; + + void clear() { + handlers.clear(); + } + + void insert(const RGWBucketSyncFlowManager::pipe_handler& handler, + std::optional& source_bucket_info, + std::optional& target_bucket_info) { + rgw_sync_pipe_handler_info p(handler, source_bucket_info, target_bucket_info); + handlers.insert(p); + } + + iterator begin() { + return handlers.begin(); + } + + iterator end() { + return handlers.end(); + } + + size_t size() const { + return handlers.size(); + } + + bool empty() const { + return handlers.empty(); + } + + void update_empty_bucket_info(const std::map& buckets_info) { + if (buckets_info.empty()) { + return; + } + + std::set p; + + for (auto pipe : handlers) { + pipe.update_empty_bucket_info(buckets_info); + p.insert(pipe); + } + + handlers = std::move(p); + } +}; + +class RGWRunBucketSourcesSyncCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + boost::intrusive_ptr lease_cr; + + rgw_sync_pipe_info_set pipes; + rgw_sync_pipe_info_set::iterator siter; + + rgw_bucket_sync_pair_info sync_pair; + + RGWSyncTraceNodeRef tn; + ceph::real_time* progress; + std::vector shard_progress; + std::vector::iterator cur_shard_progress; + + RGWRESTConn *conn{nullptr}; + rgw_zone_id last_zone; + + std::optional gen; + rgw_bucket_index_marker_info marker_info; + BucketIndexShardsManager marker_mgr; + +public: + RGWRunBucketSourcesSyncCR(RGWDataSyncCtx *_sc, + boost::intrusive_ptr lease_cr, + const rgw_bucket_shard& source_bs, + const RGWSyncTraceNodeRef& _tn_parent, + std::optional gen, + ceph::real_time* progress); + + int operate(const DoutPrefixProvider *dpp) override; +}; + +class RGWDataSyncSingleEntryCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw::bucket_sync::Handle state; // cached bucket-shard state + rgw_data_sync_obligation obligation; // input obligation + std::optional complete; // obligation to complete + uint32_t obligation_counter = 0; + RGWDataSyncShardMarkerTrack *marker_tracker; + rgw_raw_obj error_repo; + boost::intrusive_ptr lease_cr; + RGWSyncTraceNodeRef tn; + + ceph::real_time progress; + int sync_status = 0; +public: + RGWDataSyncSingleEntryCR(RGWDataSyncCtx *_sc, rgw::bucket_sync::Handle state, + rgw_data_sync_obligation _obligation, + RGWDataSyncShardMarkerTrack *_marker_tracker, + const rgw_raw_obj& error_repo, + boost::intrusive_ptr lease_cr, + const RGWSyncTraceNodeRef& _tn_parent) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + state(std::move(state)), obligation(std::move(_obligation)), + marker_tracker(_marker_tracker), error_repo(error_repo), + lease_cr(std::move(lease_cr)) { + set_description() << "data sync single entry (source_zone=" << sc->source_zone << ") " << obligation; + tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", to_string(obligation.bs, obligation.gen)); + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + if (state->obligation) { + // this is already syncing in another DataSyncSingleEntryCR + if (state->obligation->timestamp < obligation.timestamp) { + // cancel existing obligation and overwrite it + tn->log(10, SSTR("canceling existing obligation " << *state->obligation)); + complete = std::move(*state->obligation); + *state->obligation = std::move(obligation); + state->counter++; + } else { + // cancel new obligation + tn->log(10, SSTR("canceling new obligation " << obligation)); + complete = std::move(obligation); + } + } else { + // start syncing a new obligation + state->obligation = obligation; + obligation_counter = state->counter; + state->counter++; + + // loop until the latest obligation is satisfied, because other callers + // may update the obligation while we're syncing + while ((state->obligation->timestamp == ceph::real_time() || + state->progress_timestamp < state->obligation->timestamp) && + obligation_counter != state->counter) { + obligation_counter = state->counter; + progress = ceph::real_time{}; + + ldout(cct, 4) << "starting sync on " << bucket_shard_str{state->key.first} + << ' ' << *state->obligation << " progress timestamp " << state->progress_timestamp + << " progress " << progress << dendl; + yield call(new RGWRunBucketSourcesSyncCR(sc, lease_cr, + state->key.first, tn, + state->obligation->gen, + &progress)); + if (retcode < 0) { + break; + } + state->progress_timestamp = std::max(progress, state->progress_timestamp); + } + // any new obligations will process themselves + complete = std::move(*state->obligation); + state->obligation.reset(); + + tn->log(10, SSTR("sync finished on " << bucket_shard_str{state->key.first} + << " progress=" << progress << ' ' << complete << " r=" << retcode)); + } + sync_status = retcode; + + if (sync_status == -ENOENT) { + // this was added when 'tenant/' was added to datalog entries, because + // preexisting tenant buckets could never sync and would stay in the + // error_repo forever + tn->log(0, SSTR("WARNING: skipping data log entry for missing bucket " << complete->bs)); + sync_status = 0; + } + + if (sync_status < 0) { + // write actual sync failures for 'radosgw-admin sync error list' + if (sync_status != -EBUSY && sync_status != -EAGAIN) { + yield call(sync_env->error_logger->log_error_cr(dpp, sc->conn->get_remote_id(), "data", + to_string(complete->bs, complete->gen), + -sync_status, string("failed to sync bucket instance: ") + cpp_strerror(-sync_status))); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to log sync failure: retcode=" << retcode)); + } + } + if (complete->timestamp != ceph::real_time{}) { + tn->log(10, SSTR("writing " << *complete << " to error repo for retry")); + yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + rgw::error_repo::encode_key(complete->bs, complete->gen), + complete->timestamp)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to log sync failure in error repo: retcode=" << retcode)); + } + } + } else if (complete->retry) { + yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, + rgw::error_repo::encode_key(complete->bs, complete->gen), + complete->timestamp)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to remove omap key from error repo (" + << error_repo << " retcode=" << retcode)); + } + } + /* FIXME: what do do in case of error */ + if (marker_tracker && !complete->marker.empty()) { + /* update marker */ + yield call(marker_tracker->finish(complete->marker)); + } + if (sync_status == 0) { + sync_status = retcode; + } + if (sync_status < 0) { + return set_cr_error(sync_status); + } + return set_cr_done(); + } + return 0; + } +}; + +rgw_raw_obj datalog_oid_for_error_repo(RGWDataSyncCtx *sc, rgw::sal::RadosStore* driver, + rgw_pool& pool, rgw_bucket_shard& bs) { + int datalog_shard = driver->svc()->datalog_rados->choose_oid(bs); + string oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, datalog_shard); + return rgw_raw_obj(pool, oid + ".retry"); + } + +class RGWDataIncrementalSyncFullObligationCR: public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_bucket_shard source_bs; + rgw_raw_obj error_repo; + std::string error_marker; + ceph::real_time timestamp; + RGWSyncTraceNodeRef tn; + rgw_bucket_index_marker_info remote_info; + rgw_pool pool; + uint32_t sid; + rgw_bucket_shard bs; + std::vector::const_iterator each; + +public: + RGWDataIncrementalSyncFullObligationCR(RGWDataSyncCtx *_sc, rgw_bucket_shard& _source_bs, + const rgw_raw_obj& error_repo, const std::string& _error_marker, + ceph::real_time& _timestamp, RGWSyncTraceNodeRef& _tn) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), source_bs(_source_bs), + error_repo(error_repo), error_marker(_error_marker), timestamp(_timestamp), + tn(sync_env->sync_tracer->add_node(_tn, "error_repo", SSTR(bucket_shard_str(source_bs)))) + {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield call(new RGWReadRemoteBucketIndexLogInfoCR(sc, source_bs.bucket, &remote_info)); + if (retcode < 0) { + return set_cr_error(retcode); + } + + each = remote_info.generations.cbegin(); + for (; each != remote_info.generations.cend(); each++) { + for (sid = 0; sid < each->num_shards; sid++) { + bs.bucket = source_bs.bucket; + bs.shard_id = sid; + error_repo = datalog_oid_for_error_repo(sc, sync_env->driver, pool, source_bs); + tn->log(10, SSTR("writing shard_id " << sid << " of gen " << each->gen << " to error repo for retry")); + yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + rgw::error_repo::encode_key(bs, each->gen), + timestamp), cct->_conf->rgw_data_sync_spawn_window, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + retcode = ret; + } + return 0; + }); + } + } + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, SSTR("writing to error repo returned error: " << ret)); + } + return ret; + }); + + // once everything succeeds, remove the full sync obligation from the error repo + yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, + error_marker, timestamp)); + return set_cr_done(); + } + return 0; + } +}; + +RGWCoroutine* data_sync_single_entry(RGWDataSyncCtx *sc, const rgw_bucket_shard& src, + std::optional gen, + const std::string marker, + ceph::real_time timestamp, + boost::intrusive_ptr lease_cr, + boost::intrusive_ptr bucket_shard_cache, + RGWDataSyncShardMarkerTrack* marker_tracker, + rgw_raw_obj error_repo, + RGWSyncTraceNodeRef& tn, + bool retry) { + auto state = bucket_shard_cache->get(src, gen); + auto obligation = rgw_data_sync_obligation{src, gen, marker, timestamp, retry}; + return new RGWDataSyncSingleEntryCR(sc, std::move(state), std::move(obligation), + &*marker_tracker, error_repo, + lease_cr.get(), tn); +} + +static ceph::real_time timestamp_for_bucket_shard(rgw::sal::RadosStore* driver, + const rgw_data_sync_status& sync_status, + const rgw_bucket_shard& bs) { + int datalog_shard = driver->svc()->datalog_rados->choose_oid(bs); + auto status = sync_status.sync_markers.find(datalog_shard); + if (status == sync_status.sync_markers.end()) { + return ceph::real_clock::zero(); + } + return status->second.timestamp; +} + +class RGWDataFullSyncSingleEntryCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_pool pool; + rgw_bucket_shard source_bs; + const std::string key; + rgw_data_sync_status sync_status; + rgw_raw_obj error_repo; + ceph::real_time timestamp; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr bucket_shard_cache; + RGWDataSyncShardMarkerTrack* marker_tracker; + RGWSyncTraceNodeRef tn; + rgw_bucket_index_marker_info remote_info; + uint32_t sid; + std::vector::iterator each; + uint64_t i{0}; + RGWCoroutine* shard_cr = nullptr; + bool first_shard = true; + bool error_inject; + +public: + RGWDataFullSyncSingleEntryCR(RGWDataSyncCtx *_sc, const rgw_pool& _pool, const rgw_bucket_shard& _source_bs, + const std::string& _key, const rgw_data_sync_status& sync_status, const rgw_raw_obj& _error_repo, + ceph::real_time _timestamp, boost::intrusive_ptr _lease_cr, + boost::intrusive_ptr _bucket_shard_cache, + RGWDataSyncShardMarkerTrack* _marker_tracker, + RGWSyncTraceNodeRef& _tn) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), pool(_pool), source_bs(_source_bs), key(_key), + error_repo(_error_repo), timestamp(_timestamp), lease_cr(std::move(_lease_cr)), + bucket_shard_cache(_bucket_shard_cache), marker_tracker(_marker_tracker), tn(_tn) { + error_inject = (sync_env->cct->_conf->rgw_sync_data_full_inject_err_probability > 0); + } + + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + if (error_inject && + rand() % 10000 < cct->_conf->rgw_sync_data_full_inject_err_probability * 10000.0) { + tn->log(0, SSTR("injecting read bilog info error on key=" << key)); + retcode = -ENOENT; + } else { + tn->log(0, SSTR("read bilog info key=" << key)); + yield call(new RGWReadRemoteBucketIndexLogInfoCR(sc, source_bs.bucket, &remote_info)); + } + + if (retcode < 0) { + tn->log(10, SSTR("full sync: failed to read remote bucket info. Writing " + << source_bs.shard_id << " to error repo for retry")); + yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + rgw::error_repo::encode_key(source_bs, std::nullopt), + timestamp)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to log " << source_bs.shard_id << " in error repo: retcode=" << retcode)); + } + yield call(marker_tracker->finish(key)); + return set_cr_error(retcode); + } + + //wait to sync the first shard of the oldest generation and then sync all other shards. + //if any of the operations fail at any time, write them into error repo for later retry. + + each = remote_info.generations.begin(); + for (; each != remote_info.generations.end(); each++) { + for (sid = 0; sid < each->num_shards; sid++) { + source_bs.shard_id = sid; + // use the error repo and sync status timestamp from the datalog shard corresponding to source_bs + error_repo = datalog_oid_for_error_repo(sc, sync_env->driver, pool, source_bs); + timestamp = timestamp_for_bucket_shard(sync_env->driver, sync_status, source_bs); + if (retcode < 0) { + tn->log(10, SSTR("Write " << source_bs.shard_id << " to error repo for retry")); + yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + rgw::error_repo::encode_key(source_bs, each->gen), + timestamp), cct->_conf->rgw_data_sync_spawn_window, std::nullopt); + } else { + shard_cr = data_sync_single_entry(sc, source_bs, each->gen, key, timestamp, + lease_cr, bucket_shard_cache, nullptr, error_repo, tn, false); + tn->log(10, SSTR("full sync: syncing shard_id " << sid << " of gen " << each->gen)); + if (first_shard) { + yield call(shard_cr); + first_shard = false; + } else { + yield_spawn_window(shard_cr, cct->_conf->rgw_data_sync_spawn_window, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + retcode = ret; + } + return retcode; + }); + } + } + } + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + retcode = ret; + } + return retcode; + }); + } + + yield call(marker_tracker->finish(key)); + + return set_cr_done(); + } + return 0; + } +}; + +class RGWDataBaseSyncShardCR : public RGWCoroutine { +protected: + RGWDataSyncCtx *const sc; + const rgw_pool& pool; + const uint32_t shard_id; + rgw_data_sync_marker& sync_marker; + RGWSyncTraceNodeRef tn; + const string& status_oid; + const rgw_raw_obj& error_repo; + boost::intrusive_ptr lease_cr; + const rgw_data_sync_status& sync_status; + boost::intrusive_ptr bucket_shard_cache; + + std::optional marker_tracker; + RGWRadosGetOmapValsCR::ResultPtr omapvals; + rgw_bucket_shard source_bs; + + int parse_bucket_key(const std::string& key, rgw_bucket_shard& bs) const { + return rgw_bucket_parse_bucket_key(sc->env->cct, key, + &bs.bucket, &bs.shard_id); + } + + RGWDataBaseSyncShardCR( + RGWDataSyncCtx *const _sc, const rgw_pool& pool, const uint32_t shard_id, + rgw_data_sync_marker& sync_marker, RGWSyncTraceNodeRef tn, + const string& status_oid, const rgw_raw_obj& error_repo, + boost::intrusive_ptr lease_cr, + const rgw_data_sync_status& sync_status, + const boost::intrusive_ptr& bucket_shard_cache) + : RGWCoroutine(_sc->cct), sc(_sc), pool(pool), shard_id(shard_id), + sync_marker(sync_marker), tn(tn), status_oid(status_oid), + error_repo(error_repo), lease_cr(std::move(lease_cr)), + sync_status(sync_status), bucket_shard_cache(bucket_shard_cache) {} +}; + +class RGWDataFullSyncShardCR : public RGWDataBaseSyncShardCR { + static constexpr auto OMAP_GET_MAX_ENTRIES = 100; + + string oid; + uint64_t total_entries = 0; + ceph::real_time entry_timestamp; + std::map entries; + std::map::iterator iter; + string error_marker; + +public: + + RGWDataFullSyncShardCR( + RGWDataSyncCtx *const sc, const rgw_pool& pool, const uint32_t shard_id, + rgw_data_sync_marker& sync_marker, RGWSyncTraceNodeRef tn, + const string& status_oid, const rgw_raw_obj& error_repo, + boost::intrusive_ptr lease_cr, + const rgw_data_sync_status& sync_status, + const boost::intrusive_ptr& bucket_shard_cache) + : RGWDataBaseSyncShardCR(sc, pool, shard_id, sync_marker, tn, + status_oid, error_repo, std::move(lease_cr), + sync_status, bucket_shard_cache) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + tn->log(10, "start full sync"); + oid = full_data_sync_index_shard_oid(sc->source_zone, shard_id); + marker_tracker.emplace(sc, status_oid, sync_marker, tn); + total_entries = sync_marker.pos; + entry_timestamp = sync_marker.timestamp; // time when full sync started + do { + if (!lease_cr->is_locked()) { + drain_all(); + tn->log(1, "lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + omapvals = std::make_shared(); + yield call(new RGWRadosGetOmapValsCR(sc->env->driver, + rgw_raw_obj(pool, oid), + sync_marker.marker, + OMAP_GET_MAX_ENTRIES, omapvals)); + if (retcode < 0) { + drain_all(); + return set_cr_error(retcode); + } + entries = std::move(omapvals->entries); + if (entries.size() > 0) { + tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + } + tn->log(20, SSTR("retrieved " << entries.size() << " entries to sync")); + iter = entries.begin(); + for (; iter != entries.end(); ++iter) { + retcode = parse_bucket_key(iter->first, source_bs); + if (retcode < 0) { + tn->log(1, SSTR("failed to parse bucket shard: " << iter->first)); + marker_tracker->try_update_high_marker(iter->first, 0, + entry_timestamp); + continue; + } + tn->log(20, SSTR("full sync: " << iter->first)); + total_entries++; + if (!marker_tracker->start(iter->first, total_entries, + entry_timestamp)) { + tn->log(0, SSTR("ERROR: cannot start syncing " << iter->first + << ". Duplicate entry?")); + } else { + tn->log(10, SSTR("timestamp for " << iter->first << " is :" << entry_timestamp)); + yield_spawn_window(new RGWDataFullSyncSingleEntryCR( + sc, pool, source_bs, iter->first, sync_status, + error_repo, entry_timestamp, lease_cr, + bucket_shard_cache, &*marker_tracker, tn), + cct->_conf->rgw_data_sync_spawn_window, + std::nullopt); + } + sync_marker.marker = iter->first; + } + } while (omapvals->more); + omapvals.reset(); + + drain_all(); + + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + + /* update marker to reflect we're done with full sync */ + sync_marker.state = rgw_data_sync_marker::IncrementalSync; + sync_marker.marker = sync_marker.next_step_marker; + sync_marker.next_step_marker.clear(); + yield call(new RGWSimpleRadosWriteCR( + sc->env->dpp,sc->env->async_rados, sc->env->svc->sysobj, + rgw_raw_obj(pool, status_oid), sync_marker)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to set sync marker: retcode=" << retcode)); + return set_cr_error(retcode); + } + + // clean up full sync index, ignoring errors + yield call(new RGWRadosRemoveCR(sc->env->driver, {pool, oid})); + + // transition to incremental sync + return set_cr_done(); + } + return 0; + } +}; + +class RGWDataIncSyncShardCR : public RGWDataBaseSyncShardCR { + static constexpr int max_error_entries = 10; + static constexpr uint32_t retry_backoff_secs = 60; + + ceph::mutex& inc_lock; + bc::flat_set& modified_shards; + + bc::flat_set current_modified; + decltype(current_modified)::iterator modified_iter; + + ceph::coarse_real_time error_retry_time; + string error_marker; + std::map error_entries; + decltype(error_entries)::iterator iter; + ceph::real_time entry_timestamp; + std::optional gen; + + string next_marker; + vector log_entries; + decltype(log_entries)::iterator log_iter; + bool truncated = false; + + utime_t get_idle_interval() const { + ceph::timespan interval = std::chrono::seconds(cct->_conf->rgw_data_sync_poll_interval); + if (!ceph::coarse_real_clock::is_zero(error_retry_time)) { + auto now = ceph::coarse_real_clock::now(); + if (error_retry_time > now) { + auto d = error_retry_time - now; + if (interval > d) { + interval = d; + } + } + } + // convert timespan -> time_point -> utime_t + return utime_t(ceph::coarse_real_clock::zero() + interval); + } + + +public: + + RGWDataIncSyncShardCR( + RGWDataSyncCtx *const sc, const rgw_pool& pool, const uint32_t shard_id, + rgw_data_sync_marker& sync_marker, RGWSyncTraceNodeRef tn, + const string& status_oid, const rgw_raw_obj& error_repo, + boost::intrusive_ptr lease_cr, + const rgw_data_sync_status& sync_status, + const boost::intrusive_ptr& bucket_shard_cache, + ceph::mutex& inc_lock, + bc::flat_set& modified_shards) + : RGWDataBaseSyncShardCR(sc, pool, shard_id, sync_marker, tn, + status_oid, error_repo, std::move(lease_cr), + sync_status, bucket_shard_cache), + inc_lock(inc_lock), modified_shards(modified_shards) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + tn->log(10, "start incremental sync"); + marker_tracker.emplace(sc, status_oid, sync_marker, tn); + do { + if (!lease_cr->is_locked()) { + drain_all(); + tn->log(1, "lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + { + current_modified.clear(); + std::unique_lock il(inc_lock); + current_modified.swap(modified_shards); + il.unlock(); + } + + if (current_modified.size() > 0) { + tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + } + /* process out of band updates */ + for (modified_iter = current_modified.begin(); + modified_iter != current_modified.end(); + ++modified_iter) { + retcode = parse_bucket_key(modified_iter->key, source_bs); + if (retcode < 0) { + tn->log(1, SSTR("failed to parse bucket shard: " + << modified_iter->key)); + continue; + } + tn->log(20, SSTR("received async update notification: " + << modified_iter->key)); + spawn(data_sync_single_entry(sc, source_bs, modified_iter->gen, {}, + ceph::real_time{}, lease_cr, + bucket_shard_cache, &*marker_tracker, + error_repo, tn, false), false); + } + + if (error_retry_time <= ceph::coarse_real_clock::now()) { + /* process bucket shards that previously failed */ + omapvals = std::make_shared(); + yield call(new RGWRadosGetOmapValsCR(sc->env->driver, error_repo, + error_marker, max_error_entries, + omapvals)); + error_entries = std::move(omapvals->entries); + tn->log(20, SSTR("read error repo, got " << error_entries.size() + << " entries")); + iter = error_entries.begin(); + for (; iter != error_entries.end(); ++iter) { + error_marker = iter->first; + entry_timestamp = rgw::error_repo::decode_value(iter->second); + retcode = rgw::error_repo::decode_key(iter->first, source_bs, gen); + if (retcode == -EINVAL) { + // backward compatibility for string keys that don't encode a gen + retcode = parse_bucket_key(error_marker, source_bs); + } + if (retcode < 0) { + tn->log(1, SSTR("failed to parse bucket shard: " << error_marker)); + spawn(rgw::error_repo::remove_cr(sc->env->driver->svc()->rados, + error_repo, error_marker, + entry_timestamp), + false); + continue; + } + tn->log(10, SSTR("gen is " << gen)); + if (!gen) { + // write all full sync obligations for the bucket to error repo + spawn(new RGWDataIncrementalSyncFullObligationCR(sc, source_bs, + error_repo, error_marker, entry_timestamp, tn), false); + } else { + tn->log(20, SSTR("handle error entry key=" + << to_string(source_bs, gen) + << " timestamp=" << entry_timestamp)); + spawn(data_sync_single_entry(sc, source_bs, gen, "", + entry_timestamp, lease_cr, + bucket_shard_cache, &*marker_tracker, + error_repo, tn, true), false); + } + } + if (!omapvals->more) { + error_retry_time = ceph::coarse_real_clock::now() + + make_timespan(retry_backoff_secs); + error_marker.clear(); + } + } + omapvals.reset(); + + tn->log(20, SSTR("shard_id=" << shard_id << " sync_marker=" + << sync_marker.marker)); + yield call(new RGWReadRemoteDataLogShardCR(sc, shard_id, + sync_marker.marker, + &next_marker, &log_entries, + &truncated)); + if (retcode < 0 && retcode != -ENOENT) { + tn->log(0, SSTR("ERROR: failed to read remote data log info: ret=" + << retcode)); + drain_all(); + return set_cr_error(retcode); + } + + if (log_entries.size() > 0) { + tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + } + + for (log_iter = log_entries.begin(); + log_iter != log_entries.end(); + ++log_iter) { + tn->log(20, SSTR("shard_id=" << shard_id << " log_entry: " + << log_iter->log_id << ":" << log_iter->log_timestamp + << ":" << log_iter->entry.key)); + retcode = parse_bucket_key(log_iter->entry.key, source_bs); + if (retcode < 0) { + tn->log(1, SSTR("failed to parse bucket shard: " + << log_iter->entry.key)); + marker_tracker->try_update_high_marker(log_iter->log_id, 0, + log_iter->log_timestamp); + continue; + } + if (!marker_tracker->start(log_iter->log_id, 0, + log_iter->log_timestamp)) { + tn->log(0, SSTR("ERROR: cannot start syncing " << log_iter->log_id + << ". Duplicate entry?")); + } else { + tn->log(1, SSTR("incremental sync on " << log_iter->entry.key + << "shard: " << shard_id << "on gen " + << log_iter->entry.gen)); + yield_spawn_window( + data_sync_single_entry(sc, source_bs,log_iter->entry.gen, + log_iter->log_id, log_iter->log_timestamp, + lease_cr,bucket_shard_cache, + &*marker_tracker, error_repo, tn, false), + cct->_conf->rgw_data_sync_spawn_window, std::nullopt); + } + } + + tn->log(20, SSTR("shard_id=" << shard_id << + " sync_marker="<< sync_marker.marker + << " next_marker=" << next_marker + << " truncated=" << truncated)); + if (!next_marker.empty()) { + sync_marker.marker = next_marker; + } else if (!log_entries.empty()) { + sync_marker.marker = log_entries.back().log_id; + } + if (!truncated) { + // we reached the end, wait a while before checking for more + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + yield wait(get_idle_interval()); + } + } while (true); + } + return 0; + } +}; + +class RGWDataSyncShardCR : public RGWCoroutine { + RGWDataSyncCtx *const sc; + const rgw_pool pool; + const uint32_t shard_id; + rgw_data_sync_marker& sync_marker; + rgw_data_sync_status sync_status; + const RGWSyncTraceNodeRef tn; + bool *reset_backoff; + + ceph::mutex inc_lock = ceph::make_mutex("RGWDataSyncShardCR::inc_lock"); + ceph::condition_variable inc_cond; + + RGWDataSyncEnv *const sync_env{ sc->env }; + + const string status_oid{ RGWDataSyncStatusManager::shard_obj_name( + sc->source_zone, shard_id) }; + const rgw_raw_obj error_repo{ pool, status_oid + ".retry" }; + + // target number of entries to cache before recycling idle ones + static constexpr size_t target_cache_size = 256; + boost::intrusive_ptr bucket_shard_cache { + rgw::bucket_sync::Cache::create(target_cache_size) }; + + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; + + bc::flat_set modified_shards; + +public: + RGWDataSyncShardCR(RGWDataSyncCtx* const _sc, const rgw_pool& pool, + const uint32_t shard_id, rgw_data_sync_marker& marker, + const rgw_data_sync_status& sync_status, + RGWSyncTraceNodeRef& tn, bool *reset_backoff) + : RGWCoroutine(_sc->cct), sc(_sc), pool(pool), shard_id(shard_id), + sync_marker(marker), sync_status(sync_status), tn(tn), + reset_backoff(reset_backoff) { + set_description() << "data sync shard source_zone=" << sc->source_zone + << " shard_id=" << shard_id; + } + + ~RGWDataSyncShardCR() override { + if (lease_cr) { + lease_cr->abort(); + } + } + + void append_modified_shards(bc::flat_set& entries) { + std::lock_guard l{inc_lock}; + modified_shards.insert(entries.begin(), entries.end()); + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield init_lease_cr(); + while (!lease_cr->is_locked()) { + if (lease_cr->is_done()) { + tn->log(5, "failed to take lease"); + set_status("lease lock failed, early abort"); + drain_all(); + return set_cr_error(lease_cr->get_ret_status()); + } + set_sleeping(true); + yield; + } + *reset_backoff = true; + tn->log(10, "took lease"); + + while (true) { + if (sync_marker.state == rgw_data_sync_marker::FullSync) { + yield call(new RGWDataFullSyncShardCR(sc, pool, shard_id, + sync_marker, tn, + status_oid, error_repo, + lease_cr, sync_status, + bucket_shard_cache)); + if (retcode < 0) { + if (retcode != -EBUSY) { + tn->log(10, SSTR("full sync failed (retcode=" << retcode << ")")); + } + lease_cr->go_down(); + drain_all(); + return set_cr_error(retcode); + } + } else if (sync_marker.state == rgw_data_sync_marker::IncrementalSync) { + yield call(new RGWDataIncSyncShardCR(sc, pool, shard_id, + sync_marker, tn, + status_oid, error_repo, + lease_cr, sync_status, + bucket_shard_cache, + inc_lock, modified_shards)); + if (retcode < 0) { + if (retcode != -EBUSY) { + tn->log(10, SSTR("incremental sync failed (retcode=" << retcode + << ")")); + } + lease_cr->go_down(); + drain_all(); + return set_cr_error(retcode); + } + } else { + lease_cr->go_down(); + drain_all(); + return set_cr_error(-EIO); + } + } + } + return 0; + } + + void init_lease_cr() { + set_status("acquiring sync lock"); + uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; + string lock_name = "sync_lock"; + if (lease_cr) { + lease_cr->abort(); + } + auto driver = sync_env->driver; + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, driver, + rgw_raw_obj(pool, status_oid), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); + } +}; + +class RGWDataSyncShardControlCR : public RGWBackoffControlCR { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + rgw_pool pool; + + uint32_t shard_id; + rgw_data_sync_marker sync_marker; + rgw_data_sync_status sync_status; + + RGWSyncTraceNodeRef tn; +public: + RGWDataSyncShardControlCR(RGWDataSyncCtx *_sc, const rgw_pool& _pool, + uint32_t _shard_id, rgw_data_sync_marker& _marker, const rgw_data_sync_status& sync_status, + RGWSyncTraceNodeRef& _tn_parent) : RGWBackoffControlCR(_sc->cct, false), + sc(_sc), sync_env(_sc->env), + pool(_pool), + shard_id(_shard_id), + sync_marker(_marker) { + tn = sync_env->sync_tracer->add_node(_tn_parent, "shard", std::to_string(shard_id)); + } + + RGWCoroutine *alloc_cr() override { + return new RGWDataSyncShardCR(sc, pool, shard_id, sync_marker, sync_status, tn, backoff_ptr()); + } + + RGWCoroutine *alloc_finisher_cr() override { + return new RGWSimpleRadosReadCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id)), + &sync_marker); + } + + void append_modified_shards(bc::flat_set& keys) { + std::lock_guard l{cr_lock()}; + + RGWDataSyncShardCR *cr = static_cast(get_cr()); + if (!cr) { + return; + } + + cr->append_modified_shards(keys); + } +}; + +class RGWDataSyncCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + uint32_t num_shards; + + rgw_data_sync_status sync_status; + + ceph::mutex shard_crs_lock = + ceph::make_mutex("RGWDataSyncCR::shard_crs_lock"); + map shard_crs; + + bool *reset_backoff; + + RGWSyncTraceNodeRef tn; + + RGWDataSyncModule *data_sync_module{nullptr}; +public: + RGWDataSyncCR(RGWDataSyncCtx *_sc, uint32_t _num_shards, RGWSyncTraceNodeRef& _tn, bool *_reset_backoff) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + num_shards(_num_shards), + reset_backoff(_reset_backoff), tn(_tn) { + + } + + ~RGWDataSyncCR() override { + for (auto iter : shard_crs) { + iter.second->put(); + } + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + + /* read sync status */ + yield call(new RGWReadDataSyncStatusCoroutine(sc, &sync_status)); + + data_sync_module = sync_env->sync_module->get_data_handler(); + + if (retcode < 0 && retcode != -ENOENT) { + tn->log(0, SSTR("ERROR: failed to fetch sync status, retcode=" << retcode)); + return set_cr_error(retcode); + } + + /* state: init status */ + if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateInit) { + tn->log(20, SSTR("init")); + sync_status.sync_info.num_shards = num_shards; + uint64_t instance_id; + instance_id = ceph::util::generate_random_number(); + yield call(new RGWInitDataSyncStatusCoroutine(sc, num_shards, instance_id, tn, &sync_status)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to init sync, retcode=" << retcode)); + return set_cr_error(retcode); + } + // sets state = StateBuildingFullSyncMaps + + *reset_backoff = true; + } + + data_sync_module->init(sc, sync_status.sync_info.instance_id); + + if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateBuildingFullSyncMaps) { + tn->log(10, SSTR("building full sync maps")); + /* call sync module init here */ + sync_status.sync_info.num_shards = num_shards; + yield call(data_sync_module->init_sync(dpp, sc)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: sync module init_sync() failed, retcode=" << retcode)); + return set_cr_error(retcode); + } + /* state: building full sync maps */ + yield call(new RGWListBucketIndexesCR(sc, &sync_status)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to build full sync maps, retcode=" << retcode)); + return set_cr_error(retcode); + } + sync_status.sync_info.state = rgw_data_sync_info::StateSync; + + /* update new state */ + yield call(set_sync_info_cr()); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to write sync status, retcode=" << retcode)); + return set_cr_error(retcode); + } + + *reset_backoff = true; + } + + yield call(data_sync_module->start_sync(dpp, sc)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to start sync, retcode=" << retcode)); + return set_cr_error(retcode); + } + + yield { + if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateSync) { + tn->log(10, SSTR("spawning " << num_shards << " shards sync")); + for (map::iterator iter = sync_status.sync_markers.begin(); + iter != sync_status.sync_markers.end(); ++iter) { + RGWDataSyncShardControlCR *cr = new RGWDataSyncShardControlCR(sc, sync_env->svc->zone->get_zone_params().log_pool, + iter->first, iter->second, sync_status, tn); + cr->get(); + shard_crs_lock.lock(); + shard_crs[iter->first] = cr; + shard_crs_lock.unlock(); + spawn(cr, true); + } + } + } + + return set_cr_done(); + } + return 0; + } + + RGWCoroutine *set_sync_info_cr() { + return new RGWSimpleRadosWriteCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sc->source_zone)), + sync_status.sync_info); + } + + void wakeup(int shard_id, bc::flat_set& entries) { + std::lock_guard l{shard_crs_lock}; + map::iterator iter = shard_crs.find(shard_id); + if (iter == shard_crs.end()) { + return; + } + iter->second->append_modified_shards(entries); + iter->second->wakeup(); + } +}; + +class RGWDefaultDataSyncModule : public RGWDataSyncModule { +public: + RGWDefaultDataSyncModule() {} + + RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override; + RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; + RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; +}; + +class RGWDefaultSyncModuleInstance : public RGWSyncModuleInstance { + RGWDefaultDataSyncModule data_handler; +public: + RGWDefaultSyncModuleInstance() {} + RGWDataSyncModule *get_data_handler() override { + return &data_handler; + } + bool supports_user_writes() override { + return true; + } +}; + +int RGWDefaultSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) +{ + instance->reset(new RGWDefaultSyncModuleInstance()); + return 0; +} + +class RGWUserPermHandler { + friend struct Init; + friend class Bucket; + + RGWDataSyncEnv *sync_env; + rgw_user uid; + + struct _info { + RGWUserInfo user_info; + rgw::IAM::Environment env; + std::unique_ptr identity; + RGWAccessControlPolicy user_acl; + }; + + std::shared_ptr<_info> info; + + struct Init; + + std::shared_ptr init_action; + + struct Init : public RGWGenericAsyncCR::Action { + RGWDataSyncEnv *sync_env; + + rgw_user uid; + std::shared_ptr info; + + int ret{0}; + + Init(RGWUserPermHandler *handler) : sync_env(handler->sync_env), + uid(handler->uid), + info(handler->info) {} + int operate() override { + auto user_ctl = sync_env->driver->getRados()->ctl.user; + + ret = user_ctl->get_info_by_uid(sync_env->dpp, uid, &info->user_info, null_yield); + if (ret < 0) { + return ret; + } + + info->identity = rgw::auth::transform_old_authinfo(sync_env->cct, + uid, + RGW_PERM_FULL_CONTROL, + false, /* system_request? */ + TYPE_RGW); + + map uattrs; + + ret = user_ctl->get_attrs_by_uid(sync_env->dpp, uid, &uattrs, null_yield); + if (ret == 0) { + ret = RGWUserPermHandler::policy_from_attrs(sync_env->cct, uattrs, &info->user_acl); + } + if (ret == -ENOENT) { + info->user_acl.create_default(uid, info->user_info.display_name); + } + + return 0; + } + }; + +public: + RGWUserPermHandler(RGWDataSyncEnv *_sync_env, + const rgw_user& _uid) : sync_env(_sync_env), + uid(_uid) {} + + RGWCoroutine *init_cr() { + info = make_shared<_info>(); + init_action = make_shared(this); + + return new RGWGenericAsyncCR(sync_env->cct, + sync_env->async_rados, + init_action); + } + + class Bucket { + RGWDataSyncEnv *sync_env; + std::shared_ptr<_info> info; + RGWAccessControlPolicy bucket_acl; + std::optional ps; + public: + Bucket() {} + + int init(RGWUserPermHandler *handler, + const RGWBucketInfo& bucket_info, + const map& bucket_attrs); + + bool verify_bucket_permission(int perm); + bool verify_object_permission(const map& obj_attrs, + int perm); + }; + + static int policy_from_attrs(CephContext *cct, + const map& attrs, + RGWAccessControlPolicy *acl) { + acl->set_ctx(cct); + + auto aiter = attrs.find(RGW_ATTR_ACL); + if (aiter == attrs.end()) { + return -ENOENT; + } + auto iter = aiter->second.begin(); + try { + acl->decode(iter); + } catch (buffer::error& err) { + ldout(cct, 0) << "ERROR: " << __func__ << "(): could not decode policy, caught buffer::error" << dendl; + return -EIO; + } + + return 0; + } + + int init_bucket(const RGWBucketInfo& bucket_info, + const map& bucket_attrs, + Bucket *bs) { + return bs->init(this, bucket_info, bucket_attrs); + } +}; + +int RGWUserPermHandler::Bucket::init(RGWUserPermHandler *handler, + const RGWBucketInfo& bucket_info, + const map& bucket_attrs) +{ + sync_env = handler->sync_env; + info = handler->info; + + int r = RGWUserPermHandler::policy_from_attrs(sync_env->cct, bucket_attrs, &bucket_acl); + if (r < 0) { + return r; + } + + ps.emplace(sync_env->cct, + info->env, + info->identity.get(), + bucket_info, + info->identity->get_perm_mask(), + false, /* defer to bucket acls */ + nullptr, /* referer */ + false); /* request_payer */ + + return 0; +} + +bool RGWUserPermHandler::Bucket::verify_bucket_permission(int perm) +{ + return verify_bucket_permission_no_policy(sync_env->dpp, + &(*ps), + &info->user_acl, + &bucket_acl, + perm); +} + +bool RGWUserPermHandler::Bucket::verify_object_permission(const map& obj_attrs, + int perm) +{ + RGWAccessControlPolicy obj_acl; + + int r = policy_from_attrs(sync_env->cct, obj_attrs, &obj_acl); + if (r < 0) { + return r; + } + + return verify_bucket_permission_no_policy(sync_env->dpp, + &(*ps), + &bucket_acl, + &obj_acl, + perm); +} + +class RGWFetchObjFilter_Sync : public RGWFetchObjFilter_Default { + rgw_bucket_sync_pipe sync_pipe; + + std::shared_ptr bucket_perms; + std::optional verify_dest_params; + + std::optional mtime; + std::optional etag; + std::optional obj_size; + + std::unique_ptr identity; + + std::shared_ptr need_retry; + +public: + RGWFetchObjFilter_Sync(rgw_bucket_sync_pipe& _sync_pipe, + std::shared_ptr& _bucket_perms, + std::optional&& _verify_dest_params, + std::shared_ptr& _need_retry) : sync_pipe(_sync_pipe), + bucket_perms(_bucket_perms), + verify_dest_params(std::move(_verify_dest_params)), + need_retry(_need_retry) { + *need_retry = false; + } + + int filter(CephContext *cct, + const rgw_obj_key& source_key, + const RGWBucketInfo& dest_bucket_info, + std::optional dest_placement_rule, + const map& obj_attrs, + std::optional *poverride_owner, + const rgw_placement_rule **prule) override; +}; + +int RGWFetchObjFilter_Sync::filter(CephContext *cct, + const rgw_obj_key& source_key, + const RGWBucketInfo& dest_bucket_info, + std::optional dest_placement_rule, + const map& obj_attrs, + std::optional *poverride_owner, + const rgw_placement_rule **prule) +{ + int abort_err = -ERR_PRECONDITION_FAILED; + + rgw_sync_pipe_params params; + + RGWObjTags obj_tags; + + auto iter = obj_attrs.find(RGW_ATTR_TAGS); + if (iter != obj_attrs.end()) { + try { + auto it = iter->second.cbegin(); + obj_tags.decode(it); + } catch (buffer::error &err) { + ldout(cct, 0) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; + } + } + + if (!sync_pipe.info.handler.find_obj_params(source_key, + obj_tags.get_tags(), + ¶ms)) { + return abort_err; + } + + if (verify_dest_params && + !(*verify_dest_params == params.dest)) { + /* raced! original dest params were different, will need to retry */ + ldout(cct, 0) << "WARNING: " << __func__ << ": pipe dest params are different than original params, must have raced with object rewrite, retrying" << dendl; + *need_retry = true; + return -ECANCELED; + } + + std::optional > new_attrs; + + if (params.dest.acl_translation) { + rgw_user& acl_translation_owner = params.dest.acl_translation->owner; + if (!acl_translation_owner.empty()) { + if (params.mode == rgw_sync_pipe_params::MODE_USER && + acl_translation_owner != dest_bucket_info.owner) { + ldout(cct, 0) << "ERROR: " << __func__ << ": acl translation was requested, but user (" << acl_translation_owner + << ") is not dest bucket owner (" << dest_bucket_info.owner << ")" << dendl; + return -EPERM; + } + *poverride_owner = acl_translation_owner; + } + } + if (params.mode == rgw_sync_pipe_params::MODE_USER) { + if (!bucket_perms->verify_object_permission(obj_attrs, RGW_PERM_READ)) { + ldout(cct, 0) << "ERROR: " << __func__ << ": permission check failed: user not allowed to fetch object" << dendl; + return -EPERM; + } + } + + if (!dest_placement_rule && + params.dest.storage_class) { + dest_rule.storage_class = *params.dest.storage_class; + dest_rule.inherit_from(dest_bucket_info.placement_rule); + dest_placement_rule = dest_rule; + *prule = &dest_rule; + } + + return RGWFetchObjFilter_Default::filter(cct, + source_key, + dest_bucket_info, + dest_placement_rule, + obj_attrs, + poverride_owner, + prule); +} + +class RGWObjFetchCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_bucket_sync_pipe& sync_pipe; + rgw_obj_key& key; + std::optional dest_key; + std::optional versioned_epoch; + rgw_zone_set *zones_trace; + + bool need_more_info{false}; + bool check_change{false}; + + ceph::real_time src_mtime; + uint64_t src_size; + string src_etag; + map src_attrs; + map src_headers; + + std::optional param_user; + rgw_sync_pipe_params::Mode param_mode; + + std::optional user_perms; + std::shared_ptr source_bucket_perms; + RGWUserPermHandler::Bucket dest_bucket_perms; + + std::optional dest_params; + + int try_num{0}; + std::shared_ptr need_retry; +public: + RGWObjFetchCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, + rgw_obj_key& _key, + std::optional _dest_key, + std::optional _versioned_epoch, + rgw_zone_set *_zones_trace) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + sync_pipe(_sync_pipe), + key(_key), + dest_key(_dest_key), + versioned_epoch(_versioned_epoch), + zones_trace(_zones_trace) { + } + + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + +#define MAX_RACE_RETRIES_OBJ_FETCH 10 + for (try_num = 0; try_num < MAX_RACE_RETRIES_OBJ_FETCH; ++try_num) { + + { + std::optional param_acl_translation; + std::optional param_storage_class; + + if (!sync_pipe.info.handler.find_basic_info_without_tags(key, + ¶m_user, + ¶m_acl_translation, + ¶m_storage_class, + ¶m_mode, + &need_more_info)) { + if (!need_more_info) { + return set_cr_error(-ERR_PRECONDITION_FAILED); + } + } + } + + if (need_more_info) { + ldout(cct, 20) << "Could not determine exact policy rule for obj=" << key << ", will read source object attributes" << dendl; + /* + * we need to fetch info about source object, so that we can determine + * the correct policy configuration. This can happen if there are multiple + * policy rules, and some depend on the object tagging */ + yield call(new RGWStatRemoteObjCR(sync_env->async_rados, + sync_env->driver, + sc->source_zone, + sync_pipe.info.source_bs.bucket, + key, + &src_mtime, + &src_size, + &src_etag, + &src_attrs, + &src_headers)); + if (retcode < 0) { + return set_cr_error(retcode); + } + + RGWObjTags obj_tags; + + auto iter = src_attrs.find(RGW_ATTR_TAGS); + if (iter != src_attrs.end()) { + try { + auto it = iter->second.cbegin(); + obj_tags.decode(it); + } catch (buffer::error &err) { + ldout(cct, 0) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; + } + } + + rgw_sync_pipe_params params; + if (!sync_pipe.info.handler.find_obj_params(key, + obj_tags.get_tags(), + ¶ms)) { + return set_cr_error(-ERR_PRECONDITION_FAILED); + } + + param_user = params.user; + param_mode = params.mode; + + dest_params = params.dest; + } + + if (param_mode == rgw_sync_pipe_params::MODE_USER) { + if (!param_user) { + ldout(cct, 20) << "ERROR: " << __func__ << ": user level sync but user param not set" << dendl; + return set_cr_error(-EPERM); + } + user_perms.emplace(sync_env, *param_user); + + yield call(user_perms->init_cr()); + if (retcode < 0) { + ldout(cct, 20) << "ERROR: " << __func__ << ": failed to init user perms manager for uid=" << *param_user << dendl; + return set_cr_error(retcode); + } + + /* verify that user is allowed to write at the target bucket */ + int r = user_perms->init_bucket(sync_pipe.dest_bucket_info, + sync_pipe.dest_bucket_attrs, + &dest_bucket_perms); + if (r < 0) { + ldout(cct, 20) << "ERROR: " << __func__ << ": failed to init bucket perms manager for uid=" << *param_user << " bucket=" << sync_pipe.source_bucket_info.bucket.get_key() << dendl; + return set_cr_error(retcode); + } + + if (!dest_bucket_perms.verify_bucket_permission(RGW_PERM_WRITE)) { + ldout(cct, 0) << "ERROR: " << __func__ << ": permission check failed: user not allowed to write into bucket (bucket=" << sync_pipe.info.dest_bucket.get_key() << ")" << dendl; + return -EPERM; + } + + /* init source bucket permission structure */ + source_bucket_perms = make_shared(); + r = user_perms->init_bucket(sync_pipe.source_bucket_info, + sync_pipe.source_bucket_attrs, + source_bucket_perms.get()); + if (r < 0) { + ldout(cct, 20) << "ERROR: " << __func__ << ": failed to init bucket perms manager for uid=" << *param_user << " bucket=" << sync_pipe.source_bucket_info.bucket.get_key() << dendl; + return set_cr_error(retcode); + } + } + + yield { + if (!need_retry) { + need_retry = make_shared(); + } + auto filter = make_shared(sync_pipe, + source_bucket_perms, + std::move(dest_params), + need_retry); + + call(new RGWFetchRemoteObjCR(sync_env->async_rados, sync_env->driver, sc->source_zone, + nullopt, + sync_pipe.info.source_bs.bucket, + std::nullopt, sync_pipe.dest_bucket_info, + key, dest_key, versioned_epoch, + true, + std::static_pointer_cast(filter), + zones_trace, sync_env->counters, dpp)); + } + if (retcode < 0) { + if (*need_retry) { + continue; + } + return set_cr_error(retcode); + } + + return set_cr_done(); + } + + ldout(cct, 0) << "ERROR: " << __func__ << ": Too many retries trying to fetch object, possibly a bug: bucket=" << sync_pipe.source_bucket_info.bucket.get_key() << " key=" << key << dendl; + + return set_cr_error(-EIO); + } + return 0; + } +}; + +RGWCoroutine *RGWDefaultDataSyncModule::sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) +{ + return new RGWObjFetchCR(sc, sync_pipe, key, std::nullopt, versioned_epoch, zones_trace); +} + +RGWCoroutine *RGWDefaultDataSyncModule::remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, + real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) +{ + auto sync_env = sc->env; + return new RGWRemoveObjCR(sync_env->dpp, sync_env->async_rados, sync_env->driver, sc->source_zone, + sync_pipe.dest_bucket_info, key, versioned, versioned_epoch, + NULL, NULL, false, &mtime, zones_trace); +} + +RGWCoroutine *RGWDefaultDataSyncModule::create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) +{ + auto sync_env = sc->env; + return new RGWRemoveObjCR(sync_env->dpp, sync_env->async_rados, sync_env->driver, sc->source_zone, + sync_pipe.dest_bucket_info, key, versioned, versioned_epoch, + &owner.id, &owner.display_name, true, &mtime, zones_trace); +} + +class RGWArchiveDataSyncModule : public RGWDefaultDataSyncModule { +public: + RGWArchiveDataSyncModule() {} + + RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override; + RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; + RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; +}; + +class RGWArchiveSyncModuleInstance : public RGWDefaultSyncModuleInstance { + RGWArchiveDataSyncModule data_handler; +public: + RGWArchiveSyncModuleInstance() {} + RGWDataSyncModule *get_data_handler() override { + return &data_handler; + } + RGWMetadataHandler *alloc_bucket_meta_handler() override { + return RGWArchiveBucketMetaHandlerAllocator::alloc(); + } + RGWBucketInstanceMetadataHandlerBase *alloc_bucket_instance_meta_handler(rgw::sal::Driver* driver) override { + return RGWArchiveBucketInstanceMetaHandlerAllocator::alloc(driver); + } +}; + +int RGWArchiveSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) +{ + instance->reset(new RGWArchiveSyncModuleInstance()); + return 0; +} + +RGWCoroutine *RGWArchiveDataSyncModule::sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) +{ + auto sync_env = sc->env; + ldout(sc->cct, 5) << "SYNC_ARCHIVE: sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; + if (!sync_pipe.dest_bucket_info.versioned() || + (sync_pipe.dest_bucket_info.flags & BUCKET_VERSIONS_SUSPENDED)) { + ldout(sc->cct, 0) << "SYNC_ARCHIVE: sync_object: enabling object versioning for archive bucket" << dendl; + sync_pipe.dest_bucket_info.flags = (sync_pipe.dest_bucket_info.flags & ~BUCKET_VERSIONS_SUSPENDED) | BUCKET_VERSIONED; + int op_ret = sync_env->driver->getRados()->put_bucket_instance_info(sync_pipe.dest_bucket_info, false, real_time(), NULL, sync_env->dpp); + if (op_ret < 0) { + ldpp_dout(sync_env->dpp, 0) << "SYNC_ARCHIVE: sync_object: error versioning archive bucket" << dendl; + return NULL; + } + } + + std::optional dest_key; + + if (versioned_epoch.value_or(0) == 0) { /* force version if not set */ + versioned_epoch = 0; + dest_key = key; + if (key.instance.empty()) { + sync_env->driver->getRados()->gen_rand_obj_instance_name(&(*dest_key)); + } + } + + return new RGWObjFetchCR(sc, sync_pipe, key, dest_key, versioned_epoch, zones_trace); +} + +RGWCoroutine *RGWArchiveDataSyncModule::remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, + real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) +{ + ldout(sc->cct, 0) << "SYNC_ARCHIVE: remove_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch << dendl; + return NULL; +} + +RGWCoroutine *RGWArchiveDataSyncModule::create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) +{ + ldout(sc->cct, 0) << "SYNC_ARCHIVE: create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime + << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + auto sync_env = sc->env; + return new RGWRemoveObjCR(sync_env->dpp, sync_env->async_rados, sync_env->driver, sc->source_zone, + sync_pipe.dest_bucket_info, key, versioned, versioned_epoch, + &owner.id, &owner.display_name, true, &mtime, zones_trace); +} + +class RGWDataSyncControlCR : public RGWBackoffControlCR +{ + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + uint32_t num_shards; + + RGWSyncTraceNodeRef tn; + + static constexpr bool exit_on_error = false; // retry on all errors +public: + RGWDataSyncControlCR(RGWDataSyncCtx *_sc, uint32_t _num_shards, + RGWSyncTraceNodeRef& _tn_parent) : RGWBackoffControlCR(_sc->cct, exit_on_error), + sc(_sc), sync_env(_sc->env), num_shards(_num_shards) { + tn = sync_env->sync_tracer->add_node(_tn_parent, "sync"); + } + + RGWCoroutine *alloc_cr() override { + return new RGWDataSyncCR(sc, num_shards, tn, backoff_ptr()); + } + + void wakeup(int shard_id, bc::flat_set& entries) { + ceph::mutex& m = cr_lock(); + + m.lock(); + RGWDataSyncCR *cr = static_cast(get_cr()); + if (!cr) { + m.unlock(); + return; + } + + cr->get(); + m.unlock(); + + if (cr) { + cr->wakeup(shard_id, entries); + } + + cr->put(); + } +}; + +void RGWRemoteDataLog::wakeup(int shard_id, bc::flat_set& entries) { + std::shared_lock rl{lock}; + if (!data_sync_cr) { + return; + } + data_sync_cr->wakeup(shard_id, entries); +} + +int RGWRemoteDataLog::run_sync(const DoutPrefixProvider *dpp, int num_shards) +{ + lock.lock(); + data_sync_cr = new RGWDataSyncControlCR(&sc, num_shards, tn); + data_sync_cr->get(); // run() will drop a ref, so take another + lock.unlock(); + + int r = run(dpp, data_sync_cr); + + lock.lock(); + data_sync_cr->put(); + data_sync_cr = NULL; + lock.unlock(); + + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to run sync" << dendl; + return r; + } + return 0; +} + +CephContext *RGWDataSyncStatusManager::get_cct() const +{ + return driver->ctx(); +} + +int RGWDataSyncStatusManager::init(const DoutPrefixProvider *dpp) +{ + RGWZone *zone_def; + + if (!(zone_def = driver->svc()->zone->find_zone(source_zone))) { + ldpp_dout(this, 0) << "ERROR: failed to find zone config info for zone=" << source_zone << dendl; + return -EIO; + } + + if (!driver->svc()->sync_modules->get_manager()->supports_data_export(zone_def->tier_type)) { + return -ENOTSUP; + } + + const RGWZoneParams& zone_params = driver->svc()->zone->get_zone_params(); + + if (sync_module == nullptr) { + sync_module = driver->get_sync_module(); + } + + conn = driver->svc()->zone->get_zone_conn(source_zone); + if (!conn) { + ldpp_dout(this, 0) << "connection object to zone " << source_zone << " does not exist" << dendl; + return -EINVAL; + } + + error_logger = new RGWSyncErrorLogger(driver, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS); + + int r = source_log.init(source_zone, conn, error_logger, driver->getRados()->get_sync_tracer(), + sync_module, counters); + if (r < 0) { + ldpp_dout(this, 0) << "ERROR: failed to init remote log, r=" << r << dendl; + finalize(); + return r; + } + + rgw_datalog_info datalog_info; + r = source_log.read_log_info(dpp, &datalog_info); + if (r < 0) { + ldpp_dout(this, 5) << "ERROR: master.read_log_info() returned r=" << r << dendl; + finalize(); + return r; + } + + num_shards = datalog_info.num_shards; + + for (int i = 0; i < num_shards; i++) { + shard_objs[i] = rgw_raw_obj(zone_params.log_pool, shard_obj_name(source_zone, i)); + } + + return 0; +} + +void RGWDataSyncStatusManager::finalize() +{ + delete error_logger; + error_logger = nullptr; +} + +unsigned RGWDataSyncStatusManager::get_subsys() const +{ + return dout_subsys; +} + +std::ostream& RGWDataSyncStatusManager::gen_prefix(std::ostream& out) const +{ + auto zone = std::string_view{source_zone.id}; + return out << "data sync zone:" << zone.substr(0, 8) << ' '; +} + +string RGWDataSyncStatusManager::sync_status_oid(const rgw_zone_id& source_zone) +{ + char buf[datalog_sync_status_oid_prefix.size() + source_zone.id.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%s", datalog_sync_status_oid_prefix.c_str(), source_zone.id.c_str()); + + return string(buf); +} + +string RGWDataSyncStatusManager::shard_obj_name(const rgw_zone_id& source_zone, int shard_id) +{ + char buf[datalog_sync_status_shard_prefix.size() + source_zone.id.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%s.%d", datalog_sync_status_shard_prefix.c_str(), source_zone.id.c_str(), shard_id); + + return string(buf); +} + +class RGWInitBucketShardSyncStatusCoroutine : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + const rgw_bucket_sync_pair_info& sync_pair; + const string sync_status_oid; + + rgw_bucket_shard_sync_info& status; + RGWObjVersionTracker& objv_tracker; + const BucketIndexShardsManager& marker_mgr; + bool exclusive; +public: + RGWInitBucketShardSyncStatusCoroutine(RGWDataSyncCtx *_sc, + const rgw_bucket_sync_pair_info& _sync_pair, + rgw_bucket_shard_sync_info& _status, + uint64_t gen, + const BucketIndexShardsManager& _marker_mgr, + RGWObjVersionTracker& objv_tracker, + bool exclusive) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + sync_pair(_sync_pair), + sync_status_oid(RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, _sync_pair, gen)), + status(_status), objv_tracker(objv_tracker), marker_mgr(_marker_mgr), exclusive(exclusive) + {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield { + rgw_raw_obj obj(sync_env->svc->zone->get_zone_params().log_pool, sync_status_oid); + + // whether or not to do full sync, incremental sync will follow anyway + if (sync_env->sync_module->should_full_sync()) { + const auto max_marker = marker_mgr.get(sync_pair.source_bs.shard_id, ""); + status.inc_marker.position = max_marker; + } + status.inc_marker.timestamp = ceph::real_clock::now(); + status.state = rgw_bucket_shard_sync_info::StateIncrementalSync; + + map attrs; + status.encode_all_attrs(attrs); + call(new RGWSimpleRadosWriteAttrsCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + obj, attrs, &objv_tracker, exclusive)); + } + + if (retcode < 0) { + ldout(cct, 20) << "ERROR: init marker position failed. error: " << retcode << dendl; + return set_cr_error(retcode); + } + ldout(cct, 20) << "init marker position: " << status.inc_marker.position << + ". written to shard status object: " << sync_status_oid << dendl; + return set_cr_done(); + } + return 0; + } +}; + +#define BUCKET_SYNC_ATTR_PREFIX RGW_ATTR_PREFIX "bucket-sync." + +template +static bool decode_attr(CephContext *cct, map& attrs, const string& attr_name, T *val) +{ + map::iterator iter = attrs.find(attr_name); + if (iter == attrs.end()) { + *val = T(); + return false; + } + + auto biter = iter->second.cbegin(); + try { + decode(*val, biter); + } catch (buffer::error& err) { + ldout(cct, 0) << "ERROR: failed to decode attribute: " << attr_name << dendl; + return false; + } + return true; +} + +void rgw_bucket_shard_sync_info::decode_from_attrs(CephContext *cct, map& attrs) +{ + if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "state", &state)) { + decode_attr(cct, attrs, "state", &state); + } + if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "inc_marker", &inc_marker)) { + decode_attr(cct, attrs, "inc_marker", &inc_marker); + } +} + +void rgw_bucket_shard_sync_info::encode_all_attrs(map& attrs) +{ + encode_state_attr(attrs); + inc_marker.encode_attr(attrs); +} + +void rgw_bucket_shard_sync_info::encode_state_attr(map& attrs) +{ + using ceph::encode; + encode(state, attrs[BUCKET_SYNC_ATTR_PREFIX "state"]); +} + +void rgw_bucket_shard_full_sync_marker::encode_attr(map& attrs) +{ + using ceph::encode; + encode(*this, attrs[BUCKET_SYNC_ATTR_PREFIX "full_marker"]); +} + +void rgw_bucket_shard_inc_sync_marker::encode_attr(map& attrs) +{ + using ceph::encode; + encode(*this, attrs[BUCKET_SYNC_ATTR_PREFIX "inc_marker"]); +} + +class RGWReadBucketPipeSyncStatusCoroutine : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + string oid; + rgw_bucket_shard_sync_info *status; + RGWObjVersionTracker* objv_tracker; + map attrs; +public: + RGWReadBucketPipeSyncStatusCoroutine(RGWDataSyncCtx *_sc, + const rgw_bucket_sync_pair_info& sync_pair, + rgw_bucket_shard_sync_info *_status, + RGWObjVersionTracker* objv_tracker, + uint64_t gen) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + oid(RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, sync_pair, gen)), + status(_status), objv_tracker(objv_tracker) + {} + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWReadBucketPipeSyncStatusCoroutine::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + yield call(new RGWSimpleRadosReadAttrsCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, oid), + &attrs, true, objv_tracker)); + if (retcode == -ENOENT) { + *status = rgw_bucket_shard_sync_info(); + return set_cr_done(); + } + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to call fetch bucket shard info oid=" << oid << " ret=" << retcode << dendl; + return set_cr_error(retcode); + } + status->decode_from_attrs(sync_env->cct, attrs); + return set_cr_done(); + } + return 0; +} + +// wrap ReadSyncStatus and set a flag if it's not in incremental +class CheckBucketShardStatusIsIncremental : public RGWReadBucketPipeSyncStatusCoroutine { + bool* result; + rgw_bucket_shard_sync_info status; + public: + CheckBucketShardStatusIsIncremental(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& sync_pair, + bool* result) + : RGWReadBucketPipeSyncStatusCoroutine(sc, sync_pair, &status, nullptr, 0 /*no gen in compat mode*/), + result(result) + {} + + int operate(const DoutPrefixProvider *dpp) override { + int r = RGWReadBucketPipeSyncStatusCoroutine::operate(dpp); + if (state == RGWCoroutine_Done && + status.state != rgw_bucket_shard_sync_info::StateIncrementalSync) { + *result = false; + } + return r; + } +}; + +class CheckAllBucketShardStatusIsIncremental : public RGWShardCollectCR { + // start with 1 shard, and only spawn more if we detect an existing shard. + // this makes the backward compatilibility check far less expensive in the + // general case where no shards exist + static constexpr int initial_concurrent_shards = 1; + static constexpr int max_concurrent_shards = 16; + + RGWDataSyncCtx* sc; + rgw_bucket_sync_pair_info sync_pair; + const int num_shards; + bool* result; + int shard = 0; + public: + CheckAllBucketShardStatusIsIncremental(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& sync_pair, + int num_shards, bool* result) + : RGWShardCollectCR(sc->cct, initial_concurrent_shards), + sc(sc), sync_pair(sync_pair), num_shards(num_shards), result(result) + {} + + bool spawn_next() override { + // stop spawning if we saw any errors or non-incremental shards + if (shard >= num_shards || status < 0 || !*result) { + return false; + } + sync_pair.source_bs.shard_id = shard++; + spawn(new CheckBucketShardStatusIsIncremental(sc, sync_pair, result), false); + return true; + } + + private: + int handle_result(int r) override { + if (r < 0) { + ldout(cct, 4) << "failed to read bucket shard status: " + << cpp_strerror(r) << dendl; + } else if (shard == 0) { + // enable concurrency once the first shard succeeds + max_concurrent = max_concurrent_shards; + } + return r; + } +}; + +// wrap InitBucketShardSyncStatus with local storage for 'status' and 'objv' +// and a loop to retry on racing writes +class InitBucketShardStatusCR : public RGWCoroutine { + RGWDataSyncCtx* sc; + rgw_bucket_sync_pair_info pair; + rgw_bucket_shard_sync_info status; + RGWObjVersionTracker objv; + const uint64_t gen; + const BucketIndexShardsManager& marker_mgr; + + public: + InitBucketShardStatusCR(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& pair, + uint64_t gen, + const BucketIndexShardsManager& marker_mgr) + : RGWCoroutine(sc->cct), sc(sc), pair(pair), gen(gen), marker_mgr(marker_mgr) + {} + int operate(const DoutPrefixProvider *dpp) { + reenter(this) { + // non exclusive create with empty status + objv.generate_new_write_ver(cct); + yield call(new RGWInitBucketShardSyncStatusCoroutine(sc, pair, status, gen, marker_mgr, objv, false)); + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +class InitBucketShardStatusCollectCR : public RGWShardCollectCR { + static constexpr int max_concurrent_shards = 16; + RGWDataSyncCtx* sc; + rgw_bucket_sync_pair_info sync_pair; + const uint64_t gen; + const BucketIndexShardsManager& marker_mgr; + + const int num_shards; + int shard = 0; + + int handle_result(int r) override { + if (r < 0) { + ldout(cct, 4) << "failed to init bucket shard status: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + InitBucketShardStatusCollectCR(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& sync_pair, + uint64_t gen, + const BucketIndexShardsManager& marker_mgr, + int num_shards) + : RGWShardCollectCR(sc->cct, max_concurrent_shards), + sc(sc), sync_pair(sync_pair), gen(gen), marker_mgr(marker_mgr), num_shards(num_shards) + {} + + bool spawn_next() override { + if (shard >= num_shards || status < 0) { // stop spawning on any errors + return false; + } + sync_pair.source_bs.shard_id = shard++; + spawn(new InitBucketShardStatusCR(sc, sync_pair, gen, marker_mgr), false); + return true; + } +}; + +class RemoveBucketShardStatusCR : public RGWCoroutine { + RGWDataSyncCtx* const sc; + RGWDataSyncEnv* const sync_env; + + rgw_bucket_sync_pair_info sync_pair; + rgw_raw_obj obj; + RGWObjVersionTracker objv; + +public: + RemoveBucketShardStatusCR(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& sync_pair, uint64_t gen) + : RGWCoroutine(sc->cct), sc(sc), sync_env(sc->env), + sync_pair(sync_pair), + obj(sync_env->svc->zone->get_zone_params().log_pool, + RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, sync_pair, gen)) + {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield call(new RGWRadosRemoveCR(sync_env->driver, obj, &objv)); + if (retcode < 0 && retcode != -ENOENT) { + ldout(cct, 20) << "ERROR: failed to remove bucket shard status for: " << sync_pair << + ". with error: " << retcode << dendl; + return set_cr_error(retcode); + } + ldout(cct, 20) << "removed bucket shard status object: " << obj.oid << dendl; + return set_cr_done(); + } + return 0; + } +}; + +class RemoveBucketShardStatusCollectCR : public RGWShardCollectCR { + static constexpr int max_concurrent_shards = 16; + RGWDataSyncCtx* const sc; + RGWDataSyncEnv* const sync_env; + rgw_bucket_sync_pair_info sync_pair; + const uint64_t gen; + + const int num_shards; + int shard = 0; + + int handle_result(int r) override { + if (r < 0) { + ldout(cct, 4) << "failed to remove bucket shard status object: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + RemoveBucketShardStatusCollectCR(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& sync_pair, + uint64_t gen, + int num_shards) + : RGWShardCollectCR(sc->cct, max_concurrent_shards), + sc(sc), sync_env(sc->env), sync_pair(sync_pair), gen(gen), num_shards(num_shards) + {} + + bool spawn_next() override { + if (shard >= num_shards) { + return false; + } + sync_pair.source_bs.shard_id = shard++; + spawn(new RemoveBucketShardStatusCR(sc, sync_pair, gen), false); + return true; + } +}; + +class InitBucketFullSyncStatusCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + const rgw_bucket_sync_pair_info& sync_pair; + const rgw_raw_obj& status_obj; + rgw_bucket_sync_status& status; + RGWObjVersionTracker& objv; + const RGWBucketInfo& source_info; + const bool check_compat; + + const rgw_bucket_index_marker_info& info; + BucketIndexShardsManager marker_mgr; + + bool all_incremental = true; + bool no_zero = false; + +public: + InitBucketFullSyncStatusCR(RGWDataSyncCtx* sc, + const rgw_bucket_sync_pair_info& sync_pair, + const rgw_raw_obj& status_obj, + rgw_bucket_sync_status& status, + RGWObjVersionTracker& objv, + const RGWBucketInfo& source_info, + bool check_compat, + const rgw_bucket_index_marker_info& info) + : RGWCoroutine(sc->cct), sc(sc), sync_env(sc->env), + sync_pair(sync_pair), status_obj(status_obj), + status(status), objv(objv), source_info(source_info), + check_compat(check_compat), info(info) + {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + retcode = marker_mgr.from_string(info.max_marker, -1); + if (retcode < 0) { + lderr(cct) << "failed to parse bilog shard markers: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + status.state = BucketSyncState::Init; + + if (info.oldest_gen == 0) { + if (check_compat) { + // use shard count from our log gen=0 + // try to convert existing per-shard incremental status for backward compatibility + if (source_info.layout.logs.empty() || + source_info.layout.logs.front().gen > 0) { + ldpp_dout(dpp, 20) << "no generation zero when checking compatibility" << dendl; + no_zero = true; + } else if (auto& log = source_info.layout.logs.front(); + log.layout.type != rgw::BucketLogType::InIndex) { + ldpp_dout(dpp, 20) << "unrecognized log layout type when checking compatibility " << log.layout.type << dendl; + no_zero = true; + } + if (!no_zero) { + yield { + const int num_shards0 = + source_info.layout.logs.front().layout.in_index.layout.num_shards; + call(new CheckAllBucketShardStatusIsIncremental(sc, sync_pair, + num_shards0, + &all_incremental)); + } + if (retcode < 0) { + return set_cr_error(retcode); + } + if (all_incremental) { + // we can use existing status and resume incremental sync + status.state = BucketSyncState::Incremental; + } + } else { + all_incremental = false; + } + } + } + + if (status.state != BucketSyncState::Incremental) { + // initialize all shard sync status. this will populate the log marker + // positions where incremental sync will resume after full sync + yield { + const int num_shards = marker_mgr.get().size(); + call(new InitBucketShardStatusCollectCR(sc, sync_pair, info.latest_gen, marker_mgr, num_shards)); + } + if (retcode < 0) { + ldout(cct, 20) << "failed to init bucket shard status: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + if (sync_env->sync_module->should_full_sync()) { + status.state = BucketSyncState::Full; + } else { + status.state = BucketSyncState::Incremental; + } + } + + status.shards_done_with_gen.resize(marker_mgr.get().size()); + status.incremental_gen = info.latest_gen; + + ldout(cct, 20) << "writing bucket sync status during init. state=" << status.state << ". marker=" << status.full.position.to_str() << dendl; + + // write bucket sync status + using CR = RGWSimpleRadosWriteCR; + yield call(new CR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + status_obj, status, &objv, false)); + if (retcode < 0) { + ldout(cct, 20) << "failed to write bucket shard status: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +#define OMAP_READ_MAX_ENTRIES 10 +class RGWReadRecoveringBucketShardsCoroutine : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw::sal::RadosStore* driver; + + const int shard_id; + int max_entries; + + set& recovering_buckets; + string marker; + string error_oid; + + RGWRadosGetOmapKeysCR::ResultPtr omapkeys; + set error_entries; + int max_omap_entries; + int count; + +public: + RGWReadRecoveringBucketShardsCoroutine(RGWDataSyncCtx *_sc, const int _shard_id, + set& _recovering_buckets, const int _max_entries) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + driver(sync_env->driver), shard_id(_shard_id), max_entries(_max_entries), + recovering_buckets(_recovering_buckets), max_omap_entries(OMAP_READ_MAX_ENTRIES) + { + error_oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id) + ".retry"; + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWReadRecoveringBucketShardsCoroutine::operate(const DoutPrefixProvider *dpp) +{ + reenter(this){ + //read recovering bucket shards + count = 0; + do { + omapkeys = std::make_shared(); + yield call(new RGWRadosGetOmapKeysCR(driver, rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, error_oid), + marker, max_omap_entries, omapkeys)); + + if (retcode == -ENOENT) { + break; + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to read recovering bucket shards with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + error_entries = std::move(omapkeys->entries); + if (error_entries.empty()) { + break; + } + + count += error_entries.size(); + marker = *error_entries.rbegin(); + recovering_buckets.insert(std::make_move_iterator(error_entries.begin()), + std::make_move_iterator(error_entries.end())); + } while (omapkeys->more && count < max_entries); + + return set_cr_done(); + } + + return 0; +} + +class RGWReadPendingBucketShardsCoroutine : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw::sal::RadosStore* driver; + + const int shard_id; + int max_entries; + + set& pending_buckets; + string marker; + string status_oid; + + rgw_data_sync_marker* sync_marker; + int count; + + std::string next_marker; + vector log_entries; + bool truncated; + +public: + RGWReadPendingBucketShardsCoroutine(RGWDataSyncCtx *_sc, const int _shard_id, + set& _pending_buckets, + rgw_data_sync_marker* _sync_marker, const int _max_entries) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + driver(sync_env->driver), shard_id(_shard_id), max_entries(_max_entries), + pending_buckets(_pending_buckets), sync_marker(_sync_marker) + { + status_oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id); + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWReadPendingBucketShardsCoroutine::operate(const DoutPrefixProvider *dpp) +{ + reenter(this){ + //read sync status marker + using CR = RGWSimpleRadosReadCR; + yield call(new CR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, status_oid), + sync_marker)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to read sync status marker with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + //read pending bucket shards + marker = sync_marker->marker; + count = 0; + do{ + yield call(new RGWReadRemoteDataLogShardCR(sc, shard_id, marker, + &next_marker, &log_entries, &truncated)); + + if (retcode == -ENOENT) { + break; + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to read remote data log info with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + if (log_entries.empty()) { + break; + } + + count += log_entries.size(); + for (const auto& entry : log_entries) { + pending_buckets.insert(entry.entry.key); + } + }while(truncated && count < max_entries); + + return set_cr_done(); + } + + return 0; +} + +int RGWRemoteDataLog::read_shard_status(const DoutPrefixProvider *dpp, int shard_id, set& pending_buckets, set& recovering_buckets, rgw_data_sync_marker *sync_marker, const int max_entries) +{ + // cannot run concurrently with run_sync(), so run in a separate manager + RGWCoroutinesManager crs(driver->ctx(), driver->getRados()->get_cr_registry()); + RGWHTTPManager http_manager(driver->ctx(), crs.get_completion_mgr()); + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + RGWDataSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + RGWDataSyncCtx sc_local = sc; + sc_local.env = &sync_env_local; + list stacks; + RGWCoroutinesStack* recovering_stack = new RGWCoroutinesStack(driver->ctx(), &crs); + recovering_stack->call(new RGWReadRecoveringBucketShardsCoroutine(&sc_local, shard_id, recovering_buckets, max_entries)); + stacks.push_back(recovering_stack); + RGWCoroutinesStack* pending_stack = new RGWCoroutinesStack(driver->ctx(), &crs); + pending_stack->call(new RGWReadPendingBucketShardsCoroutine(&sc_local, shard_id, pending_buckets, sync_marker, max_entries)); + stacks.push_back(pending_stack); + ret = crs.run(dpp, stacks); + http_manager.stop(); + return ret; +} + +CephContext *RGWBucketPipeSyncStatusManager::get_cct() const +{ + return driver->ctx(); +} + +void rgw_bucket_entry_owner::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("ID", id, obj); + JSONDecoder::decode_json("DisplayName", display_name, obj); +} + +struct bucket_list_entry { + bool delete_marker; + rgw_obj_key key; + bool is_latest; + real_time mtime; + string etag; + uint64_t size; + string storage_class; + rgw_bucket_entry_owner owner; + uint64_t versioned_epoch; + string rgw_tag; + + bucket_list_entry() : delete_marker(false), is_latest(false), size(0), versioned_epoch(0) {} + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("IsDeleteMarker", delete_marker, obj); + JSONDecoder::decode_json("Key", key.name, obj); + JSONDecoder::decode_json("VersionId", key.instance, obj); + JSONDecoder::decode_json("IsLatest", is_latest, obj); + string mtime_str; + JSONDecoder::decode_json("RgwxMtime", mtime_str, obj); + + struct tm t; + uint32_t nsec; + if (parse_iso8601(mtime_str.c_str(), &t, &nsec)) { + ceph_timespec ts; + ts.tv_sec = (uint64_t)internal_timegm(&t); + ts.tv_nsec = nsec; + mtime = real_clock::from_ceph_timespec(ts); + } + JSONDecoder::decode_json("ETag", etag, obj); + JSONDecoder::decode_json("Size", size, obj); + JSONDecoder::decode_json("StorageClass", storage_class, obj); + JSONDecoder::decode_json("Owner", owner, obj); + JSONDecoder::decode_json("VersionedEpoch", versioned_epoch, obj); + JSONDecoder::decode_json("RgwxTag", rgw_tag, obj); + if (key.instance == "null" && !versioned_epoch) { + key.instance.clear(); + } + } + + RGWModifyOp get_modify_op() const { + if (delete_marker) { + return CLS_RGW_OP_LINK_OLH_DM; + } else if (!key.instance.empty() && key.instance != "null") { + return CLS_RGW_OP_LINK_OLH; + } else { + return CLS_RGW_OP_ADD; + } + } +}; + +struct bucket_list_result { + string name; + string prefix; + string key_marker; + string version_id_marker; + int max_keys; + bool is_truncated; + list entries; + + bucket_list_result() : max_keys(0), is_truncated(false) {} + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("Name", name, obj); + JSONDecoder::decode_json("Prefix", prefix, obj); + JSONDecoder::decode_json("KeyMarker", key_marker, obj); + JSONDecoder::decode_json("VersionIdMarker", version_id_marker, obj); + JSONDecoder::decode_json("MaxKeys", max_keys, obj); + JSONDecoder::decode_json("IsTruncated", is_truncated, obj); + JSONDecoder::decode_json("Entries", entries, obj); + } +}; + +class RGWListRemoteBucketCR: public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + const rgw_bucket_shard& bs; + rgw_obj_key marker_position; + + bucket_list_result *result; + +public: + RGWListRemoteBucketCR(RGWDataSyncCtx *_sc, const rgw_bucket_shard& bs, + rgw_obj_key& _marker_position, bucket_list_result *_result) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), bs(bs), + marker_position(_marker_position), result(_result) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield { + rgw_http_param_pair pairs[] = { { "versions" , NULL }, + { "format" , "json" }, + { "objs-container" , "true" }, + { "key-marker" , marker_position.name.c_str() }, + { "version-id-marker" , marker_position.instance.c_str() }, + { NULL, NULL } }; + string p = string("/") + bs.bucket.get_key(':', 0); + call(new RGWReadRESTResourceCR(sync_env->cct, sc->conn, sync_env->http_manager, p, pairs, result)); + } + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +struct next_bilog_result { + uint64_t generation = 0; + int num_shards = 0; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("generation", generation, obj); + JSONDecoder::decode_json("num_shards", num_shards, obj); + } +}; + +struct bilog_list_result { + list entries; + bool truncated{false}; + std::optional next_log; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("entries", entries, obj); + JSONDecoder::decode_json("truncated", truncated, obj); + JSONDecoder::decode_json("next_log", next_log, obj); + } +}; + +class RGWListBucketIndexLogCR: public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + const string instance_key; + string marker; + + bilog_list_result *result; + std::optional timer; + uint64_t generation; + std::string gen_str = std::to_string(generation); + uint32_t format_ver{1}; + +public: + RGWListBucketIndexLogCR(RGWDataSyncCtx *_sc, const rgw_bucket_shard& bs, string& _marker, + uint64_t _generation, bilog_list_result *_result) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + instance_key(bs.get_key()), marker(_marker), result(_result), generation(_generation) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + if (sync_env->counters) { + timer.emplace(sync_env->counters, sync_counters::l_poll); + } + yield { + rgw_http_param_pair pairs[] = { { "bucket-instance", instance_key.c_str() }, + { "format" , "json" }, + { "marker" , marker.c_str() }, + { "type", "bucket-index" }, + { "generation", gen_str.c_str() }, + { "format-ver", "2"}, + { NULL, NULL } }; + + call(new RGWReadRESTResourceCR(sync_env->cct, sc->conn, sync_env->http_manager, + "/admin/log", pairs, result)); + } + timer.reset(); + if (retcode < 0) { + if (sync_env->counters) { + sync_env->counters->inc(sync_counters::l_poll_err); + } + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +#define BUCKET_SYNC_UPDATE_MARKER_WINDOW 10 + +class RGWBucketFullSyncMarkerTrack : public RGWSyncShardMarkerTrack { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + const rgw_raw_obj& status_obj; + rgw_bucket_sync_status& sync_status; + RGWSyncTraceNodeRef tn; + RGWObjVersionTracker& objv_tracker; + +public: + RGWBucketFullSyncMarkerTrack(RGWDataSyncCtx *_sc, + const rgw_raw_obj& status_obj, + rgw_bucket_sync_status& sync_status, + RGWSyncTraceNodeRef tn, + RGWObjVersionTracker& objv_tracker) + : RGWSyncShardMarkerTrack(BUCKET_SYNC_UPDATE_MARKER_WINDOW), + sc(_sc), sync_env(_sc->env), status_obj(status_obj), + sync_status(sync_status), tn(std::move(tn)), objv_tracker(objv_tracker) + {} + + + RGWCoroutine *store_marker(const rgw_obj_key& new_marker, uint64_t index_pos, const real_time& timestamp) override { + sync_status.full.position = new_marker; + sync_status.full.count = index_pos; + + tn->log(20, SSTR("updating marker oid=" << status_obj.oid << " marker=" << new_marker)); + return new RGWSimpleRadosWriteCR( + sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, + status_obj, sync_status, &objv_tracker); + } + + RGWOrderCallCR *allocate_order_control_cr() override { + return new RGWLastCallerWinsCR(sync_env->cct); + } +}; + +// write the incremental sync status and update 'stable_timestamp' on success +class RGWWriteBucketShardIncSyncStatus : public RGWCoroutine { + RGWDataSyncEnv *sync_env; + rgw_raw_obj obj; + rgw_bucket_shard_inc_sync_marker sync_marker; + ceph::real_time* stable_timestamp; + RGWObjVersionTracker& objv_tracker; + std::map attrs; + public: + RGWWriteBucketShardIncSyncStatus(RGWDataSyncEnv *sync_env, + const rgw_raw_obj& obj, + const rgw_bucket_shard_inc_sync_marker& sync_marker, + ceph::real_time* stable_timestamp, + RGWObjVersionTracker& objv_tracker) + : RGWCoroutine(sync_env->cct), sync_env(sync_env), obj(obj), + sync_marker(sync_marker), stable_timestamp(stable_timestamp), + objv_tracker(objv_tracker) + {} + int operate(const DoutPrefixProvider *dpp) { + reenter(this) { + sync_marker.encode_attr(attrs); + + yield call(new RGWSimpleRadosWriteAttrsCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, + obj, attrs, &objv_tracker)); + if (retcode < 0) { + return set_cr_error(retcode); + } + if (stable_timestamp) { + *stable_timestamp = sync_marker.timestamp; + } + return set_cr_done(); + } + return 0; + } +}; + +class RGWBucketIncSyncShardMarkerTrack : public RGWSyncShardMarkerTrack { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + rgw_raw_obj obj; + rgw_bucket_shard_inc_sync_marker sync_marker; + + map key_to_marker; + + struct operation { + rgw_obj_key key; + bool is_olh; + }; + map marker_to_op; + std::set pending_olh; // object names with pending olh operations + + RGWSyncTraceNodeRef tn; + RGWObjVersionTracker& objv_tracker; + ceph::real_time* stable_timestamp; + + void handle_finish(const string& marker) override { + auto iter = marker_to_op.find(marker); + if (iter == marker_to_op.end()) { + return; + } + auto& op = iter->second; + key_to_marker.erase(op.key); + reset_need_retry(op.key); + if (op.is_olh) { + pending_olh.erase(op.key.name); + } + marker_to_op.erase(iter); + } + +public: + RGWBucketIncSyncShardMarkerTrack(RGWDataSyncCtx *_sc, + const string& _marker_oid, + const rgw_bucket_shard_inc_sync_marker& _marker, + RGWSyncTraceNodeRef tn, + RGWObjVersionTracker& objv_tracker, + ceph::real_time* stable_timestamp) + : RGWSyncShardMarkerTrack(BUCKET_SYNC_UPDATE_MARKER_WINDOW), + sc(_sc), sync_env(_sc->env), + obj(sync_env->svc->zone->get_zone_params().log_pool, _marker_oid), + sync_marker(_marker), tn(std::move(tn)), objv_tracker(objv_tracker), + stable_timestamp(stable_timestamp) + {} + + const rgw_raw_obj& get_obj() const { return obj; } + + RGWCoroutine* store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override { + sync_marker.position = new_marker; + sync_marker.timestamp = timestamp; + + tn->log(20, SSTR("updating marker marker_oid=" << obj.oid << " marker=" << new_marker << " timestamp=" << timestamp)); + return new RGWWriteBucketShardIncSyncStatus(sync_env, obj, sync_marker, + stable_timestamp, objv_tracker); + } + + /* + * create index from key -> , and from marker -> key + * this is useful so that we can insure that we only have one + * entry for any key that is used. This is needed when doing + * incremenatl sync of data, and we don't want to run multiple + * concurrent sync operations for the same bucket shard + * Also, we should make sure that we don't run concurrent operations on the same key with + * different ops. + */ + bool index_key_to_marker(const rgw_obj_key& key, const string& marker, bool is_olh) { + auto result = key_to_marker.emplace(key, marker); + if (!result.second) { // exists + set_need_retry(key); + return false; + } + marker_to_op[marker] = operation{key, is_olh}; + if (is_olh) { + // prevent other olh ops from starting on this object name + pending_olh.insert(key.name); + } + return true; + } + + bool can_do_op(const rgw_obj_key& key, bool is_olh) { + // serialize olh ops on the same object name + if (is_olh && pending_olh.count(key.name)) { + tn->log(20, SSTR("sync of " << key << " waiting for pending olh op")); + return false; + } + return (key_to_marker.find(key) == key_to_marker.end()); + } + + RGWOrderCallCR *allocate_order_control_cr() override { + return new RGWLastCallerWinsCR(sync_env->cct); + } +}; + +static bool ignore_sync_error(int err) { + switch (err) { + case -ENOENT: + case -EPERM: + return true; + default: + break; + } + return false; +} + +template +class RGWBucketSyncSingleEntryCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + rgw_bucket_sync_pipe& sync_pipe; + rgw_bucket_shard& bs; + + rgw_obj_key key; + bool versioned; + std::optional versioned_epoch; + rgw_bucket_entry_owner owner; + real_time timestamp; + RGWModifyOp op; + RGWPendingState op_state; + + T entry_marker; + RGWSyncShardMarkerTrack *marker_tracker; + + int sync_status; + + stringstream error_ss; + + bool error_injection; + + RGWDataSyncModule *data_sync_module; + + rgw_zone_set zones_trace; + + RGWSyncTraceNodeRef tn; + std::string zone_name; + +public: + RGWBucketSyncSingleEntryCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, + const rgw_obj_key& _key, bool _versioned, + std::optional _versioned_epoch, + real_time& _timestamp, + const rgw_bucket_entry_owner& _owner, + RGWModifyOp _op, RGWPendingState _op_state, + const T& _entry_marker, RGWSyncShardMarkerTrack *_marker_tracker, rgw_zone_set& _zones_trace, + RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + sync_pipe(_sync_pipe), bs(_sync_pipe.info.source_bs), + key(_key), versioned(_versioned), versioned_epoch(_versioned_epoch), + owner(_owner), + timestamp(_timestamp), op(_op), + op_state(_op_state), + entry_marker(_entry_marker), + marker_tracker(_marker_tracker), + sync_status(0){ + stringstream ss; + ss << bucket_shard_str{bs} << "/" << key << "[" << versioned_epoch.value_or(0) << "]"; + set_description() << "bucket sync single entry (source_zone=" << sc->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state; + set_status("init"); + + tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", SSTR(key)); + + tn->log(20, SSTR("bucket sync single entry (source_zone=" << sc->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state)); + error_injection = (sync_env->cct->_conf->rgw_sync_data_inject_err_probability > 0); + + data_sync_module = sync_env->sync_module->get_data_handler(); + + zones_trace = _zones_trace; + zones_trace.insert(sync_env->svc->zone->get_zone().id, _sync_pipe.info.dest_bucket.get_key()); + + if (sc->env->ostr) { + RGWZone* z; + if ((z = sc->env->driver->svc()->zone->find_zone(sc->source_zone))) { + zone_name = z->name; + } + } + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + /* skip entries that are not complete */ + if (op_state != CLS_RGW_STATE_COMPLETE) { + goto done; + } + tn->set_flag(RGW_SNS_FLAG_ACTIVE); + do { + yield { + marker_tracker->reset_need_retry(key); + if (key.name.empty()) { + /* shouldn't happen */ + set_status("skipping empty entry"); + tn->log(0, "entry with empty obj name, skipping"); + goto done; + } + if (error_injection && + rand() % 10000 < cct->_conf->rgw_sync_data_inject_err_probability * 10000.0) { + tn->log(0, SSTR(": injecting data sync error on key=" << key.name)); + retcode = -EIO; + } else if (op == CLS_RGW_OP_ADD || + op == CLS_RGW_OP_LINK_OLH) { + set_status("syncing obj"); + tn->log(5, SSTR("bucket sync: sync obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); + if (versioned_epoch) { + pretty_print(sc->env, "Syncing object s3://{}/{} version {} in sync from zone {}\n", + bs.bucket.name, key, *versioned_epoch, zone_name); + } else { + pretty_print(sc->env, "Syncing object s3://{}/{} in sync from zone {}\n", + bs.bucket.name, key, zone_name); + } + call(data_sync_module->sync_object(dpp, sc, sync_pipe, key, versioned_epoch, &zones_trace)); + } else if (op == CLS_RGW_OP_DEL || op == CLS_RGW_OP_UNLINK_INSTANCE) { + set_status("removing obj"); + if (versioned_epoch) { + pretty_print(sc->env, "Deleting object s3://{}/{} version {} in sync from zone {}\n", + bs.bucket.name, key, *versioned_epoch, zone_name); + } else { + pretty_print(sc->env, "Deleting object s3://{}/{} in sync from zone {}\n", + bs.bucket.name, key, zone_name); + } + if (op == CLS_RGW_OP_UNLINK_INSTANCE) { + versioned = true; + } + tn->log(10, SSTR("removing obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); + call(data_sync_module->remove_object(dpp, sc, sync_pipe, key, timestamp, versioned, versioned_epoch.value_or(0), &zones_trace)); + // our copy of the object is more recent, continue as if it succeeded + } else if (op == CLS_RGW_OP_LINK_OLH_DM) { + set_status("creating delete marker"); + tn->log(10, SSTR("creating delete marker: obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); + call(data_sync_module->create_delete_marker(dpp, sc, sync_pipe, key, timestamp, owner, versioned, versioned_epoch.value_or(0), &zones_trace)); + } + tn->set_resource_name(SSTR(bucket_str_noinstance(bs.bucket) << "/" << key)); + } + if (retcode == -ERR_PRECONDITION_FAILED) { + pretty_print(sc->env, "Skipping object s3://{}/{} in sync from zone {}\n", + bs.bucket.name, key, zone_name); + set_status("Skipping object sync: precondition failed (object contains newer change or policy doesn't allow sync)"); + tn->log(0, "Skipping object sync: precondition failed (object contains newer change or policy doesn't allow sync)"); + retcode = 0; + } + } while (marker_tracker->need_retry(key)); + { + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + if (retcode >= 0) { + tn->log(10, "success"); + } else { + tn->log(10, SSTR("failed, retcode=" << retcode << " (" << cpp_strerror(-retcode) << ")")); + } + } + + if (retcode < 0 && retcode != -ENOENT) { + set_status() << "failed to sync obj; retcode=" << retcode; + tn->log(0, SSTR("ERROR: failed to sync object: " + << bucket_shard_str{bs} << "/" << key.name)); + if (!ignore_sync_error(retcode)) { + error_ss << bucket_shard_str{bs} << "/" << key.name; + sync_status = retcode; + } + } + if (!error_ss.str().empty()) { + yield call(sync_env->error_logger->log_error_cr(dpp, sc->conn->get_remote_id(), "data", error_ss.str(), -retcode, string("failed to sync object") + cpp_strerror(-sync_status))); + } +done: + if (sync_status == 0) { + /* update marker */ + set_status() << "calling marker_tracker->finish(" << entry_marker << ")"; + yield call(marker_tracker->finish(entry_marker)); + sync_status = retcode; + } + if (sync_status < 0) { + return set_cr_error(sync_status); + } + return set_cr_done(); + } + return 0; + } +}; + +class RGWBucketFullSyncCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_bucket_sync_pipe& sync_pipe; + rgw_bucket_sync_status& sync_status; + rgw_bucket_shard& bs; + boost::intrusive_ptr lease_cr; + bucket_list_result list_result; + list::iterator entries_iter; + rgw_obj_key list_marker; + bucket_list_entry *entry{nullptr}; + + int total_entries{0}; + + int sync_result{0}; + + const rgw_raw_obj& status_obj; + RGWObjVersionTracker& objv; + + rgw_zone_set zones_trace; + + RGWSyncTraceNodeRef tn; + RGWBucketFullSyncMarkerTrack marker_tracker; + + struct _prefix_handler { + RGWBucketSyncFlowManager::pipe_rules_ref rules; + RGWBucketSyncFlowManager::pipe_rules::prefix_map_t::const_iterator iter; + std::optional cur_prefix; + + void set_rules(RGWBucketSyncFlowManager::pipe_rules_ref& _rules) { + rules = _rules; + } + + bool revalidate_marker(rgw_obj_key *marker) { + if (cur_prefix && + boost::starts_with(marker->name, *cur_prefix)) { + return true; + } + if (!rules) { + return false; + } + iter = rules->prefix_search(marker->name); + if (iter == rules->prefix_end()) { + return false; + } + cur_prefix = iter->first; + marker->name = *cur_prefix; + marker->instance.clear(); + return true; + } + + bool check_key_handled(const rgw_obj_key& key) { + if (!rules) { + return false; + } + if (cur_prefix && + boost::starts_with(key.name, *cur_prefix)) { + return true; + } + iter = rules->prefix_search(key.name); + if (iter == rules->prefix_end()) { + return false; + } + cur_prefix = iter->first; + return boost::starts_with(key.name, iter->first); + } + } prefix_handler; + +public: + RGWBucketFullSyncCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, + const rgw_raw_obj& status_obj, + boost::intrusive_ptr lease_cr, + rgw_bucket_sync_status& sync_status, + RGWSyncTraceNodeRef tn_parent, + RGWObjVersionTracker& objv_tracker) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + sync_pipe(_sync_pipe), sync_status(sync_status), + bs(_sync_pipe.info.source_bs), + lease_cr(std::move(lease_cr)), status_obj(status_obj), objv(objv_tracker), + tn(sync_env->sync_tracer->add_node(tn_parent, "full_sync", + SSTR(bucket_shard_str{bs}))), + marker_tracker(sc, status_obj, sync_status, tn, objv_tracker) + { + zones_trace.insert(sc->source_zone.id, sync_pipe.info.dest_bucket.get_key()); + prefix_handler.set_rules(sync_pipe.get_rules()); + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWBucketFullSyncCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + list_marker = sync_status.full.position; + + total_entries = sync_status.full.count; + do { + if (lease_cr && !lease_cr->is_locked()) { + drain_all(); + tn->log(1, "no lease or lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + set_status("listing remote bucket"); + tn->log(20, "listing bucket for full sync"); + + if (!prefix_handler.revalidate_marker(&list_marker)) { + set_status() << "finished iterating over all available prefixes: last marker=" << list_marker; + tn->log(20, SSTR("finished iterating over all available prefixes: last marker=" << list_marker)); + break; + } + + yield call(new RGWListRemoteBucketCR(sc, bs, list_marker, &list_result)); + if (retcode < 0 && retcode != -ENOENT) { + set_status("failed bucket listing, going down"); + drain_all(); + return set_cr_error(retcode); + } + if (list_result.entries.size() > 0) { + tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + } + entries_iter = list_result.entries.begin(); + for (; entries_iter != list_result.entries.end(); ++entries_iter) { + if (lease_cr && !lease_cr->is_locked()) { + drain_all(); + tn->log(1, "no lease or lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + tn->log(20, SSTR("[full sync] syncing object: " + << bucket_shard_str{bs} << "/" << entries_iter->key)); + entry = &(*entries_iter); + list_marker = entries_iter->key; + if (!prefix_handler.check_key_handled(entries_iter->key)) { + set_status() << "skipping entry due to policy rules: " << entries_iter->key; + tn->log(20, SSTR("skipping entry due to policy rules: " << entries_iter->key)); + continue; + } + total_entries++; + if (!marker_tracker.start(entry->key, total_entries, real_time())) { + tn->log(0, SSTR("ERROR: cannot start syncing " << entry->key << ". Duplicate entry?")); + } else { + using SyncCR = RGWBucketSyncSingleEntryCR; + yield spawn(new SyncCR(sc, sync_pipe, entry->key, + false, /* versioned, only matters for object removal */ + entry->versioned_epoch, entry->mtime, + entry->owner, entry->get_modify_op(), CLS_RGW_STATE_COMPLETE, + entry->key, &marker_tracker, zones_trace, tn), + false); + } + drain_with_cb(cct->_conf->rgw_bucket_sync_spawn_window, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_result = ret; + } + return 0; + }); + } + } while (list_result.is_truncated && sync_result == 0); + set_status("done iterating over all objects"); + + /* wait for all operations to complete */ + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_result = ret; + } + return 0; + }); + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + if (lease_cr && !lease_cr->is_locked()) { + tn->log(1, "no lease or lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + yield call(marker_tracker.flush()); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: marker_tracker.flush() returned retcode=" << retcode)); + return set_cr_error(retcode); + } + /* update sync state to incremental */ + if (sync_result == 0) { + sync_status.state = BucketSyncState::Incremental; + tn->log(5, SSTR("set bucket state=" << sync_status.state)); + yield call(new RGWSimpleRadosWriteCR( + dpp, sync_env->async_rados, sync_env->svc->sysobj, + status_obj, sync_status, &objv)); + tn->log(5, SSTR("bucket status objv=" << objv)); + } else { + tn->log(10, SSTR("backing out with sync_status=" << sync_result)); + } + if (retcode < 0 && sync_result == 0) { /* actually tried to set incremental state and failed */ + tn->log(0, SSTR("ERROR: failed to set sync state on bucket " + << bucket_shard_str{bs} << " retcode=" << retcode)); + return set_cr_error(retcode); + } + if (sync_result < 0) { + return set_cr_error(sync_result); + } + return set_cr_done(); + } + return 0; +} + +static bool has_olh_epoch(RGWModifyOp op) { + return op == CLS_RGW_OP_LINK_OLH || op == CLS_RGW_OP_UNLINK_INSTANCE; +} + +class RGWBucketShardIsDoneCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_bucket_sync_status bucket_status; + const rgw_raw_obj& bucket_status_obj; + const int shard_id; + RGWObjVersionTracker objv_tracker; + const next_bilog_result& next_log; + const uint64_t generation; + +public: + RGWBucketShardIsDoneCR(RGWDataSyncCtx *_sc, const rgw_raw_obj& _bucket_status_obj, + int _shard_id, const next_bilog_result& _next_log, const uint64_t _gen) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + bucket_status_obj(_bucket_status_obj), + shard_id(_shard_id), next_log(_next_log), generation(_gen) {} + + int operate(const DoutPrefixProvider* dpp) override + { + reenter(this) { + do { + // read bucket sync status + objv_tracker.clear(); + using ReadCR = RGWSimpleRadosReadCR; + yield call(new ReadCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + bucket_status_obj, &bucket_status, false, &objv_tracker)); + if (retcode < 0) { + ldpp_dout(dpp, 20) << "failed to read bucket shard status: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + if (bucket_status.state != BucketSyncState::Incremental) { + // exit with success to avoid stale shard being + // retried in error repo if we lost a race + ldpp_dout(dpp, 20) << "RGWBucketShardIsDoneCR found sync state = " << bucket_status.state << dendl; + return set_cr_done(); + } + + if (bucket_status.incremental_gen != generation) { + // exit with success to avoid stale shard being + // retried in error repo if we lost a race + ldpp_dout(dpp, 20) << "RGWBucketShardIsDoneCR expected gen: " << generation + << ", got: " << bucket_status.incremental_gen << dendl; + return set_cr_done(); + } + + yield { + // update bucket_status after a shard is done with current gen + auto& done = bucket_status.shards_done_with_gen; + done[shard_id] = true; + + // increment gen if all shards are already done with current gen + if (std::all_of(done.begin(), done.end(), + [] (const bool done){return done; } )) { + bucket_status.incremental_gen = next_log.generation; + done.clear(); + done.resize(next_log.num_shards, false); + } + ldpp_dout(dpp, 20) << "bucket status incremental gen is " << bucket_status.incremental_gen << dendl; + using WriteCR = RGWSimpleRadosWriteCR; + call(new WriteCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + bucket_status_obj, bucket_status, &objv_tracker, false)); + } + if (retcode < 0 && retcode != -ECANCELED) { + ldpp_dout(dpp, 20) << "failed to write bucket sync status: " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } else if (retcode >= 0) { + return set_cr_done(); + } + } while (retcode == -ECANCELED); + } + return 0; + } +}; + +class RGWBucketShardIncrementalSyncCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_bucket_sync_pipe& sync_pipe; + RGWBucketSyncFlowManager::pipe_rules_ref rules; + rgw_bucket_shard& bs; + const rgw_raw_obj& bucket_status_obj; + boost::intrusive_ptr lease_cr; + bilog_list_result extended_result; + list list_result; + int next_num_shards; + uint64_t next_gen; + bool truncated; + + list::iterator entries_iter, entries_end; + map, pair > squash_map; + rgw_bucket_shard_sync_info& sync_info; + uint64_t generation; + rgw_obj_key key; + rgw_bi_log_entry *entry{nullptr}; + bool updated_status{false}; + rgw_zone_id zone_id; + string target_location_key; + + string cur_id; + + int sync_status{0}; + bool syncstopped{false}; + + RGWSyncTraceNodeRef tn; + RGWBucketIncSyncShardMarkerTrack marker_tracker; + +public: + RGWBucketShardIncrementalSyncCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, + const std::string& shard_status_oid, + const rgw_raw_obj& _bucket_status_obj, + boost::intrusive_ptr lease_cr, + rgw_bucket_shard_sync_info& sync_info, + uint64_t generation, + RGWSyncTraceNodeRef& _tn_parent, + RGWObjVersionTracker& objv_tracker, + ceph::real_time* stable_timestamp) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + sync_pipe(_sync_pipe), bs(_sync_pipe.info.source_bs), + bucket_status_obj(_bucket_status_obj), lease_cr(std::move(lease_cr)), + sync_info(sync_info), generation(generation), zone_id(sync_env->svc->zone->get_zone().id), + tn(sync_env->sync_tracer->add_node(_tn_parent, "inc_sync", + SSTR(bucket_shard_str{bs}))), + marker_tracker(sc, shard_status_oid, sync_info.inc_marker, tn, + objv_tracker, stable_timestamp) + { + set_description() << "bucket shard incremental sync bucket=" + << bucket_shard_str{bs}; + set_status("init"); + rules = sync_pipe.get_rules(); + target_location_key = sync_pipe.info.dest_bucket.get_key(); + } + + bool check_key_handled(const rgw_obj_key& key) { + if (!rules) { + return false; + } + auto iter = rules->prefix_search(key.name); + if (iter == rules->prefix_end()) { + return false; + } + return boost::starts_with(key.name, iter->first); + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWBucketShardIncrementalSyncCR::operate(const DoutPrefixProvider *dpp) +{ + int ret; + reenter(this) { + do { + if (lease_cr && !lease_cr->is_locked()) { + drain_all(); + tn->log(1, "no lease or lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + tn->log(20, SSTR("listing bilog for incremental sync; position=" << sync_info.inc_marker.position)); + set_status() << "listing bilog; position=" << sync_info.inc_marker.position; + yield call(new RGWListBucketIndexLogCR(sc, bs, sync_info.inc_marker.position, generation, &extended_result)); + if (retcode < 0 && retcode != -ENOENT) { + /* wait for all operations to complete */ + drain_all(); + return set_cr_error(retcode); + } + list_result = std::move(extended_result.entries); + truncated = extended_result.truncated; + if (extended_result.next_log) { + next_gen = extended_result.next_log->generation; + next_num_shards = extended_result.next_log->num_shards; + } + + squash_map.clear(); + entries_iter = list_result.begin(); + entries_end = list_result.end(); + for (; entries_iter != entries_end; ++entries_iter) { + auto e = *entries_iter; + if (e.op == RGWModifyOp::CLS_RGW_OP_SYNCSTOP) { + ldpp_dout(dpp, 20) << "syncstop at: " << e.timestamp << ". marker: " << e.id << dendl; + syncstopped = true; + entries_end = std::next(entries_iter); // stop after this entry + break; + } + if (e.op == RGWModifyOp::CLS_RGW_OP_RESYNC) { + ldpp_dout(dpp, 20) << "syncstart at: " << e.timestamp << ". marker: " << e.id << dendl; + continue; + } + if (e.op == CLS_RGW_OP_CANCEL) { + continue; + } + if (e.state != CLS_RGW_STATE_COMPLETE) { + continue; + } + if (e.zones_trace.exists(zone_id.id, target_location_key)) { + continue; + } + auto& squash_entry = squash_map[make_pair(e.object, e.instance)]; + // don't squash over olh entries - we need to apply their olh_epoch + if (has_olh_epoch(squash_entry.second) && !has_olh_epoch(e.op)) { + continue; + } + if (squash_entry.first <= e.timestamp) { + squash_entry = make_pair<>(e.timestamp, e.op); + } + } + + entries_iter = list_result.begin(); + for (; entries_iter != entries_end; ++entries_iter) { + if (lease_cr && !lease_cr->is_locked()) { + drain_all(); + tn->log(1, "no lease or lease is lost, abort"); + return set_cr_error(-ECANCELED); + } + entry = &(*entries_iter); + { + ssize_t p = entry->id.find('#'); /* entries might have explicit shard info in them, e.g., 6#00000000004.94.3 */ + if (p < 0) { + cur_id = entry->id; + } else { + cur_id = entry->id.substr(p + 1); + } + } + sync_info.inc_marker.position = cur_id; + + if (entry->op == RGWModifyOp::CLS_RGW_OP_SYNCSTOP || entry->op == RGWModifyOp::CLS_RGW_OP_RESYNC) { + ldpp_dout(dpp, 20) << "detected syncstop or resync on " << entries_iter->timestamp << ", skipping entry" << dendl; + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + + if (!key.set(rgw_obj_index_key{entry->object, entry->instance})) { + set_status() << "parse_raw_oid() on " << entry->object << " returned false, skipping entry"; + tn->log(20, SSTR("parse_raw_oid() on " << entry->object << " returned false, skipping entry")); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + + tn->log(20, SSTR("parsed entry: id=" << cur_id << " iter->object=" << entry->object << " iter->instance=" << entry->instance << " name=" << key.name << " instance=" << key.instance << " ns=" << key.ns)); + + if (!key.ns.empty()) { + set_status() << "skipping entry in namespace: " << entry->object; + tn->log(20, SSTR("skipping entry in namespace: " << entry->object)); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + + if (!check_key_handled(key)) { + set_status() << "skipping entry due to policy rules: " << entry->object; + tn->log(20, SSTR("skipping entry due to policy rules: " << entry->object)); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + + set_status() << "got entry.id=" << cur_id << " key=" << key << " op=" << (int)entry->op; + if (entry->op == CLS_RGW_OP_CANCEL) { + set_status() << "canceled operation, skipping"; + tn->log(20, SSTR("skipping object: " + << bucket_shard_str{bs} << "/" << key << ": canceled operation")); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + if (entry->state != CLS_RGW_STATE_COMPLETE) { + set_status() << "non-complete operation, skipping"; + tn->log(20, SSTR("skipping object: " + << bucket_shard_str{bs} << "/" << key << ": non-complete operation")); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + if (entry->zones_trace.exists(zone_id.id, target_location_key)) { + set_status() << "redundant operation, skipping"; + tn->log(20, SSTR("skipping object: " + <timestamp); + continue; + } + if (make_pair<>(entry->timestamp, entry->op) != squash_map[make_pair(entry->object, entry->instance)]) { + set_status() << "squashed operation, skipping"; + tn->log(20, SSTR("skipping object: " + << bucket_shard_str{bs} << "/" << key << ": squashed operation")); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + tn->set_flag(RGW_SNS_FLAG_ACTIVE); + tn->log(20, SSTR("syncing object: " + << bucket_shard_str{bs} << "/" << key)); + updated_status = false; + while (!marker_tracker.can_do_op(key, has_olh_epoch(entry->op))) { + if (!updated_status) { + set_status() << "can't do op, conflicting inflight operation"; + updated_status = true; + } + tn->log(5, SSTR("can't do op on key=" << key << " need to wait for conflicting operation to complete")); + yield wait_for_child(); + bool again = true; + while (again) { + again = collect(&ret, nullptr); + if (ret < 0) { + tn->log(0, SSTR("ERROR: a child operation returned error (ret=" << ret << ")")); + sync_status = ret; + /* we have reported this error */ + } + } + if (sync_status != 0) + break; + } + if (sync_status != 0) { + /* get error, stop */ + break; + } + if (!marker_tracker.index_key_to_marker(key, cur_id, has_olh_epoch(entry->op))) { + set_status() << "can't do op, sync already in progress for object"; + tn->log(20, SSTR("skipping sync of entry: " << cur_id << ":" << key << " sync already in progress for object")); + marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); + continue; + } + // yield { + set_status() << "start object sync"; + if (!marker_tracker.start(cur_id, 0, entry->timestamp)) { + tn->log(0, SSTR("ERROR: cannot start syncing " << cur_id << ". Duplicate entry?")); + } else { + std::optional versioned_epoch; + rgw_bucket_entry_owner owner(entry->owner, entry->owner_display_name); + if (entry->ver.pool < 0) { + versioned_epoch = entry->ver.epoch; + } + tn->log(20, SSTR("entry->timestamp=" << entry->timestamp)); + using SyncCR = RGWBucketSyncSingleEntryCR; + spawn(new SyncCR(sc, sync_pipe, key, + entry->is_versioned(), versioned_epoch, + entry->timestamp, owner, entry->op, entry->state, + cur_id, &marker_tracker, entry->zones_trace, tn), + false); + } + // } + drain_with_cb(cct->_conf->rgw_bucket_sync_spawn_window, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_status = ret; + } + return 0; + }); + } + + } while (!list_result.empty() && sync_status == 0 && !syncstopped); + + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_status = ret; + } + return 0; + }); + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + + if (syncstopped) { + // transition to StateStopped in RGWSyncBucketShardCR. if sync is + // still disabled, we'll delete the sync status object. otherwise we'll + // restart full sync to catch any changes that happened while sync was + // disabled + sync_info.state = rgw_bucket_shard_sync_info::StateStopped; + return set_cr_done(); + } + + yield call(marker_tracker.flush()); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: marker_tracker.flush() returned retcode=" << retcode)); + return set_cr_error(retcode); + } + if (sync_status < 0) { + tn->log(10, SSTR("backing out with sync_status=" << sync_status)); + return set_cr_error(sync_status); + } + + if (!truncated && extended_result.next_log) { + yield call(new RGWBucketShardIsDoneCR(sc, bucket_status_obj, bs.shard_id, *extended_result.next_log, generation)); + if (retcode < 0) { + ldout(cct, 20) << "failed to update bucket sync status: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + yield { + // delete the shard status object + auto status_obj = sync_env->svc->rados->obj(marker_tracker.get_obj()); + retcode = status_obj.open(dpp); + if (retcode < 0) { + return set_cr_error(retcode); + } + call(new RGWRadosRemoveOidCR(sync_env->driver, std::move(status_obj))); + if (retcode < 0) { + ldpp_dout(dpp, 20) << "failed to remove shard status object: " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + } + } + + return set_cr_done(); + } + return 0; +} + +class RGWGetBucketPeersCR : public RGWCoroutine { + RGWDataSyncEnv *sync_env; + + std::optional target_bucket; + std::optional source_zone; + std::optional source_bucket; + + rgw_sync_pipe_info_set *pipes; + map buckets_info; + map::iterator siiter; + std::optional target_bucket_info; + std::optional source_bucket_info; + + rgw_sync_pipe_info_set::iterator siter; + + std::shared_ptr source_policy; + std::shared_ptr target_policy; + + RGWSyncTraceNodeRef tn; + + using pipe_const_iter = map::const_iterator; + + static pair get_pipe_iters(const map& m, std::optional zone) { + if (!zone) { + return { m.begin(), m.end() }; + } + + auto b = m.find(*zone); + if (b == m.end()) { + return { b, b }; + } + return { b, std::next(b) }; + } + + void filter_sources(std::optional source_zone, + std::optional source_bucket, + const map& all_sources, + rgw_sync_pipe_info_set *result) { + ldpp_dout(sync_env->dpp, 20) << __func__ << ": source_zone=" << source_zone.value_or(rgw_zone_id("*")).id + << " source_bucket=" << source_bucket.value_or(rgw_bucket()) + << " all_sources.size()=" << all_sources.size() << dendl; + auto iters = get_pipe_iters(all_sources, source_zone); + for (auto i = iters.first; i != iters.second; ++i) { + for (auto& handler : i->second) { + if (!handler.specific()) { + ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": skipping" << dendl; + continue; + } + if (source_bucket && + !source_bucket->match(*handler.source.bucket)) { + continue; + } + ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": adding" << dendl; + result->insert(handler, source_bucket_info, target_bucket_info); + } + } + } + + void filter_targets(std::optional target_zone, + std::optional target_bucket, + const map& all_targets, + rgw_sync_pipe_info_set *result) { + ldpp_dout(sync_env->dpp, 20) << __func__ << ": target_zone=" << source_zone.value_or(rgw_zone_id("*")).id + << " target_bucket=" << source_bucket.value_or(rgw_bucket()) + << " all_targets.size()=" << all_targets.size() << dendl; + auto iters = get_pipe_iters(all_targets, target_zone); + for (auto i = iters.first; i != iters.second; ++i) { + for (auto& handler : i->second) { + if (target_bucket && + handler.dest.bucket && + !target_bucket->match(*handler.dest.bucket)) { + ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": skipping" << dendl; + continue; + } + ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": adding" << dendl; + result->insert(handler, source_bucket_info, target_bucket_info); + } + } + } + + void update_from_target_bucket_policy(); + void update_from_source_bucket_policy(); + + struct GetHintTargets : public RGWGenericAsyncCR::Action { + RGWDataSyncEnv *sync_env; + rgw_bucket source_bucket; + std::set targets; + + GetHintTargets(RGWDataSyncEnv *_sync_env, + const rgw_bucket& _source_bucket) : sync_env(_sync_env), + source_bucket(_source_bucket) {} + int operate() override { + int r = sync_env->svc->bucket_sync->get_bucket_sync_hints(sync_env->dpp, + source_bucket, + nullptr, + &targets, + null_yield); + if (r < 0) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: " << __func__ << "(): failed to fetch bucket sync hints for bucket=" << source_bucket << dendl; + return r; + } + + return 0; + } + }; + + std::shared_ptr get_hint_targets_action; + std::set::iterator hiter; + +public: + RGWGetBucketPeersCR(RGWDataSyncEnv *_sync_env, + std::optional _target_bucket, + std::optional _source_zone, + std::optional _source_bucket, + rgw_sync_pipe_info_set *_pipes, + const RGWSyncTraceNodeRef& _tn_parent) + : RGWCoroutine(_sync_env->cct), + sync_env(_sync_env), + target_bucket(_target_bucket), + source_zone(_source_zone), + source_bucket(_source_bucket), + pipes(_pipes), + tn(sync_env->sync_tracer->add_node(_tn_parent, "get_bucket_peers", + SSTR( "target=" << target_bucket.value_or(rgw_bucket()) + << ":source=" << target_bucket.value_or(rgw_bucket()) + << ":source_zone=" << source_zone.value_or(rgw_zone_id("*")).id))) { + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +std::ostream& operator<<(std::ostream& out, std::optional& bs) { + if (!bs) { + out << "*"; + } else { + out << *bs; + } + return out; +} + +static RGWCoroutine* sync_bucket_shard_cr(RGWDataSyncCtx* sc, + boost::intrusive_ptr lease, + const rgw_bucket_sync_pair_info& sync_pair, + std::optional gen, + const RGWSyncTraceNodeRef& tn, + ceph::real_time* progress); + +RGWRunBucketSourcesSyncCR::RGWRunBucketSourcesSyncCR(RGWDataSyncCtx *_sc, + boost::intrusive_ptr lease_cr, + const rgw_bucket_shard& source_bs, + const RGWSyncTraceNodeRef& _tn_parent, + std::optional gen, + ceph::real_time* progress) + : RGWCoroutine(_sc->env->cct), sc(_sc), sync_env(_sc->env), + lease_cr(std::move(lease_cr)), + tn(sync_env->sync_tracer->add_node( + _tn_parent, "bucket_sync_sources", + SSTR( "source=" << source_bs << ":source_zone=" << sc->source_zone))), + progress(progress), + gen(gen) +{ + sync_pair.source_bs = source_bs; +} + +int RGWRunBucketSourcesSyncCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + yield call(new RGWGetBucketPeersCR(sync_env, std::nullopt, sc->source_zone, + sync_pair.source_bs.bucket, &pipes, tn)); + if (retcode < 0 && retcode != -ENOENT) { + tn->log(0, SSTR("ERROR: failed to read sync status for bucket. error: " << retcode)); + return set_cr_error(retcode); + } + + ldpp_dout(dpp, 20) << __func__ << "(): requested source_bs=" << sync_pair.source_bs << dendl; + + if (pipes.empty()) { + ldpp_dout(dpp, 20) << __func__ << "(): no relevant sync pipes found" << dendl; + return set_cr_done(); + } + + shard_progress.resize(pipes.size()); + cur_shard_progress = shard_progress.begin(); + + for (siter = pipes.begin(); siter != pipes.end(); ++siter, ++cur_shard_progress) { + ldpp_dout(dpp, 20) << __func__ << "(): sync pipe=" << *siter << dendl; + + sync_pair.dest_bucket = siter->target.get_bucket(); + sync_pair.handler = siter->handler; + + ldpp_dout(dpp, 20) << __func__ << "(): sync_pair=" << sync_pair << dendl; + + yield_spawn_window(sync_bucket_shard_cr(sc, lease_cr, sync_pair, + gen, tn, &*cur_shard_progress), + cct->_conf->rgw_bucket_sync_spawn_window, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, SSTR("ERROR: a sync operation returned error: " << ret)); + } + return ret; + }); + } + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, SSTR("a sync operation returned error: " << ret)); + } + return ret; + }); + if (progress) { + *progress = *std::min_element(shard_progress.begin(), shard_progress.end()); + } + return set_cr_done(); + } + + return 0; +} + +class RGWSyncGetBucketInfoCR : public RGWCoroutine { + RGWDataSyncEnv *sync_env; + rgw_bucket bucket; + RGWBucketInfo *pbucket_info; + map *pattrs; + RGWMetaSyncEnv meta_sync_env; + + RGWSyncTraceNodeRef tn; + +public: + RGWSyncGetBucketInfoCR(RGWDataSyncEnv *_sync_env, + const rgw_bucket& _bucket, + RGWBucketInfo *_pbucket_info, + map *_pattrs, + const RGWSyncTraceNodeRef& _tn_parent) + : RGWCoroutine(_sync_env->cct), + sync_env(_sync_env), + bucket(_bucket), + pbucket_info(_pbucket_info), + pattrs(_pattrs), + tn(sync_env->sync_tracer->add_node(_tn_parent, "get_bucket_info", + SSTR(bucket))) { + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWSyncGetBucketInfoCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->driver, bucket, pbucket_info, pattrs, dpp)); + if (retcode == -ENOENT) { + /* bucket instance info has not been synced in yet, fetch it now */ + yield { + tn->log(10, SSTR("no local info for bucket:" << ": fetching metadata")); + string raw_key = string("bucket.instance:") + bucket.get_key(); + + meta_sync_env.init(dpp, cct, sync_env->driver, sync_env->svc->zone->get_master_conn(), sync_env->async_rados, + sync_env->http_manager, sync_env->error_logger, sync_env->sync_tracer); + + call(new RGWMetaSyncSingleEntryCR(&meta_sync_env, raw_key, + string() /* no marker */, + MDLOG_STATUS_COMPLETE, + NULL /* no marker tracker */, + tn)); + } + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to fetch bucket instance info for " << bucket_str{bucket})); + return set_cr_error(retcode); + } + + yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->driver, bucket, pbucket_info, pattrs, dpp)); + } + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{bucket})); + return set_cr_error(retcode); + } + + return set_cr_done(); + } + + return 0; +} + +void RGWGetBucketPeersCR::update_from_target_bucket_policy() +{ + if (!target_policy || + !target_policy->policy_handler || + !pipes) { + return; + } + + auto handler = target_policy->policy_handler.get(); + + filter_sources(source_zone, + source_bucket, + handler->get_sources(), + pipes); + + for (siter = pipes->begin(); siter != pipes->end(); ++siter) { + if (!siter->source.has_bucket_info()) { + buckets_info.emplace(siter->source.get_bucket(), all_bucket_info()); + } + if (!siter->target.has_bucket_info()) { + buckets_info.emplace(siter->target.get_bucket(), all_bucket_info()); + } + } +} + +void RGWGetBucketPeersCR::update_from_source_bucket_policy() +{ + if (!source_policy || + !source_policy->policy_handler || + !pipes) { + return; + } + + auto handler = source_policy->policy_handler.get(); + + filter_targets(sync_env->svc->zone->get_zone().id, + target_bucket, + handler->get_targets(), + pipes); + + for (siter = pipes->begin(); siter != pipes->end(); ++siter) { + if (!siter->source.has_bucket_info()) { + buckets_info.emplace(siter->source.get_bucket(), all_bucket_info()); + } + if (!siter->target.has_bucket_info()) { + buckets_info.emplace(siter->target.get_bucket(), all_bucket_info()); + } + } +} + + +class RGWSyncGetBucketSyncPolicyHandlerCR : public RGWCoroutine { + RGWDataSyncEnv *sync_env; + rgw_bucket bucket; + rgw_bucket_get_sync_policy_params get_policy_params; + + std::shared_ptr policy; + + RGWSyncTraceNodeRef tn; + + int i; + +public: + RGWSyncGetBucketSyncPolicyHandlerCR(RGWDataSyncEnv *_sync_env, + std::optional zone, + const rgw_bucket& _bucket, + std::shared_ptr& _policy, + const RGWSyncTraceNodeRef& _tn_parent) + : RGWCoroutine(_sync_env->cct), + sync_env(_sync_env), + bucket(_bucket), + policy(_policy), + tn(sync_env->sync_tracer->add_node(_tn_parent, "get_sync_policy_handler", + SSTR(bucket))) { + get_policy_params.zone = zone; + get_policy_params.bucket = bucket; + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + for (i = 0; i < 2; ++i) { + yield call(new RGWBucketGetSyncPolicyHandlerCR(sync_env->async_rados, + sync_env->driver, + get_policy_params, + policy, + dpp)); + if (retcode < 0 && + retcode != -ENOENT) { + return set_cr_error(retcode); + } + + if (retcode == 0) { + return set_cr_done(); + } + + /* bucket instance was not found, + * try to get bucket instance info, can trigger + * metadata sync of bucket instance + */ + yield call(new RGWSyncGetBucketInfoCR(sync_env, + bucket, + nullptr, + nullptr, + tn)); + if (retcode < 0) { + return set_cr_error(retcode); + } + } + } + + return 0; + } +}; + + +int RGWGetBucketPeersCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + if (pipes) { + pipes->clear(); + } + if (target_bucket) { + target_policy = make_shared(); + yield call(new RGWSyncGetBucketSyncPolicyHandlerCR(sync_env, + nullopt, + *target_bucket, + target_policy, + tn)); + if (retcode < 0 && + retcode != -ENOENT) { + return set_cr_error(retcode); + } + + update_from_target_bucket_policy(); + } + + if (source_bucket && source_zone) { + source_policy = make_shared(); + yield call(new RGWSyncGetBucketSyncPolicyHandlerCR(sync_env, + source_zone, + *source_bucket, + source_policy, + tn)); + if (retcode < 0 && + retcode != -ENOENT) { + return set_cr_error(retcode); + } + + if (source_policy->policy_handler) { + auto& opt_bucket_info = source_policy->policy_handler->get_bucket_info(); + auto& opt_attrs = source_policy->policy_handler->get_bucket_attrs(); + if (opt_bucket_info && opt_attrs) { + source_bucket_info.emplace(); + source_bucket_info->bucket_info = *opt_bucket_info; + source_bucket_info->attrs = *opt_attrs; + } + } + + if (!target_bucket) { + get_hint_targets_action = make_shared(sync_env, *source_bucket); + + yield call(new RGWGenericAsyncCR(cct, sync_env->async_rados, + get_hint_targets_action)); + if (retcode < 0) { + return set_cr_error(retcode); + } + + /* hints might have incomplete bucket ids, + * in which case we need to figure out the current + * bucket_id + */ + for (hiter = get_hint_targets_action->targets.begin(); + hiter != get_hint_targets_action->targets.end(); + ++hiter) { + ldpp_dout(dpp, 20) << "Got sync hint for bucket=" << *source_bucket << ": " << hiter->get_key() << dendl; + + target_policy = make_shared(); + yield call(new RGWSyncGetBucketSyncPolicyHandlerCR(sync_env, + nullopt, + *hiter, + target_policy, + tn)); + if (retcode < 0 && + retcode != -ENOENT) { + return set_cr_error(retcode); + } + update_from_target_bucket_policy(); + } + } + } + + update_from_source_bucket_policy(); + + for (siiter = buckets_info.begin(); siiter != buckets_info.end(); ++siiter) { + if (siiter->second.bucket_info.bucket.name.empty()) { + yield call(new RGWSyncGetBucketInfoCR(sync_env, siiter->first, + &siiter->second.bucket_info, + &siiter->second.attrs, + tn)); + } + } + + if (pipes) { + pipes->update_empty_bucket_info(buckets_info); + } + + return set_cr_done(); + } + + return 0; +} + +class RGWSyncBucketShardCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + boost::intrusive_ptr lease_cr; + rgw_bucket_sync_pair_info sync_pair; + rgw_bucket_sync_pipe& sync_pipe; + bool& bucket_stopped; + uint64_t generation; + ceph::real_time* progress; + + const std::string shard_status_oid; + const rgw_raw_obj bucket_status_obj; + rgw_bucket_shard_sync_info sync_status; + RGWObjVersionTracker objv_tracker; + + RGWSyncTraceNodeRef tn; + +public: + RGWSyncBucketShardCR(RGWDataSyncCtx *_sc, + boost::intrusive_ptr lease_cr, + const rgw_bucket_sync_pair_info& _sync_pair, + rgw_bucket_sync_pipe& sync_pipe, + bool& bucket_stopped, + uint64_t generation, + const RGWSyncTraceNodeRef& tn, + ceph::real_time* progress) + : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + lease_cr(std::move(lease_cr)), sync_pair(_sync_pair), + sync_pipe(sync_pipe), bucket_stopped(bucket_stopped), generation(generation), progress(progress), + shard_status_oid(RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, sync_pair, generation)), + bucket_status_obj(sc->env->svc->zone->get_zone_params().log_pool, + RGWBucketPipeSyncStatusManager::full_status_oid(sc->source_zone, + sync_pair.source_bs.bucket, + sync_pair.dest_bucket)), + tn(tn) { + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWSyncBucketShardCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + yield call(new RGWReadBucketPipeSyncStatusCoroutine(sc, sync_pair, &sync_status, &objv_tracker, generation)); + if (retcode < 0 && retcode != -ENOENT) { + tn->log(0, SSTR("ERROR: failed to read sync status for bucket. error: " << retcode)); + return set_cr_error(retcode); + } + + tn->log(20, SSTR("sync status for source bucket shard: " << sync_status.state)); + sync_status.state = rgw_bucket_shard_sync_info::StateIncrementalSync; + if (progress) { + *progress = sync_status.inc_marker.timestamp; + } + + yield call(new RGWBucketShardIncrementalSyncCR(sc, sync_pipe, + shard_status_oid, bucket_status_obj, lease_cr, + sync_status, generation, tn, + objv_tracker, progress)); + if (retcode < 0) { + tn->log(5, SSTR("incremental sync on bucket failed, retcode=" << retcode)); + return set_cr_error(retcode); + } + + if (sync_status.state == rgw_bucket_shard_sync_info::StateStopped) { + tn->log(20, SSTR("syncstopped indication for source bucket shard")); + bucket_stopped = true; + } + + return set_cr_done(); + } + + return 0; +} + +class RGWSyncBucketCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *env; + boost::intrusive_ptr data_lease_cr; + boost::intrusive_ptr bucket_lease_cr; + rgw_bucket_sync_pair_info sync_pair; + rgw_bucket_sync_pipe sync_pipe; + std::optional gen; + ceph::real_time* progress; + + const std::string lock_name = "bucket sync"; + const uint32_t lock_duration; + const rgw_raw_obj status_obj; + rgw_bucket_sync_status bucket_status; + bool bucket_stopped = false; + RGWObjVersionTracker objv; + bool init_check_compat = false; + rgw_bucket_index_marker_info info; + + RGWSyncTraceNodeRef tn; + +public: + RGWSyncBucketCR(RGWDataSyncCtx *_sc, + boost::intrusive_ptr lease_cr, + const rgw_bucket_sync_pair_info& _sync_pair, + std::optional gen, + const RGWSyncTraceNodeRef& _tn_parent, + ceph::real_time* progress) + : RGWCoroutine(_sc->cct), sc(_sc), env(_sc->env), + data_lease_cr(std::move(lease_cr)), sync_pair(_sync_pair), + gen(gen), progress(progress), + lock_duration(cct->_conf->rgw_sync_lease_period), + status_obj(env->svc->zone->get_zone_params().log_pool, + RGWBucketPipeSyncStatusManager::full_status_oid(sc->source_zone, + sync_pair.source_bs.bucket, + sync_pair.dest_bucket)), + tn(env->sync_tracer->add_node(_tn_parent, "bucket", + SSTR(bucket_str{_sync_pair.dest_bucket} << "<-" << bucket_shard_str{_sync_pair.source_bs} ))) { + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +static RGWCoroutine* sync_bucket_shard_cr(RGWDataSyncCtx* sc, + boost::intrusive_ptr lease, + const rgw_bucket_sync_pair_info& sync_pair, + std::optional gen, + const RGWSyncTraceNodeRef& tn, + ceph::real_time* progress) +{ + return new RGWSyncBucketCR(sc, std::move(lease), sync_pair, + gen, tn, progress); +} + +#define RELEASE_LOCK(cr) \ + if (cr) {cr->go_down(); drain_all(); cr.reset();} + +int RGWSyncBucketCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + // read source/destination bucket info + yield call(new RGWSyncGetBucketInfoCR(env, sync_pair.source_bs.bucket, &sync_pipe.source_bucket_info, + &sync_pipe.source_bucket_attrs, tn)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{sync_pair.source_bs.bucket})); + return set_cr_error(retcode); + } + + yield call(new RGWSyncGetBucketInfoCR(env, sync_pair.dest_bucket, &sync_pipe.dest_bucket_info, + &sync_pipe.dest_bucket_attrs, tn)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{sync_pair.source_bs.bucket})); + return set_cr_error(retcode); + } + + sync_pipe.info = sync_pair; + + // read bucket sync status + using ReadCR = RGWSimpleRadosReadCR; + using WriteCR = RGWSimpleRadosWriteCR; + + yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, + status_obj, &bucket_status, false, &objv)); + if (retcode == -ENOENT) { + // use exclusive create to set state=Init + objv.generate_new_write_ver(cct); + yield call(new WriteCR(dpp, env->async_rados, env->svc->sysobj, + status_obj, bucket_status, &objv, true)); + tn->log(20, "bucket status object does not exist, create a new one"); + if (retcode == -EEXIST) { + // raced with another create, read its status + tn->log(20, "raced with another create, read its status"); + yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, + status_obj, &bucket_status, false, &objv)); + } + } + if (retcode < 0) { + tn->log(20, SSTR("ERROR: failed to read bucket status object. error: " << retcode)); + return set_cr_error(retcode); + } + + do { + tn->log(20, SSTR("sync status for source bucket: " << bucket_status.state << + ". lease is: " << (bucket_lease_cr ? "taken" : "not taken") << ". stop indications is: " << bucket_stopped)); + + if (bucket_status.state != BucketSyncState::Incremental || + bucket_stopped) { + // if state is Init or Stopped, we query the remote RGW for ther state + yield call(new RGWReadRemoteBucketIndexLogInfoCR(sc, sync_pair.source_bs.bucket, &info)); + if (retcode < 0) { + return set_cr_error(retcode); + } + if (info.syncstopped) { + // remote indicates stopped state + tn->log(20, "remote bilog indicates that sync was stopped"); + if (!bucket_lease_cr) { + bucket_lease_cr.reset(new RGWContinuousLeaseCR(env->async_rados, env->driver, status_obj, + lock_name, lock_duration, this)); + yield spawn(bucket_lease_cr.get(), false); + while (!bucket_lease_cr->is_locked()) { + if (bucket_lease_cr->is_done()) { + tn->log(5, "failed to take lease"); + set_status("lease lock failed, early abort"); + drain_all(); + return set_cr_error(bucket_lease_cr->get_ret_status()); + } + tn->log(5, "waiting on bucket lease"); + yield set_sleeping(true); + } + } + + // if state was incremental, remove all per-shard status objects + if (bucket_status.state == BucketSyncState::Incremental) { + yield { + const auto num_shards = bucket_status.shards_done_with_gen.size(); + const auto gen = bucket_status.incremental_gen; + call(new RemoveBucketShardStatusCollectCR(sc, sync_pair, gen, num_shards)); + } + } + + // check if local state is "stopped" + yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, + status_obj, &bucket_status, false, &objv)); + if (retcode < 0) { + tn->log(20, SSTR("ERROR: failed to read status before writing 'stopped'. error: " << retcode)); + RELEASE_LOCK(bucket_lease_cr); + return set_cr_error(retcode); + } + if (bucket_status.state != BucketSyncState::Stopped) { + // make sure that state is changed to stopped localy + bucket_status.state = BucketSyncState::Stopped; + yield call(new WriteCR(dpp, env->async_rados, env->svc->sysobj, + status_obj, bucket_status, &objv, false)); + if (retcode < 0) { + tn->log(20, SSTR("ERROR: failed to write 'stopped' status. error: " << retcode)); + RELEASE_LOCK(bucket_lease_cr); + return set_cr_error(retcode); + } + } + RELEASE_LOCK(bucket_lease_cr); + return set_cr_done(); + } + if (bucket_stopped) { + tn->log(20, SSTR("ERROR: switched from 'stop' to 'start' sync. while state is: " << bucket_status.state)); + bucket_stopped = false; + bucket_status.state = BucketSyncState::Init; + } + } + + if (bucket_status.state != BucketSyncState::Incremental) { + // if the state wasn't Incremental, take a bucket-wide lease to prevent + // different shards from duplicating the init and full sync + if (!bucket_lease_cr) { + bucket_lease_cr.reset(new RGWContinuousLeaseCR(env->async_rados, env->driver, status_obj, + lock_name, lock_duration, this)); + yield spawn(bucket_lease_cr.get(), false); + while (!bucket_lease_cr->is_locked()) { + if (bucket_lease_cr->is_done()) { + tn->log(5, "failed to take lease"); + set_status("lease lock failed, early abort"); + drain_all(); + return set_cr_error(bucket_lease_cr->get_ret_status()); + } + tn->log(5, "waiting on bucket lease"); + yield set_sleeping(true); + } + } + + // reread the status after acquiring the lock + yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, + status_obj, &bucket_status, false, &objv)); + if (retcode < 0) { + RELEASE_LOCK(bucket_lease_cr); + tn->log(20, SSTR("ERROR: reading the status after acquiring the lock failed. error: " << retcode)); + return set_cr_error(retcode); + } + tn->log(20, SSTR("status after acquiring the lock is: " << bucket_status.state)); + + yield call(new InitBucketFullSyncStatusCR(sc, sync_pair, status_obj, + bucket_status, objv, + sync_pipe.source_bucket_info, + init_check_compat, info)); + + if (retcode < 0) { + tn->log(20, SSTR("ERROR: init full sync failed. error: " << retcode)); + RELEASE_LOCK(bucket_lease_cr); + return set_cr_error(retcode); + } + } + + assert(bucket_status.state == BucketSyncState::Incremental || + bucket_status.state == BucketSyncState::Full); + + if (bucket_status.state == BucketSyncState::Full) { + assert(bucket_lease_cr); + yield call(new RGWBucketFullSyncCR(sc, sync_pipe, status_obj, + bucket_lease_cr, bucket_status, + tn, objv)); + if (retcode < 0) { + tn->log(20, SSTR("ERROR: full sync failed. error: " << retcode)); + RELEASE_LOCK(bucket_lease_cr); + return set_cr_error(retcode); + } + } + + if (bucket_status.state == BucketSyncState::Incremental) { + // lease not required for incremental sync + RELEASE_LOCK(bucket_lease_cr); + + // if a specific gen was requested, compare that to the sync status + if (gen) { + const auto current_gen = bucket_status.incremental_gen; + if (*gen > current_gen) { + retcode = -EAGAIN; + tn->log(10, SSTR("ERROR: requested sync of future generation " + << *gen << " > " << current_gen + << ", returning " << retcode << " for later retry")); + return set_cr_error(retcode); + } else if (*gen < current_gen) { + tn->log(10, SSTR("WARNING: requested sync of past generation " + << *gen << " < " << current_gen + << ", returning success")); + return set_cr_done(); + } + } + + assert(sync_pair.source_bs.shard_id >= 0); + if (static_cast(sync_pair.source_bs.shard_id) >= bucket_status.shards_done_with_gen.size()) { + tn->log(1, SSTR("bucket shard " << sync_pair.source_bs << " index out of bounds")); + return set_cr_done(); // return success so we don't retry + } + if (bucket_status.shards_done_with_gen[sync_pair.source_bs.shard_id]) { + tn->log(10, SSTR("bucket shard " << sync_pair.source_bs << " of gen " << + gen << " already synced.")); + return set_cr_done(); + } + + yield call(new RGWSyncBucketShardCR(sc, data_lease_cr, sync_pair, + sync_pipe, bucket_stopped, + bucket_status.incremental_gen, tn, progress)); + if (retcode < 0) { + tn->log(20, SSTR("ERROR: incremental sync failed. error: " << retcode)); + return set_cr_error(retcode); + } + } + // loop back to previous states unless incremental sync returns normally + } while (bucket_status.state != BucketSyncState::Incremental || bucket_stopped); + + return set_cr_done(); + } + + return 0; +} + +int RGWBucketPipeSyncStatusManager::do_init(const DoutPrefixProvider *dpp, + std::ostream* ostr) +{ + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(this, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + + sync_module.reset(new RGWDefaultSyncModuleInstance()); + auto async_rados = driver->svc()->rados->get_async_processor(); + + sync_env.init(this, driver->ctx(), driver, + driver->svc(), async_rados, &http_manager, + error_logger.get(), driver->getRados()->get_sync_tracer(), + sync_module, nullptr); + + sync_env.ostr = ostr; + + rgw_sync_pipe_info_set pipes; + + ret = cr_mgr.run(dpp, new RGWGetBucketPeersCR(&sync_env, + dest_bucket, + source_zone, + source_bucket, + &pipes, + sync_env.sync_tracer->root_node)); + if (ret < 0) { + ldpp_dout(this, 0) << "failed to get bucket source peers info: (ret=" << ret << "): " << cpp_strerror(-ret) << dendl; + return ret; + } + + if (pipes.empty()) { + ldpp_dout(this, 0) << "No peers. This is not a valid multisite configuration." << dendl; + return -EINVAL; + } + + for (auto& pipe : pipes) { + auto& szone = pipe.source.zone; + + auto conn = driver->svc()->zone->get_zone_conn(szone); + if (!conn) { + ldpp_dout(this, 0) << "connection object to zone " << szone << " does not exist" << dendl; + return -EINVAL; + } + + RGWZone* z; + if (!(z = driver->svc()->zone->find_zone(szone))) { + ldpp_dout(this, 0) << "zone " << szone << " does not exist" << dendl; + return -EINVAL; + } + sources.emplace_back(&sync_env, szone, conn, + pipe.source.get_bucket_info(), + pipe.target.get_bucket(), + pipe.handler, z->name); + } + + return 0; +} + +int RGWBucketPipeSyncStatusManager::remote_info(const DoutPrefixProvider *dpp, + source& s, + uint64_t* oldest_gen, + uint64_t* latest_gen, + uint64_t* num_shards) +{ + rgw_bucket_index_marker_info remote_info; + BucketIndexShardsManager remote_markers; + auto r = rgw_read_remote_bilog_info(dpp, s.sc.conn, s.info.bucket, + remote_info, remote_markers, + null_yield); + + if (r < 0) { + ldpp_dout(dpp, 0) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " rgw_read_remote_bilog_info: r=" + << r << dendl; + return r; + } + if (oldest_gen) + *oldest_gen = remote_info.oldest_gen; + + if (latest_gen) + *latest_gen = remote_info.latest_gen; + + if (num_shards) + *num_shards = remote_markers.get().size(); + + return 0; +} + +tl::expected, int> +RGWBucketPipeSyncStatusManager::construct( + const DoutPrefixProvider* dpp, + rgw::sal::RadosStore* driver, + std::optional source_zone, + std::optional source_bucket, + const rgw_bucket& dest_bucket, + std::ostream* ostr) +{ + std::unique_ptr self{ + new RGWBucketPipeSyncStatusManager(driver, source_zone, source_bucket, + dest_bucket)}; + auto r = self->do_init(dpp, ostr); + if (r < 0) { + return tl::unexpected(r); + } + return self; +} + +int RGWBucketPipeSyncStatusManager::init_sync_status( + const DoutPrefixProvider *dpp) +{ + // Just running one at a time saves us from buildup/teardown and in + // practice we only do one zone at a time. + for (auto& source : sources) { + list stacks; + RGWCoroutinesStack *stack = new RGWCoroutinesStack(driver->ctx(), &cr_mgr); + pretty_print(source.sc.env, "Initializing sync state of bucket {} with zone {}.\n", + source.info.bucket.name, source.zone_name); + stack->call(new RGWSimpleRadosWriteCR( + dpp, source.sc.env->async_rados, source.sc.env->svc->sysobj, + {sync_env.svc->zone->get_zone_params().log_pool, + full_status_oid(source.sc.source_zone, + source.info.bucket, + source.dest)}, + rgw_bucket_sync_status{})); + stacks.push_back(stack); + auto r = cr_mgr.run(dpp, stacks); + if (r < 0) { + pretty_print(source.sc.env, + "Initialization of sync state for bucket {} with zone {} " + "failed with error {}\n", + source.info.bucket.name, source.zone_name, cpp_strerror(r)); + } + } + return 0; +} + +tl::expected, int> +RGWBucketPipeSyncStatusManager::read_sync_status( + const DoutPrefixProvider *dpp) +{ + std::map sync_status; + list stacks; + + auto sz = sources.begin(); + + if (source_zone) { + sz = std::find_if(sources.begin(), sources.end(), + [this](const source& s) { + return s.sc.source_zone == *source_zone; + } + ); + if (sz == sources.end()) { + ldpp_dout(this, 0) << "ERROR: failed to find source zone: " + << *source_zone << dendl; + return tl::unexpected(-ENOENT); + } + } else { + ldpp_dout(this, 5) << "No source zone specified, using source zone: " + << sz->sc.source_zone << dendl; + return tl::unexpected(-ENOENT); + } + uint64_t num_shards, latest_gen; + auto ret = remote_info(dpp, *sz, nullptr, &latest_gen, &num_shards); + if (!ret) { + ldpp_dout(this, 5) << "Unable to get remote info: " + << ret << dendl; + return tl::unexpected(ret); + } + auto stack = new RGWCoroutinesStack(driver->ctx(), &cr_mgr); + std::vector pairs(num_shards); + for (auto shard = 0u; shard < num_shards; ++shard) { + auto& pair = pairs[shard]; + pair.source_bs.bucket = sz->info.bucket; + pair.dest_bucket = sz->dest; + pair.source_bs.shard_id = shard; + stack->call(new RGWReadBucketPipeSyncStatusCoroutine( + &sz->sc, pair, &sync_status[shard], + nullptr, latest_gen)); + } + + stacks.push_back(stack); + + ret = cr_mgr.run(dpp, stacks); + if (ret < 0) { + ldpp_dout(this, 0) << "ERROR: failed to read sync status for " + << bucket_str{dest_bucket} << dendl; + return tl::unexpected(ret); + } + + return sync_status; +} + +namespace rgw::bucket_sync_run { +// Retry-loop over calls to sync_bucket_shard_cr +class ShardCR : public RGWCoroutine { + static constexpr auto allowed_retries = 10u; + + RGWDataSyncCtx& sc; + const rgw_bucket_sync_pair_info& pair; + const uint64_t gen; + unsigned retries = 0; + + ceph::real_time prev_progress; + ceph::real_time progress; + +public: + + ShardCR(RGWDataSyncCtx& sc, const rgw_bucket_sync_pair_info& pair, + const uint64_t gen) + : RGWCoroutine(sc.cct), sc(sc), pair(pair), gen(gen) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + // Since all errors (except ECANCELED) are considered retryable, + // retry other errors so long as we're making progress. + for (retries = 0u, retcode = -EDOM; + (retries < allowed_retries) && (retcode != 0); + ++retries) { + ldpp_dout(dpp, 5) << "ShardCR: syncing bucket shard on: " + << "zone=" << sc.source_zone + << ", bucket=" << pair.source_bs.bucket.name + << ", shard=" << pair.source_bs.shard_id + << ", gen=" << gen + << dendl; + yield call(sync_bucket_shard_cr(&sc, nullptr, pair, gen, + sc.env->sync_tracer->root_node, + &progress)); + + if (retcode == -ECANCELED) { + ldpp_dout(dpp, -1) << "ERROR: Got -ECANCELED for " + << pair.source_bs << dendl; + drain_all(); + return set_cr_error(retcode); + } else if (retcode < 0) { + ldpp_dout(dpp, 5) << "WARNING: Got error, retcode=" << retcode << " for " + << pair.source_bs << "on retry " + << retries + 1 << " of " << allowed_retries + << " allowed" << dendl; + // Reset the retry counter if we made any progress + if (progress != prev_progress) { + retries = 0; + } + prev_progress = progress; + } + } + if (retcode < 0) { + ldpp_dout(dpp, -1) << "ERROR: Exhausted retries for " + << pair.source_bs << " retcode=" + << retcode << dendl; + drain_all(); + return set_cr_error(retcode); + } + + drain_all(); + return set_cr_done(); + } + return 0; + } +}; + +// Loop over calls to ShardCR with limited concurrency +class GenCR : public RGWShardCollectCR { + static constexpr auto MAX_CONCURRENT_SHARDS = 64; + + RGWDataSyncCtx& sc; + const uint64_t gen; + + std::vector pairs; + decltype(pairs)::const_iterator iter; + +public: + GenCR(RGWDataSyncCtx& sc, const rgw_bucket& source, const rgw_bucket& dest, + const uint64_t gen, const uint64_t shards, + const RGWBucketSyncFlowManager::pipe_handler& handler) + : RGWShardCollectCR(sc.cct, MAX_CONCURRENT_SHARDS), + sc(sc), gen(gen) { + pairs.resize(shards); + for (auto shard = 0u; shard < shards; ++shard) { + auto& pair = pairs[shard]; + pair.handler = handler; + pair.source_bs.bucket = source; + pair.dest_bucket = dest; + pair.source_bs.shard_id = shard; + } + iter = pairs.cbegin(); + assert(pairs.size() == shards); + } + + virtual bool spawn_next() override { + if (iter == pairs.cend()) { + return false; + } + spawn(new ShardCR(sc, *iter, gen), false); + ++iter; + return true; + } + + int handle_result(int r) override { + if (r < 0) { + ldpp_dout(sc.env->dpp, 4) << "ERROR: Error syncing shard: " + << cpp_strerror(r) << dendl; + } + return r; + } +}; + +// Read sync status, loop over calls to GenCR +class SourceCR : public RGWCoroutine { + RGWDataSyncCtx& sc; + const RGWBucketInfo& info; + const rgw_bucket& dest; + const RGWBucketSyncFlowManager::pipe_handler& handler; + const rgw_raw_obj status_obj{ + sc.env->svc->zone->get_zone_params().log_pool, + RGWBucketPipeSyncStatusManager::full_status_oid(sc.source_zone, info.bucket, + dest)}; + + BucketSyncState state = BucketSyncState::Incremental; + uint64_t gen = 0; + uint64_t num_shards = 0; + rgw_bucket_sync_status status; + std::string zone_name; + +public: + + SourceCR(RGWDataSyncCtx& sc, const RGWBucketInfo& info, + const rgw_bucket& dest, + const RGWBucketSyncFlowManager::pipe_handler& handler, + const std::string& zone_name) + : RGWCoroutine(sc.cct), sc(sc), info(info), dest(dest), handler(handler), + zone_name(zone_name) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + // Get the source's status. In incremental sync, this gives us + // the generation and shard count that is next needed to be run. + yield call(new RGWSimpleRadosReadCR( + dpp, sc.env->async_rados, sc.env->svc->sysobj, + status_obj, &status)); + if (retcode < 0) { + ldpp_dout(dpp, -1) << "ERROR: Unable to fetch status for zone=" + << sc.source_zone << " retcode=" + << retcode << dendl; + drain_all(); + return set_cr_error(retcode); + } + + if (status.state == BucketSyncState::Stopped) { + // Nothing to do. + pretty_print(sc.env, "Sync of bucket {} from source zone {} is in state Stopped. " + "Nothing to do.\n", dest.name, zone_name); + ldpp_dout(dpp, 5) << "SourceCR: Bucket is in state Stopped, returning." + << dendl; + drain_all(); + return set_cr_done(); + } + + do { + state = status.state; + gen = status.incremental_gen; + num_shards = status.shards_done_with_gen.size(); + + ldpp_dout(dpp, 5) << "SourceCR: " + << "state=" << state + << ", gen=" << gen + << ", num_shards=" << num_shards + << dendl; + + // Special case to handle full sync. Since full sync no longer + // uses shards and has no generations, we sync shard zero, + // though use the current generation so a following + // incremental sync can carry on. + if (state != BucketSyncState::Incremental) { + pretty_print(sc.env, "Beginning full sync of bucket {} from source zone {}.\n", + dest.name, zone_name); + ldpp_dout(dpp, 5) << "SourceCR: Calling GenCR with " + << "gen=" << gen + << ", num_shards=" << 1 + << dendl; + yield call(new GenCR(sc, info.bucket, dest, gen, 1, handler)); + } else { + pretty_print(sc.env, "Beginning incremental sync of bucket {}, generation {} from source zone {}.\n", + dest.name, gen, zone_name); + ldpp_dout(dpp, 5) << "SourceCR: Calling GenCR with " + << "gen=" << gen + << ", num_shards=" << num_shards + << dendl; + yield call(new GenCR(sc, info.bucket, dest, gen, num_shards, + handler)); + } + if (retcode < 0) { + ldpp_dout(dpp, -1) << "ERROR: Giving up syncing from " + << sc.source_zone << " retcode=" + << retcode << dendl; + drain_all(); + return set_cr_error(retcode); + } + + pretty_print(sc.env, "Completed.\n"); + + yield call(new RGWSimpleRadosReadCR( + dpp, sc.env->async_rados, sc.env->svc->sysobj, + status_obj, &status)); + if (retcode < 0) { + ldpp_dout(dpp, -1) << "ERROR: Unable to fetch status for zone=" + << sc.source_zone << " retcode=" + << retcode << dendl; + drain_all(); + return set_cr_error(retcode); + } + // Repeat until we have done an incremental run and the + // generation remains unchanged. + ldpp_dout(dpp, 5) << "SourceCR: " + << "state=" << state + << ", gen=" << gen + << ", num_shards=" << num_shards + << ", status.state=" << status.state + << ", status.incremental_gen=" << status.incremental_gen + << ", status.shards_done_with_gen.size()=" << status.shards_done_with_gen.size() + << dendl; + } while (state != BucketSyncState::Incremental || + gen != status.incremental_gen); + drain_all(); + return set_cr_done(); + } + return 0; + } +}; +} // namespace rgw::bucket_sync_run + +int RGWBucketPipeSyncStatusManager::run(const DoutPrefixProvider *dpp) +{ + list stacks; + for (auto& source : sources) { + auto stack = new RGWCoroutinesStack(driver->ctx(), &cr_mgr); + stack->call(new rgw::bucket_sync_run::SourceCR( + source.sc, source.info, source.dest, source.handler, + source.zone_name)); + stacks.push_back(stack); + } + auto ret = cr_mgr.run(dpp, stacks); + if (ret < 0) { + ldpp_dout(this, 0) << "ERROR: Sync unsuccessful on bucket " + << bucket_str{dest_bucket} << dendl; + } + return ret; +} + +unsigned RGWBucketPipeSyncStatusManager::get_subsys() const +{ + return dout_subsys; +} + +std::ostream& RGWBucketPipeSyncStatusManager::gen_prefix(std::ostream& out) const +{ + auto zone = std::string_view{source_zone.value_or(rgw_zone_id("*")).id}; + return out << "bucket sync zone:" << zone.substr(0, 8) + << " bucket:" << dest_bucket << ' '; +} + +string RGWBucketPipeSyncStatusManager::full_status_oid(const rgw_zone_id& source_zone, + const rgw_bucket& source_bucket, + const rgw_bucket& dest_bucket) +{ + if (source_bucket == dest_bucket) { + return bucket_full_status_oid_prefix + "." + source_zone.id + ":" + + dest_bucket.get_key(); + } else { + return bucket_full_status_oid_prefix + "." + source_zone.id + ":" + + dest_bucket.get_key() + ":" + source_bucket.get_key(); + } +} + +inline std::string generation_token(uint64_t gen) { + return (gen == 0) ? "" : (":" + std::to_string(gen)); +} + +string RGWBucketPipeSyncStatusManager::inc_status_oid(const rgw_zone_id& source_zone, + const rgw_bucket_sync_pair_info& sync_pair, + uint64_t gen) +{ + if (sync_pair.source_bs.bucket == sync_pair.dest_bucket) { + return bucket_status_oid_prefix + "." + source_zone.id + ":" + sync_pair.source_bs.get_key() + + generation_token(gen); + } else { + return bucket_status_oid_prefix + "." + source_zone.id + ":" + sync_pair.dest_bucket.get_key() + ":" + sync_pair.source_bs.get_key() + + generation_token(gen); + } +} + +string RGWBucketPipeSyncStatusManager::obj_status_oid(const rgw_bucket_sync_pipe& sync_pipe, + const rgw_zone_id& source_zone, + const rgw::sal::Object* obj) +{ + string prefix = object_status_oid_prefix + "." + source_zone.id + ":" + obj->get_bucket()->get_key().get_key(); + if (sync_pipe.source_bucket_info.bucket != + sync_pipe.dest_bucket_info.bucket) { + prefix += string("/") + sync_pipe.dest_bucket_info.bucket.get_key(); + } + return prefix + ":" + obj->get_name() + ":" + obj->get_instance(); +} + +int rgw_read_remote_bilog_info(const DoutPrefixProvider *dpp, + RGWRESTConn* conn, + const rgw_bucket& bucket, + rgw_bucket_index_marker_info& info, + BucketIndexShardsManager& markers, + optional_yield y) +{ + const auto instance_key = bucket.get_key(); + const rgw_http_param_pair params[] = { + { "type" , "bucket-index" }, + { "bucket-instance", instance_key.c_str() }, + { "info" , nullptr }, + { nullptr, nullptr } + }; + int r = conn->get_json_resource(dpp, "/admin/log/", params, y, info); + if (r < 0) { + ldpp_dout(dpp, -1) << "failed to fetch remote log markers: " << cpp_strerror(r) << dendl; + return r; + } + // parse shard markers + r = markers.from_string(info.max_marker, -1); + if (r < 0) { + ldpp_dout(dpp, -1) << "failed to decode remote log markers" << dendl; + return r; + } + return 0; +} + +class RGWCollectBucketSyncStatusCR : public RGWShardCollectCR { + static constexpr int max_concurrent_shards = 16; + rgw::sal::RadosStore* const driver; + RGWDataSyncCtx *const sc; + RGWDataSyncEnv *const env; + const uint64_t gen; + + rgw_bucket_sync_pair_info sync_pair; + using Vector = std::vector; + Vector::iterator i, end; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to read bucket shard sync status: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + RGWCollectBucketSyncStatusCR(rgw::sal::RadosStore* driver, RGWDataSyncCtx *sc, + const rgw_bucket_sync_pair_info& sync_pair, + uint64_t gen, + Vector *status) + : RGWShardCollectCR(sc->cct, max_concurrent_shards), + driver(driver), sc(sc), env(sc->env), gen(gen), sync_pair(sync_pair), + i(status->begin()), end(status->end()) + {} + + bool spawn_next() override { + if (i == end) { + return false; + } + spawn(new RGWReadBucketPipeSyncStatusCoroutine(sc, sync_pair, &*i, nullptr, gen), false); + ++i; + ++sync_pair.source_bs.shard_id; + return true; + } +}; + +int rgw_read_bucket_full_sync_status(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore *driver, + const rgw_sync_bucket_pipe& pipe, + rgw_bucket_sync_status *status, + optional_yield y) +{ + auto get_oid = RGWBucketPipeSyncStatusManager::full_status_oid; + const rgw_raw_obj obj{driver->svc()->zone->get_zone_params().log_pool, + get_oid(*pipe.source.zone, *pipe.source.bucket, *pipe.dest.bucket)}; + + auto svc = driver->svc()->sysobj; + auto sysobj = svc->get_obj(obj); + bufferlist bl; + int ret = sysobj.rop().read(dpp, &bl, y); + if (ret < 0) + return ret; + + try { + auto iter = bl.cbegin(); + using ceph::decode; + rgw_bucket_sync_status result; + decode(result, iter); + *status = result; + return 0; + } catch (const buffer::error& err) { + lderr(svc->ctx()) << "error decoding " << obj << ": " << err.what() << dendl; + return -EIO; + } +} + +int rgw_read_bucket_inc_sync_status(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore *driver, + const rgw_sync_bucket_pipe& pipe, + uint64_t gen, + std::vector *status) +{ + if (!pipe.source.zone || + !pipe.source.bucket || + !pipe.dest.zone || + !pipe.dest.bucket) { + return -EINVAL; + } + + rgw_bucket_sync_pair_info sync_pair; + sync_pair.source_bs.bucket = *pipe.source.bucket; + sync_pair.source_bs.shard_id = 0; + sync_pair.dest_bucket = *pipe.dest.bucket; + + RGWDataSyncEnv env; + RGWSyncModuleInstanceRef module; // null sync module + env.init(dpp, driver->ctx(), driver, driver->svc(), driver->svc()->rados->get_async_processor(), + nullptr, nullptr, nullptr, module, nullptr); + + RGWDataSyncCtx sc; + sc.init(&env, nullptr, *pipe.source.zone); + + RGWCoroutinesManager crs(driver->ctx(), driver->getRados()->get_cr_registry()); + return crs.run(dpp, new RGWCollectBucketSyncStatusCR(driver, &sc, + sync_pair, + gen, + status)); +} + +void rgw_data_sync_info::generate_test_instances(list& o) +{ + auto info = new rgw_data_sync_info; + info->state = rgw_data_sync_info::StateBuildingFullSyncMaps; + info->num_shards = 8; + o.push_back(info); + o.push_back(new rgw_data_sync_info); +} + +void rgw_data_sync_marker::generate_test_instances(list& o) +{ + auto marker = new rgw_data_sync_marker; + marker->state = rgw_data_sync_marker::IncrementalSync; + marker->marker = "01234"; + marker->pos = 5; + o.push_back(marker); + o.push_back(new rgw_data_sync_marker); +} + +void rgw_data_sync_status::generate_test_instances(list& o) +{ + o.push_back(new rgw_data_sync_status); +} + +void rgw_bucket_shard_full_sync_marker::dump(Formatter *f) const +{ + encode_json("position", position, f); + encode_json("count", count, f); +} + +void rgw_bucket_shard_inc_sync_marker::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("position", position, obj); + JSONDecoder::decode_json("timestamp", timestamp, obj); +} + +void rgw_bucket_shard_inc_sync_marker::dump(Formatter *f) const +{ + encode_json("position", position, f); + encode_json("timestamp", timestamp, f); +} + +void rgw_bucket_shard_sync_info::decode_json(JSONObj *obj) +{ + std::string s; + JSONDecoder::decode_json("status", s, obj); + if (s == "full-sync") { + state = StateFullSync; + } else if (s == "incremental-sync") { + state = StateIncrementalSync; + } else if (s == "stopped") { + state = StateStopped; + } else { + state = StateInit; + } + JSONDecoder::decode_json("inc_marker", inc_marker, obj); +} + +void rgw_bucket_shard_full_sync_marker::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("position", position, obj); + JSONDecoder::decode_json("count", count, obj); +} + +void rgw_bucket_shard_sync_info::dump(Formatter *f) const +{ + const char *s{nullptr}; + switch ((SyncState)state) { + case StateInit: + s = "init"; + break; + case StateFullSync: + s = "full-sync"; + break; + case StateIncrementalSync: + s = "incremental-sync"; + break; + case StateStopped: + s = "stopped"; + break; + default: + s = "unknown"; + break; + } + encode_json("status", s, f); + encode_json("inc_marker", inc_marker, f); +} + +void rgw_bucket_full_sync_status::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("position", position, obj); + JSONDecoder::decode_json("count", count, obj); +} + +void rgw_bucket_full_sync_status::dump(Formatter *f) const +{ + encode_json("position", position, f); + encode_json("count", count, f); +} + +void encode_json(const char *name, BucketSyncState state, Formatter *f) +{ + switch (state) { + case BucketSyncState::Init: + encode_json(name, "init", f); + break; + case BucketSyncState::Full: + encode_json(name, "full-sync", f); + break; + case BucketSyncState::Incremental: + encode_json(name, "incremental-sync", f); + break; + case BucketSyncState::Stopped: + encode_json(name, "stopped", f); + break; + default: + encode_json(name, "unknown", f); + break; + } +} + +void decode_json_obj(BucketSyncState& state, JSONObj *obj) +{ + std::string s; + decode_json_obj(s, obj); + if (s == "full-sync") { + state = BucketSyncState::Full; + } else if (s == "incremental-sync") { + state = BucketSyncState::Incremental; + } else if (s == "stopped") { + state = BucketSyncState::Stopped; + } else { + state = BucketSyncState::Init; + } +} + +void rgw_bucket_sync_status::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("state", state, obj); + JSONDecoder::decode_json("full", full, obj); + JSONDecoder::decode_json("incremental_gen", incremental_gen, obj); +} + +void rgw_bucket_sync_status::dump(Formatter *f) const +{ + encode_json("state", state, f); + encode_json("full", full, f); + encode_json("incremental_gen", incremental_gen, f); +} + + +void bilog_status_v2::dump(Formatter *f) const +{ + encode_json("sync_status", sync_status, f); + encode_json("inc_status", inc_status, f); +} + +void bilog_status_v2::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("sync_status", sync_status, obj); + JSONDecoder::decode_json("inc_status", inc_status, obj); +} diff --git a/src/rgw/driver/rados/rgw_data_sync.h b/src/rgw/driver/rados/rgw_data_sync.h new file mode 100644 index 00000000000..6cc714dbaf8 --- /dev/null +++ b/src/rgw/driver/rados/rgw_data_sync.h @@ -0,0 +1,823 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_DATA_SYNC_H +#define CEPH_RGW_DATA_SYNC_H + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include +#include + +#include "include/encoding.h" + +#include "common/ceph_json.h" +#include "common/likely.h" + +#include "rgw_coroutine.h" +#include "rgw_http_client.h" +#include "rgw_sal_rados.h" + +#include "rgw_datalog.h" +#include "rgw_sync.h" +#include "rgw_sync_module.h" +#include "rgw_sync_trace.h" +#include "rgw_sync_policy.h" + +#include "rgw_bucket_sync.h" + +// represents an obligation to sync an entry up a given time +struct rgw_data_sync_obligation { + rgw_bucket_shard bs; + std::optional gen; + std::string marker; + ceph::real_time timestamp; + bool retry = false; +}; + +inline std::ostream& operator<<(std::ostream& out, const rgw_data_sync_obligation& o) { + out << "key=" << o.bs; + if (o.gen) { + out << '[' << *o.gen << ']'; + } + if (!o.marker.empty()) { + out << " marker=" << o.marker; + } + if (o.timestamp != ceph::real_time{}) { + out << " timestamp=" << o.timestamp; + } + if (o.retry) { + out << " retry"; + } + return out; +} + +class JSONObj; +struct rgw_sync_bucket_pipe; + +struct rgw_bucket_sync_pair_info { + RGWBucketSyncFlowManager::pipe_handler handler; /* responsible for sync filters */ + rgw_bucket_shard source_bs; + rgw_bucket dest_bucket; +}; + +inline std::ostream& operator<<(std::ostream& out, const rgw_bucket_sync_pair_info& p) { + if (p.source_bs.bucket == p.dest_bucket) { + return out << p.source_bs; + } + return out << p.source_bs << "->" << p.dest_bucket; +} + +struct rgw_bucket_sync_pipe { + rgw_bucket_sync_pair_info info; + RGWBucketInfo source_bucket_info; + std::map source_bucket_attrs; + RGWBucketInfo dest_bucket_info; + std::map dest_bucket_attrs; + + RGWBucketSyncFlowManager::pipe_rules_ref& get_rules() { + return info.handler.rules; + } +}; + +inline std::ostream& operator<<(std::ostream& out, const rgw_bucket_sync_pipe& p) { + return out << p.info; +} + +struct rgw_datalog_info { + uint32_t num_shards; + + rgw_datalog_info() : num_shards(0) {} + + void decode_json(JSONObj *obj); +}; + +struct rgw_data_sync_info { + enum SyncState { + StateInit = 0, + StateBuildingFullSyncMaps = 1, + StateSync = 2, + }; + + uint16_t state; + uint32_t num_shards; + + uint64_t instance_id{0}; + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(state, bl); + encode(num_shards, bl); + encode(instance_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(state, bl); + decode(num_shards, bl); + if (struct_v >= 2) { + decode(instance_id, bl); + } + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const { + std::string s; + switch ((SyncState)state) { + case StateInit: + s = "init"; + break; + case StateBuildingFullSyncMaps: + s = "building-full-sync-maps"; + break; + case StateSync: + s = "sync"; + break; + default: + s = "unknown"; + break; + } + encode_json("status", s, f); + encode_json("num_shards", num_shards, f); + encode_json("instance_id", instance_id, f); + } + void decode_json(JSONObj *obj) { + std::string s; + JSONDecoder::decode_json("status", s, obj); + if (s == "building-full-sync-maps") { + state = StateBuildingFullSyncMaps; + } else if (s == "sync") { + state = StateSync; + } else { + state = StateInit; + } + JSONDecoder::decode_json("num_shards", num_shards, obj); + JSONDecoder::decode_json("instance_id", instance_id, obj); + } + static void generate_test_instances(std::list& o); + + rgw_data_sync_info() : state((int)StateInit), num_shards(0) {} +}; +WRITE_CLASS_ENCODER(rgw_data_sync_info) + +struct rgw_data_sync_marker { + enum SyncState { + FullSync = 0, + IncrementalSync = 1, + }; + uint16_t state; + std::string marker; + std::string next_step_marker; + uint64_t total_entries; + uint64_t pos; + real_time timestamp; + + rgw_data_sync_marker() : state(FullSync), total_entries(0), pos(0) {} + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(state, bl); + encode(marker, bl); + encode(next_step_marker, bl); + encode(total_entries, bl); + encode(pos, bl); + encode(timestamp, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(state, bl); + decode(marker, bl); + decode(next_step_marker, bl); + decode(total_entries, bl); + decode(pos, bl); + decode(timestamp, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const { + const char *s{nullptr}; + switch ((SyncState)state) { + case FullSync: + s = "full-sync"; + break; + case IncrementalSync: + s = "incremental-sync"; + break; + default: + s = "unknown"; + break; + } + encode_json("status", s, f); + encode_json("marker", marker, f); + encode_json("next_step_marker", next_step_marker, f); + encode_json("total_entries", total_entries, f); + encode_json("pos", pos, f); + encode_json("timestamp", utime_t(timestamp), f); + } + void decode_json(JSONObj *obj) { + std::string s; + JSONDecoder::decode_json("status", s, obj); + if (s == "full-sync") { + state = FullSync; + } else if (s == "incremental-sync") { + state = IncrementalSync; + } + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("next_step_marker", next_step_marker, obj); + JSONDecoder::decode_json("total_entries", total_entries, obj); + JSONDecoder::decode_json("pos", pos, obj); + utime_t t; + JSONDecoder::decode_json("timestamp", t, obj); + timestamp = t.to_real_time(); + } + static void generate_test_instances(std::list& o); +}; +WRITE_CLASS_ENCODER(rgw_data_sync_marker) + +struct rgw_data_sync_status { + rgw_data_sync_info sync_info; + std::map sync_markers; + + rgw_data_sync_status() {} + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(sync_info, bl); + /* sync markers are encoded separately */ + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(sync_info, bl); + /* sync markers are decoded separately */ + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const { + encode_json("info", sync_info, f); + encode_json("markers", sync_markers, f); + } + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("info", sync_info, obj); + JSONDecoder::decode_json("markers", sync_markers, obj); + } + static void generate_test_instances(std::list& o); +}; +WRITE_CLASS_ENCODER(rgw_data_sync_status) + +struct rgw_datalog_entry { + std::string key; + ceph::real_time timestamp; + + void decode_json(JSONObj *obj); +}; + +struct rgw_datalog_shard_data { + std::string marker; + bool truncated; + std::vector entries; + + void decode_json(JSONObj *obj); +}; + +class RGWAsyncRadosProcessor; +class RGWDataSyncControlCR; + +struct rgw_bucket_entry_owner { + std::string id; + std::string display_name; + + rgw_bucket_entry_owner() {} + rgw_bucket_entry_owner(const std::string& _id, const std::string& _display_name) : id(_id), display_name(_display_name) {} + + void decode_json(JSONObj *obj); +}; + +class RGWSyncErrorLogger; +class RGWRESTConn; +class RGWServices; + +struct RGWDataSyncEnv { + const DoutPrefixProvider *dpp{nullptr}; + CephContext *cct{nullptr}; + rgw::sal::RadosStore* driver{nullptr}; + RGWServices *svc{nullptr}; + RGWAsyncRadosProcessor *async_rados{nullptr}; + RGWHTTPManager *http_manager{nullptr}; + RGWSyncErrorLogger *error_logger{nullptr}; + RGWSyncTraceManager *sync_tracer{nullptr}; + RGWSyncModuleInstanceRef sync_module{nullptr}; + PerfCounters* counters{nullptr}; + + RGWDataSyncEnv() {} + + void init(const DoutPrefixProvider *_dpp, CephContext *_cct, rgw::sal::RadosStore* _driver, RGWServices *_svc, + RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager, + RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer, + RGWSyncModuleInstanceRef& _sync_module, + PerfCounters* _counters) { + dpp = _dpp; + cct = _cct; + driver = _driver; + svc = _svc; + async_rados = _async_rados; + http_manager = _http_manager; + error_logger = _error_logger; + sync_tracer = _sync_tracer; + sync_module = _sync_module; + counters = _counters; + } + + std::string shard_obj_name(int shard_id); + std::string status_oid(); + + std::ostream* ostr{nullptr}; // For pretty printing progress +}; + +// pretty ostream output for `radosgw-admin bucket sync run` +template +void pretty_print(const RGWDataSyncEnv* env, T&& ...t) { + if (unlikely(!!env->ostr)) { + fmt::print(*env->ostr, std::forward(t)...); + env->ostr->flush(); + } +} + +struct RGWDataSyncCtx { + RGWDataSyncEnv *env{nullptr}; + CephContext *cct{nullptr}; + + RGWRESTConn *conn{nullptr}; + rgw_zone_id source_zone; + + RGWDataSyncCtx() = default; + + RGWDataSyncCtx(RGWDataSyncEnv* env, + RGWRESTConn* conn, + const rgw_zone_id& source_zone) + : env(env), cct(env->cct), conn(conn), source_zone(source_zone) {} + + void init(RGWDataSyncEnv *_env, + RGWRESTConn *_conn, + const rgw_zone_id& _source_zone) { + cct = _env->cct; + env = _env; + conn = _conn; + source_zone = _source_zone; + } +}; + +class RGWRados; + +class RGWRemoteDataLog : public RGWCoroutinesManager { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* driver; + CephContext *cct; + RGWCoroutinesManagerRegistry *cr_registry; + RGWAsyncRadosProcessor *async_rados; + RGWHTTPManager http_manager; + + RGWDataSyncEnv sync_env; + RGWDataSyncCtx sc; + + ceph::shared_mutex lock = ceph::make_shared_mutex("RGWRemoteDataLog::lock"); + RGWDataSyncControlCR *data_sync_cr; + + RGWSyncTraceNodeRef tn; + + bool initialized; + +public: + RGWRemoteDataLog(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* _store, + RGWAsyncRadosProcessor *async_rados); + int init(const rgw_zone_id& _source_zone, RGWRESTConn *_conn, RGWSyncErrorLogger *_error_logger, + RGWSyncTraceManager *_sync_tracer, RGWSyncModuleInstanceRef& module, + PerfCounters* _counters); + void finish(); + + int read_log_info(const DoutPrefixProvider *dpp, rgw_datalog_info *log_info); + int read_source_log_shards_info(const DoutPrefixProvider *dpp, std::map *shards_info); + int read_source_log_shards_next(const DoutPrefixProvider *dpp, std::map shard_markers, std::map *result); + int read_sync_status(const DoutPrefixProvider *dpp, rgw_data_sync_status *sync_status); + int read_recovering_shards(const DoutPrefixProvider *dpp, const int num_shards, std::set& recovering_shards); + int read_shard_status(const DoutPrefixProvider *dpp, int shard_id, std::set& lagging_buckets,std::set& recovering_buckets, rgw_data_sync_marker* sync_marker, const int max_entries); + int init_sync_status(const DoutPrefixProvider *dpp, int num_shards); + int run_sync(const DoutPrefixProvider *dpp, int num_shards); + + void wakeup(int shard_id, bc::flat_set& entries); +}; + +class RGWDataSyncStatusManager : public DoutPrefixProvider { + rgw::sal::RadosStore* driver; + + rgw_zone_id source_zone; + RGWRESTConn *conn; + RGWSyncErrorLogger *error_logger; + RGWSyncModuleInstanceRef sync_module; + PerfCounters* counters; + + RGWRemoteDataLog source_log; + + std::string source_status_oid; + std::string source_shard_status_oid_prefix; + + std::map shard_objs; + + int num_shards; + +public: + RGWDataSyncStatusManager(rgw::sal::RadosStore* _driver, RGWAsyncRadosProcessor *async_rados, + const rgw_zone_id& _source_zone, PerfCounters* counters) + : driver(_driver), source_zone(_source_zone), conn(NULL), error_logger(NULL), + sync_module(nullptr), counters(counters), + source_log(this, driver, async_rados), num_shards(0) {} + RGWDataSyncStatusManager(rgw::sal::RadosStore* _driver, RGWAsyncRadosProcessor *async_rados, + const rgw_zone_id& _source_zone, PerfCounters* counters, + const RGWSyncModuleInstanceRef& _sync_module) + : driver(_driver), source_zone(_source_zone), conn(NULL), error_logger(NULL), + sync_module(_sync_module), counters(counters), + source_log(this, driver, async_rados), num_shards(0) {} + ~RGWDataSyncStatusManager() { + finalize(); + } + int init(const DoutPrefixProvider *dpp); + void finalize(); + + static std::string shard_obj_name(const rgw_zone_id& source_zone, int shard_id); + static std::string sync_status_oid(const rgw_zone_id& source_zone); + + int read_sync_status(const DoutPrefixProvider *dpp, rgw_data_sync_status *sync_status) { + return source_log.read_sync_status(dpp, sync_status); + } + + int read_recovering_shards(const DoutPrefixProvider *dpp, const int num_shards, std::set& recovering_shards) { + return source_log.read_recovering_shards(dpp, num_shards, recovering_shards); + } + + int read_shard_status(const DoutPrefixProvider *dpp, int shard_id, std::set& lagging_buckets, std::set& recovering_buckets, rgw_data_sync_marker *sync_marker, const int max_entries) { + return source_log.read_shard_status(dpp, shard_id, lagging_buckets, recovering_buckets,sync_marker, max_entries); + } + int init_sync_status(const DoutPrefixProvider *dpp) { return source_log.init_sync_status(dpp, num_shards); } + + int read_log_info(const DoutPrefixProvider *dpp, rgw_datalog_info *log_info) { + return source_log.read_log_info(dpp, log_info); + } + int read_source_log_shards_info(const DoutPrefixProvider *dpp, std::map *shards_info) { + return source_log.read_source_log_shards_info(dpp, shards_info); + } + int read_source_log_shards_next(const DoutPrefixProvider *dpp, std::map shard_markers, std::map *result) { + return source_log.read_source_log_shards_next(dpp, shard_markers, result); + } + + int run(const DoutPrefixProvider *dpp) { return source_log.run_sync(dpp, num_shards); } + + void wakeup(int shard_id, bc::flat_set& entries) { return source_log.wakeup(shard_id, entries); } + + void stop() { + source_log.finish(); + } + + // implements DoutPrefixProvider + CephContext *get_cct() const override; + unsigned get_subsys() const override; + std::ostream& gen_prefix(std::ostream& out) const override; +}; + +class RGWBucketPipeSyncStatusManager; +class RGWBucketSyncCR; + +struct rgw_bucket_shard_full_sync_marker { + rgw_obj_key position; + uint64_t count; + + rgw_bucket_shard_full_sync_marker() : count(0) {} + + void encode_attr(std::map& attrs); + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(position, bl); + encode(count, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(position, bl); + decode(count, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(rgw_bucket_shard_full_sync_marker) + +struct rgw_bucket_shard_inc_sync_marker { + std::string position; + ceph::real_time timestamp; + + void encode_attr(std::map& attrs); + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(position, bl); + encode(timestamp, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(position, bl); + if (struct_v >= 2) { + decode(timestamp, bl); + } + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(rgw_bucket_shard_inc_sync_marker) + +struct rgw_bucket_shard_sync_info { + enum SyncState { + StateInit = 0, + StateFullSync = 1, + StateIncrementalSync = 2, + StateStopped = 3, + }; + + uint16_t state; + rgw_bucket_shard_inc_sync_marker inc_marker; + + void decode_from_attrs(CephContext *cct, std::map& attrs); + void encode_all_attrs(std::map& attrs); + void encode_state_attr(std::map& attrs); + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(state, bl); + encode(inc_marker, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(state, bl); + if (struct_v <= 1) { + rgw_bucket_shard_full_sync_marker full_marker; + decode(full_marker, bl); + } + decode(inc_marker, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + + rgw_bucket_shard_sync_info() : state((int)StateInit) {} + +}; +WRITE_CLASS_ENCODER(rgw_bucket_shard_sync_info) + +struct rgw_bucket_full_sync_status { + rgw_obj_key position; + uint64_t count = 0; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(position, bl); + encode(count, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(position, bl); + decode(count, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(rgw_bucket_full_sync_status) + +enum class BucketSyncState : uint8_t { + Init = 0, + Full, + Incremental, + Stopped, +}; +inline std::ostream& operator<<(std::ostream& out, const BucketSyncState& s) { + switch (s) { + case BucketSyncState::Init: out << "init"; break; + case BucketSyncState::Full: out << "full"; break; + case BucketSyncState::Incremental: out << "incremental"; break; + case BucketSyncState::Stopped: out << "stopped"; break; + } + return out; +} + +void encode_json(const char *name, BucketSyncState state, Formatter *f); +void decode_json_obj(BucketSyncState& state, JSONObj *obj); + +struct rgw_bucket_sync_status { + BucketSyncState state = BucketSyncState::Init; + rgw_bucket_full_sync_status full; + uint64_t incremental_gen = 0; + std::vector shards_done_with_gen; + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(state, bl); + encode(full, bl); + encode(incremental_gen, bl); + encode(shards_done_with_gen, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(state, bl); + decode(full, bl); + if (struct_v > 1) { + decode(incremental_gen, bl); + decode(shards_done_with_gen, bl); + } + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(rgw_bucket_sync_status) + +struct bilog_status_v2 { + rgw_bucket_sync_status sync_status; + std::vector inc_status; + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; + +struct store_gen_shards { + uint64_t gen = 0; + uint32_t num_shards = 0; + + void dump(Formatter *f) const { + encode_json("gen", gen, f); + encode_json("num_shards", num_shards, f); + } + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("gen", gen, obj); + JSONDecoder::decode_json("num_shards", num_shards, obj); + } +}; + +struct rgw_bucket_index_marker_info { + std::string bucket_ver; + std::string master_ver; + std::string max_marker; + bool syncstopped{false}; + uint64_t oldest_gen = 0; + uint64_t latest_gen = 0; + std::vector generations; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("bucket_ver", bucket_ver, obj); + JSONDecoder::decode_json("master_ver", master_ver, obj); + JSONDecoder::decode_json("max_marker", max_marker, obj); + JSONDecoder::decode_json("syncstopped", syncstopped, obj); + JSONDecoder::decode_json("oldest_gen", oldest_gen, obj); + JSONDecoder::decode_json("latest_gen", latest_gen, obj); + JSONDecoder::decode_json("generations", generations, obj); + } +}; + + +class BucketIndexShardsManager; + +int rgw_read_remote_bilog_info(const DoutPrefixProvider *dpp, + RGWRESTConn* conn, + const rgw_bucket& bucket, + rgw_bucket_index_marker_info& info, + BucketIndexShardsManager& markers, + optional_yield y); + +class RGWBucketPipeSyncStatusManager : public DoutPrefixProvider { + rgw::sal::RadosStore* driver; + + RGWDataSyncEnv sync_env; + + RGWCoroutinesManager cr_mgr{driver->ctx(), + driver->getRados()->get_cr_registry()}; + + RGWHTTPManager http_manager{driver->ctx(), cr_mgr.get_completion_mgr()}; + + std::optional source_zone; + std::optional source_bucket; + + std::unique_ptr error_logger = + std::make_unique(driver, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, + ERROR_LOGGER_SHARDS); + RGWSyncModuleInstanceRef sync_module; + + rgw_bucket dest_bucket; + + struct source { + RGWDataSyncCtx sc; + RGWBucketInfo info; + rgw_bucket dest; + RGWBucketSyncFlowManager::pipe_handler handler; + std::string zone_name; + + source(RGWDataSyncEnv* env, const rgw_zone_id& zone, RGWRESTConn* conn, + const RGWBucketInfo& info, const rgw_bucket& dest, + const RGWBucketSyncFlowManager::pipe_handler& handler, + const std::string& zone_name) + : sc(env, conn, zone), info(info), dest(dest), handler(handler), + zone_name(zone_name) {} + }; + std::vector sources; + + int do_init(const DoutPrefixProvider *dpp, std::ostream* ostr); + RGWBucketPipeSyncStatusManager(rgw::sal::RadosStore* driver, + std::optional source_zone, + std::optional source_bucket, + const rgw_bucket& dest_bucket) + : driver(driver), source_zone(source_zone), source_bucket(source_bucket), + dest_bucket(dest_bucket) {} + + int remote_info(const DoutPrefixProvider *dpp, source& s, + uint64_t* oldest_gen, uint64_t* latest_gen, + uint64_t* num_shards); +public: + static tl::expected, int> + construct(const DoutPrefixProvider* dpp, rgw::sal::RadosStore* driver, + std::optional source_zone, + std::optional source_bucket, + const rgw_bucket& dest_bucket, std::ostream *ostream); + ~RGWBucketPipeSyncStatusManager() = default; + + + static std::string full_status_oid(const rgw_zone_id& source_zone, + const rgw_bucket& source_bucket, + const rgw_bucket& dest_bucket); + static std::string inc_status_oid(const rgw_zone_id& source_zone, + const rgw_bucket_sync_pair_info& bs, + uint64_t gen); + // specific source obj sync status, can be used by sync modules + static std::string obj_status_oid(const rgw_bucket_sync_pipe& sync_pipe, + const rgw_zone_id& source_zone, const rgw::sal::Object* obj); /* specific source obj sync status, + can be used by sync modules */ + + // implements DoutPrefixProvider + CephContext *get_cct() const override; + unsigned get_subsys() const override; + std::ostream& gen_prefix(std::ostream& out) const override; + + int init_sync_status(const DoutPrefixProvider *dpp); + tl::expected, int> read_sync_status( + const DoutPrefixProvider *dpp); + int run(const DoutPrefixProvider *dpp); +}; + +/// read the full sync status with respect to a source bucket +int rgw_read_bucket_full_sync_status(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore *driver, + const rgw_sync_bucket_pipe& pipe, + rgw_bucket_sync_status *status, + optional_yield y); + +/// read the incremental sync status of all bucket shards from the given source zone +int rgw_read_bucket_inc_sync_status(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore *driver, + const rgw_sync_bucket_pipe& pipe, + uint64_t gen, + std::vector *status); + +class RGWDefaultSyncModule : public RGWSyncModule { +public: + RGWDefaultSyncModule() {} + bool supports_writes() override { return true; } + bool supports_data_export() override { return true; } + int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; +}; + +class RGWArchiveSyncModule : public RGWDefaultSyncModule { +public: + RGWArchiveSyncModule() {} + bool supports_writes() override { return true; } + bool supports_data_export() override { return false; } + int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; +}; + +#endif diff --git a/src/rgw/driver/rados/rgw_datalog.cc b/src/rgw/driver/rados/rgw_datalog.cc new file mode 100644 index 00000000000..3eeb820e2eb --- /dev/null +++ b/src/rgw/driver/rados/rgw_datalog.cc @@ -0,0 +1,1065 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include + +#include "common/debug.h" +#include "common/containers.h" +#include "common/errno.h" +#include "common/error_code.h" + +#include "common/async/blocked_completion.h" +#include "common/async/librados_completion.h" + +#include "cls/fifo/cls_fifo_types.h" +#include "cls/log/cls_log_client.h" + +#include "cls_fifo_legacy.h" +#include "rgw_bucket_layout.h" +#include "rgw_datalog.h" +#include "rgw_log_backing.h" +#include "rgw_tools.h" + +#define dout_context g_ceph_context +static constexpr auto dout_subsys = ceph_subsys_rgw; + +namespace bs = boost::system; +namespace lr = librados; + +using ceph::containers::tiny_vector; + +void rgw_data_change::dump(ceph::Formatter *f) const +{ + std::string type; + switch (entity_type) { + case ENTITY_TYPE_BUCKET: + type = "bucket"; + break; + default: + type = "unknown"; + } + encode_json("entity_type", type, f); + encode_json("key", key, f); + utime_t ut(timestamp); + encode_json("timestamp", ut, f); + encode_json("gen", gen, f); +} + +void rgw_data_change::decode_json(JSONObj *obj) { + std::string s; + JSONDecoder::decode_json("entity_type", s, obj); + if (s == "bucket") { + entity_type = ENTITY_TYPE_BUCKET; + } else { + entity_type = ENTITY_TYPE_UNKNOWN; + } + JSONDecoder::decode_json("key", key, obj); + utime_t ut; + JSONDecoder::decode_json("timestamp", ut, obj); + timestamp = ut.to_real_time(); + JSONDecoder::decode_json("gen", gen, obj); +} + +void rgw_data_change_log_entry::dump(Formatter *f) const +{ + encode_json("log_id", log_id, f); + utime_t ut(log_timestamp); + encode_json("log_timestamp", ut, f); + encode_json("entry", entry, f); +} + +void rgw_data_change_log_entry::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("log_id", log_id, obj); + utime_t ut; + JSONDecoder::decode_json("log_timestamp", ut, obj); + log_timestamp = ut.to_real_time(); + JSONDecoder::decode_json("entry", entry, obj); +} + +void rgw_data_notify_entry::dump(Formatter *f) const +{ + encode_json("key", key, f); + encode_json("gen", gen, f); +} + +void rgw_data_notify_entry::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("key", key, obj); + JSONDecoder::decode_json("gen", gen, obj); +} + +class RGWDataChangesOmap final : public RGWDataChangesBE { + using centries = std::list; + std::vector oids; + +public: + RGWDataChangesOmap(lr::IoCtx& ioctx, + RGWDataChangesLog& datalog, + uint64_t gen_id, + int num_shards) + : RGWDataChangesBE(ioctx, datalog, gen_id) { + oids.reserve(num_shards); + for (auto i = 0; i < num_shards; ++i) { + oids.push_back(get_oid(i)); + } + } + ~RGWDataChangesOmap() override = default; + + void prepare(ceph::real_time ut, const std::string& key, + ceph::buffer::list&& entry, entries& out) override { + if (!std::holds_alternative(out)) { + ceph_assert(std::visit([](const auto& v) { return std::empty(v); }, out)); + out = centries(); + } + + cls_log_entry e; + cls_log_add_prepare_entry(e, utime_t(ut), {}, key, entry); + std::get(out).push_back(std::move(e)); + } + int push(const DoutPrefixProvider *dpp, int index, entries&& items) override { + lr::ObjectWriteOperation op; + cls_log_add(op, std::get(items), true); + auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to push to " << oids[index] << cpp_strerror(-r) + << dendl; + } + return r; + } + int push(const DoutPrefixProvider *dpp, int index, ceph::real_time now, + const std::string& key, + ceph::buffer::list&& bl) override { + lr::ObjectWriteOperation op; + cls_log_add(op, utime_t(now), {}, key, bl); + auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to push to " << oids[index] + << cpp_strerror(-r) << dendl; + } + return r; + } + int list(const DoutPrefixProvider *dpp, int index, int max_entries, + std::vector& entries, + std::optional marker, + std::string* out_marker, bool* truncated) override { + std::list log_entries; + lr::ObjectReadOperation op; + cls_log_list(op, {}, {}, std::string(marker.value_or("")), + max_entries, log_entries, out_marker, truncated); + auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, nullptr, null_yield); + if (r == -ENOENT) { + *truncated = false; + return 0; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to list " << oids[index] + << cpp_strerror(-r) << dendl; + return r; + } + for (auto iter = log_entries.begin(); iter != log_entries.end(); ++iter) { + rgw_data_change_log_entry log_entry; + log_entry.log_id = iter->id; + auto rt = iter->timestamp.to_real_time(); + log_entry.log_timestamp = rt; + auto liter = iter->data.cbegin(); + try { + decode(log_entry.entry, liter); + } catch (ceph::buffer::error& err) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to decode data changes log entry: " + << err.what() << dendl; + return -EIO; + } + entries.push_back(log_entry); + } + return 0; + } + int get_info(const DoutPrefixProvider *dpp, int index, RGWDataChangesLogInfo *info) override { + cls_log_header header; + lr::ObjectReadOperation op; + cls_log_info(op, &header); + auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, nullptr, null_yield); + if (r == -ENOENT) r = 0; + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to get info from " << oids[index] + << cpp_strerror(-r) << dendl; + } else { + info->marker = header.max_marker; + info->last_update = header.max_time.to_real_time(); + } + return r; + } + int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker) override { + lr::ObjectWriteOperation op; + cls_log_trim(op, {}, {}, {}, std::string(marker)); + auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, null_yield); + if (r == -ENOENT) r = -ENODATA; + if (r < 0 && r != -ENODATA) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to get info from " << oids[index] + << cpp_strerror(-r) << dendl; + } + return r; + } + int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker, + lr::AioCompletion* c) override { + lr::ObjectWriteOperation op; + cls_log_trim(op, {}, {}, {}, std::string(marker)); + auto r = ioctx.aio_operate(oids[index], c, &op, 0); + if (r == -ENOENT) r = -ENODATA; + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to get info from " << oids[index] + << cpp_strerror(-r) << dendl; + } + return r; + } + std::string_view max_marker() const override { + return "99999999"; + } + int is_empty(const DoutPrefixProvider *dpp) override { + for (auto shard = 0u; shard < oids.size(); ++shard) { + std::list log_entries; + lr::ObjectReadOperation op; + std::string out_marker; + bool truncated; + cls_log_list(op, {}, {}, {}, 1, log_entries, &out_marker, &truncated); + auto r = rgw_rados_operate(dpp, ioctx, oids[shard], &op, nullptr, null_yield); + if (r == -ENOENT) { + continue; + } + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to list " << oids[shard] + << cpp_strerror(-r) << dendl; + return r; + } + if (!log_entries.empty()) { + return 0; + } + } + return 1; + } +}; + +class RGWDataChangesFIFO final : public RGWDataChangesBE { + using centries = std::vector; + tiny_vector fifos; + +public: + RGWDataChangesFIFO(lr::IoCtx& ioctx, + RGWDataChangesLog& datalog, + uint64_t gen_id, int shards) + : RGWDataChangesBE(ioctx, datalog, gen_id), + fifos(shards, [&ioctx, this](std::size_t i, auto emplacer) { + emplacer.emplace(ioctx, get_oid(i)); + }) {} + ~RGWDataChangesFIFO() override = default; + void prepare(ceph::real_time, const std::string&, + ceph::buffer::list&& entry, entries& out) override { + if (!std::holds_alternative(out)) { + ceph_assert(std::visit([](auto& v) { return std::empty(v); }, out)); + out = centries(); + } + std::get(out).push_back(std::move(entry)); + } + int push(const DoutPrefixProvider *dpp, int index, entries&& items) override { + auto r = fifos[index].push(dpp, std::get(items), null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to push to FIFO: " << get_oid(index) + << ": " << cpp_strerror(-r) << dendl; + } + return r; + } + int push(const DoutPrefixProvider *dpp, int index, ceph::real_time, + const std::string&, + ceph::buffer::list&& bl) override { + auto r = fifos[index].push(dpp, std::move(bl), null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to push to FIFO: " << get_oid(index) + << ": " << cpp_strerror(-r) << dendl; + } + return r; + } + int list(const DoutPrefixProvider *dpp, int index, int max_entries, + std::vector& entries, + std::optional marker, + std::string* out_marker, bool* truncated) override { + std::vector log_entries; + bool more = false; + auto r = fifos[index].list(dpp, max_entries, marker, &log_entries, &more, + null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to list FIFO: " << get_oid(index) + << ": " << cpp_strerror(-r) << dendl; + return r; + } + for (const auto& entry : log_entries) { + rgw_data_change_log_entry log_entry; + log_entry.log_id = entry.marker; + log_entry.log_timestamp = entry.mtime; + auto liter = entry.data.cbegin(); + try { + decode(log_entry.entry, liter); + } catch (const buffer::error& err) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": failed to decode data changes log entry: " + << err.what() << dendl; + return -EIO; + } + entries.push_back(std::move(log_entry)); + } + if (truncated) + *truncated = more; + if (out_marker && !log_entries.empty()) { + *out_marker = log_entries.back().marker; + } + return 0; + } + int get_info(const DoutPrefixProvider *dpp, int index, RGWDataChangesLogInfo *info) override { + auto& fifo = fifos[index]; + auto r = fifo.read_meta(dpp, null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to get FIFO metadata: " << get_oid(index) + << ": " << cpp_strerror(-r) << dendl; + return r; + } + rados::cls::fifo::info m; + fifo.meta(dpp, m, null_yield); + auto p = m.head_part_num; + if (p < 0) { + info->marker = ""; + info->last_update = ceph::real_clock::zero(); + return 0; + } + rgw::cls::fifo::part_info h; + r = fifo.get_part_info(dpp, p, &h, null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to get part info: " << get_oid(index) << "/" << p + << ": " << cpp_strerror(-r) << dendl; + return r; + } + info->marker = rgw::cls::fifo::marker{p, h.last_ofs}.to_string(); + info->last_update = h.max_time; + return 0; + } + int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker) override { + auto r = fifos[index].trim(dpp, marker, false, null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to trim FIFO: " << get_oid(index) + << ": " << cpp_strerror(-r) << dendl; + } + return r; + } + int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker, + librados::AioCompletion* c) override { + int r = 0; + if (marker == rgw::cls::fifo::marker(0, 0).to_string()) { + rgw_complete_aio_completion(c, -ENODATA); + } else { + fifos[index].trim(dpp, marker, false, c, null_yield); + } + return r; + } + std::string_view max_marker() const override { + static const std::string mm = + rgw::cls::fifo::marker::max().to_string(); + return std::string_view(mm); + } + int is_empty(const DoutPrefixProvider *dpp) override { + std::vector log_entries; + bool more = false; + for (auto shard = 0u; shard < fifos.size(); ++shard) { + auto r = fifos[shard].list(dpp, 1, {}, &log_entries, &more, + null_yield); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": unable to list FIFO: " << get_oid(shard) + << ": " << cpp_strerror(-r) << dendl; + return r; + } + if (!log_entries.empty()) { + return 0; + } + } + return 1; + } +}; + +RGWDataChangesLog::RGWDataChangesLog(CephContext* cct) + : cct(cct), + num_shards(cct->_conf->rgw_data_log_num_shards), + prefix(get_prefix()), + changes(cct->_conf->rgw_data_log_changes_size) {} + +bs::error_code DataLogBackends::handle_init(entries_t e) noexcept { + std::unique_lock l(m); + + for (const auto& [gen_id, gen] : e) { + if (gen.pruned) { + lderr(datalog.cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": ERROR: given empty generation: gen_id=" << gen_id << dendl; + } + if (count(gen_id) != 0) { + lderr(datalog.cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": ERROR: generation already exists: gen_id=" << gen_id << dendl; + } + try { + switch (gen.type) { + case log_type::omap: + emplace(gen_id, new RGWDataChangesOmap(ioctx, datalog, gen_id, shards)); + break; + case log_type::fifo: + emplace(gen_id, new RGWDataChangesFIFO(ioctx, datalog, gen_id, shards)); + break; + default: + lderr(datalog.cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": IMPOSSIBLE: invalid log type: gen_id=" << gen_id + << ", type" << gen.type << dendl; + return bs::error_code(EFAULT, bs::system_category()); + } + } catch (const bs::system_error& err) { + lderr(datalog.cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": error setting up backend: gen_id=" << gen_id + << ", err=" << err.what() << dendl; + return err.code(); + } + } + return {}; +} +bs::error_code DataLogBackends::handle_new_gens(entries_t e) noexcept { + return handle_init(std::move(e)); +} +bs::error_code DataLogBackends::handle_empty_to(uint64_t new_tail) noexcept { + std::unique_lock l(m); + auto i = cbegin(); + if (i->first < new_tail) { + return {}; + } + if (new_tail >= (cend() - 1)->first) { + lderr(datalog.cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": ERROR: attempt to trim head: new_tail=" << new_tail << dendl; + return bs::error_code(EFAULT, bs::system_category()); + } + erase(i, upper_bound(new_tail)); + return {}; +} + + +int RGWDataChangesLog::start(const DoutPrefixProvider *dpp, const RGWZone* _zone, + const RGWZoneParams& zoneparams, + librados::Rados* lr) +{ + zone = _zone; + ceph_assert(zone); + auto defbacking = to_log_type( + cct->_conf.get_val("rgw_default_data_log_backing")); + // Should be guaranteed by `set_enum_allowed` + ceph_assert(defbacking); + auto log_pool = zoneparams.log_pool; + auto r = rgw_init_ioctx(dpp, lr, log_pool, ioctx, true, false); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ + << ": Failed to initialized ioctx, r=" << r + << ", pool=" << log_pool << dendl; + return -r; + } + + auto besr = logback_generations::init( + dpp, ioctx, metadata_log_oid(), [this](uint64_t gen_id, int shard) { + return get_oid(gen_id, shard); + }, + num_shards, *defbacking, null_yield, *this); + + + if (!besr) { + lderr(cct) << __PRETTY_FUNCTION__ + << ": Error initializing backends: " + << besr.error().message() << dendl; + return ceph::from_error_code(besr.error()); + } + + bes = std::move(*besr); + renew_thread = make_named_thread("rgw_dt_lg_renew", + &RGWDataChangesLog::renew_run, this); + return 0; +} + +int RGWDataChangesLog::choose_oid(const rgw_bucket_shard& bs) { + const auto& name = bs.bucket.name; + auto shard_shift = (bs.shard_id > 0 ? bs.shard_id : 0); + auto r = (ceph_str_hash_linux(name.data(), name.size()) + + shard_shift) % num_shards; + return static_cast(r); +} + +int RGWDataChangesLog::renew_entries(const DoutPrefixProvider *dpp) +{ + if (!zone->log_data) + return 0; + + /* we can't keep the bucket name as part of the cls_log_entry, and we need + * it later, so we keep two lists under the map */ + bc::flat_map, + RGWDataChangesBE::entries>> m; + + std::unique_lock l(lock); + decltype(cur_cycle) entries; + entries.swap(cur_cycle); + l.unlock(); + + auto ut = real_clock::now(); + auto be = bes->head(); + for (const auto& [bs, gen] : entries) { + auto index = choose_oid(bs); + + rgw_data_change change; + bufferlist bl; + change.entity_type = ENTITY_TYPE_BUCKET; + change.key = bs.get_key(); + change.timestamp = ut; + change.gen = gen; + encode(change, bl); + + m[index].first.push_back({bs, gen}); + be->prepare(ut, change.key, std::move(bl), m[index].second); + } + + for (auto& [index, p] : m) { + auto& [buckets, entries] = p; + + auto now = real_clock::now(); + + auto ret = be->push(dpp, index, std::move(entries)); + if (ret < 0) { + /* we don't really need to have a special handling for failed cases here, + * as this is just an optimization. */ + ldpp_dout(dpp, -1) << "ERROR: svc.cls->timelog.add() returned " << ret << dendl; + return ret; + } + + auto expiration = now; + expiration += ceph::make_timespan(cct->_conf->rgw_data_log_window); + for (auto& [bs, gen] : buckets) { + update_renewed(bs, gen, expiration); + } + } + + return 0; +} + +auto RGWDataChangesLog::_get_change(const rgw_bucket_shard& bs, + uint64_t gen) + -> ChangeStatusPtr +{ + ceph_assert(ceph_mutex_is_locked(lock)); + ChangeStatusPtr status; + if (!changes.find({bs, gen}, status)) { + status = std::make_shared(); + changes.add({bs, gen}, status); + } + return status; +} + +void RGWDataChangesLog::register_renew(const rgw_bucket_shard& bs, + const rgw::bucket_log_layout_generation& gen) +{ + std::scoped_lock l{lock}; + cur_cycle.insert({bs, gen.gen}); +} + +void RGWDataChangesLog::update_renewed(const rgw_bucket_shard& bs, + uint64_t gen, + real_time expiration) +{ + std::unique_lock l{lock}; + auto status = _get_change(bs, gen); + l.unlock(); + + ldout(cct, 20) << "RGWDataChangesLog::update_renewd() bucket_name=" + << bs.bucket.name << " shard_id=" << bs.shard_id + << " expiration=" << expiration << dendl; + + std::unique_lock sl(status->lock); + status->cur_expiration = expiration; +} + +int RGWDataChangesLog::get_log_shard_id(rgw_bucket& bucket, int shard_id) { + rgw_bucket_shard bs(bucket, shard_id); + return choose_oid(bs); +} + +bool RGWDataChangesLog::filter_bucket(const DoutPrefixProvider *dpp, + const rgw_bucket& bucket, + optional_yield y) const +{ + if (!bucket_filter) { + return true; + } + + return bucket_filter(bucket, y, dpp); +} + +std::string RGWDataChangesLog::get_oid(uint64_t gen_id, int i) const { + return (gen_id > 0 ? + fmt::format("{}@G{}.{}", prefix, gen_id, i) : + fmt::format("{}.{}", prefix, i)); +} + +int RGWDataChangesLog::add_entry(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + const rgw::bucket_log_layout_generation& gen, + int shard_id) +{ + auto& bucket = bucket_info.bucket; + + if (!filter_bucket(dpp, bucket, null_yield)) { + return 0; + } + + if (observer) { + observer->on_bucket_changed(bucket.get_key()); + } + + rgw_bucket_shard bs(bucket, shard_id); + + int index = choose_oid(bs); + + mark_modified(index, bs, gen.gen); + + std::unique_lock l(lock); + + auto status = _get_change(bs, gen.gen); + l.unlock(); + + auto now = real_clock::now(); + + std::unique_lock sl(status->lock); + + ldpp_dout(dpp, 20) << "RGWDataChangesLog::add_entry() bucket.name=" << bucket.name + << " shard_id=" << shard_id << " now=" << now + << " cur_expiration=" << status->cur_expiration << dendl; + + if (now < status->cur_expiration) { + /* no need to send, recently completed */ + sl.unlock(); + register_renew(bs, gen); + return 0; + } + + RefCountedCond* cond; + + if (status->pending) { + cond = status->cond; + + ceph_assert(cond); + + status->cond->get(); + sl.unlock(); + + int ret = cond->wait(); + cond->put(); + if (!ret) { + register_renew(bs, gen); + } + return ret; + } + + status->cond = new RefCountedCond; + status->pending = true; + + ceph::real_time expiration; + + int ret; + + do { + status->cur_sent = now; + + expiration = now; + expiration += ceph::make_timespan(cct->_conf->rgw_data_log_window); + + sl.unlock(); + + ceph::buffer::list bl; + rgw_data_change change; + change.entity_type = ENTITY_TYPE_BUCKET; + change.key = bs.get_key(); + change.timestamp = now; + change.gen = gen.gen; + encode(change, bl); + + ldpp_dout(dpp, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl; + + auto be = bes->head(); + ret = be->push(dpp, index, now, change.key, std::move(bl)); + + now = real_clock::now(); + + sl.lock(); + + } while (!ret && real_clock::now() > expiration); + + cond = status->cond; + + status->pending = false; + /* time of when operation started, not completed */ + status->cur_expiration = status->cur_sent; + status->cur_expiration += make_timespan(cct->_conf->rgw_data_log_window); + status->cond = nullptr; + sl.unlock(); + + cond->done(ret); + cond->put(); + + return ret; +} + +int DataLogBackends::list(const DoutPrefixProvider *dpp, int shard, int max_entries, + std::vector& entries, + std::string_view marker, + std::string* out_marker, + bool* truncated) +{ + const auto [start_id, start_cursor] = cursorgen(marker); + auto gen_id = start_id; + std::string out_cursor; + while (max_entries > 0) { + std::vector gentries; + std::unique_lock l(m); + auto i = lower_bound(gen_id); + if (i == end()) return 0; + auto be = i->second; + l.unlock(); + gen_id = be->gen_id; + auto r = be->list(dpp, shard, max_entries, gentries, + gen_id == start_id ? start_cursor : std::string{}, + &out_cursor, truncated); + if (r < 0) + return r; + + if (out_marker && !out_cursor.empty()) { + *out_marker = gencursor(gen_id, out_cursor); + } + for (auto& g : gentries) { + g.log_id = gencursor(gen_id, g.log_id); + } + if (int s = gentries.size(); s < 0 || s > max_entries) + max_entries = 0; + else + max_entries -= gentries.size(); + + std::move(gentries.begin(), gentries.end(), + std::back_inserter(entries)); + ++gen_id; + } + return 0; +} + +int RGWDataChangesLog::list_entries(const DoutPrefixProvider *dpp, int shard, int max_entries, + std::vector& entries, + std::string_view marker, + std::string* out_marker, bool* truncated) +{ + assert(shard < num_shards); + return bes->list(dpp, shard, max_entries, entries, marker, out_marker, truncated); +} + +int RGWDataChangesLog::list_entries(const DoutPrefixProvider *dpp, int max_entries, + std::vector& entries, + LogMarker& marker, bool *ptruncated) +{ + bool truncated; + entries.clear(); + for (; marker.shard < num_shards && int(entries.size()) < max_entries; + marker.shard++, marker.marker.clear()) { + int ret = list_entries(dpp, marker.shard, max_entries - entries.size(), + entries, marker.marker, NULL, &truncated); + if (ret == -ENOENT) { + continue; + } + if (ret < 0) { + return ret; + } + if (!truncated) { + *ptruncated = false; + return 0; + } + } + *ptruncated = (marker.shard < num_shards); + return 0; +} + +int RGWDataChangesLog::get_info(const DoutPrefixProvider *dpp, int shard_id, RGWDataChangesLogInfo *info) +{ + assert(shard_id < num_shards); + auto be = bes->head(); + auto r = be->get_info(dpp, shard_id, info); + if (!info->marker.empty()) { + info->marker = gencursor(be->gen_id, info->marker); + } + return r; +} + +int DataLogBackends::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker) +{ + auto [target_gen, cursor] = cursorgen(marker); + std::unique_lock l(m); + const auto head_gen = (end() - 1)->second->gen_id; + const auto tail_gen = begin()->first; + if (target_gen < tail_gen) return 0; + auto r = 0; + for (auto be = lower_bound(0)->second; + be->gen_id <= target_gen && be->gen_id <= head_gen && r >= 0; + be = upper_bound(be->gen_id)->second) { + l.unlock(); + auto c = be->gen_id == target_gen ? cursor : be->max_marker(); + r = be->trim(dpp, shard_id, c); + if (r == -ENOENT) + r = -ENODATA; + if (r == -ENODATA && be->gen_id < target_gen) + r = 0; + if (be->gen_id == target_gen) + break; + l.lock(); + }; + return r; +} + +int RGWDataChangesLog::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker) +{ + assert(shard_id < num_shards); + return bes->trim_entries(dpp, shard_id, marker); +} + +class GenTrim : public rgw::cls::fifo::Completion { +public: + DataLogBackends* const bes; + const int shard_id; + const uint64_t target_gen; + const std::string cursor; + const uint64_t head_gen; + const uint64_t tail_gen; + boost::intrusive_ptr be; + + GenTrim(const DoutPrefixProvider *dpp, DataLogBackends* bes, int shard_id, uint64_t target_gen, + std::string cursor, uint64_t head_gen, uint64_t tail_gen, + boost::intrusive_ptr be, + lr::AioCompletion* super) + : Completion(dpp, super), bes(bes), shard_id(shard_id), target_gen(target_gen), + cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen), + be(std::move(be)) {} + + void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { + auto gen_id = be->gen_id; + be.reset(); + if (r == -ENOENT) + r = -ENODATA; + if (r == -ENODATA && gen_id < target_gen) + r = 0; + if (r < 0) { + complete(std::move(p), r); + return; + } + + { + std::unique_lock l(bes->m); + auto i = bes->upper_bound(gen_id); + if (i == bes->end() || i->first > target_gen || i->first > head_gen) { + l.unlock(); + complete(std::move(p), -ENODATA); + return; + } + be = i->second; + } + auto c = be->gen_id == target_gen ? cursor : be->max_marker(); + be->trim(dpp, shard_id, c, call(std::move(p))); + } +}; + +void DataLogBackends::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, + librados::AioCompletion* c) +{ + auto [target_gen, cursor] = cursorgen(marker); + std::unique_lock l(m); + const auto head_gen = (end() - 1)->second->gen_id; + const auto tail_gen = begin()->first; + if (target_gen < tail_gen) { + l.unlock(); + rgw_complete_aio_completion(c, -ENODATA); + return; + } + auto be = begin()->second; + l.unlock(); + auto gt = std::make_unique(dpp, this, shard_id, target_gen, + std::string(cursor), head_gen, tail_gen, + be, c); + + auto cc = be->gen_id == target_gen ? cursor : be->max_marker(); + be->trim(dpp, shard_id, cc, GenTrim::call(std::move(gt))); +} + +int DataLogBackends::trim_generations(const DoutPrefixProvider *dpp, std::optional& through) { + if (size() != 1) { + std::vector candidates; + { + std::scoped_lock l(m); + auto e = cend() - 1; + for (auto i = cbegin(); i < e; ++i) { + candidates.push_back(i->second); + } + } + + std::optional highest; + for (auto& be : candidates) { + auto r = be->is_empty(dpp); + if (r < 0) { + return r; + } else if (r == 1) { + highest = be->gen_id; + } else { + break; + } + } + + through = highest; + if (!highest) { + return 0; + } + auto ec = empty_to(dpp, *highest, null_yield); + if (ec) { + return ceph::from_error_code(ec); + } + } + + return ceph::from_error_code(remove_empty(dpp, null_yield)); +} + + +int RGWDataChangesLog::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, + librados::AioCompletion* c) +{ + assert(shard_id < num_shards); + bes->trim_entries(dpp, shard_id, marker, c); + return 0; +} + +bool RGWDataChangesLog::going_down() const +{ + return down_flag; +} + +RGWDataChangesLog::~RGWDataChangesLog() { + down_flag = true; + if (renew_thread.joinable()) { + renew_stop(); + renew_thread.join(); + } +} + +void RGWDataChangesLog::renew_run() noexcept { + static constexpr auto runs_per_prune = 150; + auto run = 0; + for (;;) { + const DoutPrefix dp(cct, dout_subsys, "rgw data changes log: "); + ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: start" << dendl; + int r = renew_entries(&dp); + if (r < 0) { + ldpp_dout(&dp, 0) << "ERROR: RGWDataChangesLog::renew_entries returned error r=" << r << dendl; + } + + if (going_down()) + break; + + if (run == runs_per_prune) { + std::optional through; + ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: pruning old generations" << dendl; + trim_generations(&dp, through); + if (r < 0) { + derr << "RGWDataChangesLog::ChangesRenewThread: failed pruning r=" + << r << dendl; + } else if (through) { + ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: pruned generations " + << "through " << *through << "." << dendl; + } else { + ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: nothing to prune." + << dendl; + } + run = 0; + } else { + ++run; + } + + int interval = cct->_conf->rgw_data_log_window * 3 / 4; + std::unique_lock locker{renew_lock}; + renew_cond.wait_for(locker, std::chrono::seconds(interval)); + } +} + +void RGWDataChangesLog::renew_stop() +{ + std::lock_guard l{renew_lock}; + renew_cond.notify_all(); +} + +void RGWDataChangesLog::mark_modified(int shard_id, const rgw_bucket_shard& bs, uint64_t gen) +{ + if (!cct->_conf->rgw_data_notify_interval_msec) { + return; + } + + auto key = bs.get_key(); + { + std::shared_lock rl{modified_lock}; // read lock to check for existence + auto shard = modified_shards.find(shard_id); + if (shard != modified_shards.end() && shard->second.count({key, gen})) { + return; + } + } + + std::unique_lock wl{modified_lock}; // write lock for insertion + modified_shards[shard_id].insert(rgw_data_notify_entry{key, gen}); +} + +std::string RGWDataChangesLog::max_marker() const { + return gencursor(std::numeric_limits::max(), + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); +} + +int RGWDataChangesLog::change_format(const DoutPrefixProvider *dpp, log_type type, optional_yield y) { + return ceph::from_error_code(bes->new_backing(dpp, type, y)); +} + +int RGWDataChangesLog::trim_generations(const DoutPrefixProvider *dpp, std::optional& through) { + return bes->trim_generations(dpp, through); +} + +void RGWDataChangesLogInfo::dump(Formatter *f) const +{ + encode_json("marker", marker, f); + utime_t ut(last_update); + encode_json("last_update", ut, f); +} + +void RGWDataChangesLogInfo::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("marker", marker, obj); + utime_t ut; + JSONDecoder::decode_json("last_update", ut, obj); + last_update = ut.to_real_time(); +} + + diff --git a/src/rgw/driver/rados/rgw_datalog.h b/src/rgw/driver/rados/rgw_datalog.h new file mode 100644 index 00000000000..0bc4837c9c1 --- /dev/null +++ b/src/rgw/driver/rados/rgw_datalog.h @@ -0,0 +1,386 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_DATALOG_H +#define CEPH_RGW_DATALOG_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include "include/buffer.h" +#include "include/encoding.h" +#include "include/function2.hpp" + +#include "include/rados/librados.hpp" + +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/ceph_time.h" +#include "common/Formatter.h" +#include "common/lru_map.h" +#include "common/RefCountedObj.h" + +#include "cls/log/cls_log_types.h" + +#include "rgw_basic_types.h" +#include "rgw_log_backing.h" +#include "rgw_sync_policy.h" +#include "rgw_zone.h" +#include "rgw_trim_bilog.h" + +namespace bc = boost::container; + +enum DataLogEntityType { + ENTITY_TYPE_UNKNOWN = 0, + ENTITY_TYPE_BUCKET = 1, +}; + +struct rgw_data_change { + DataLogEntityType entity_type; + std::string key; + ceph::real_time timestamp; + uint64_t gen = 0; + + void encode(ceph::buffer::list& bl) const { + // require decoders to recognize v2 when gen>0 + const uint8_t compat = (gen == 0) ? 1 : 2; + ENCODE_START(2, compat, bl); + auto t = std::uint8_t(entity_type); + encode(t, bl); + encode(key, bl); + encode(timestamp, bl); + encode(gen, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + std::uint8_t t; + decode(t, bl); + entity_type = DataLogEntityType(t); + decode(key, bl); + decode(timestamp, bl); + if (struct_v < 2) { + gen = 0; + } else { + decode(gen, bl); + } + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(rgw_data_change) + +struct rgw_data_change_log_entry { + std::string log_id; + ceph::real_time log_timestamp; + rgw_data_change entry; + + void encode(ceph::buffer::list& bl) const { + ENCODE_START(1, 1, bl); + encode(log_id, bl); + encode(log_timestamp, bl); + encode(entry, bl); + ENCODE_FINISH(bl); + } + + void decode(ceph::buffer::list::const_iterator& bl) { + DECODE_START(1, bl); + decode(log_id, bl); + decode(log_timestamp, bl); + decode(entry, bl); + DECODE_FINISH(bl); + } + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; +WRITE_CLASS_ENCODER(rgw_data_change_log_entry) + +struct RGWDataChangesLogInfo { + std::string marker; + ceph::real_time last_update; + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); +}; + +struct RGWDataChangesLogMarker { + int shard = 0; + std::string marker; + + RGWDataChangesLogMarker() = default; +}; + +class RGWDataChangesLog; + +struct rgw_data_notify_entry { + std::string key; + uint64_t gen = 0; + + void dump(ceph::Formatter* f) const; + void decode_json(JSONObj* obj); + + rgw_data_notify_entry& operator=(const rgw_data_notify_entry&) = default; + + bool operator <(const rgw_data_notify_entry& d) const { + if (key < d.key) { + return true; + } + if (d.key < key) { + return false; + } + return gen < d.gen; + } + friend std::ostream& operator <<(std::ostream& m, + const rgw_data_notify_entry& e) { + return m << "[key: " << e.key << ", gen: " << e.gen << "]"; + } +}; + +class RGWDataChangesBE; + +class DataLogBackends final + : public logback_generations, + private bc::flat_map> { + friend class logback_generations; + friend class GenTrim; + + std::mutex m; + RGWDataChangesLog& datalog; + + DataLogBackends(librados::IoCtx& ioctx, + std::string oid, + fu2::unique_function&& get_oid, + int shards, RGWDataChangesLog& datalog) noexcept + : logback_generations(ioctx, oid, std::move(get_oid), + shards), datalog(datalog) {} +public: + + boost::intrusive_ptr head() { + std::unique_lock l(m); + auto i = end(); + --i; + return i->second; + } + int list(const DoutPrefixProvider *dpp, int shard, int max_entries, + std::vector& entries, + std::string_view marker, + std::string* out_marker, bool* truncated); + int trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker); + void trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, + librados::AioCompletion* c); + void set_zero(RGWDataChangesBE* be) { + emplace(0, be); + } + + bs::error_code handle_init(entries_t e) noexcept override; + bs::error_code handle_new_gens(entries_t e) noexcept override; + bs::error_code handle_empty_to(uint64_t new_tail) noexcept override; + + int trim_generations(const DoutPrefixProvider *dpp, std::optional& through); +}; + +struct BucketGen { + rgw_bucket_shard shard; + uint64_t gen; + + BucketGen(const rgw_bucket_shard& shard, uint64_t gen) + : shard(shard), gen(gen) {} + + BucketGen(rgw_bucket_shard&& shard, uint64_t gen) + : shard(std::move(shard)), gen(gen) {} + + BucketGen(const BucketGen&) = default; + BucketGen(BucketGen&&) = default; + BucketGen& operator =(const BucketGen&) = default; + BucketGen& operator =(BucketGen&&) = default; + + ~BucketGen() = default; +}; + +inline bool operator ==(const BucketGen& l, const BucketGen& r) { + return (l.shard == r.shard) && (l.gen == r.gen); +} + +inline bool operator <(const BucketGen& l, const BucketGen& r) { + if (l.shard < r.shard) { + return true; + } else if (l.shard == r.shard) { + return l.gen < r.gen; + } else { + return false; + } +} + +class RGWDataChangesLog { + friend DataLogBackends; + CephContext *cct; + librados::IoCtx ioctx; + rgw::BucketChangeObserver *observer = nullptr; + const RGWZone* zone; + std::unique_ptr bes; + + const int num_shards; + std::string get_prefix() { + auto prefix = cct->_conf->rgw_data_log_obj_prefix; + return prefix.empty() ? prefix : "data_log"; + } + std::string metadata_log_oid() { + return get_prefix() + "generations_metadata"; + } + std::string prefix; + + ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock"); + ceph::shared_mutex modified_lock = + ceph::make_shared_mutex("RGWDataChangesLog::modified_lock"); + bc::flat_map> modified_shards; + + std::atomic down_flag = { false }; + + struct ChangeStatus { + std::shared_ptr sync_policy; + ceph::real_time cur_expiration; + ceph::real_time cur_sent; + bool pending = false; + RefCountedCond* cond = nullptr; + ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::ChangeStatus"); + }; + + using ChangeStatusPtr = std::shared_ptr; + + lru_map changes; + + bc::flat_set cur_cycle; + + ChangeStatusPtr _get_change(const rgw_bucket_shard& bs, uint64_t gen); + void register_renew(const rgw_bucket_shard& bs, + const rgw::bucket_log_layout_generation& gen); + void update_renewed(const rgw_bucket_shard& bs, + uint64_t gen, + ceph::real_time expiration); + + ceph::mutex renew_lock = ceph::make_mutex("ChangesRenewThread::lock"); + ceph::condition_variable renew_cond; + void renew_run() noexcept; + void renew_stop(); + std::thread renew_thread; + + std::function bucket_filter; + bool going_down() const; + bool filter_bucket(const DoutPrefixProvider *dpp, const rgw_bucket& bucket, optional_yield y) const; + int renew_entries(const DoutPrefixProvider *dpp); + +public: + + RGWDataChangesLog(CephContext* cct); + ~RGWDataChangesLog(); + + int start(const DoutPrefixProvider *dpp, const RGWZone* _zone, const RGWZoneParams& zoneparams, + librados::Rados* lr); + int choose_oid(const rgw_bucket_shard& bs); + int add_entry(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, + const rgw::bucket_log_layout_generation& gen, int shard_id); + int get_log_shard_id(rgw_bucket& bucket, int shard_id); + int list_entries(const DoutPrefixProvider *dpp, int shard, int max_entries, + std::vector& entries, + std::string_view marker, + std::string* out_marker, bool* truncated); + int trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker); + int trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, + librados::AioCompletion* c); // :( + int get_info(const DoutPrefixProvider *dpp, int shard_id, RGWDataChangesLogInfo *info); + + using LogMarker = RGWDataChangesLogMarker; + + int list_entries(const DoutPrefixProvider *dpp, int max_entries, + std::vector& entries, + LogMarker& marker, bool* ptruncated); + + void mark_modified(int shard_id, const rgw_bucket_shard& bs, uint64_t gen); + auto read_clear_modified() { + std::unique_lock wl{modified_lock}; + decltype(modified_shards) modified; + modified.swap(modified_shards); + modified_shards.clear(); + return modified; + } + + void set_observer(rgw::BucketChangeObserver *observer) { + this->observer = observer; + } + + void set_bucket_filter(decltype(bucket_filter)&& f) { + bucket_filter = std::move(f); + } + // a marker that compares greater than any other + std::string max_marker() const; + std::string get_oid(uint64_t gen_id, int shard_id) const; + + + int change_format(const DoutPrefixProvider *dpp, log_type type, optional_yield y); + int trim_generations(const DoutPrefixProvider *dpp, std::optional& through); +}; + +class RGWDataChangesBE : public boost::intrusive_ref_counter { +protected: + librados::IoCtx& ioctx; + CephContext* const cct; + RGWDataChangesLog& datalog; + + std::string get_oid(int shard_id) { + return datalog.get_oid(gen_id, shard_id); + } +public: + using entries = std::variant, + std::vector>; + + const uint64_t gen_id; + + RGWDataChangesBE(librados::IoCtx& ioctx, + RGWDataChangesLog& datalog, + uint64_t gen_id) + : ioctx(ioctx), cct(static_cast(ioctx.cct())), + datalog(datalog), gen_id(gen_id) {} + virtual ~RGWDataChangesBE() = default; + + virtual void prepare(ceph::real_time now, + const std::string& key, + ceph::buffer::list&& entry, + entries& out) = 0; + virtual int push(const DoutPrefixProvider *dpp, int index, entries&& items) = 0; + virtual int push(const DoutPrefixProvider *dpp, int index, ceph::real_time now, + const std::string& key, + ceph::buffer::list&& bl) = 0; + virtual int list(const DoutPrefixProvider *dpp, int shard, int max_entries, + std::vector& entries, + std::optional marker, + std::string* out_marker, bool* truncated) = 0; + virtual int get_info(const DoutPrefixProvider *dpp, int index, RGWDataChangesLogInfo *info) = 0; + virtual int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker) = 0; + virtual int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker, + librados::AioCompletion* c) = 0; + virtual std::string_view max_marker() const = 0; + // 1 on empty, 0 on non-empty, negative on error. + virtual int is_empty(const DoutPrefixProvider *dpp) = 0; +}; + + +#endif diff --git a/src/rgw/driver/rados/rgw_datalog_notify.cc b/src/rgw/driver/rados/rgw_datalog_notify.cc new file mode 100644 index 00000000000..12cdc532f3c --- /dev/null +++ b/src/rgw/driver/rados/rgw_datalog_notify.cc @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_datalog_notify.h" +#include "rgw_datalog.h" + +// custom encoding for v1 notify API +struct EntryEncoderV1 { + const rgw_data_notify_entry& entry; +}; +struct SetEncoderV1 { + const bc::flat_set& entries; +}; + +// encode rgw_data_notify_entry as string +void encode_json(const char *name, const EntryEncoderV1& e, Formatter *f) +{ + f->dump_string(name, e.entry.key); // encode the key only +} +// encode set as set +void encode_json(const char *name, const SetEncoderV1& e, Formatter *f) +{ + f->open_array_section(name); + for (auto& entry : e.entries) { + encode_json("obj", EntryEncoderV1{entry}, f); + } + f->close_section(); +} +// encode map> as map> +void encode_json(const char *name, const rgw_data_notify_v1_encoder& e, Formatter *f) +{ + f->open_array_section(name); + for (auto& [key, val] : e.shards) { + f->open_object_section("entry"); + encode_json("key", key, f); + encode_json("val", SetEncoderV1{val}, f); + f->close_section(); + } + f->close_section(); +} + +struct EntryDecoderV1 { + rgw_data_notify_entry& entry; +}; +struct SetDecoderV1 { + bc::flat_set& entries; +}; + +// decode string into rgw_data_notify_entry +void decode_json_obj(EntryDecoderV1& d, JSONObj *obj) +{ + decode_json_obj(d.entry.key, obj); + d.entry.gen = 0; +} +// decode set into set +void decode_json_obj(SetDecoderV1& d, JSONObj *obj) +{ + for (JSONObjIter o = obj->find_first(); !o.end(); ++o) { + rgw_data_notify_entry val; + auto decoder = EntryDecoderV1{val}; + decode_json_obj(decoder, *o); + d.entries.insert(std::move(val)); + } +} +// decode map> into map> +void decode_json_obj(rgw_data_notify_v1_decoder& d, JSONObj *obj) +{ + for (JSONObjIter o = obj->find_first(); !o.end(); ++o) { + int shard_id = 0; + JSONDecoder::decode_json("key", shard_id, *o); + bc::flat_set val; + SetDecoderV1 decoder{val}; + JSONDecoder::decode_json("val", decoder, *o); + d.shards[shard_id] = std::move(val); + } +} diff --git a/src/rgw/driver/rados/rgw_datalog_notify.h b/src/rgw/driver/rados/rgw_datalog_notify.h new file mode 100644 index 00000000000..4cd1b3c110f --- /dev/null +++ b/src/rgw/driver/rados/rgw_datalog_notify.h @@ -0,0 +1,31 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include +#include + +#include "rgw_datalog.h" + +namespace bc = boost::container; + +namespace ceph { class Formatter; } +class JSONObj; + +class RGWCoroutine; +class RGWHTTPManager; +class RGWRESTConn; + +struct rgw_data_notify_entry; + +// json encoder and decoder for notify v1 API +struct rgw_data_notify_v1_encoder { + const bc::flat_map>& shards; +}; +void encode_json(const char *name, const rgw_data_notify_v1_encoder& e, + ceph::Formatter *f); +struct rgw_data_notify_v1_decoder { + bc::flat_map>& shards; +}; +void decode_json_obj(rgw_data_notify_v1_decoder& d, JSONObj *obj); diff --git a/src/rgw/driver/rados/rgw_etag_verifier.cc b/src/rgw/driver/rados/rgw_etag_verifier.cc new file mode 100644 index 00000000000..52f7c794842 --- /dev/null +++ b/src/rgw/driver/rados/rgw_etag_verifier.cc @@ -0,0 +1,191 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_etag_verifier.h" +#include "rgw_obj_manifest.h" + +#define dout_subsys ceph_subsys_rgw + +namespace rgw::putobj { + +int create_etag_verifier(const DoutPrefixProvider *dpp, + CephContext* cct, rgw::sal::DataProcessor* filter, + const bufferlist& manifest_bl, + const std::optional& compression, + etag_verifier_ptr& verifier) +{ + RGWObjManifest manifest; + + try { + auto miter = manifest_bl.cbegin(); + decode(manifest, miter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: couldn't decode manifest" << dendl; + return -EIO; + } + + RGWObjManifestRule rule; + bool found = manifest.get_rule(0, &rule); + if (!found) { + ldpp_dout(dpp, -1) << "ERROR: manifest->get_rule() could not find rule" << dendl; + return -EIO; + } + + if (rule.start_part_num == 0) { + /* Atomic object */ + verifier.emplace(cct, filter); + return 0; + } + + uint64_t cur_part_ofs = UINT64_MAX; + std::vector part_ofs; + + /* + * We must store the offset of each part to calculate the ETAGs for each + * MPU part. These part ETags then become the input for the MPU object + * Etag. + */ + for (auto mi = manifest.obj_begin(dpp); mi != manifest.obj_end(dpp); ++mi) { + if (cur_part_ofs == mi.get_part_ofs()) + continue; + cur_part_ofs = mi.get_part_ofs(); + ldpp_dout(dpp, 20) << "MPU Part offset:" << cur_part_ofs << dendl; + part_ofs.push_back(cur_part_ofs); + } + + if (compression) { + // if the source object was compressed, the manifest is storing + // compressed part offsets. transform the compressed offsets back to + // their original offsets by finding the first block of each part + const auto& blocks = compression->blocks; + auto block = blocks.begin(); + for (auto& ofs : part_ofs) { + // find the compression_block with new_ofs == ofs + constexpr auto less = [] (const compression_block& block, uint64_t ofs) { + return block.new_ofs < ofs; + }; + block = std::lower_bound(block, blocks.end(), ofs, less); + if (block == blocks.end() || block->new_ofs != ofs) { + ldpp_dout(dpp, 4) << "no match for compressed offset " << ofs + << ", disabling etag verification" << dendl; + return -EIO; + } + ofs = block->old_ofs; + ldpp_dout(dpp, 20) << "MPU Part uncompressed offset:" << ofs << dendl; + } + } + + verifier.emplace(cct, std::move(part_ofs), filter); + return 0; +} + +int ETagVerifier_Atomic::process(bufferlist&& in, uint64_t logical_offset) +{ + bufferlist out; + if (in.length() > 0) + hash.Update((const unsigned char *)in.c_str(), in.length()); + + return Pipe::process(std::move(in), logical_offset); +} + +void ETagVerifier_Atomic::calculate_etag() +{ + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + + /* Return early if ETag has already been calculated */ + if (!calculated_etag.empty()) + return; + + hash.Final(m); + buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); + calculated_etag = calc_md5; + ldout(cct, 20) << "Single part object: " << " etag:" << calculated_etag + << dendl; +} + +void ETagVerifier_MPU::process_end_of_MPU_part() +{ + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char calc_md5_part[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + std::string calculated_etag_part; + + hash.Final(m); + mpu_etag_hash.Update((const unsigned char *)m, sizeof(m)); + hash.Restart(); + + if (cct->_conf->subsys.should_gather(dout_subsys, 20)) { + buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5_part); + calculated_etag_part = calc_md5_part; + ldout(cct, 20) << "Part etag: " << calculated_etag_part << dendl; + } + + cur_part_index++; + next_part_index++; +} + +int ETagVerifier_MPU::process(bufferlist&& in, uint64_t logical_offset) +{ + uint64_t bl_end = in.length() + logical_offset; + + /* Handle the last MPU part */ + if (size_t(next_part_index) == part_ofs.size()) { + hash.Update((const unsigned char *)in.c_str(), in.length()); + goto done; + } + + /* Incoming bufferlist spans two MPU parts. Calculate separate ETags */ + if (bl_end > part_ofs[next_part_index]) { + + uint64_t part_one_len = part_ofs[next_part_index] - logical_offset; + hash.Update((const unsigned char *)in.c_str(), part_one_len); + process_end_of_MPU_part(); + + hash.Update((const unsigned char *)in.c_str() + part_one_len, + bl_end - part_ofs[cur_part_index]); + /* + * If we've moved to the last part of the MPU, avoid usage of + * parts_ofs[next_part_index] as it will lead to our-of-range access. + */ + if (size_t(next_part_index) == part_ofs.size()) + goto done; + } else { + hash.Update((const unsigned char *)in.c_str(), in.length()); + } + + /* Update the MPU Etag if the current part has ended */ + if (logical_offset + in.length() + 1 == part_ofs[next_part_index]) + process_end_of_MPU_part(); + +done: + return Pipe::process(std::move(in), logical_offset); +} + +void ETagVerifier_MPU::calculate_etag() +{ + const uint32_t parts = part_ofs.size(); + constexpr auto digits10 = std::numeric_limits::digits10; + constexpr auto extra = 2 + digits10; // add "-%u\0" at the end + + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE], mpu_m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + extra]; + + /* Return early if ETag has already been calculated */ + if (!calculated_etag.empty()) + return; + + hash.Final(m); + mpu_etag_hash.Update((const unsigned char *)m, sizeof(m)); + + /* Refer RGWCompleteMultipart::execute() for ETag calculation for MPU object */ + mpu_etag_hash.Final(mpu_m); + buf_to_hex(mpu_m, CEPH_CRYPTO_MD5_DIGESTSIZE, final_etag_str); + snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], + sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, + "-%u", parts); + + calculated_etag = final_etag_str; + ldout(cct, 20) << "MPU calculated ETag:" << calculated_etag << dendl; +} + +} // namespace rgw::putobj diff --git a/src/rgw/driver/rados/rgw_etag_verifier.h b/src/rgw/driver/rados/rgw_etag_verifier.h new file mode 100644 index 00000000000..56a679ebddd --- /dev/null +++ b/src/rgw/driver/rados/rgw_etag_verifier.h @@ -0,0 +1,92 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * RGW Etag Verifier is an RGW filter which enables the objects copied using + * multisite sync to be verified using their ETag from source i.e. the MD5 + * checksum of the object is computed at the destination and is verified to be + * identical to the ETag stored in the object HEAD at source cluster. + * + * For MPU objects, a different filter named RGWMultipartEtagFilter is applied + * which re-computes ETag using RGWObjManifest. This computes the ETag using the + * same algorithm used at the source cluster i.e. MD5 sum of the individual ETag + * on the MPU parts. + */ +#ifndef CEPH_RGW_ETAG_VERIFIER_H +#define CEPH_RGW_ETAG_VERIFIER_H + +#include "rgw_putobj.h" +#include "rgw_op.h" +#include "common/static_ptr.h" + +namespace rgw::putobj { + +class ETagVerifier : public rgw::putobj::Pipe +{ +protected: + CephContext* cct; + MD5 hash; + std::string calculated_etag; + +public: + ETagVerifier(CephContext* cct_, rgw::sal::DataProcessor *next) + : Pipe(next), cct(cct_) { + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + } + + virtual void calculate_etag() = 0; + std::string get_calculated_etag() { return calculated_etag;} + +}; /* ETagVerifier */ + +class ETagVerifier_Atomic : public ETagVerifier +{ +public: + ETagVerifier_Atomic(CephContext* cct_, rgw::sal::DataProcessor *next) + : ETagVerifier(cct_, next) {} + + int process(bufferlist&& data, uint64_t logical_offset) override; + void calculate_etag() override; + +}; /* ETagVerifier_Atomic */ + +class ETagVerifier_MPU : public ETagVerifier +{ + std::vector part_ofs; + uint64_t cur_part_index{0}, next_part_index{1}; + MD5 mpu_etag_hash; + + void process_end_of_MPU_part(); + +public: + ETagVerifier_MPU(CephContext* cct, + std::vector part_ofs, + rgw::sal::DataProcessor *next) + : ETagVerifier(cct, next), + part_ofs(std::move(part_ofs)) + { + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + } + + int process(bufferlist&& data, uint64_t logical_offset) override; + void calculate_etag() override; + +}; /* ETagVerifier_MPU */ + +constexpr auto max_etag_verifier_size = std::max( + sizeof(ETagVerifier_Atomic), + sizeof(ETagVerifier_MPU) + ); +using etag_verifier_ptr = ceph::static_ptr; + +int create_etag_verifier(const DoutPrefixProvider *dpp, + CephContext* cct, rgw::sal::DataProcessor* next, + const bufferlist& manifest_bl, + const std::optional& compression, + etag_verifier_ptr& verifier); + +} // namespace rgw::putobj + +#endif /* CEPH_RGW_ETAG_VERIFIER_H */ diff --git a/src/rgw/driver/rados/rgw_gc.cc b/src/rgw/driver/rados/rgw_gc.cc new file mode 100644 index 00000000000..bd16bde1bd5 --- /dev/null +++ b/src/rgw/driver/rados/rgw_gc.cc @@ -0,0 +1,811 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_gc.h" + +#include "rgw_tools.h" +#include "include/scope_guard.h" +#include "include/rados/librados.hpp" +#include "cls/rgw/cls_rgw_client.h" +#include "cls/rgw_gc/cls_rgw_gc_client.h" +#include "cls/refcount/cls_refcount_client.h" +#include "cls/version/cls_version_client.h" +#include "rgw_perf_counters.h" +#include "cls/lock/cls_lock_client.h" +#include "include/random.h" +#include "rgw_gc_log.h" + +#include // XXX +#include +#include "xxhash.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; +using namespace librados; + +static string gc_oid_prefix = "gc"; +static string gc_index_lock_name = "gc_process"; + +void RGWGC::initialize(CephContext *_cct, RGWRados *_store) { + cct = _cct; + store = _store; + + max_objs = min(static_cast(cct->_conf->rgw_gc_max_objs), rgw_shards_max()); + + obj_names = new string[max_objs]; + + for (int i = 0; i < max_objs; i++) { + obj_names[i] = gc_oid_prefix; + char buf[32]; + snprintf(buf, 32, ".%d", i); + obj_names[i].append(buf); + + auto it = transitioned_objects_cache.begin() + i; + transitioned_objects_cache.insert(it, false); + + //version = 0 -> not ready for transition + //version = 1 -> marked ready for transition + librados::ObjectWriteOperation op; + op.create(false); + const uint64_t queue_size = cct->_conf->rgw_gc_max_queue_size, num_deferred_entries = cct->_conf->rgw_gc_max_deferred; + gc_log_init2(op, queue_size, num_deferred_entries); + store->gc_operate(this, obj_names[i], &op); + } +} + +void RGWGC::finalize() +{ + delete[] obj_names; +} + +int RGWGC::tag_index(const string& tag) +{ + return rgw_shards_mod(XXH64(tag.c_str(), tag.size(), seed), max_objs); +} + +std::tuple> RGWGC::send_split_chain(const cls_rgw_obj_chain& chain, const std::string& tag) +{ + ldpp_dout(this, 20) << "RGWGC::send_split_chain - tag is: " << tag << dendl; + + if (cct->_conf->rgw_max_chunk_size) { + cls_rgw_obj_chain broken_chain; + ldpp_dout(this, 20) << "RGWGC::send_split_chain - rgw_max_chunk_size is: " << cct->_conf->rgw_max_chunk_size << dendl; + + for (auto it = chain.objs.begin(); it != chain.objs.end(); it++) { + ldpp_dout(this, 20) << "RGWGC::send_split_chain - adding obj with name: " << it->key << dendl; + broken_chain.objs.emplace_back(*it); + cls_rgw_gc_obj_info info; + info.tag = tag; + info.chain = broken_chain; + cls_rgw_gc_set_entry_op op; + op.info = info; + size_t total_encoded_size = op.estimate_encoded_size(); + ldpp_dout(this, 20) << "RGWGC::send_split_chain - total_encoded_size is: " << total_encoded_size << dendl; + + if (total_encoded_size > cct->_conf->rgw_max_chunk_size) { //dont add to chain, and send to gc + broken_chain.objs.pop_back(); + --it; + ldpp_dout(this, 20) << "RGWGC::send_split_chain - more than, dont add to broken chain and send chain" << dendl; + auto ret = send_chain(broken_chain, tag); + if (ret < 0) { + broken_chain.objs.insert(broken_chain.objs.end(), it, chain.objs.end()); // add all the remainder objs to the list to be deleted inline + ldpp_dout(this, 0) << "RGWGC::send_split_chain - send chain returned error: " << ret << dendl; + return {ret, {broken_chain}}; + } + broken_chain.objs.clear(); + } + } + if (!broken_chain.objs.empty()) { //when the chain is smaller than or equal to rgw_max_chunk_size + ldpp_dout(this, 20) << "RGWGC::send_split_chain - sending leftover objects" << dendl; + auto ret = send_chain(broken_chain, tag); + if (ret < 0) { + ldpp_dout(this, 0) << "RGWGC::send_split_chain - send chain returned error: " << ret << dendl; + return {ret, {broken_chain}}; + } + } + } else { + auto ret = send_chain(chain, tag); + if (ret < 0) { + ldpp_dout(this, 0) << "RGWGC::send_split_chain - send chain returned error: " << ret << dendl; + return {ret, {std::move(chain)}}; + } + } + return {0, {}}; +} + +int RGWGC::send_chain(const cls_rgw_obj_chain& chain, const string& tag) +{ + ObjectWriteOperation op; + cls_rgw_gc_obj_info info; + info.chain = chain; + info.tag = tag; + gc_log_enqueue2(op, cct->_conf->rgw_gc_obj_min_wait, info); + + int i = tag_index(tag); + + ldpp_dout(this, 20) << "RGWGC::send_chain - on object name: " << obj_names[i] << "tag is: " << tag << dendl; + + auto ret = store->gc_operate(this, obj_names[i], &op); + if (ret != -ECANCELED && ret != -EPERM) { + return ret; + } + ObjectWriteOperation set_entry_op; + cls_rgw_gc_set_entry(set_entry_op, cct->_conf->rgw_gc_obj_min_wait, info); + return store->gc_operate(this, obj_names[i], &set_entry_op); +} + +struct defer_chain_state { + librados::AioCompletion* completion = nullptr; + // TODO: hold a reference on the state in RGWGC to avoid use-after-free if + // RGWGC destructs before this completion fires + RGWGC* gc = nullptr; + cls_rgw_gc_obj_info info; + + ~defer_chain_state() { + if (completion) { + completion->release(); + } + } +}; + +static void async_defer_callback(librados::completion_t, void* arg) +{ + std::unique_ptr state{static_cast(arg)}; + if (state->completion->get_return_value() == -ECANCELED) { + state->gc->on_defer_canceled(state->info); + } +} + +void RGWGC::on_defer_canceled(const cls_rgw_gc_obj_info& info) +{ + const std::string& tag = info.tag; + const int i = tag_index(tag); + + // ECANCELED from cls_version_check() tells us that we've transitioned + transitioned_objects_cache[i] = true; + + ObjectWriteOperation op; + cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); + cls_rgw_gc_remove(op, {tag}); + + auto c = librados::Rados::aio_create_completion(nullptr, nullptr); + store->gc_aio_operate(obj_names[i], c, &op); + c->release(); +} + +int RGWGC::async_defer_chain(const string& tag, const cls_rgw_obj_chain& chain) +{ + const int i = tag_index(tag); + cls_rgw_gc_obj_info info; + info.chain = chain; + info.tag = tag; + + // if we've transitioned this shard object, we can rely on the cls_rgw_gc queue + if (transitioned_objects_cache[i]) { + ObjectWriteOperation op; + cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); + + // this tag may still be present in omap, so remove it once the cls_rgw_gc + // enqueue succeeds + cls_rgw_gc_remove(op, {tag}); + + auto c = librados::Rados::aio_create_completion(nullptr, nullptr); + int ret = store->gc_aio_operate(obj_names[i], c, &op); + c->release(); + return ret; + } + + // if we haven't seen the transition yet, write the defer to omap with cls_rgw + ObjectWriteOperation op; + + // assert that we haven't initialized cls_rgw_gc queue. this prevents us + // from writing new entries to omap after the transition + gc_log_defer1(op, cct->_conf->rgw_gc_obj_min_wait, info); + + // prepare a callback to detect the transition via ECANCELED from cls_version_check() + auto state = std::make_unique(); + state->gc = this; + state->info.chain = chain; + state->info.tag = tag; + state->completion = librados::Rados::aio_create_completion( + state.get(), async_defer_callback); + + int ret = store->gc_aio_operate(obj_names[i], state->completion, &op); + if (ret == 0) { + state.release(); // release ownership until async_defer_callback() + } + return ret; +} + +int RGWGC::remove(int index, const std::vector& tags, AioCompletion **pc) +{ + ObjectWriteOperation op; + cls_rgw_gc_remove(op, tags); + + auto c = librados::Rados::aio_create_completion(nullptr, nullptr); + int ret = store->gc_aio_operate(obj_names[index], c, &op); + if (ret < 0) { + c->release(); + } else { + *pc = c; + } + return ret; +} + +int RGWGC::remove(int index, int num_entries) +{ + ObjectWriteOperation op; + cls_rgw_gc_queue_remove_entries(op, num_entries); + + return store->gc_operate(this, obj_names[index], &op); +} + +int RGWGC::list(int *index, string& marker, uint32_t max, bool expired_only, std::list& result, bool *truncated, bool& processing_queue) +{ + result.clear(); + string next_marker; + bool check_queue = false; + + for (; *index < max_objs && result.size() < max; (*index)++, marker.clear(), check_queue = false) { + std::list entries, queue_entries; + int ret = 0; + + //processing_queue is set to true from previous iteration if the queue was under process and probably has more elements in it. + if (! transitioned_objects_cache[*index] && ! check_queue && ! processing_queue) { + ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, max - result.size(), expired_only, entries, truncated, next_marker); + if (ret != -ENOENT && ret < 0) { + return ret; + } + obj_version objv; + cls_version_read(store->gc_pool_ctx, obj_names[*index], &objv); + if (ret == -ENOENT || entries.size() == 0) { + if (objv.ver == 0) { + continue; + } else { + if (! expired_only) { + transitioned_objects_cache[*index] = true; + marker.clear(); + } else { + std::list non_expired_entries; + ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, 1, false, non_expired_entries, truncated, next_marker); + if (non_expired_entries.size() == 0) { + transitioned_objects_cache[*index] = true; + marker.clear(); + } + } + } + } + if ((objv.ver == 1) && (entries.size() < max - result.size())) { + check_queue = true; + marker.clear(); + } + } + if (transitioned_objects_cache[*index] || check_queue || processing_queue) { + processing_queue = false; + ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[*index], marker, (max - result.size()) - entries.size(), expired_only, queue_entries, truncated, next_marker); + if (ret < 0) { + return ret; + } + } + if (entries.size() == 0 && queue_entries.size() == 0) + continue; + + std::list::iterator iter; + for (iter = entries.begin(); iter != entries.end(); ++iter) { + result.push_back(*iter); + } + + for (iter = queue_entries.begin(); iter != queue_entries.end(); ++iter) { + result.push_back(*iter); + } + + marker = next_marker; + + if (*index == max_objs - 1) { + if (queue_entries.size() > 0 && *truncated) { + processing_queue = true; + } else { + processing_queue = false; + } + /* we cut short here, truncated will hold the correct value */ + return 0; + } + + if (result.size() == max) { + if (queue_entries.size() > 0 && *truncated) { + processing_queue = true; + } else { + processing_queue = false; + *index += 1; //move to next gc object + } + + /* close approximation, it might be that the next of the objects don't hold + * anything, in this case truncated should have been false, but we can find + * that out on the next iteration + */ + *truncated = true; + return 0; + } + } + *truncated = false; + processing_queue = false; + + return 0; +} + +class RGWGCIOManager { + const DoutPrefixProvider* dpp; + CephContext *cct; + RGWGC *gc; + + struct IO { + enum Type { + UnknownIO = 0, + TailIO = 1, + IndexIO = 2, + } type{UnknownIO}; + librados::AioCompletion *c{nullptr}; + string oid; + int index{-1}; + string tag; + }; + + deque ios; + vector > remove_tags; + /* tracks the number of remaining shadow objects for a given tag in order to + * only remove the tag once all shadow objects have themselves been removed + */ + vector > tag_io_size; + +#define MAX_AIO_DEFAULT 10 + size_t max_aio{MAX_AIO_DEFAULT}; + +public: + RGWGCIOManager(const DoutPrefixProvider* _dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), + cct(_cct), + gc(_gc) { + max_aio = cct->_conf->rgw_gc_max_concurrent_io; + remove_tags.resize(min(static_cast(cct->_conf->rgw_gc_max_objs), rgw_shards_max())); + tag_io_size.resize(min(static_cast(cct->_conf->rgw_gc_max_objs), rgw_shards_max())); + } + + ~RGWGCIOManager() { + for (auto io : ios) { + io.c->release(); + } + } + + int schedule_io(IoCtx *ioctx, const string& oid, ObjectWriteOperation *op, + int index, const string& tag) { + while (ios.size() > max_aio) { + if (gc->going_down()) { + return 0; + } + auto ret = handle_next_completion(); + //Return error if we are using queue, else ignore it + if (gc->transitioned_objects_cache[index] && ret < 0) { + return ret; + } + } + + auto c = librados::Rados::aio_create_completion(nullptr, nullptr); + int ret = ioctx->aio_operate(oid, c, op); + if (ret < 0) { + return ret; + } + ios.push_back(IO{IO::TailIO, c, oid, index, tag}); + + return 0; + } + + int handle_next_completion() { + ceph_assert(!ios.empty()); + IO& io = ios.front(); + io.c->wait_for_complete(); + int ret = io.c->get_return_value(); + io.c->release(); + + if (ret == -ENOENT) { + ret = 0; + } + + if (io.type == IO::IndexIO && ! gc->transitioned_objects_cache[io.index]) { + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: gc cleanup of tags on gc shard index=" << + io.index << " returned error, ret=" << ret << dendl; + } + goto done; + } + + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: gc could not remove oid=" << io.oid << + ", ret=" << ret << dendl; + goto done; + } + + if (! gc->transitioned_objects_cache[io.index]) { + schedule_tag_removal(io.index, io.tag); + } + + done: + ios.pop_front(); + return ret; + } + + /* This is a request to schedule a tag removal. It will be called once when + * there are no shadow objects. But it will also be called for every shadow + * object when there are any. Since we do not want the tag to be removed + * until all shadow objects have been successfully removed, the scheduling + * will not happen until the shadow object count goes down to zero + */ + void schedule_tag_removal(int index, string tag) { + auto& ts = tag_io_size[index]; + auto ts_it = ts.find(tag); + if (ts_it != ts.end()) { + auto& size = ts_it->second; + --size; + // wait all shadow obj delete return + if (size != 0) + return; + + ts.erase(ts_it); + } + + auto& rt = remove_tags[index]; + + rt.push_back(tag); + if (rt.size() >= (size_t)cct->_conf->rgw_gc_max_trim_chunk) { + flush_remove_tags(index, rt); + } + } + + void add_tag_io_size(int index, string tag, size_t size) { + auto& ts = tag_io_size[index]; + ts.emplace(tag, size); + } + + int drain_ios() { + int ret_val = 0; + while (!ios.empty()) { + if (gc->going_down()) { + return -EAGAIN; + } + auto ret = handle_next_completion(); + if (ret < 0) { + ret_val = ret; + } + } + return ret_val; + } + + void drain() { + drain_ios(); + flush_remove_tags(); + /* the tags draining might have generated more ios, drain those too */ + drain_ios(); + } + + void flush_remove_tags(int index, vector& rt) { + IO index_io; + index_io.type = IO::IndexIO; + index_io.index = index; + + ldpp_dout(dpp, 20) << __func__ << + " removing entries from gc log shard index=" << index << ", size=" << + rt.size() << ", entries=" << rt << dendl; + + auto rt_guard = make_scope_guard( + [&] + { + rt.clear(); + } + ); + + int ret = gc->remove(index, rt, &index_io.c); + if (ret < 0) { + /* we already cleared list of tags, this prevents us from + * ballooning in case of a persistent problem + */ + ldpp_dout(dpp, 0) << "WARNING: failed to remove tags on gc shard index=" << + index << " ret=" << ret << dendl; + return; + } + if (perfcounter) { + /* log the count of tags retired for rate estimation */ + perfcounter->inc(l_rgw_gc_retire, rt.size()); + } + ios.push_back(index_io); + } + + void flush_remove_tags() { + int index = 0; + for (auto& rt : remove_tags) { + if (! gc->transitioned_objects_cache[index]) { + flush_remove_tags(index, rt); + } + ++index; + } + } + + int remove_queue_entries(int index, int num_entries) { + int ret = gc->remove(index, num_entries); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to remove queue entries on index=" << + index << " ret=" << ret << dendl; + return ret; + } + if (perfcounter) { + /* log the count of tags retired for rate estimation */ + perfcounter->inc(l_rgw_gc_retire, num_entries); + } + return 0; + } +}; // class RGWGCIOManger + +int RGWGC::process(int index, int max_secs, bool expired_only, + RGWGCIOManager& io_manager) +{ + ldpp_dout(this, 20) << "RGWGC::process entered with GC index_shard=" << + index << ", max_secs=" << max_secs << ", expired_only=" << + expired_only << dendl; + + rados::cls::lock::Lock l(gc_index_lock_name); + utime_t end = ceph_clock_now(); + + /* max_secs should be greater than zero. We don't want a zero max_secs + * to be translated as no timeout, since we'd then need to break the + * lock and that would require a manual intervention. In this case + * we can just wait it out. */ + if (max_secs <= 0) + return -EAGAIN; + + end += max_secs; + utime_t time(max_secs, 0); + l.set_duration(time); + + int ret = l.lock_exclusive(&store->gc_pool_ctx, obj_names[index]); + if (ret == -EBUSY) { /* already locked by another gc processor */ + ldpp_dout(this, 10) << "RGWGC::process failed to acquire lock on " << + obj_names[index] << dendl; + return 0; + } + if (ret < 0) + return ret; + + string marker; + string next_marker; + bool truncated; + IoCtx *ctx = new IoCtx; + do { + int max = 100; + std::list entries; + + int ret = 0; + + if (! transitioned_objects_cache[index]) { + ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); + ldpp_dout(this, 20) << + "RGWGC::process cls_rgw_gc_list returned with returned:" << ret << + ", entries.size=" << entries.size() << ", truncated=" << truncated << + ", next_marker='" << next_marker << "'" << dendl; + obj_version objv; + cls_version_read(store->gc_pool_ctx, obj_names[index], &objv); + if ((objv.ver == 1) && entries.size() == 0) { + std::list non_expired_entries; + ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, 1, false, non_expired_entries, &truncated, next_marker); + if (non_expired_entries.size() == 0) { + transitioned_objects_cache[index] = true; + marker.clear(); + ldpp_dout(this, 20) << "RGWGC::process cls_rgw_gc_list returned NO non expired entries, so setting cache entry to TRUE" << dendl; + } else { + ret = 0; + goto done; + } + } + if ((objv.ver == 0) && (ret == -ENOENT || entries.size() == 0)) { + ret = 0; + goto done; + } + } + + if (transitioned_objects_cache[index]) { + ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); + ldpp_dout(this, 20) << + "RGWGC::process cls_rgw_gc_queue_list_entries returned with return value:" << ret << + ", entries.size=" << entries.size() << ", truncated=" << truncated << + ", next_marker='" << next_marker << "'" << dendl; + if (entries.size() == 0) { + ret = 0; + goto done; + } + } + + if (ret < 0) + goto done; + + marker = next_marker; + + string last_pool; + std::list::iterator iter; + for (iter = entries.begin(); iter != entries.end(); ++iter) { + cls_rgw_gc_obj_info& info = *iter; + + ldpp_dout(this, 20) << "RGWGC::process iterating over entry tag='" << + info.tag << "', time=" << info.time << ", chain.objs.size()=" << + info.chain.objs.size() << dendl; + + std::list::iterator liter; + cls_rgw_obj_chain& chain = info.chain; + + utime_t now = ceph_clock_now(); + if (now >= end) { + goto done; + } + if (! transitioned_objects_cache[index]) { + if (chain.objs.empty()) { + io_manager.schedule_tag_removal(index, info.tag); + } else { + io_manager.add_tag_io_size(index, info.tag, chain.objs.size()); + } + } + if (! chain.objs.empty()) { + for (liter = chain.objs.begin(); liter != chain.objs.end(); ++liter) { + cls_rgw_obj& obj = *liter; + + if (obj.pool != last_pool) { + delete ctx; + ctx = new IoCtx; + ret = rgw_init_ioctx(this, store->get_rados_handle(), obj.pool, *ctx); + if (ret < 0) { + if (transitioned_objects_cache[index]) { + goto done; + } + last_pool = ""; + ldpp_dout(this, 0) << "ERROR: failed to create ioctx pool=" << + obj.pool << dendl; + continue; + } + last_pool = obj.pool; + } + + ctx->locator_set_key(obj.loc); + + const string& oid = obj.key.name; /* just stored raw oid there */ + + ldpp_dout(this, 5) << "RGWGC::process removing " << obj.pool << + ":" << obj.key.name << dendl; + ObjectWriteOperation op; + cls_refcount_put(op, info.tag, true); + + ret = io_manager.schedule_io(ctx, oid, &op, index, info.tag); + if (ret < 0) { + ldpp_dout(this, 0) << + "WARNING: failed to schedule deletion for oid=" << oid << dendl; + if (transitioned_objects_cache[index]) { + //If deleting oid failed for any of them, we will not delete queue entries + goto done; + } + } + if (going_down()) { + // leave early, even if tag isn't removed, it's ok since it + // will be picked up next time around + goto done; + } + } // chains loop + } // else -- chains not empty + } // entries loop + if (transitioned_objects_cache[index] && entries.size() > 0) { + ret = io_manager.drain_ios(); + if (ret < 0) { + goto done; + } + //Remove the entries from the queue + ldpp_dout(this, 5) << "RGWGC::process removing entries, marker: " << marker << dendl; + ret = io_manager.remove_queue_entries(index, entries.size()); + if (ret < 0) { + ldpp_dout(this, 0) << + "WARNING: failed to remove queue entries" << dendl; + goto done; + } + } + } while (truncated); + +done: + /* we don't drain here, because if we're going down we don't want to + * hold the system if backend is unresponsive + */ + l.unlock(&store->gc_pool_ctx, obj_names[index]); + delete ctx; + + return 0; +} + +int RGWGC::process(bool expired_only) +{ + int max_secs = cct->_conf->rgw_gc_processor_max_time; + + const int start = ceph::util::generate_random_number(0, max_objs - 1); + + RGWGCIOManager io_manager(this, store->ctx(), this); + + for (int i = 0; i < max_objs; i++) { + int index = (i + start) % max_objs; + int ret = process(index, max_secs, expired_only, io_manager); + if (ret < 0) + return ret; + } + if (!going_down()) { + io_manager.drain(); + } + + return 0; +} + +bool RGWGC::going_down() +{ + return down_flag; +} + +void RGWGC::start_processor() +{ + worker = new GCWorker(this, cct, this); + worker->create("rgw_gc"); +} + +void RGWGC::stop_processor() +{ + down_flag = true; + if (worker) { + worker->stop(); + worker->join(); + } + delete worker; + worker = NULL; +} + +unsigned RGWGC::get_subsys() const +{ + return dout_subsys; +} + +std::ostream& RGWGC::gen_prefix(std::ostream& out) const +{ + return out << "garbage collection: "; +} + +void *RGWGC::GCWorker::entry() { + do { + utime_t start = ceph_clock_now(); + ldpp_dout(dpp, 2) << "garbage collection: start" << dendl; + int r = gc->process(true); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: garbage collection process() returned error r=" << r << dendl; + } + ldpp_dout(dpp, 2) << "garbage collection: stop" << dendl; + + if (gc->going_down()) + break; + + utime_t end = ceph_clock_now(); + end -= start; + int secs = cct->_conf->rgw_gc_processor_period; + + if (secs <= end.sec()) + continue; // next round + + secs -= end.sec(); + + std::unique_lock locker{lock}; + cond.wait_for(locker, std::chrono::seconds(secs)); + } while (!gc->going_down()); + + return NULL; +} + +void RGWGC::GCWorker::stop() +{ + std::lock_guard l{lock}; + cond.notify_all(); +} diff --git a/src/rgw/driver/rados/rgw_gc.h b/src/rgw/driver/rados/rgw_gc.h new file mode 100644 index 00000000000..196f2802c16 --- /dev/null +++ b/src/rgw/driver/rados/rgw_gc.h @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_GC_H +#define CEPH_RGW_GC_H + + +#include "include/types.h" +#include "include/rados/librados.hpp" +#include "common/ceph_mutex.h" +#include "common/Cond.h" +#include "common/Thread.h" +#include "rgw_common.h" +#include "rgw_sal.h" +#include "rgw_rados.h" +#include "cls/rgw/cls_rgw_types.h" + +#include + +class RGWGCIOManager; + +class RGWGC : public DoutPrefixProvider { + CephContext *cct; + RGWRados *store; + int max_objs; + std::string *obj_names; + std::atomic down_flag = { false }; + + static constexpr uint64_t seed = 8675309; + + int tag_index(const std::string& tag); + int send_chain(const cls_rgw_obj_chain& chain, const std::string& tag); + + class GCWorker : public Thread { + const DoutPrefixProvider *dpp; + CephContext *cct; + RGWGC *gc; + ceph::mutex lock = ceph::make_mutex("GCWorker"); + ceph::condition_variable cond; + + public: + GCWorker(const DoutPrefixProvider *_dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), cct(_cct), gc(_gc) {} + void *entry() override; + void stop(); + }; + + GCWorker *worker; +public: + RGWGC() : cct(NULL), store(NULL), max_objs(0), obj_names(NULL), worker(NULL) {} + ~RGWGC() { + stop_processor(); + finalize(); + } + std::vector transitioned_objects_cache; + std::tuple> send_split_chain(const cls_rgw_obj_chain& chain, const std::string& tag); + + // asynchronously defer garbage collection on an object that's still being read + int async_defer_chain(const std::string& tag, const cls_rgw_obj_chain& info); + + // callback for when async_defer_chain() fails with ECANCELED + void on_defer_canceled(const cls_rgw_gc_obj_info& info); + + int remove(int index, const std::vector& tags, librados::AioCompletion **pc); + int remove(int index, int num_entries); + + void initialize(CephContext *_cct, RGWRados *_store); + void finalize(); + + int list(int *index, std::string& marker, uint32_t max, bool expired_only, std::list& result, bool *truncated, bool& processing_queue); + void list_init(int *index) { *index = 0; } + int process(int index, int process_max_secs, bool expired_only, + RGWGCIOManager& io_manager); + int process(bool expired_only); + + bool going_down(); + void start_processor(); + void stop_processor(); + + CephContext *get_cct() const override { return store->ctx(); } + unsigned get_subsys() const; + + std::ostream& gen_prefix(std::ostream& out) const; + +}; + + +#endif diff --git a/src/rgw/driver/rados/rgw_gc_log.cc b/src/rgw/driver/rados/rgw_gc_log.cc new file mode 100644 index 00000000000..ad819eddc05 --- /dev/null +++ b/src/rgw/driver/rados/rgw_gc_log.cc @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_gc_log.h" + +#include "cls/rgw/cls_rgw_client.h" +#include "cls/rgw_gc/cls_rgw_gc_client.h" +#include "cls/version/cls_version_client.h" + + +void gc_log_init2(librados::ObjectWriteOperation& op, + uint64_t max_size, uint64_t max_deferred) +{ + obj_version objv; // objv.ver = 0 + cls_version_check(op, objv, VER_COND_EQ); + cls_rgw_gc_queue_init(op, max_size, max_deferred); + objv.ver = 1; + cls_version_set(op, objv); +} + +void gc_log_enqueue1(librados::ObjectWriteOperation& op, + uint32_t expiration, cls_rgw_gc_obj_info& info) +{ + obj_version objv; // objv.ver = 0 + cls_version_check(op, objv, VER_COND_EQ); + cls_rgw_gc_set_entry(op, expiration, info); +} + +void gc_log_enqueue2(librados::ObjectWriteOperation& op, + uint32_t expiration, const cls_rgw_gc_obj_info& info) +{ + obj_version objv; + objv.ver = 1; + cls_version_check(op, objv, VER_COND_EQ); + cls_rgw_gc_queue_enqueue(op, expiration, info); +} + +void gc_log_defer1(librados::ObjectWriteOperation& op, + uint32_t expiration, const cls_rgw_gc_obj_info& info) +{ + obj_version objv; // objv.ver = 0 + cls_version_check(op, objv, VER_COND_EQ); + cls_rgw_gc_defer_entry(op, expiration, info.tag); +} + +void gc_log_defer2(librados::ObjectWriteOperation& op, + uint32_t expiration, const cls_rgw_gc_obj_info& info) +{ + obj_version objv; + objv.ver = 1; + cls_version_check(op, objv, VER_COND_EQ); + cls_rgw_gc_queue_defer_entry(op, expiration, info); + // TODO: conditional on whether omap is known to be empty + cls_rgw_gc_remove(op, {info.tag}); +} diff --git a/src/rgw/driver/rados/rgw_lc_tier.cc b/src/rgw/driver/rados/rgw_lc_tier.cc new file mode 100644 index 00000000000..0ad21693123 --- /dev/null +++ b/src/rgw/driver/rados/rgw_lc_tier.cc @@ -0,0 +1,1336 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include +#include +#include + +#include "common/Formatter.h" +#include +#include "rgw_lc.h" +#include "rgw_lc_tier.h" +#include "rgw_string.h" +#include "rgw_zone.h" +#include "rgw_common.h" +#include "rgw_rest.h" +#include "svc_zone.h" + +#include +#include +#include + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +struct rgw_lc_multipart_part_info { + int part_num{0}; + uint64_t ofs{0}; + uint64_t size{0}; + std::string etag; +}; + +struct rgw_lc_obj_properties { + ceph::real_time mtime; + std::string etag; + uint64_t versioned_epoch{0}; + std::map& target_acl_mappings; + std::string target_storage_class; + + rgw_lc_obj_properties(ceph::real_time _mtime, std::string _etag, + uint64_t _versioned_epoch, std::map& _t_acl_mappings, + std::string _t_storage_class) : + mtime(_mtime), etag(_etag), + versioned_epoch(_versioned_epoch), + target_acl_mappings(_t_acl_mappings), + target_storage_class(_t_storage_class) {} +}; + +struct rgw_lc_multipart_upload_info { + std::string upload_id; + uint64_t obj_size; + ceph::real_time mtime; + std::string etag; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(upload_id, bl); + encode(obj_size, bl); + encode(mtime, bl); + encode(etag, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(upload_id, bl); + decode(obj_size, bl); + decode(mtime, bl); + decode(etag, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_lc_multipart_upload_info) + +static inline string get_key_instance(const rgw_obj_key& key) +{ + if (!key.instance.empty() && + !key.have_null_instance()) { + return "-" + key.instance; + } + return ""; +} + +static inline string get_key_oid(const rgw_obj_key& key) +{ + string oid = key.name; + if (!key.instance.empty() && + !key.have_null_instance()) { + oid += string("-") + key.instance; + } + return oid; +} + +static inline string obj_to_aws_path(const rgw_obj& obj) +{ + string path = obj.bucket.name + "/" + get_key_oid(obj.key); + return path; +} + +static int read_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver, + const rgw_raw_obj *status_obj, rgw_lc_multipart_upload_info *status) +{ + int ret = 0; + rgw::sal::RadosStore *rados = dynamic_cast(driver); + + if (!rados) { + ldpp_dout(dpp, 0) << "ERROR: Not a RadosStore. Cannot be transitioned to cloud." << dendl; + return -1; + } + + auto& pool = status_obj->pool; + const auto oid = status_obj->oid; + auto sysobj = rados->svc()->sysobj; + bufferlist bl; + + ret = rgw_get_system_obj(sysobj, pool, oid, bl, nullptr, nullptr, + null_yield, dpp); + + if (ret < 0) { + return ret; + } + + if (bl.length() > 0) { + try { + auto p = bl.cbegin(); + status->decode(p); + } catch (buffer::error& e) { + ldpp_dout(dpp, 10) << "failed to decode status obj: " + << e.what() << dendl; + return -EIO; + } + } else { + return -EIO; + } + + return 0; +} + +static int put_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver, + const rgw_raw_obj *status_obj, rgw_lc_multipart_upload_info *status) +{ + int ret = 0; + rgw::sal::RadosStore *rados = dynamic_cast(driver); + + if (!rados) { + ldpp_dout(dpp, 0) << "ERROR: Not a RadosStore. Cannot be transitioned to cloud." << dendl; + return -1; + } + + auto& pool = status_obj->pool; + const auto oid = status_obj->oid; + auto sysobj = rados->svc()->sysobj; + bufferlist bl; + status->encode(bl); + + ret = rgw_put_system_obj(dpp, sysobj, pool, oid, bl, true, nullptr, + real_time{}, null_yield); + + return ret; +} + +static int delete_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver, + const rgw_raw_obj *status_obj) +{ + int ret = 0; + rgw::sal::RadosStore *rados = dynamic_cast(driver); + + if (!rados) { + ldpp_dout(dpp, 0) << "ERROR: Not a RadosStore. Cannot be transitioned to cloud." << dendl; + return -1; + } + + auto& pool = status_obj->pool; + const auto oid = status_obj->oid; + auto sysobj = rados->svc()->sysobj; + + ret = rgw_delete_system_obj(dpp, sysobj, pool, oid, nullptr, null_yield); + + return ret; +} + +static std::set keep_headers = { "CONTENT_TYPE", + "CONTENT_ENCODING", + "CONTENT_DISPOSITION", + "CONTENT_LANGUAGE" }; + +/* + * mapping between rgw object attrs and output http fields + * + static const struct rgw_http_attr base_rgw_to_http_attrs[] = { + { RGW_ATTR_CONTENT_LANG, "Content-Language" }, + { RGW_ATTR_EXPIRES, "Expires" }, + { RGW_ATTR_CACHE_CONTROL, "Cache-Control" }, + { RGW_ATTR_CONTENT_DISP, "Content-Disposition" }, + { RGW_ATTR_CONTENT_ENC, "Content-Encoding" }, + { RGW_ATTR_USER_MANIFEST, "X-Object-Manifest" }, + { RGW_ATTR_X_ROBOTS_TAG , "X-Robots-Tag" }, + { RGW_ATTR_STORAGE_CLASS , "X-Amz-Storage-Class" }, +// RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION header depends on access mode: +// S3 endpoint: x-amz-website-redirect-location +// S3Website endpoint: Location +{ RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION, "x-amz-website-redirect-location" }, +}; */ + +static void init_headers(map& attrs, + map& headers) +{ + for (auto& kv : attrs) { + const char * name = kv.first.c_str(); + const auto aiter = rgw_to_http_attrs.find(name); + + if (aiter != std::end(rgw_to_http_attrs)) { + headers[aiter->second] = rgw_bl_str(kv.second); + } else if (strncmp(name, RGW_ATTR_META_PREFIX, + sizeof(RGW_ATTR_META_PREFIX)-1) == 0) { + name += sizeof(RGW_ATTR_META_PREFIX) - 1; + string sname(name); + string name_prefix = RGW_ATTR_META_PREFIX; + char full_name_buf[name_prefix.size() + sname.size() + 1]; + snprintf(full_name_buf, sizeof(full_name_buf), "%.*s%.*s", + static_cast(name_prefix.length()), + name_prefix.data(), + static_cast(sname.length()), + sname.data()); + headers[full_name_buf] = rgw_bl_str(kv.second); + } else if (strcmp(name,RGW_ATTR_CONTENT_TYPE) == 0) { + headers["CONTENT_TYPE"] = rgw_bl_str(kv.second); + } + } +} + +/* Read object or just head from remote endpoint. For now initializes only headers, + * but can be extended to fetch etag, mtime etc if needed. + */ +static int cloud_tier_get_object(RGWLCCloudTierCtx& tier_ctx, bool head, + std::map& headers) { + RGWRESTConn::get_obj_params req_params; + RGWBucketInfo b; + std::string target_obj_name; + int ret = 0; + std::unique_ptr dest_bucket; + std::unique_ptr dest_obj; + rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, + tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, + tier_ctx.target_storage_class); + std::string etag; + RGWRESTStreamRWRequest *in_req; + + b.bucket.name = tier_ctx.target_bucket_name; + target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + + tier_ctx.obj->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance(tier_ctx.obj->get_key()); + } + + ret = tier_ctx.driver->get_bucket(nullptr, b, &dest_bucket); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , reterr = " << ret << dendl; + return ret; + } + + dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + if (!dest_obj) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; + return -1; + } + /* init input connection */ + req_params.get_op = !head; + req_params.prepend_metadata = true; + req_params.rgwx_stat = true; + req_params.sync_manifest = true; + req_params.skip_decrypt = true; + + ret = tier_ctx.conn.get_obj(tier_ctx.dpp, dest_obj.get(), req_params, true /* send */, &in_req); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: " << __func__ << "(): conn.get_obj() returned ret=" << ret << dendl; + return ret; + } + + /* fetch headers */ + ret = tier_ctx.conn.complete_request(in_req, nullptr, nullptr, nullptr, nullptr, &headers, null_yield); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(tier_ctx.dpp, 20) << "ERROR: " << __func__ << "(): conn.complete_request() returned ret=" << ret << dendl; + return ret; + } + return 0; +} + +static bool is_already_tiered(const DoutPrefixProvider *dpp, + std::map& headers, + ceph::real_time& mtime) { + char buf[32]; + map attrs = headers; + + for (const auto& a : attrs) { + ldpp_dout(dpp, 20) << "GetCrf attr[" << a.first << "] = " << a.second <iterate(dpp, ofs, end, out_cb, null_yield); + return ret; +} + +int RGWLCCloudStreamPut::init() { + /* init output connection */ + if (multipart.is_multipart) { + char buf[32]; + snprintf(buf, sizeof(buf), "%d", multipart.part_num); + rgw_http_param_pair params[] = { { "uploadId", multipart.upload_id.c_str() }, + { "partNumber", buf }, + { nullptr, nullptr } }; + conn.put_obj_send_init(dest_obj, params, &out_req); + } else { + conn.put_obj_send_init(dest_obj, nullptr, &out_req); + } + + return 0; +} + +bool RGWLCCloudStreamPut::keep_attr(const string& h) { + return (keep_headers.find(h) != keep_headers.end() || + boost::algorithm::starts_with(h, "X_AMZ_")); +} + +void RGWLCCloudStreamPut::init_send_attrs(const DoutPrefixProvider *dpp, + const rgw_rest_obj& rest_obj, + const rgw_lc_obj_properties& obj_properties, + std::map& attrs) { + + map& acl_mappings(obj_properties.target_acl_mappings); + const std::string& target_storage_class = obj_properties.target_storage_class; + + attrs.clear(); + + for (auto& hi : rest_obj.attrs) { + if (keep_attr(hi.first)) { + attrs.insert(hi); + } + } + + const auto acl = rest_obj.acls.get_acl(); + + map > access_map; + + if (!acl_mappings.empty()) { + for (auto& grant : acl.get_grant_map()) { + auto& orig_grantee = grant.first; + auto& perm = grant.second; + + string grantee; + + const auto& am = acl_mappings; + + const auto iter = am.find(orig_grantee); + if (iter == am.end()) { + ldpp_dout(dpp, 20) << "acl_mappings: Could not find " << orig_grantee << " .. ignoring" << dendl; + continue; + } + + grantee = iter->second.dest_id; + + string type; + + switch (iter->second.type) { + case ACL_TYPE_CANON_USER: + type = "id"; + break; + case ACL_TYPE_EMAIL_USER: + type = "emailAddress"; + break; + case ACL_TYPE_GROUP: + type = "uri"; + break; + default: + continue; + } + + string tv = type + "=" + grantee; + + int flags = perm.get_permission().get_permissions(); + if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) { + access_map[flags].push_back(tv); + continue; + } + + for (int i = 1; i <= RGW_PERM_WRITE_ACP; i <<= 1) { + if (flags & i) { + access_map[i].push_back(tv); + } + } + } + } + + for (const auto& aiter : access_map) { + int grant_type = aiter.first; + + string header_str("x-amz-grant-"); + + switch (grant_type) { + case RGW_PERM_READ: + header_str.append("read"); + break; + case RGW_PERM_WRITE: + header_str.append("write"); + break; + case RGW_PERM_READ_ACP: + header_str.append("read-acp"); + break; + case RGW_PERM_WRITE_ACP: + header_str.append("write-acp"); + break; + case RGW_PERM_FULL_CONTROL: + header_str.append("full-control"); + break; + } + + string s; + + for (const auto& viter : aiter.second) { + if (!s.empty()) { + s.append(", "); + } + s.append(viter); + } + + ldpp_dout(dpp, 20) << "acl_mappings: set acl: " << header_str << "=" << s << dendl; + + attrs[header_str] = s; + } + + /* Copy target storage class */ + if (!target_storage_class.empty()) { + attrs["x-amz-storage-class"] = target_storage_class; + } else { + attrs["x-amz-storage-class"] = "STANDARD"; + } + + /* New attribute to specify its transitioned from RGW */ + attrs["x-amz-meta-rgwx-source"] = "rgw"; + + char buf[32]; + snprintf(buf, sizeof(buf), "%llu", (long long)obj_properties.versioned_epoch); + attrs["x-amz-meta-rgwx-versioned-epoch"] = buf; + + utime_t ut(obj_properties.mtime); + snprintf(buf, sizeof(buf), "%lld.%09lld", + (long long)ut.sec(), + (long long)ut.nsec()); + + attrs["x-amz-meta-rgwx-source-mtime"] = buf; + attrs["x-amz-meta-rgwx-source-etag"] = obj_properties.etag; + attrs["x-amz-meta-rgwx-source-key"] = rest_obj.key.name; + if (!rest_obj.key.instance.empty()) { + attrs["x-amz-meta-rgwx-source-version-id"] = rest_obj.key.instance; + } + for (const auto& a : attrs) { + ldpp_dout(dpp, 30) << "init_send_attrs attr[" << a.first << "] = " << a.second <(out_req); + + std::map new_attrs; + if (!multipart.is_multipart) { + init_send_attrs(dpp, rest_obj, obj_properties, new_attrs); + } + + r->set_send_length(rest_obj.content_len); + + RGWAccessControlPolicy policy; + + r->send_ready(dpp, conn.get_key(), new_attrs, policy); +} + +void RGWLCCloudStreamPut::handle_headers(const map& headers) { + for (const auto& h : headers) { + if (h.first == "ETAG") { + etag = h.second; + } + } +} + +bool RGWLCCloudStreamPut::get_etag(string *petag) { + if (etag.empty()) { + return false; + } + *petag = etag; + return true; +} + +void RGWLCCloudStreamPut::set_multipart(const string& upload_id, int part_num, uint64_t part_size) { + multipart.is_multipart = true; + multipart.upload_id = upload_id; + multipart.part_num = part_num; + multipart.part_size = part_size; +} + +int RGWLCCloudStreamPut::send() { + int ret = RGWHTTP::send(out_req); + return ret; +} + +RGWGetDataCB *RGWLCCloudStreamPut::get_cb() { + return out_req->get_out_cb(); +} + +int RGWLCCloudStreamPut::complete_request() { + int ret = conn.complete_request(out_req, etag, &obj_properties.mtime, null_yield); + return ret; +} + +/* Read local copy and write to Cloud endpoint */ +static int cloud_tier_transfer_object(const DoutPrefixProvider* dpp, + RGWLCStreamRead* readf, RGWLCCloudStreamPut* writef) { + std::string url; + bufferlist bl; + bool sent_attrs{false}; + int ret{0}; + off_t ofs; + off_t end; + + ret = readf->init(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: fail to initialize in_crf, ret = " << ret << dendl; + return ret; + } + readf->get_range(ofs, end); + rgw_rest_obj& rest_obj = readf->get_rest_obj(); + if (!sent_attrs) { + ret = writef->init(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: fail to initialize out_crf, ret = " << ret << dendl; + return ret; + } + + writef->send_ready(dpp, rest_obj); + ret = writef->send(); + if (ret < 0) { + return ret; + } + sent_attrs = true; + } + + ret = readf->read(ofs, end, writef->get_cb()); + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: fail to read from in_crf, ret = " << ret << dendl; + return ret; + } + + ret = writef->complete_request(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: fail to complete request, ret = " << ret << dendl; + return ret; + } + + return 0; +} + +static int cloud_tier_plain_transfer(RGWLCCloudTierCtx& tier_ctx) { + int ret; + std::unique_ptr dest_bucket; + std::unique_ptr dest_obj; + + rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, + tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, + tier_ctx.target_storage_class); + RGWBucketInfo b; + std::string target_obj_name; + + b.bucket.name = tier_ctx.target_bucket_name; + target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + + tier_ctx.obj->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance(tier_ctx.obj->get_key()); + } + + ret = tier_ctx.driver->get_bucket(nullptr, b, &dest_bucket); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , ret = " << ret << dendl; + return ret; + } + + dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + if (!dest_obj) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; + return -1; + } + + tier_ctx.obj->set_atomic(); + + /* Prepare Read from source */ + /* TODO: Define readf, writef as stack variables. For some reason, + * when used as stack variables (esp., readf), the transition seems to + * be taking lot of time eventually erroring out at times. + */ + std::shared_ptr readf; + readf.reset(new RGWLCStreamRead(tier_ctx.cct, tier_ctx.dpp, + tier_ctx.obj, tier_ctx.o.meta.mtime)); + + std::shared_ptr writef; + writef.reset(new RGWLCCloudStreamPut(tier_ctx.dpp, obj_properties, tier_ctx.conn, + dest_obj.get())); + + /* actual Read & Write */ + ret = cloud_tier_transfer_object(tier_ctx.dpp, readf.get(), writef.get()); + + return ret; +} + +static int cloud_tier_send_multipart_part(RGWLCCloudTierCtx& tier_ctx, + const std::string& upload_id, + const rgw_lc_multipart_part_info& part_info, + std::string *petag) { + int ret; + std::unique_ptr dest_bucket; + std::unique_ptr dest_obj; + + rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, + tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, + tier_ctx.target_storage_class); + RGWBucketInfo b; + std::string target_obj_name; + off_t end; + + b.bucket.name = tier_ctx.target_bucket_name; + target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + + tier_ctx.obj->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance(tier_ctx.obj->get_key()); + } + + ret = tier_ctx.driver->get_bucket(nullptr, b, &dest_bucket); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , ret = " << ret << dendl; + return ret; + } + + dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); + if (!dest_obj) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; + return -1; + } + + tier_ctx.obj->set_atomic(); + + /* TODO: Define readf, writef as stack variables. For some reason, + * when used as stack variables (esp., readf), the transition seems to + * be taking lot of time eventually erroring out at times. */ + std::shared_ptr readf; + readf.reset(new RGWLCStreamRead(tier_ctx.cct, tier_ctx.dpp, + tier_ctx.obj, tier_ctx.o.meta.mtime)); + + std::shared_ptr writef; + writef.reset(new RGWLCCloudStreamPut(tier_ctx.dpp, obj_properties, tier_ctx.conn, + dest_obj.get())); + + /* Prepare Read from source */ + end = part_info.ofs + part_info.size - 1; + readf->set_multipart(part_info.size, part_info.ofs, end); + + /* Prepare write */ + writef->set_multipart(upload_id, part_info.part_num, part_info.size); + + /* actual Read & Write */ + ret = cloud_tier_transfer_object(tier_ctx.dpp, readf.get(), writef.get()); + if (ret < 0) { + return ret; + } + + if (!(writef->get_etag(petag))) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to get etag from PUT request" << dendl; + return -EIO; + } + + return 0; +} + +static int cloud_tier_abort_multipart(const DoutPrefixProvider *dpp, + RGWRESTConn& dest_conn, const rgw_obj& dest_obj, + const std::string& upload_id) { + int ret; + bufferlist out_bl; + bufferlist bl; + rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; + + string resource = obj_to_aws_path(dest_obj); + ret = dest_conn.send_resource(dpp, "DELETE", resource, params, nullptr, + out_bl, &bl, nullptr, null_yield); + + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload for dest object=" << dest_obj << " (ret=" << ret << ")" << dendl; + return ret; + } + + return 0; +} + +static int cloud_tier_init_multipart(const DoutPrefixProvider *dpp, + RGWRESTConn& dest_conn, const rgw_obj& dest_obj, + uint64_t obj_size, std::map& attrs, + std::string& upload_id) { + bufferlist out_bl; + bufferlist bl; + + struct InitMultipartResult { + std::string bucket; + std::string key; + std::string upload_id; + + void decode_xml(XMLObj *obj) { + RGWXMLDecoder::decode_xml("Bucket", bucket, obj); + RGWXMLDecoder::decode_xml("Key", key, obj); + RGWXMLDecoder::decode_xml("UploadId", upload_id, obj); + } + } result; + + int ret; + rgw_http_param_pair params[] = { { "uploads", nullptr }, {nullptr, nullptr} }; + + string resource = obj_to_aws_path(dest_obj); + + ret = dest_conn.send_resource(dpp, "POST", resource, params, &attrs, + out_bl, &bl, nullptr, null_yield); + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl; + return ret; + } + /* + * If one of the following fails we cannot abort upload, as we cannot + * extract the upload id. If one of these fail it's very likely that that's + * the least of our problem. + */ + RGWXMLDecoder::XMLParser parser; + if (!parser.init()) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; + return -EIO; + } + + if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: failed to parse xml initmultipart: " << str << dendl; + return -EIO; + } + + try { + RGWXMLDecoder::decode_xml("InitiateMultipartUploadResult", result, &parser, true); + } catch (RGWXMLDecoder::err& err) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; + return -EIO; + } + + ldpp_dout(dpp, 20) << "init multipart result: bucket=" << result.bucket << " key=" << result.key << " upload_id=" << result.upload_id << dendl; + + upload_id = result.upload_id; + + return 0; +} + +static int cloud_tier_complete_multipart(const DoutPrefixProvider *dpp, + RGWRESTConn& dest_conn, const rgw_obj& dest_obj, + std::string& upload_id, + const std::map& parts) { + rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; + + stringstream ss; + XMLFormatter formatter; + int ret; + + bufferlist bl, out_bl; + string resource = obj_to_aws_path(dest_obj); + + struct CompleteMultipartReq { + std::map parts; + + explicit CompleteMultipartReq(const std::map& _parts) : parts(_parts) {} + + void dump_xml(Formatter *f) const { + for (const auto& p : parts) { + f->open_object_section("Part"); + encode_xml("PartNumber", p.first, f); + encode_xml("ETag", p.second.etag, f); + f->close_section(); + }; + } + } req_enc(parts); + + struct CompleteMultipartResult { + std::string location; + std::string bucket; + std::string key; + std::string etag; + + void decode_xml(XMLObj *obj) { + RGWXMLDecoder::decode_xml("Location", bucket, obj); + RGWXMLDecoder::decode_xml("Bucket", bucket, obj); + RGWXMLDecoder::decode_xml("Key", key, obj); + RGWXMLDecoder::decode_xml("ETag", etag, obj); + } + } result; + + encode_xml("CompleteMultipartUpload", req_enc, &formatter); + + formatter.flush(ss); + bl.append(ss.str()); + + ret = dest_conn.send_resource(dpp, "POST", resource, params, nullptr, + out_bl, &bl, nullptr, null_yield); + + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to complete multipart upload for dest object=" << dest_obj << dendl; + return ret; + } + /* + * If one of the following fails we cannot abort upload, as we cannot + * extract the upload id. If one of these fail it's very likely that that's + * the least of our problem. + */ + RGWXMLDecoder::XMLParser parser; + if (!parser.init()) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; + return -EIO; + } + + if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: failed to parse xml Completemultipart: " << str << dendl; + return -EIO; + } + + try { + RGWXMLDecoder::decode_xml("CompleteMultipartUploadResult", result, &parser, true); + } catch (RGWXMLDecoder::err& err) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; + return -EIO; + } + + ldpp_dout(dpp, 20) << "complete multipart result: location=" << result.location << " bucket=" << result.bucket << " key=" << result.key << " etag=" << result.etag << dendl; + + return ret; +} + +static int cloud_tier_abort_multipart_upload(RGWLCCloudTierCtx& tier_ctx, + const rgw_obj& dest_obj, const rgw_raw_obj& status_obj, + const std::string& upload_id) { + int ret; + + ret = cloud_tier_abort_multipart(tier_ctx.dpp, tier_ctx.conn, dest_obj, upload_id); + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to abort multipart upload dest obj=" << dest_obj << " upload_id=" << upload_id << " ret=" << ret << dendl; + /* ignore error, best effort */ + } + /* remove status obj */ + ret = delete_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to remove sync status obj obj=" << status_obj << " ret=" << ret << dendl; + // ignore error, best effort + } + return 0; +} + +static int cloud_tier_multipart_transfer(RGWLCCloudTierCtx& tier_ctx) { + rgw_obj src_obj; + rgw_obj dest_obj; + + uint64_t obj_size; + std::string src_etag; + rgw_rest_obj rest_obj; + + rgw_lc_multipart_upload_info status; + + std::map new_attrs; + + rgw_raw_obj status_obj; + + RGWBucketInfo b; + std::string target_obj_name; + rgw_bucket target_bucket; + + int ret; + + rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, + tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, + tier_ctx.target_storage_class); + + uint32_t part_size{0}; + uint32_t num_parts{0}; + + int cur_part{0}; + uint64_t cur_ofs{0}; + std::map parts; + + obj_size = tier_ctx.o.meta.size; + + target_bucket.name = tier_ctx.target_bucket_name; + + target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + + tier_ctx.obj->get_name(); + if (!tier_ctx.o.is_current()) { + target_obj_name += get_key_instance(tier_ctx.obj->get_key()); + } + dest_obj.init(target_bucket, target_obj_name); + + rgw_pool pool = static_cast(tier_ctx.driver)->svc()->zone->get_zone_params().log_pool; + status_obj = rgw_raw_obj(pool, "lc_multipart_" + tier_ctx.obj->get_oid()); + + ret = read_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj, &status); + + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to read sync status of object " << src_obj << " ret=" << ret << dendl; + return ret; + } + + if (ret >= 0) { + // check here that mtime and size did not change + if (status.mtime != obj_properties.mtime || status.obj_size != obj_size || + status.etag != obj_properties.etag) { + cloud_tier_abort_multipart_upload(tier_ctx, dest_obj, status_obj, status.upload_id); + ret = -ENOENT; + } + } + + if (ret == -ENOENT) { + RGWLCStreamRead readf(tier_ctx.cct, tier_ctx.dpp, tier_ctx.obj, tier_ctx.o.meta.mtime); + + readf.init(); + + rest_obj = readf.get_rest_obj(); + + RGWLCCloudStreamPut::init_send_attrs(tier_ctx.dpp, rest_obj, obj_properties, new_attrs); + + ret = cloud_tier_init_multipart(tier_ctx.dpp, tier_ctx.conn, dest_obj, obj_size, new_attrs, status.upload_id); + if (ret < 0) { + return ret; + } + + status.obj_size = obj_size; + status.mtime = obj_properties.mtime; + status.etag = obj_properties.etag; + + ret = put_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj, &status); + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to driver multipart upload state, ret=" << ret << dendl; + // continue with upload anyway + } + +#define MULTIPART_MAX_PARTS 10000 +#define MULTIPART_MAX_PARTS 10000 + uint64_t min_part_size = obj_size / MULTIPART_MAX_PARTS; + uint64_t min_conf_size = tier_ctx.multipart_min_part_size; + + if (min_conf_size < MULTIPART_MIN_POSSIBLE_PART_SIZE) { + min_conf_size = MULTIPART_MIN_POSSIBLE_PART_SIZE; + } + + part_size = std::max(min_conf_size, min_part_size); + num_parts = (obj_size + part_size - 1) / part_size; + cur_part = 1; + cur_ofs = 0; + } + + for (; (uint32_t)cur_part <= num_parts; ++cur_part) { + ldpp_dout(tier_ctx.dpp, 20) << "cur_part = "<< cur_part << ", info.ofs = " << cur_ofs << ", info.size = " << part_size << ", obj size = " << obj_size<< ", num_parts:" << num_parts << dendl; + rgw_lc_multipart_part_info& cur_part_info = parts[cur_part]; + cur_part_info.part_num = cur_part; + cur_part_info.ofs = cur_ofs; + cur_part_info.size = std::min((uint64_t)part_size, obj_size - cur_ofs); + + cur_ofs += cur_part_info.size; + + ret = cloud_tier_send_multipart_part(tier_ctx, + status.upload_id, + cur_part_info, + &cur_part_info.etag); + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to send multipart part of obj=" << tier_ctx.obj << ", sync via multipart upload, upload_id=" << status.upload_id << " part number " << cur_part << " (error: " << cpp_strerror(-ret) << ")" << dendl; + cloud_tier_abort_multipart_upload(tier_ctx, dest_obj, status_obj, status.upload_id); + return ret; + } + + } + + ret = cloud_tier_complete_multipart(tier_ctx.dpp, tier_ctx.conn, dest_obj, status.upload_id, parts); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to complete multipart upload of obj=" << tier_ctx.obj << " (error: " << cpp_strerror(-ret) << ")" << dendl; + cloud_tier_abort_multipart_upload(tier_ctx, dest_obj, status_obj, status.upload_id); + return ret; + } + + /* remove status obj */ + ret = delete_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj); + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to abort multipart upload obj=" << tier_ctx.obj << " upload_id=" << status.upload_id << " part number " << cur_part << " (" << cpp_strerror(-ret) << ")" << dendl; + // ignore error, best effort + } + return 0; +} + +/* Check if object has already been transitioned */ +static int cloud_tier_check_object(RGWLCCloudTierCtx& tier_ctx, bool& already_tiered) { + int ret; + std::map headers; + + /* Fetch Head object */ + ret = cloud_tier_get_object(tier_ctx, true, headers); + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to fetch HEAD from cloud for obj=" << tier_ctx.obj << " , ret = " << ret << dendl; + return ret; + } + + already_tiered = is_already_tiered(tier_ctx.dpp, headers, tier_ctx.o.meta.mtime); + + if (already_tiered) { + ldpp_dout(tier_ctx.dpp, 20) << "is_already_tiered true" << dendl; + } else { + ldpp_dout(tier_ctx.dpp, 20) << "is_already_tiered false..going with out_crf writing" << dendl; + } + + return ret; +} + +static int cloud_tier_create_bucket(RGWLCCloudTierCtx& tier_ctx) { + bufferlist out_bl; + int ret = 0; + pair key(tier_ctx.storage_class, tier_ctx.target_bucket_name); + struct CreateBucketResult { + std::string code; + + void decode_xml(XMLObj *obj) { + RGWXMLDecoder::decode_xml("Code", code, obj); + } + } result; + + ldpp_dout(tier_ctx.dpp, 30) << "Cloud_tier_ctx: creating bucket:" << tier_ctx.target_bucket_name << dendl; + bufferlist bl; + string resource = tier_ctx.target_bucket_name; + + ret = tier_ctx.conn.send_resource(tier_ctx.dpp, "PUT", resource, nullptr, nullptr, + out_bl, &bl, nullptr, null_yield); + + if (ret < 0 ) { + ldpp_dout(tier_ctx.dpp, 0) << "create target bucket : " << tier_ctx.target_bucket_name << " returned ret:" << ret << dendl; + } + if (out_bl.length() > 0) { + RGWXMLDecoder::XMLParser parser; + if (!parser.init()) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize xml parser for parsing create_bucket response from server" << dendl; + return -EIO; + } + + if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(tier_ctx.dpp, 5) << "ERROR: failed to parse xml createbucket: " << str << dendl; + return -EIO; + } + + try { + RGWXMLDecoder::decode_xml("Error", result, &parser, true); + } catch (RGWXMLDecoder::err& err) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(tier_ctx.dpp, 5) << "ERROR: unexpected xml: " << str << dendl; + return -EIO; + } + + if (result.code != "BucketAlreadyOwnedByYou" && result.code != "BucketAlreadyExists") { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: Creating target bucket failed with error: " << result.code << dendl; + return -EIO; + } + } + + return 0; +} + +int rgw_cloud_tier_transfer_object(RGWLCCloudTierCtx& tier_ctx, std::set& cloud_targets) { + int ret = 0; + + // check if target_path is already created + std::set::iterator it; + + it = cloud_targets.find(tier_ctx.target_bucket_name); + tier_ctx.target_bucket_created = (it != cloud_targets.end()); + + /* If run first time attempt to create the target bucket */ + if (!tier_ctx.target_bucket_created) { + ret = cloud_tier_create_bucket(tier_ctx); + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to create target bucket on the cloud endpoint ret=" << ret << dendl; + return ret; + } + tier_ctx.target_bucket_created = true; + cloud_targets.insert(tier_ctx.target_bucket_name); + } + + /* Since multiple zones may try to transition the same object to the cloud, + * verify if the object is already transitioned. And since its just a best + * effort, do not bail out in case of any errors. + */ + bool already_tiered = false; + ret = cloud_tier_check_object(tier_ctx, already_tiered); + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to check object on the cloud endpoint ret=" << ret << dendl; + } + + if (already_tiered) { + ldpp_dout(tier_ctx.dpp, 20) << "Object (" << tier_ctx.o.key << ") is already tiered" << dendl; + return 0; + } + + uint64_t size = tier_ctx.o.meta.size; + uint64_t multipart_sync_threshold = tier_ctx.multipart_sync_threshold; + + if (multipart_sync_threshold < MULTIPART_MIN_POSSIBLE_PART_SIZE) { + multipart_sync_threshold = MULTIPART_MIN_POSSIBLE_PART_SIZE; + } + + if (size < multipart_sync_threshold) { + ret = cloud_tier_plain_transfer(tier_ctx); + } else { + tier_ctx.is_multipart_upload = true; + ret = cloud_tier_multipart_transfer(tier_ctx); + } + + if (ret < 0) { + ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to transition object ret=" << ret << dendl; + } + + return ret; +} diff --git a/src/rgw/driver/rados/rgw_lc_tier.h b/src/rgw/driver/rados/rgw_lc_tier.h new file mode 100644 index 00000000000..1b21f262092 --- /dev/null +++ b/src/rgw/driver/rados/rgw_lc_tier.h @@ -0,0 +1,54 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_LC_TIER_H +#define CEPH_RGW_LC_TIER_H + +#include "rgw_lc.h" +#include "rgw_rest_conn.h" +#include "rgw_rados.h" +#include "rgw_zone.h" +#include "rgw_sal_rados.h" +#include "rgw_cr_rest.h" + +#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) +#define MULTIPART_MIN_POSSIBLE_PART_SIZE (5 * 1024 * 1024) + +struct RGWLCCloudTierCtx { + CephContext *cct; + const DoutPrefixProvider *dpp; + + /* Source */ + rgw_bucket_dir_entry& o; + rgw::sal::Driver *driver; + RGWBucketInfo& bucket_info; + std::string storage_class; + + rgw::sal::Object *obj; + + /* Remote */ + RGWRESTConn& conn; + std::string target_bucket_name; + std::string target_storage_class; + + std::map acl_mappings; + uint64_t multipart_min_part_size; + uint64_t multipart_sync_threshold; + + bool is_multipart_upload{false}; + bool target_bucket_created{true}; + + RGWLCCloudTierCtx(CephContext* _cct, const DoutPrefixProvider *_dpp, + rgw_bucket_dir_entry& _o, rgw::sal::Driver *_driver, + RGWBucketInfo &_binfo, rgw::sal::Object *_obj, + RGWRESTConn& _conn, std::string& _bucket, + std::string& _storage_class) : + cct(_cct), dpp(_dpp), o(_o), driver(_driver), bucket_info(_binfo), + obj(_obj), conn(_conn), target_bucket_name(_bucket), + target_storage_class(_storage_class) {} +}; + +/* Transition object to cloud endpoint */ +int rgw_cloud_tier_transfer_object(RGWLCCloudTierCtx& tier_ctx, std::set& cloud_targets); + +#endif diff --git a/src/rgw/driver/rados/rgw_log_backing.cc b/src/rgw/driver/rados/rgw_log_backing.cc new file mode 100644 index 00000000000..7c9dafe7e44 --- /dev/null +++ b/src/rgw/driver/rados/rgw_log_backing.cc @@ -0,0 +1,708 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "cls/log/cls_log_client.h" +#include "cls/version/cls_version_client.h" + +#include "rgw_log_backing.h" +#include "rgw_tools.h" +#include "cls_fifo_legacy.h" + +using namespace std::chrono_literals; +namespace cb = ceph::buffer; + +static constexpr auto dout_subsys = ceph_subsys_rgw; + +enum class shard_check { dne, omap, fifo, corrupt }; +inline std::ostream& operator <<(std::ostream& m, const shard_check& t) { + switch (t) { + case shard_check::dne: + return m << "shard_check::dne"; + case shard_check::omap: + return m << "shard_check::omap"; + case shard_check::fifo: + return m << "shard_check::fifo"; + case shard_check::corrupt: + return m << "shard_check::corrupt"; + } + + return m << "shard_check::UNKNOWN=" << static_cast(t); +} + +namespace { +/// Return the shard type, and a bool to see whether it has entries. +shard_check +probe_shard(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + bool& fifo_unsupported, optional_yield y) +{ + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " probing oid=" << oid + << dendl; + if (!fifo_unsupported) { + std::unique_ptr fifo; + auto r = rgw::cls::fifo::FIFO::open(dpp, ioctx, oid, + &fifo, y, + std::nullopt, true); + switch (r) { + case 0: + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": oid=" << oid << " is FIFO" + << dendl; + return shard_check::fifo; + + case -ENODATA: + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": oid=" << oid << " is empty and therefore OMAP" + << dendl; + return shard_check::omap; + + case -ENOENT: + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": oid=" << oid << " does not exist" + << dendl; + return shard_check::dne; + + case -EPERM: + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": FIFO is unsupported, marking." + << dendl; + fifo_unsupported = true; + return shard_check::omap; + + default: + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": error probing: r=" << r + << ", oid=" << oid << dendl; + return shard_check::corrupt; + } + } else { + // Since FIFO is unsupported, OMAP is the only alternative + return shard_check::omap; + } +} + +tl::expected +handle_dne(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, + log_type def, + std::string oid, + bool fifo_unsupported, + optional_yield y) +{ + if (def == log_type::fifo) { + if (fifo_unsupported) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " WARNING: FIFO set as default but not supported by OSD. " + << "Falling back to OMAP." << dendl; + return log_type::omap; + } + std::unique_ptr fifo; + auto r = rgw::cls::fifo::FIFO::create(dpp, ioctx, oid, + &fifo, y, + std::nullopt); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " error creating FIFO: r=" << r + << ", oid=" << oid << dendl; + return tl::unexpected(bs::error_code(-r, bs::system_category())); + } + } + return def; +} +} + +tl::expected +log_backing_type(const DoutPrefixProvider *dpp, + librados::IoCtx& ioctx, + log_type def, + int shards, + const fu2::unique_function& get_oid, + optional_yield y) +{ + auto check = shard_check::dne; + bool fifo_unsupported = false; + for (int i = 0; i < shards; ++i) { + auto c = probe_shard(dpp, ioctx, get_oid(i), fifo_unsupported, y); + if (c == shard_check::corrupt) + return tl::unexpected(bs::error_code(EIO, bs::system_category())); + if (c == shard_check::dne) continue; + if (check == shard_check::dne) { + check = c; + continue; + } + + if (check != c) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " clashing types: check=" << check + << ", c=" << c << dendl; + return tl::unexpected(bs::error_code(EIO, bs::system_category())); + } + } + if (check == shard_check::corrupt) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " should be unreachable!" << dendl; + return tl::unexpected(bs::error_code(EIO, bs::system_category())); + } + + if (check == shard_check::dne) + return handle_dne(dpp, ioctx, + def, + get_oid(0), + fifo_unsupported, + y); + + return (check == shard_check::fifo ? log_type::fifo : log_type::omap); +} + +bs::error_code log_remove(const DoutPrefixProvider *dpp, + librados::IoCtx& ioctx, + int shards, + const fu2::unique_function& get_oid, + bool leave_zero, + optional_yield y) +{ + bs::error_code ec; + for (int i = 0; i < shards; ++i) { + auto oid = get_oid(i); + rados::cls::fifo::info info; + uint32_t part_header_size = 0, part_entry_overhead = 0; + + auto r = rgw::cls::fifo::get_meta(dpp, ioctx, oid, std::nullopt, &info, + &part_header_size, &part_entry_overhead, + 0, y, true); + if (r == -ENOENT) continue; + if (r == 0 && info.head_part_num > -1) { + for (auto j = info.tail_part_num; j <= info.head_part_num; ++j) { + librados::ObjectWriteOperation op; + op.remove(); + auto part_oid = info.part_oid(j); + auto subr = rgw_rados_operate(dpp, ioctx, part_oid, &op, null_yield); + if (subr < 0 && subr != -ENOENT) { + if (!ec) + ec = bs::error_code(-subr, bs::system_category()); + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed removing FIFO part: part_oid=" << part_oid + << ", subr=" << subr << dendl; + } + } + } + if (r < 0 && r != -ENODATA) { + if (!ec) + ec = bs::error_code(-r, bs::system_category()); + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed checking FIFO part: oid=" << oid + << ", r=" << r << dendl; + } + librados::ObjectWriteOperation op; + if (i == 0 && leave_zero) { + // Leave shard 0 in existence, but remove contents and + // omap. cls_lock stores things in the xattrs. And sync needs to + // rendezvous with locks on generation 0 shard 0. + op.omap_set_header({}); + op.omap_clear(); + op.truncate(0); + } else { + op.remove(); + } + r = rgw_rados_operate(dpp, ioctx, oid, &op, null_yield); + if (r < 0 && r != -ENOENT) { + if (!ec) + ec = bs::error_code(-r, bs::system_category()); + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed removing shard: oid=" << oid + << ", r=" << r << dendl; + } + } + return ec; +} + +logback_generations::~logback_generations() { + if (watchcookie > 0) { + auto cct = static_cast(ioctx.cct()); + auto r = ioctx.unwatch2(watchcookie); + if (r < 0) { + lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed unwatching oid=" << oid + << ", r=" << r << dendl; + } + } +} + +bs::error_code logback_generations::setup(const DoutPrefixProvider *dpp, + log_type def, + optional_yield y) noexcept +{ + try { + // First, read. + auto cct = static_cast(ioctx.cct()); + auto res = read(dpp, y); + if (!res && res.error() != bs::errc::no_such_file_or_directory) { + return res.error(); + } + if (res) { + std::unique_lock lock(m); + std::tie(entries_, version) = std::move(*res); + } else { + // Are we the first? Then create generation 0 and the generations + // metadata. + librados::ObjectWriteOperation op; + auto type = log_backing_type(dpp, ioctx, def, shards, + [this](int shard) { + return this->get_oid(0, shard); + }, y); + if (!type) + return type.error(); + + logback_generation l; + l.type = *type; + + std::unique_lock lock(m); + version.ver = 1; + static constexpr auto TAG_LEN = 24; + version.tag.clear(); + append_rand_alpha(cct, version.tag, version.tag, TAG_LEN); + op.create(true); + cls_version_set(op, version); + cb::list bl; + entries_.emplace(0, std::move(l)); + encode(entries_, bl); + lock.unlock(); + + op.write_full(bl); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed writing oid=" << oid + << ", r=" << r << dendl; + bs::system_error(-r, bs::system_category()); + } + // Did someone race us? Then re-read. + if (r != 0) { + res = read(dpp, y); + if (!res) + return res.error(); + if (res->first.empty()) + return bs::error_code(EIO, bs::system_category()); + auto l = res->first.begin()->second; + // In the unlikely event that someone raced us, created + // generation zero, incremented, then erased generation zero, + // don't leave generation zero lying around. + if (l.gen_id != 0) { + auto ec = log_remove(dpp, ioctx, shards, + [this](int shard) { + return this->get_oid(0, shard); + }, true, y); + if (ec) return ec; + } + std::unique_lock lock(m); + std::tie(entries_, version) = std::move(*res); + } + } + // Pass all non-empty generations to the handler + std::unique_lock lock(m); + auto i = lowest_nomempty(entries_); + entries_t e; + std::copy(i, entries_.cend(), + std::inserter(e, e.end())); + m.unlock(); + auto ec = watch(); + if (ec) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed to re-establish watch, unsafe to continue: oid=" + << oid << ", ec=" << ec.message() << dendl; + } + return handle_init(std::move(e)); + } catch (const std::bad_alloc&) { + return bs::error_code(ENOMEM, bs::system_category()); + } +} + +bs::error_code logback_generations::update(const DoutPrefixProvider *dpp, optional_yield y) noexcept +{ + try { + auto res = read(dpp, y); + if (!res) { + return res.error(); + } + + std::unique_lock l(m); + auto& [es, v] = *res; + if (v == version) { + // Nothing to do! + return {}; + } + + // Check consistency and prepare update + if (es.empty()) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": INCONSISTENCY! Read empty update." << dendl; + return bs::error_code(EFAULT, bs::system_category()); + } + auto cur_lowest = lowest_nomempty(entries_); + // Straight up can't happen + assert(cur_lowest != entries_.cend()); + auto new_lowest = lowest_nomempty(es); + if (new_lowest == es.cend()) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": INCONSISTENCY! Read update with no active head." << dendl; + return bs::error_code(EFAULT, bs::system_category()); + } + if (new_lowest->first < cur_lowest->first) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": INCONSISTENCY! Tail moved wrong way." << dendl; + return bs::error_code(EFAULT, bs::system_category()); + } + + std::optional highest_empty; + if (new_lowest->first > cur_lowest->first && new_lowest != es.begin()) { + --new_lowest; + highest_empty = new_lowest->first; + } + + entries_t new_entries; + + if ((es.end() - 1)->first < (entries_.end() - 1)->first) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": INCONSISTENCY! Head moved wrong way." << dendl; + return bs::error_code(EFAULT, bs::system_category()); + } + + if ((es.end() - 1)->first > (entries_.end() - 1)->first) { + auto ei = es.lower_bound((entries_.end() - 1)->first + 1); + std::copy(ei, es.end(), std::inserter(new_entries, new_entries.end())); + } + + // Everything checks out! + + version = v; + entries_ = es; + l.unlock(); + + if (highest_empty) { + auto ec = handle_empty_to(*highest_empty); + if (ec) return ec; + } + + if (!new_entries.empty()) { + auto ec = handle_new_gens(std::move(new_entries)); + if (ec) return ec; + } + } catch (const std::bad_alloc&) { + return bs::error_code(ENOMEM, bs::system_category()); + } + return {}; +} + +auto logback_generations::read(const DoutPrefixProvider *dpp, optional_yield y) noexcept -> + tl::expected, bs::error_code> +{ + try { + librados::ObjectReadOperation op; + std::unique_lock l(m); + cls_version_check(op, version, VER_COND_GE); + l.unlock(); + obj_version v2; + cls_version_read(op, &v2); + cb::list bl; + op.read(0, 0, &bl, nullptr); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); + if (r < 0) { + if (r == -ENOENT) { + ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": oid=" << oid + << " not found" << dendl; + } else { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed reading oid=" << oid + << ", r=" << r << dendl; + } + return tl::unexpected(bs::error_code(-r, bs::system_category())); + } + auto bi = bl.cbegin(); + entries_t e; + try { + decode(e, bi); + } catch (const cb::error& err) { + return tl::unexpected(err.code()); + } + return std::pair{ std::move(e), std::move(v2) }; + } catch (const std::bad_alloc&) { + return tl::unexpected(bs::error_code(ENOMEM, bs::system_category())); + } +} + +bs::error_code logback_generations::write(const DoutPrefixProvider *dpp, entries_t&& e, + std::unique_lock&& l_, + optional_yield y) noexcept +{ + auto l = std::move(l_); + ceph_assert(l.mutex() == &m && + l.owns_lock()); + try { + librados::ObjectWriteOperation op; + cls_version_check(op, version, VER_COND_GE); + cb::list bl; + encode(e, bl); + op.write_full(bl); + cls_version_inc(op); + auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (r == 0) { + entries_ = std::move(e); + version.inc(); + return {}; + } + l.unlock(); + if (r < 0 && r != -ECANCELED) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed reading oid=" << oid + << ", r=" << r << dendl; + return { -r, bs::system_category() }; + } + if (r == -ECANCELED) { + auto ec = update(dpp, y); + if (ec) { + return ec; + } else { + return { ECANCELED, bs::system_category() }; + } + } + } catch (const std::bad_alloc&) { + return { ENOMEM, bs::system_category() }; + } + return {}; +} + + +bs::error_code logback_generations::watch() noexcept { + try { + auto cct = static_cast(ioctx.cct()); + auto r = ioctx.watch2(oid, &watchcookie, this); + if (r < 0) { + lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed to set watch oid=" << oid + << ", r=" << r << dendl; + return { -r, bs::system_category() }; + } + } catch (const std::bad_alloc&) { + return bs::error_code(ENOMEM, bs::system_category()); + } + return {}; +} + +bs::error_code logback_generations::new_backing(const DoutPrefixProvider *dpp, + log_type type, + optional_yield y) noexcept { + static constexpr auto max_tries = 10; + try { + auto ec = update(dpp, y); + if (ec) return ec; + auto tries = 0; + entries_t new_entries; + do { + std::unique_lock l(m); + auto last = entries_.end() - 1; + if (last->second.type == type) { + // Nothing to be done + return {}; + } + auto newgenid = last->first + 1; + logback_generation newgen; + newgen.gen_id = newgenid; + newgen.type = type; + new_entries.emplace(newgenid, newgen); + auto es = entries_; + es.emplace(newgenid, std::move(newgen)); + ec = write(dpp, std::move(es), std::move(l), y); + ++tries; + } while (ec == bs::errc::operation_canceled && + tries < max_tries); + if (tries >= max_tries) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": exhausted retry attempts." << dendl; + return ec; + } + + if (ec) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": write failed with ec=" << ec.message() << dendl; + return ec; + } + + cb::list bl, rbl; + + auto r = rgw_rados_notify(dpp, ioctx, oid, bl, 10'000, &rbl, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": notify failed with r=" << r << dendl; + return { -r, bs::system_category() }; + } + ec = handle_new_gens(new_entries); + } catch (const std::bad_alloc&) { + return bs::error_code(ENOMEM, bs::system_category()); + } + return {}; +} + +bs::error_code logback_generations::empty_to(const DoutPrefixProvider *dpp, + uint64_t gen_id, + optional_yield y) noexcept { + static constexpr auto max_tries = 10; + try { + auto ec = update(dpp, y); + if (ec) return ec; + auto tries = 0; + uint64_t newtail = 0; + do { + std::unique_lock l(m); + { + auto last = entries_.end() - 1; + if (gen_id >= last->first) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": Attempt to trim beyond the possible." << dendl; + return bs::error_code(EINVAL, bs::system_category()); + } + } + auto es = entries_; + auto ei = es.upper_bound(gen_id); + if (ei == es.begin()) { + // Nothing to be done. + return {}; + } + for (auto i = es.begin(); i < ei; ++i) { + newtail = i->first; + i->second.pruned = ceph::real_clock::now(); + } + ec = write(dpp, std::move(es), std::move(l), y); + ++tries; + } while (ec == bs::errc::operation_canceled && + tries < max_tries); + if (tries >= max_tries) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": exhausted retry attempts." << dendl; + return ec; + } + + if (ec) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": write failed with ec=" << ec.message() << dendl; + return ec; + } + + cb::list bl, rbl; + + auto r = rgw_rados_notify(dpp, ioctx, oid, bl, 10'000, &rbl, y); + if (r < 0) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": notify failed with r=" << r << dendl; + return { -r, bs::system_category() }; + } + ec = handle_empty_to(newtail); + } catch (const std::bad_alloc&) { + return bs::error_code(ENOMEM, bs::system_category()); + } + return {}; +} + +bs::error_code logback_generations::remove_empty(const DoutPrefixProvider *dpp, optional_yield y) noexcept { + static constexpr auto max_tries = 10; + try { + auto ec = update(dpp, y); + if (ec) return ec; + auto tries = 0; + entries_t new_entries; + std::unique_lock l(m); + ceph_assert(!entries_.empty()); + { + auto i = lowest_nomempty(entries_); + if (i == entries_.begin()) { + return {}; + } + } + entries_t es; + auto now = ceph::real_clock::now(); + l.unlock(); + do { + std::copy_if(entries_.cbegin(), entries_.cend(), + std::inserter(es, es.end()), + [now](const auto& e) { + if (!e.second.pruned) + return false; + + auto pruned = *e.second.pruned; + return (now - pruned) >= 1h; + }); + auto es2 = entries_; + for (const auto& [gen_id, e] : es) { + ceph_assert(e.pruned); + auto ec = log_remove(dpp, ioctx, shards, + [this, gen_id = gen_id](int shard) { + return this->get_oid(gen_id, shard); + }, (gen_id == 0), y); + if (ec) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": Error pruning: gen_id=" << gen_id + << " ec=" << ec.message() << dendl; + } + if (auto i = es2.find(gen_id); i != es2.end()) { + es2.erase(i); + } + } + l.lock(); + es.clear(); + ec = write(dpp, std::move(es2), std::move(l), y); + ++tries; + } while (ec == bs::errc::operation_canceled && + tries < max_tries); + if (tries >= max_tries) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": exhausted retry attempts." << dendl; + return ec; + } + + if (ec) { + ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": write failed with ec=" << ec.message() << dendl; + return ec; + } + } catch (const std::bad_alloc&) { + return bs::error_code(ENOMEM, bs::system_category()); + } + return {}; +} + +void logback_generations::handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) +{ + auto cct = static_cast(ioctx.cct()); + const DoutPrefix dp(cct, dout_subsys, "logback generations handle_notify: "); + if (notifier_id != my_id) { + auto ec = update(&dp, null_yield); + if (ec) { + lderr(cct) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": update failed, no one to report to and no safe way to continue." + << dendl; + abort(); + } + } + cb::list rbl; + ioctx.notify_ack(oid, notify_id, watchcookie, rbl); +} + +void logback_generations::handle_error(uint64_t cookie, int err) { + auto cct = static_cast(ioctx.cct()); + auto r = ioctx.unwatch2(watchcookie); + if (r < 0) { + lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed to set unwatch oid=" << oid + << ", r=" << r << dendl; + } + + auto ec = watch(); + if (ec) { + lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << ": failed to re-establish watch, unsafe to continue: oid=" + << oid << ", ec=" << ec.message() << dendl; + } +} diff --git a/src/rgw/driver/rados/rgw_log_backing.h b/src/rgw/driver/rados/rgw_log_backing.h new file mode 100644 index 00000000000..3fa67d7418b --- /dev/null +++ b/src/rgw/driver/rados/rgw_log_backing.h @@ -0,0 +1,399 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_LOGBACKING_H +#define CEPH_RGW_LOGBACKING_H + +#include +#include +#include +#include + +#include + +#include +#include + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include + +#include "include/rados/librados.hpp" +#include "include/encoding.h" +#include "include/expected.hpp" +#include "include/function2.hpp" + +#include "cls/version/cls_version_types.h" + +#include "common/async/yield_context.h" +#include "common/Formatter.h" +#include "common/strtol.h" + +namespace bc = boost::container; +namespace bs = boost::system; + +#include "cls_fifo_legacy.h" + +/// Type of log backing, stored in the mark used in the quick check, +/// and passed to checking functions. +enum class log_type { + omap = 0, + fifo = 1 +}; + +inline void encode(const log_type& type, ceph::buffer::list& bl) { + auto t = static_cast(type); + encode(t, bl); +} + +inline void decode(log_type& type, bufferlist::const_iterator& bl) { + uint8_t t; + decode(t, bl); + type = static_cast(t); +} + +inline std::optional to_log_type(std::string_view s) { + if (strncasecmp(s.data(), "omap", s.length()) == 0) { + return log_type::omap; + } else if (strncasecmp(s.data(), "fifo", s.length()) == 0) { + return log_type::fifo; + } else { + return std::nullopt; + } +} +inline std::ostream& operator <<(std::ostream& m, const log_type& t) { + switch (t) { + case log_type::omap: + return m << "log_type::omap"; + case log_type::fifo: + return m << "log_type::fifo"; + } + + return m << "log_type::UNKNOWN=" << static_cast(t); +} + +/// Look over the shards in a log and determine the type. +tl::expected +log_backing_type(const DoutPrefixProvider *dpp, + librados::IoCtx& ioctx, + log_type def, + int shards, //< Total number of shards + /// A function taking a shard number and + /// returning an oid. + const fu2::unique_function& get_oid, + optional_yield y); + +/// Remove all log shards and associated parts of fifos. +bs::error_code log_remove(librados::IoCtx& ioctx, + int shards, //< Total number of shards + /// A function taking a shard number and + /// returning an oid. + const fu2::unique_function& get_oid, + bool leave_zero, + optional_yield y); + + +struct logback_generation { + uint64_t gen_id = 0; + log_type type; + std::optional pruned; + + void encode(ceph::buffer::list& bl) const { + ENCODE_START(1, 1, bl); + encode(gen_id, bl); + encode(type, bl); + encode(pruned, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(gen_id, bl); + decode(type, bl); + decode(pruned, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(logback_generation) +inline std::ostream& operator <<(std::ostream& m, const logback_generation& g) { + return m << "[" << g.gen_id << "," << g.type << "," + << (g.pruned ? "PRUNED" : "NOT PRUNED") << "]"; +} + +class logback_generations : public librados::WatchCtx2 { +public: + using entries_t = bc::flat_map; + +protected: + librados::IoCtx& ioctx; + logback_generations(librados::IoCtx& ioctx, + std::string oid, + fu2::unique_function&& get_oid, + int shards) noexcept + : ioctx(ioctx), oid(oid), get_oid(std::move(get_oid)), + shards(shards) {} + + uint64_t my_id = ioctx.get_instance_id(); + +private: + const std::string oid; + const fu2::unique_function get_oid; + +protected: + const int shards; + +private: + + uint64_t watchcookie = 0; + + obj_version version; + std::mutex m; + entries_t entries_; + + tl::expected, bs::error_code> + read(const DoutPrefixProvider *dpp, optional_yield y) noexcept; + bs::error_code write(const DoutPrefixProvider *dpp, entries_t&& e, std::unique_lock&& l_, + optional_yield y) noexcept; + bs::error_code setup(const DoutPrefixProvider *dpp, log_type def, optional_yield y) noexcept; + + bs::error_code watch() noexcept; + + auto lowest_nomempty(const entries_t& es) { + return std::find_if(es.begin(), es.end(), + [](const auto& e) { + return !e.second.pruned; + }); + } + +public: + + /// For the use of watch/notify. + + void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) override final; + + void handle_error(uint64_t cookie, int err) override final; + + /// Public interface + + virtual ~logback_generations(); + + template + static tl::expected, bs::error_code> + init(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx_, std::string oid_, + fu2::unique_function&& get_oid_, + int shards_, log_type def, optional_yield y, + Args&& ...args) noexcept { + try { + T* lgp = new T(ioctx_, std::move(oid_), + std::move(get_oid_), + shards_, std::forward(args)...); + std::unique_ptr lg(lgp); + lgp = nullptr; + auto ec = lg->setup(dpp, def, y); + if (ec) + return tl::unexpected(ec); + // Obnoxiousness for C++ Compiler in Bionic Beaver + return tl::expected, bs::error_code>(std::move(lg)); + } catch (const std::bad_alloc&) { + return tl::unexpected(bs::error_code(ENOMEM, bs::system_category())); + } + } + + bs::error_code update(const DoutPrefixProvider *dpp, optional_yield y) noexcept; + + entries_t entries() const { + return entries_; + } + + bs::error_code new_backing(const DoutPrefixProvider *dpp, log_type type, optional_yield y) noexcept; + + bs::error_code empty_to(const DoutPrefixProvider *dpp, uint64_t gen_id, optional_yield y) noexcept; + + bs::error_code remove_empty(const DoutPrefixProvider *dpp, optional_yield y) noexcept; + + // Callbacks, to be defined by descendant. + + /// Handle initialization on startup + /// + /// @param e All non-empty generations + virtual bs::error_code handle_init(entries_t e) noexcept = 0; + + /// Handle new generations. + /// + /// @param e Map of generations added since last update + virtual bs::error_code handle_new_gens(entries_t e) noexcept = 0; + + /// Handle generations being marked empty + /// + /// @param new_tail Lowest non-empty generation + virtual bs::error_code handle_empty_to(uint64_t new_tail) noexcept = 0; +}; + +inline std::string gencursor(uint64_t gen_id, std::string_view cursor) { + return (gen_id > 0 ? + fmt::format("G{:0>20}@{}", gen_id, cursor) : + std::string(cursor)); +} + +inline std::pair +cursorgen(std::string_view cursor_) { + if (cursor_.empty()) { + return { 0, "" }; + } + std::string_view cursor = cursor_; + if (cursor[0] != 'G') { + return { 0, cursor }; + } + cursor.remove_prefix(1); + auto gen_id = ceph::consume(cursor); + if (!gen_id || cursor[0] != '@') { + return { 0, cursor_ }; + } + cursor.remove_prefix(1); + return { *gen_id, cursor }; +} + +class LazyFIFO { + librados::IoCtx& ioctx; + std::string oid; + std::mutex m; + std::unique_ptr fifo; + + int lazy_init(const DoutPrefixProvider *dpp, optional_yield y) { + std::unique_lock l(m); + if (fifo) return 0; + auto r = rgw::cls::fifo::FIFO::create(dpp, ioctx, oid, &fifo, y); + if (r) { + fifo.reset(); + } + return r; + } + +public: + + LazyFIFO(librados::IoCtx& ioctx, std::string oid) + : ioctx(ioctx), oid(std::move(oid)) {} + + int read_meta(const DoutPrefixProvider *dpp, optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + return fifo->read_meta(dpp, y); + } + + int meta(const DoutPrefixProvider *dpp, rados::cls::fifo::info& info, optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + info = fifo->meta(); + return 0; + } + + int get_part_layout_info(const DoutPrefixProvider *dpp, + std::uint32_t& part_header_size, + std::uint32_t& part_entry_overhead, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + std::tie(part_header_size, part_entry_overhead) + = fifo->get_part_layout_info(); + return 0; + } + + int push(const DoutPrefixProvider *dpp, + const ceph::buffer::list& bl, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + return fifo->push(dpp, bl, y); + } + + int push(const DoutPrefixProvider *dpp, + ceph::buffer::list& bl, + librados::AioCompletion* c, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + fifo->push(dpp, bl, c); + return 0; + } + + int push(const DoutPrefixProvider *dpp, + const std::vector& data_bufs, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + return fifo->push(dpp, data_bufs, y); + } + + int push(const DoutPrefixProvider *dpp, + const std::vector& data_bufs, + librados::AioCompletion* c, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + fifo->push(dpp, data_bufs, c); + return 0; + } + + int list(const DoutPrefixProvider *dpp, + int max_entries, std::optional markstr, + std::vector* out, + bool* more, optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + return fifo->list(dpp, max_entries, markstr, out, more, y); + } + + int list(const DoutPrefixProvider *dpp, int max_entries, std::optional markstr, + std::vector* out, bool* more, + librados::AioCompletion* c, optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + fifo->list(dpp, max_entries, markstr, out, more, c); + return 0; + } + + int trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + return fifo->trim(dpp, markstr, exclusive, y); + } + + int trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, librados::AioCompletion* c, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + fifo->trim(dpp, markstr, exclusive, c); + return 0; + } + + int get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, rados::cls::fifo::part_header* header, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + return fifo->get_part_info(dpp, part_num, header, y); + } + + int get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, rados::cls::fifo::part_header* header, + librados::AioCompletion* c, optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + fifo->get_part_info(part_num, header, c); + return 0; + } + + int get_head_info(const DoutPrefixProvider *dpp, fu2::unique_function< + void(int r, rados::cls::fifo::part_header&&)>&& f, + librados::AioCompletion* c, + optional_yield y) { + auto r = lazy_init(dpp, y); + if (r < 0) return r; + fifo->get_head_info(dpp, std::move(f), c); + return 0; + } +}; + +#endif diff --git a/src/rgw/driver/rados/rgw_metadata.cc b/src/rgw/driver/rados/rgw_metadata.cc new file mode 100644 index 00000000000..e3e49316eac --- /dev/null +++ b/src/rgw/driver/rados/rgw_metadata.cc @@ -0,0 +1,233 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_metadata.h" + +#include "rgw_zone.h" +#include "rgw_mdlog.h" + +#include "services/svc_zone.h" +#include "services/svc_cls.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +const std::string RGWMetadataLogHistory::oid = "meta.history"; + +struct obj_version; + +void rgw_shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id) +{ + uint32_t val = ceph_str_hash_linux(key.c_str(), key.size()); + char buf[16]; + if (shard_id) { + *shard_id = val % max_shards; + } + snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards)); + name = prefix + buf; +} + +void rgw_shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name) +{ + uint32_t val = ceph_str_hash_linux(key.c_str(), key.size()); + val ^= ceph_str_hash_linux(section.c_str(), section.size()); + char buf[16]; + snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards)); + name = prefix + buf; +} + +void rgw_shard_name(const string& prefix, unsigned shard_id, string& name) +{ + char buf[16]; + snprintf(buf, sizeof(buf), "%u", shard_id); + name = prefix + buf; +} + +int RGWMetadataLog::add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl) { + if (!svc.zone->need_to_log_metadata()) + return 0; + + string oid; + int shard_id; + + rgw_shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id); + mark_modified(shard_id); + real_time now = real_clock::now(); + return svc.cls->timelog.add(dpp, oid, now, section, key, bl, null_yield); +} + +int RGWMetadataLog::get_shard_id(const string& hash_key, int *shard_id) +{ + string oid; + + rgw_shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, shard_id); + return 0; +} + +int RGWMetadataLog::store_entries_in_shard(const DoutPrefixProvider *dpp, list& entries, int shard_id, librados::AioCompletion *completion) +{ + string oid; + + mark_modified(shard_id); + rgw_shard_name(prefix, shard_id, oid); + return svc.cls->timelog.add(dpp, oid, entries, completion, false, null_yield); +} + +void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, + const string& marker, void **handle) +{ + LogListCtx *ctx = new LogListCtx(); + + ctx->cur_shard = shard_id; + ctx->from_time = from_time; + ctx->end_time = end_time; + ctx->marker = marker; + + get_shard_oid(ctx->cur_shard, ctx->cur_oid); + + *handle = (void *)ctx; +} + +void RGWMetadataLog::complete_list_entries(void *handle) { + LogListCtx *ctx = static_cast(handle); + delete ctx; +} + +int RGWMetadataLog::list_entries(const DoutPrefixProvider *dpp, void *handle, + int max_entries, + list& entries, + string *last_marker, + bool *truncated) { + LogListCtx *ctx = static_cast(handle); + + if (!max_entries) { + *truncated = false; + return 0; + } + + std::string next_marker; + int ret = svc.cls->timelog.list(dpp, ctx->cur_oid, ctx->from_time, ctx->end_time, + max_entries, entries, ctx->marker, + &next_marker, truncated, null_yield); + if ((ret < 0) && (ret != -ENOENT)) + return ret; + + ctx->marker = std::move(next_marker); + if (last_marker) { + *last_marker = ctx->marker; + } + + if (ret == -ENOENT) + *truncated = false; + + return 0; +} + +int RGWMetadataLog::get_info(const DoutPrefixProvider *dpp, int shard_id, RGWMetadataLogInfo *info) +{ + string oid; + get_shard_oid(shard_id, oid); + + cls_log_header header; + + int ret = svc.cls->timelog.info(dpp, oid, &header, null_yield); + if ((ret < 0) && (ret != -ENOENT)) + return ret; + + info->marker = header.max_marker; + info->last_update = header.max_time.to_real_time(); + + return 0; +} + +static void _mdlog_info_completion(librados::completion_t cb, void *arg) +{ + auto infoc = static_cast(arg); + infoc->finish(cb); + infoc->put(); // drop the ref from get_info_async() +} + +RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb) + : completion(librados::Rados::aio_create_completion((void *)this, + _mdlog_info_completion)), + callback(cb) +{ +} + +RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion() +{ + completion->release(); +} + +int RGWMetadataLog::get_info_async(const DoutPrefixProvider *dpp, int shard_id, RGWMetadataLogInfoCompletion *completion) +{ + string oid; + get_shard_oid(shard_id, oid); + + completion->get(); // hold a ref until the completion fires + + return svc.cls->timelog.info_async(dpp, completion->get_io_obj(), oid, + &completion->get_header(), + completion->get_completion()); +} + +int RGWMetadataLog::trim(const DoutPrefixProvider *dpp, int shard_id, const real_time& from_time, const real_time& end_time, + const string& start_marker, const string& end_marker) +{ + string oid; + get_shard_oid(shard_id, oid); + + return svc.cls->timelog.trim(dpp, oid, from_time, end_time, start_marker, + end_marker, nullptr, null_yield); +} + +int RGWMetadataLog::lock_exclusive(const DoutPrefixProvider *dpp, int shard_id, timespan duration, string& zone_id, string& owner_id) { + string oid; + get_shard_oid(shard_id, oid); + + return svc.cls->lock.lock_exclusive(dpp, svc.zone->get_zone_params().log_pool, oid, duration, zone_id, owner_id); +} + +int RGWMetadataLog::unlock(const DoutPrefixProvider *dpp, int shard_id, string& zone_id, string& owner_id) { + string oid; + get_shard_oid(shard_id, oid); + + return svc.cls->lock.unlock(dpp, svc.zone->get_zone_params().log_pool, oid, zone_id, owner_id); +} + +void RGWMetadataLog::mark_modified(int shard_id) +{ + lock.get_read(); + if (modified_shards.find(shard_id) != modified_shards.end()) { + lock.unlock(); + return; + } + lock.unlock(); + + std::unique_lock wl{lock}; + modified_shards.insert(shard_id); +} + +void RGWMetadataLog::read_clear_modified(set &modified) +{ + std::unique_lock wl{lock}; + modified.swap(modified_shards); + modified_shards.clear(); +} + +void RGWMetadataLogInfo::dump(Formatter *f) const +{ + encode_json("marker", marker, f); + utime_t ut(last_update); + encode_json("last_update", ut, f); +} + +void RGWMetadataLogInfo::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("marker", marker, obj); + utime_t ut; + JSONDecoder::decode_json("last_update", ut, obj); + last_update = ut.to_real_time(); +} + diff --git a/src/rgw/driver/rados/rgw_metadata.h b/src/rgw/driver/rados/rgw_metadata.h new file mode 100644 index 00000000000..72283702e7e --- /dev/null +++ b/src/rgw/driver/rados/rgw_metadata.h @@ -0,0 +1,300 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_METADATA_H +#define CEPH_RGW_METADATA_H + +#include +#include +#include + +#include "include/types.h" +#include "rgw_common.h" +#include "rgw_period_history.h" +#include "rgw_mdlog_types.h" +#include "cls/version/cls_version_types.h" +#include "cls/log/cls_log_types.h" +#include "common/RefCountedObj.h" +#include "common/ceph_time.h" +#include "services/svc_meta_be.h" +#include "rgw_sal_fwd.h" + + +class RGWCoroutine; +class JSONObj; +struct RGWObjVersionTracker; + +struct obj_version; + + +class RGWMetadataObject { +protected: + obj_version objv; + ceph::real_time mtime; + std::map *pattrs{nullptr}; + +public: + RGWMetadataObject() {} + RGWMetadataObject(const obj_version& v, + real_time m) : objv(v), mtime(m) {} + virtual ~RGWMetadataObject() {} + obj_version& get_version(); + real_time& get_mtime() { return mtime; } + void set_pattrs(std::map *_pattrs) { + pattrs = _pattrs; + } + std::map *get_pattrs() { + return pattrs; + } + + virtual void dump(Formatter *f) const {} +}; + +class RGWMetadataManager; + +class RGWMetadataHandler { + friend class RGWMetadataManager; + +protected: + CephContext *cct; + +public: + RGWMetadataHandler() {} + virtual ~RGWMetadataHandler(); + virtual std::string get_type() = 0; + + void base_init(CephContext *_cct) { + cct = _cct; + } + + virtual RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) = 0; + + virtual int get(std::string& entry, RGWMetadataObject **obj, optional_yield, const DoutPrefixProvider *dpp) = 0; + virtual int put(std::string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield, + const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, + bool from_remote_zone) = 0; + virtual int remove(std::string& entry, RGWObjVersionTracker& objv_tracker, optional_yield, const DoutPrefixProvider *dpp) = 0; + + virtual int mutate(const std::string& entry, + const ceph::real_time& mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogStatus op_type, + std::function f) = 0; + + virtual int list_keys_init(const DoutPrefixProvider *dpp, const std::string& marker, void **phandle) = 0; + virtual int list_keys_next(const DoutPrefixProvider *dpp, void *handle, int max, std::list& keys, bool *truncated) = 0; + virtual void list_keys_complete(void *handle) = 0; + + virtual std::string get_marker(void *handle) = 0; + + virtual int get_shard_id(const std::string& entry, int *shard_id) { + *shard_id = 0; + return 0; + } + virtual int attach(RGWMetadataManager *manager); +}; + +class RGWMetadataHandler_GenericMetaBE : public RGWMetadataHandler { + friend class RGWSI_MetaBackend; + friend class RGWMetadataManager; + friend class Put; + +public: + class Put; + +protected: + RGWSI_MetaBackend_Handler *be_handler; + + virtual int do_get(RGWSI_MetaBackend_Handler::Op *op, std::string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) = 0; + virtual int do_put(RGWSI_MetaBackend_Handler::Op *op, std::string& entry, RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, optional_yield y, + const DoutPrefixProvider *dpp, RGWMDLogSyncType type, + bool from_remote_zone) = 0; + virtual int do_put_operate(Put *put_op, const DoutPrefixProvider *dpp); + virtual int do_remove(RGWSI_MetaBackend_Handler::Op *op, std::string& entry, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) = 0; + +public: + RGWMetadataHandler_GenericMetaBE() {} + + void base_init(CephContext *_cct, + RGWSI_MetaBackend_Handler *_be_handler) { + RGWMetadataHandler::base_init(_cct); + be_handler = _be_handler; + } + + RGWSI_MetaBackend_Handler *get_be_handler() { + return be_handler; + } + + class Put { + protected: + RGWMetadataHandler_GenericMetaBE *handler; + RGWSI_MetaBackend_Handler::Op *op; + std::string& entry; + RGWMetadataObject *obj; + RGWObjVersionTracker& objv_tracker; + RGWMDLogSyncType apply_type; + optional_yield y; + bool from_remote_zone{false}; + + int get(RGWMetadataObject **obj, const DoutPrefixProvider *dpp) { + return handler->do_get(op, entry, obj, y, dpp); + } + public: + Put(RGWMetadataHandler_GenericMetaBE *_handler, RGWSI_MetaBackend_Handler::Op *_op, + std::string& _entry, RGWMetadataObject *_obj, + RGWObjVersionTracker& _objv_tracker, optional_yield _y, + RGWMDLogSyncType _type, bool from_remote_zone); + + virtual ~Put() {} + + virtual int put_pre(const DoutPrefixProvider *dpp) { + return 0; + } + virtual int put(const DoutPrefixProvider *dpp) { + return 0; + } + virtual int put_post(const DoutPrefixProvider *dpp) { + return 0; + } + virtual int finalize() { + return 0; + } + }; + + int get(std::string& entry, RGWMetadataObject **obj, optional_yield, const DoutPrefixProvider *dpp) override; + int put(std::string& entry, RGWMetadataObject *obj, RGWObjVersionTracker& objv_tracker, optional_yield, const DoutPrefixProvider *dpp, RGWMDLogSyncType type, bool from_remote_zone) override; + int remove(std::string& entry, RGWObjVersionTracker& objv_tracker, optional_yield, const DoutPrefixProvider *dpp) override; + + int mutate(const std::string& entry, + const ceph::real_time& mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogStatus op_type, + std::function f) override; + + int get_shard_id(const std::string& entry, int *shard_id) override; + + int list_keys_init(const DoutPrefixProvider *dpp, const std::string& marker, void **phandle) override; + int list_keys_next(const DoutPrefixProvider *dpp, void *handle, int max, std::list& keys, bool *truncated) override; + void list_keys_complete(void *handle) override; + + std::string get_marker(void *handle) override; + + /** + * Compare an incoming versus on-disk tag/version+mtime combo against + * the sync mode to see if the new one should replace the on-disk one. + * + * @return true if the update should proceed, false otherwise. + */ + static bool check_versions(bool exists, + const obj_version& ondisk, const real_time& ondisk_time, + const obj_version& incoming, const real_time& incoming_time, + RGWMDLogSyncType sync_mode) { + switch (sync_mode) { + case APPLY_UPDATES: + if ((ondisk.tag != incoming.tag) || + (ondisk.ver >= incoming.ver)) + return false; + break; + case APPLY_NEWER: + if (ondisk_time >= incoming_time) + return false; + break; + case APPLY_EXCLUSIVE: + if (exists) + return false; + break; + case APPLY_ALWAYS: //deliberate fall-thru -- we always apply! + default: break; + } + return true; + } +}; + +class RGWMetadataTopHandler; + +class RGWMetadataManager { + friend class RGWMetadataHandler; + + CephContext *cct; + RGWSI_Meta *meta_svc; + std::map handlers; + std::unique_ptr md_top_handler; + + int find_handler(const std::string& metadata_key, RGWMetadataHandler **handler, std::string& entry); + int register_handler(RGWMetadataHandler *handler); + +public: + RGWMetadataManager(RGWSI_Meta *_meta_svc); + ~RGWMetadataManager(); + + RGWMetadataHandler *get_handler(const std::string& type); + + int get(std::string& metadata_key, Formatter *f, optional_yield y, const DoutPrefixProvider *dpp); + int put(std::string& metadata_key, bufferlist& bl, optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogSyncType sync_mode, + bool from_remote_zone, + obj_version *existing_version = NULL); + int remove(std::string& metadata_key, optional_yield y, const DoutPrefixProvider *dpp); + + int mutate(const std::string& metadata_key, + const ceph::real_time& mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogStatus op_type, + std::function f); + + int list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, void **phandle); + int list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void **phandle); + int list_keys_next(const DoutPrefixProvider *dpp, void *handle, int max, std::list& keys, bool *truncated); + void list_keys_complete(void *handle); + + std::string get_marker(void *handle); + + void dump_log_entry(cls_log_entry& entry, Formatter *f); + + void get_sections(std::list& sections); + + void parse_metadata_key(const std::string& metadata_key, std::string& type, std::string& entry); + + int get_shard_id(const std::string& section, const std::string& key, int *shard_id); +}; + +class RGWMetadataHandlerPut_SObj : public RGWMetadataHandler_GenericMetaBE::Put +{ +protected: + std::unique_ptr oo; + RGWMetadataObject *old_obj{nullptr}; + bool exists{false}; + +public: + RGWMetadataHandlerPut_SObj(RGWMetadataHandler_GenericMetaBE *handler, RGWSI_MetaBackend_Handler::Op *op, + std::string& entry, RGWMetadataObject *obj, RGWObjVersionTracker& objv_tracker, + optional_yield y, + RGWMDLogSyncType type, bool from_remote_zone); + ~RGWMetadataHandlerPut_SObj(); + + int put_pre(const DoutPrefixProvider *dpp) override; + int put(const DoutPrefixProvider *dpp) override; + virtual int put_check(const DoutPrefixProvider *dpp) { + return 0; + } + virtual int put_checked(const DoutPrefixProvider *dpp); + virtual void encode_obj(bufferlist *bl) {} +}; + +void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& key, std::string& name, int *shard_id); +void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& section, const std::string& key, std::string& name); +void rgw_shard_name(const std::string& prefix, unsigned shard_id, std::string& name); + +#endif diff --git a/src/rgw/driver/rados/rgw_notify.cc b/src/rgw/driver/rados/rgw_notify.cc new file mode 100644 index 00000000000..253a3bc4035 --- /dev/null +++ b/src/rgw/driver/rados/rgw_notify.cc @@ -0,0 +1,1009 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "rgw_notify.h" +#include "cls/2pc_queue/cls_2pc_queue_client.h" +#include "cls/lock/cls_lock_client.h" +#include +#include +#include +#include +#include "rgw_sal_rados.h" +#include "rgw_pubsub.h" +#include "rgw_pubsub_push.h" +#include "rgw_perf_counters.h" +#include "common/dout.h" +#include + +#define dout_subsys ceph_subsys_rgw + +namespace rgw::notify { + +struct event_entry_t { + rgw_pubsub_s3_event event; + std::string push_endpoint; + std::string push_endpoint_args; + std::string arn_topic; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(event, bl); + encode(push_endpoint, bl); + encode(push_endpoint_args, bl); + encode(arn_topic, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(event, bl); + decode(push_endpoint, bl); + decode(push_endpoint_args, bl); + decode(arn_topic, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(event_entry_t) + +using queues_t = std::set; + +// use mmap/mprotect to allocate 128k coroutine stacks +auto make_stack_allocator() { + return boost::context::protected_fixedsize_stack{128*1024}; +} + +class Manager : public DoutPrefixProvider { + const size_t max_queue_size; + const uint32_t queues_update_period_ms; + const uint32_t queues_update_retry_ms; + const uint32_t queue_idle_sleep_us; + const utime_t failover_time; + CephContext* const cct; + librados::IoCtx& rados_ioctx; + static constexpr auto COOKIE_LEN = 16; + const std::string lock_cookie; + boost::asio::io_context io_context; + boost::asio::executor_work_guard work_guard; + const uint32_t worker_count; + std::vector workers; + const uint32_t stale_reservations_period_s; + const uint32_t reservations_cleanup_period_s; + + const std::string Q_LIST_OBJECT_NAME = "queues_list_object"; + + CephContext *get_cct() const override { return cct; } + unsigned get_subsys() const override { return dout_subsys; } + std::ostream& gen_prefix(std::ostream& out) const override { return out << "rgw notify: "; } + + // read the list of queues from the queue list object + int read_queue_list(queues_t& queues, optional_yield y) { + constexpr auto max_chunk = 1024U; + std::string start_after; + bool more = true; + int rval; + while (more) { + librados::ObjectReadOperation op; + queues_t queues_chunk; + op.omap_get_keys2(start_after, max_chunk, &queues_chunk, &more, &rval); + const auto ret = rgw_rados_operate(this, rados_ioctx, Q_LIST_OBJECT_NAME, &op, nullptr, y); + if (ret == -ENOENT) { + // queue list object was not created - nothing to do + return 0; + } + if (ret < 0) { + // TODO: do we need to check on rval as well as ret? + ldpp_dout(this, 1) << "ERROR: failed to read queue list. error: " << ret << dendl; + return ret; + } + queues.merge(queues_chunk); + } + return 0; + } + + // set m1 to be the minimum between m1 and m2 + static int set_min_marker(std::string& m1, const std::string m2) { + cls_queue_marker mr1; + cls_queue_marker mr2; + if (mr1.from_str(m1.c_str()) < 0 || mr2.from_str(m2.c_str()) < 0) { + return -EINVAL; + } + if (mr2.gen <= mr1.gen && mr2.offset < mr1.offset) { + m1 = m2; + } + return 0; + } + + using Clock = ceph::coarse_mono_clock; + using Executor = boost::asio::io_context::executor_type; + using Timer = boost::asio::basic_waitable_timer, Executor>; + + class tokens_waiter { + const std::chrono::hours infinite_duration; + size_t pending_tokens; + Timer timer; + + struct token { + tokens_waiter& waiter; + token(tokens_waiter& _waiter) : waiter(_waiter) { + ++waiter.pending_tokens; + } + + ~token() { + --waiter.pending_tokens; + if (waiter.pending_tokens == 0) { + waiter.timer.cancel(); + } + } + }; + + public: + + tokens_waiter(boost::asio::io_context& io_context) : + infinite_duration(1000), + pending_tokens(0), + timer(io_context) {} + + void async_wait(yield_context yield) { + if (pending_tokens == 0) { + return; + } + timer.expires_from_now(infinite_duration); + boost::system::error_code ec; + timer.async_wait(yield[ec]); + ceph_assert(ec == boost::system::errc::operation_canceled); + } + + token make_token() { + return token(*this); + } + }; + + // processing of a specific entry + // return whether processing was successfull (true) or not (false) + bool process_entry(const cls_queue_entry& entry, yield_context yield) { + event_entry_t event_entry; + auto iter = entry.data.cbegin(); + try { + decode(event_entry, iter); + } catch (buffer::error& err) { + ldpp_dout(this, 5) << "WARNING: failed to decode entry. error: " << err.what() << dendl; + return false; + } + try { + // TODO move endpoint creation to queue level + const auto push_endpoint = RGWPubSubEndpoint::create(event_entry.push_endpoint, event_entry.arn_topic, + RGWHTTPArgs(event_entry.push_endpoint_args, this), + cct); + ldpp_dout(this, 20) << "INFO: push endpoint created: " << event_entry.push_endpoint << + " for entry: " << entry.marker << dendl; + const auto ret = push_endpoint->send_to_completion_async(cct, event_entry.event, optional_yield(io_context, yield)); + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: push entry: " << entry.marker << " to endpoint: " << event_entry.push_endpoint + << " failed. error: " << ret << " (will retry)" << dendl; + return false; + } else { + ldpp_dout(this, 20) << "INFO: push entry: " << entry.marker << " to endpoint: " << event_entry.push_endpoint + << " ok" << dendl; + if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); + return true; + } + } catch (const RGWPubSubEndpoint::configuration_error& e) { + ldpp_dout(this, 5) << "WARNING: failed to create push endpoint: " + << event_entry.push_endpoint << " for entry: " << entry.marker << ". error: " << e.what() << " (will retry) " << dendl; + return false; + } + } + + // clean stale reservation from queue + void cleanup_queue(const std::string& queue_name, yield_context yield) { + while (true) { + ldpp_dout(this, 20) << "INFO: trying to perform stale reservation cleanup for queue: " << queue_name << dendl; + const auto now = ceph::coarse_real_time::clock::now(); + const auto stale_time = now - std::chrono::seconds(stale_reservations_period_s); + librados::ObjectWriteOperation op; + op.assert_exists(); + rados::cls::lock::assert_locked(&op, queue_name+"_lock", + ClsLockType::EXCLUSIVE, + lock_cookie, + "" /*no tag*/); + cls_2pc_queue_expire_reservations(op, stale_time); + // check ownership and do reservation cleanup in one batch + auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); + if (ret == -ENOENT) { + // queue was deleted + ldpp_dout(this, 5) << "INFO: queue: " + << queue_name << ". was removed. cleanup will stop" << dendl; + return; + } + if (ret == -EBUSY) { + ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; + return; + } + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: failed to cleanup stale reservation from queue and/or lock queue: " << queue_name + << ". error: " << ret << dendl; + } + Timer timer(io_context); + timer.expires_from_now(std::chrono::seconds(reservations_cleanup_period_s)); + boost::system::error_code ec; + timer.async_wait(yield[ec]); + } + } + + // processing of a specific queue + void process_queue(const std::string& queue_name, yield_context yield) { + constexpr auto max_elements = 1024; + auto is_idle = false; + const std::string start_marker; + + // start a the cleanup coroutine for the queue + spawn::spawn(io_context, [this, queue_name](yield_context yield) { + cleanup_queue(queue_name, yield); + }, make_stack_allocator()); + + while (true) { + // if queue was empty the last time, sleep for idle timeout + if (is_idle) { + Timer timer(io_context); + timer.expires_from_now(std::chrono::microseconds(queue_idle_sleep_us)); + boost::system::error_code ec; + timer.async_wait(yield[ec]); + } + + // get list of entries in the queue + is_idle = true; + bool truncated = false; + std::string end_marker; + std::vector entries; + auto total_entries = 0U; + { + librados::ObjectReadOperation op; + op.assert_exists(); + bufferlist obl; + int rval; + rados::cls::lock::assert_locked(&op, queue_name+"_lock", + ClsLockType::EXCLUSIVE, + lock_cookie, + "" /*no tag*/); + cls_2pc_queue_list_entries(op, start_marker, max_elements, &obl, &rval); + // check ownership and list entries in one batch + auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, nullptr, optional_yield(io_context, yield)); + if (ret == -ENOENT) { + // queue was deleted + ldpp_dout(this, 5) << "INFO: queue: " + << queue_name << ". was removed. processing will stop" << dendl; + return; + } + if (ret == -EBUSY) { + ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; + return; + } + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: failed to get list of entries in queue and/or lock queue: " + << queue_name << ". error: " << ret << " (will retry)" << dendl; + continue; + } + ret = cls_2pc_queue_list_entries_result(obl, entries, &truncated, end_marker); + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: failed to parse list of entries in queue: " + << queue_name << ". error: " << ret << " (will retry)" << dendl; + continue; + } + } + total_entries = entries.size(); + if (total_entries == 0) { + // nothing in the queue + continue; + } + // log when queue is not idle + ldpp_dout(this, 20) << "INFO: found: " << total_entries << " entries in: " << queue_name << + ". end marker is: " << end_marker << dendl; + + is_idle = false; + auto has_error = false; + auto remove_entries = false; + auto entry_idx = 1U; + tokens_waiter waiter(io_context); + for (auto& entry : entries) { + if (has_error) { + // bail out on first error + break; + } + // TODO pass entry pointer instead of by-value + spawn::spawn(yield, [this, &queue_name, entry_idx, total_entries, &end_marker, &remove_entries, &has_error, &waiter, entry](yield_context yield) { + const auto token = waiter.make_token(); + if (process_entry(entry, yield)) { + ldpp_dout(this, 20) << "INFO: processing of entry: " << + entry.marker << " (" << entry_idx << "/" << total_entries << ") from: " << queue_name << " ok" << dendl; + remove_entries = true; + } else { + if (set_min_marker(end_marker, entry.marker) < 0) { + ldpp_dout(this, 1) << "ERROR: cannot determin minimum between malformed markers: " << end_marker << ", " << entry.marker << dendl; + } else { + ldpp_dout(this, 20) << "INFO: new end marker for removal: " << end_marker << " from: " << queue_name << dendl; + } + has_error = true; + ldpp_dout(this, 20) << "INFO: processing of entry: " << + entry.marker << " (" << entry_idx << "/" << total_entries << ") from: " << queue_name << " failed" << dendl; + } + }, make_stack_allocator()); + ++entry_idx; + } + + // wait for all pending work to finish + waiter.async_wait(yield); + + // delete all published entries from queue + if (remove_entries) { + librados::ObjectWriteOperation op; + op.assert_exists(); + rados::cls::lock::assert_locked(&op, queue_name+"_lock", + ClsLockType::EXCLUSIVE, + lock_cookie, + "" /*no tag*/); + cls_2pc_queue_remove_entries(op, end_marker); + // check ownership and deleted entries in one batch + const auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); + if (ret == -ENOENT) { + // queue was deleted + ldpp_dout(this, 5) << "INFO: queue: " + << queue_name << ". was removed. processing will stop" << dendl; + return; + } + if (ret == -EBUSY) { + ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; + return; + } + if (ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to remove entries and/or lock queue up to: " << end_marker << " from queue: " + << queue_name << ". error: " << ret << dendl; + } else { + ldpp_dout(this, 20) << "INFO: removed entries up to: " << end_marker << " from queue: " + << queue_name << dendl; + } + } + } + } + + // lits of owned queues + using owned_queues_t = std::unordered_set; + + // process all queues + // find which of the queues is owned by this daemon and process it + void process_queues(yield_context yield) { + auto has_error = false; + owned_queues_t owned_queues; + + // add randomness to the duration between queue checking + // to make sure that different daemons are not synced + std::random_device seed; + std::mt19937 rnd_gen(seed()); + const auto min_jitter = 100; // ms + const auto max_jitter = 500; // ms + std::uniform_int_distribution<> duration_jitter(min_jitter, max_jitter); + + std::vector queue_gc; + std::mutex queue_gc_lock; + while (true) { + Timer timer(io_context); + const auto duration = (has_error ? + std::chrono::milliseconds(queues_update_retry_ms) : std::chrono::milliseconds(queues_update_period_ms)) + + std::chrono::milliseconds(duration_jitter(rnd_gen)); + timer.expires_from_now(duration); + const auto tp = ceph::coarse_real_time::clock::to_time_t(ceph::coarse_real_time::clock::now() + duration); + ldpp_dout(this, 20) << "INFO: next queues processing will happen at: " << std::ctime(&tp) << dendl; + boost::system::error_code ec; + timer.async_wait(yield[ec]); + + queues_t queues; + auto ret = read_queue_list(queues, optional_yield(io_context, yield)); + if (ret < 0) { + has_error = true; + continue; + } + + for (const auto& queue_name : queues) { + // try to lock the queue to check if it is owned by this rgw + // or if ownershif needs to be taken + librados::ObjectWriteOperation op; + op.assert_exists(); + rados::cls::lock::lock(&op, queue_name+"_lock", + ClsLockType::EXCLUSIVE, + lock_cookie, + "" /*no tag*/, + "" /*no description*/, + failover_time, + LOCK_FLAG_MAY_RENEW); + + ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); + if (ret == -EBUSY) { + // lock is already taken by another RGW + ldpp_dout(this, 20) << "INFO: queue: " << queue_name << " owned (locked) by another daemon" << dendl; + // if queue was owned by this RGW, processing should be stopped, queue would be deleted from list afterwards + continue; + } + if (ret == -ENOENT) { + // queue is deleted - processing will stop the next time we try to read from the queue + ldpp_dout(this, 10) << "INFO: queue: " << queue_name << " should not be locked - already deleted" << dendl; + continue; + } + if (ret < 0) { + // failed to lock for another reason, continue to process other queues + ldpp_dout(this, 1) << "ERROR: failed to lock queue: " << queue_name << ". error: " << ret << dendl; + has_error = true; + continue; + } + // add queue to list of owned queues + if (owned_queues.insert(queue_name).second) { + ldpp_dout(this, 10) << "INFO: queue: " << queue_name << " now owned (locked) by this daemon" << dendl; + // start processing this queue + spawn::spawn(io_context, [this, &queue_gc, &queue_gc_lock, queue_name](yield_context yield) { + process_queue(queue_name, yield); + // if queue processing ended, it measn that the queue was removed or not owned anymore + // mark it for deletion + std::lock_guard lock_guard(queue_gc_lock); + queue_gc.push_back(queue_name); + ldpp_dout(this, 10) << "INFO: queue: " << queue_name << " marked for removal" << dendl; + }, make_stack_allocator()); + } else { + ldpp_dout(this, 20) << "INFO: queue: " << queue_name << " ownership (lock) renewed" << dendl; + } + } + // erase all queue that were deleted + { + std::lock_guard lock_guard(queue_gc_lock); + std::for_each(queue_gc.begin(), queue_gc.end(), [this, &owned_queues](const std::string& queue_name) { + owned_queues.erase(queue_name); + ldpp_dout(this, 20) << "INFO: queue: " << queue_name << " removed" << dendl; + }); + queue_gc.clear(); + } + } + } + +public: + + ~Manager() { + work_guard.reset(); + io_context.stop(); + std::for_each(workers.begin(), workers.end(), [] (auto& worker) { worker.join(); }); + } + + // ctor: start all threads + Manager(CephContext* _cct, uint32_t _max_queue_size, uint32_t _queues_update_period_ms, + uint32_t _queues_update_retry_ms, uint32_t _queue_idle_sleep_us, u_int32_t failover_time_ms, + uint32_t _stale_reservations_period_s, uint32_t _reservations_cleanup_period_s, + uint32_t _worker_count, rgw::sal::RadosStore* store) : + max_queue_size(_max_queue_size), + queues_update_period_ms(_queues_update_period_ms), + queues_update_retry_ms(_queues_update_retry_ms), + queue_idle_sleep_us(_queue_idle_sleep_us), + failover_time(std::chrono::milliseconds(failover_time_ms)), + cct(_cct), + rados_ioctx(store->getRados()->get_notif_pool_ctx()), + lock_cookie(gen_rand_alphanumeric(cct, COOKIE_LEN)), + work_guard(boost::asio::make_work_guard(io_context)), + worker_count(_worker_count), + stale_reservations_period_s(_stale_reservations_period_s), + reservations_cleanup_period_s(_reservations_cleanup_period_s) + { + spawn::spawn(io_context, [this] (yield_context yield) { + process_queues(yield); + }, make_stack_allocator()); + + // start the worker threads to do the actual queue processing + const std::string WORKER_THREAD_NAME = "notif-worker"; + for (auto worker_id = 0U; worker_id < worker_count; ++worker_id) { + workers.emplace_back([this]() { + try { + io_context.run(); + } catch (const std::exception& err) { + ldpp_dout(this, 10) << "Notification worker failed with error: " << err.what() << dendl; + throw(err); + } + }); + const auto rc = ceph_pthread_setname(workers.back().native_handle(), + (WORKER_THREAD_NAME+std::to_string(worker_id)).c_str()); + ceph_assert(rc == 0); + } + ldpp_dout(this, 10) << "Started notification manager with: " << worker_count << " workers" << dendl; + } + + int add_persistent_topic(const std::string& topic_name, optional_yield y) { + if (topic_name == Q_LIST_OBJECT_NAME) { + ldpp_dout(this, 1) << "ERROR: topic name cannot be: " << Q_LIST_OBJECT_NAME << " (conflict with queue list object name)" << dendl; + return -EINVAL; + } + librados::ObjectWriteOperation op; + op.create(true); + cls_2pc_queue_init(op, topic_name, max_queue_size); + auto ret = rgw_rados_operate(this, rados_ioctx, topic_name, &op, y); + if (ret == -EEXIST) { + // queue already exists - nothing to do + ldpp_dout(this, 20) << "INFO: queue for topic: " << topic_name << " already exists. nothing to do" << dendl; + return 0; + } + if (ret < 0) { + // failed to create queue + ldpp_dout(this, 1) << "ERROR: failed to create queue for topic: " << topic_name << ". error: " << ret << dendl; + return ret; + } + + bufferlist empty_bl; + std::map new_topic{{topic_name, empty_bl}}; + op.omap_set(new_topic); + ret = rgw_rados_operate(this, rados_ioctx, Q_LIST_OBJECT_NAME, &op, y); + if (ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to add queue: " << topic_name << " to queue list. error: " << ret << dendl; + return ret; + } + ldpp_dout(this, 20) << "INFO: queue: " << topic_name << " added to queue list" << dendl; + return 0; + } + + int remove_persistent_topic(const std::string& topic_name, optional_yield y) { + librados::ObjectWriteOperation op; + op.remove(); + auto ret = rgw_rados_operate(this, rados_ioctx, topic_name, &op, y); + if (ret == -ENOENT) { + // queue already removed - nothing to do + ldpp_dout(this, 20) << "INFO: queue for topic: " << topic_name << " already removed. nothing to do" << dendl; + return 0; + } + if (ret < 0) { + // failed to remove queue + ldpp_dout(this, 1) << "ERROR: failed to remove queue for topic: " << topic_name << ". error: " << ret << dendl; + return ret; + } + + std::set topic_to_remove{{topic_name}}; + op.omap_rm_keys(topic_to_remove); + ret = rgw_rados_operate(this, rados_ioctx, Q_LIST_OBJECT_NAME, &op, y); + if (ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to remove queue: " << topic_name << " from queue list. error: " << ret << dendl; + return ret; + } + ldpp_dout(this, 20) << "INFO: queue: " << topic_name << " removed from queue list" << dendl; + return 0; + } +}; + +// singleton manager +// note that the manager itself is not a singleton, and multiple instances may co-exist +// TODO make the pointer atomic in allocation and deallocation to avoid race conditions +static Manager* s_manager = nullptr; + +constexpr size_t MAX_QUEUE_SIZE = 128*1000*1000; // 128MB +constexpr uint32_t Q_LIST_UPDATE_MSEC = 1000*30; // check queue list every 30seconds +constexpr uint32_t Q_LIST_RETRY_MSEC = 1000; // retry every second if queue list update failed +constexpr uint32_t IDLE_TIMEOUT_USEC = 100*1000; // idle sleep 100ms +constexpr uint32_t FAILOVER_TIME_MSEC = 3*Q_LIST_UPDATE_MSEC; // FAILOVER TIME 3x renew time +constexpr uint32_t WORKER_COUNT = 1; // 1 worker thread +constexpr uint32_t STALE_RESERVATIONS_PERIOD_S = 120; // cleanup reservations that are more than 2 minutes old +constexpr uint32_t RESERVATIONS_CLEANUP_PERIOD_S = 30; // reservation cleanup every 30 seconds + +bool init(CephContext* cct, rgw::sal::RadosStore* store, const DoutPrefixProvider *dpp) { + if (s_manager) { + return false; + } + // TODO: take conf from CephContext + s_manager = new Manager(cct, MAX_QUEUE_SIZE, + Q_LIST_UPDATE_MSEC, Q_LIST_RETRY_MSEC, + IDLE_TIMEOUT_USEC, FAILOVER_TIME_MSEC, + STALE_RESERVATIONS_PERIOD_S, RESERVATIONS_CLEANUP_PERIOD_S, + WORKER_COUNT, + store); + return true; +} + +void shutdown() { + delete s_manager; + s_manager = nullptr; +} + +int add_persistent_topic(const std::string& topic_name, optional_yield y) { + if (!s_manager) { + return -EAGAIN; + } + return s_manager->add_persistent_topic(topic_name, y); +} + +int remove_persistent_topic(const std::string& topic_name, optional_yield y) { + if (!s_manager) { + return -EAGAIN; + } + return s_manager->remove_persistent_topic(topic_name, y); +} + +rgw::sal::Object* get_object_with_atttributes( + const reservation_t& res, rgw::sal::Object* obj) { + // in case of copy obj, the tags and metadata are taken from source + const auto src_obj = res.src_object ? res.src_object : obj; + if (src_obj->get_attrs().empty()) { + if (!src_obj->get_bucket()) { + src_obj->set_bucket(res.bucket); + } + const auto ret = src_obj->get_obj_attrs(res.yield, res.dpp); + if (ret < 0) { + ldpp_dout(res.dpp, 20) << "failed to get attributes from object: " << + src_obj->get_key() << ". ret = " << ret << dendl; + return nullptr; + } + } + return src_obj; +} + +static inline void metadata_from_attributes( + reservation_t& res, rgw::sal::Object* obj) { + auto& metadata = res.x_meta_map; + const auto src_obj = get_object_with_atttributes(res, obj); + if (!src_obj) { + return; + } + res.metadata_fetched_from_attributes = true; + for (auto& attr : src_obj->get_attrs()) { + if (boost::algorithm::starts_with(attr.first, RGW_ATTR_META_PREFIX)) { + std::string_view key(attr.first); + key.remove_prefix(sizeof(RGW_ATTR_PREFIX)-1); + // we want to pass a null terminated version + // of the bufferlist, hence "to_str().c_str()" + metadata.emplace(key, attr.second.to_str().c_str()); + } + } +} + +static inline void tags_from_attributes( + const reservation_t& res, rgw::sal::Object* obj, KeyMultiValueMap& tags) { + const auto src_obj = get_object_with_atttributes(res, obj); + if (!src_obj) { + return; + } + const auto& attrs = src_obj->get_attrs(); + const auto attr_iter = attrs.find(RGW_ATTR_TAGS); + if (attr_iter != attrs.end()) { + auto bliter = attr_iter->second.cbegin(); + RGWObjTags obj_tags; + try { + ::decode(obj_tags, bliter); + } catch(buffer::error&) { + // not able to decode tags + return; + } + tags = std::move(obj_tags.get_tags()); + } +} + +// populate event from request +static inline void populate_event(reservation_t& res, + rgw::sal::Object* obj, + uint64_t size, + const ceph::real_time& mtime, + const std::string& etag, + const std::string& version, + EventType event_type, + rgw_pubsub_s3_event& event) { + event.eventTime = mtime; + event.eventName = to_event_string(event_type); + event.userIdentity = res.user_id; // user that triggered the change + event.x_amz_request_id = res.req_id; // request ID of the original change + event.x_amz_id_2 = res.store->getRados()->host_id; // RGW on which the change was made + // configurationId is filled from notification configuration + event.bucket_name = res.bucket->get_name(); + event.bucket_ownerIdentity = res.bucket->get_owner() ? res.bucket->get_owner()->get_id().id : ""; + const auto region = res.store->get_zone()->get_zonegroup().get_api_name(); + rgw::ARN bucket_arn(res.bucket->get_key()); + bucket_arn.region = region; + event.bucket_arn = to_string(bucket_arn); + event.object_key = res.object_name ? *res.object_name : obj->get_name(); + event.object_size = size; + event.object_etag = etag; + event.object_versionId = version; + event.awsRegion = region; + // use timestamp as per key sequence id (hex encoded) + const utime_t ts(real_clock::now()); + boost::algorithm::hex((const char*)&ts, (const char*)&ts + sizeof(utime_t), + std::back_inserter(event.object_sequencer)); + set_event_id(event.id, etag, ts); + event.bucket_id = res.bucket->get_bucket_id(); + // pass meta data + if (!res.metadata_fetched_from_attributes) { + // either no metadata exist or no metadata filter was used + metadata_from_attributes(res, obj); + } + event.x_meta_map = res.x_meta_map; + // pass tags + if (!res.tagset || + (*res.tagset).get_tags().empty()) { + // try to fetch the tags from the attributes + tags_from_attributes(res, obj, event.tags); + } else { + event.tags = (*res.tagset).get_tags(); + } + // opaque data will be filled from topic configuration +} + +static inline bool notification_match(reservation_t& res, + const rgw_pubsub_topic_filter& filter, + EventType event, + const RGWObjTags* req_tags) { + if (!match(filter.events, event)) { + return false; + } + const auto obj = res.object; + if (!match(filter.s3_filter.key_filter, + res.object_name ? *res.object_name : obj->get_name())) { + return false; + } + + if (!filter.s3_filter.metadata_filter.kv.empty()) { + // metadata filter exists + if (res.s) { + res.x_meta_map = res.s->info.x_meta_map; + } + metadata_from_attributes(res, obj); + if (!match(filter.s3_filter.metadata_filter, res.x_meta_map)) { + return false; + } + } + + if (!filter.s3_filter.tag_filter.kv.empty()) { + // tag filter exists + if (req_tags) { + // tags in the request + if (!match(filter.s3_filter.tag_filter, req_tags->get_tags())) { + return false; + } + } else if (res.tagset && !(*res.tagset).get_tags().empty()) { + // tags were cached in req_state + if (!match(filter.s3_filter.tag_filter, (*res.tagset).get_tags())) { + return false; + } + } else { + // try to fetch tags from the attributes + KeyMultiValueMap tags; + tags_from_attributes(res, obj, tags); + if (!match(filter.s3_filter.tag_filter, tags)) { + return false; + } + } + } + + return true; +} + + int publish_reserve(const DoutPrefixProvider* dpp, + EventType event_type, + reservation_t& res, + const RGWObjTags* req_tags) +{ + RGWPubSub ps(res.store, res.user_tenant); + RGWPubSub::Bucket ps_bucket(&ps, res.bucket->get_key()); + rgw_pubsub_bucket_topics bucket_topics; + auto rc = ps_bucket.get_topics(&bucket_topics); + if (rc < 0) { + // failed to fetch bucket topics + return rc; + } + for (const auto& bucket_topic : bucket_topics.topics) { + const rgw_pubsub_topic_filter& topic_filter = bucket_topic.second; + const rgw_pubsub_topic& topic_cfg = topic_filter.topic; + if (!notification_match(res, topic_filter, event_type, req_tags)) { + // notification does not apply to req_state + continue; + } + ldpp_dout(res.dpp, 20) << "INFO: notification: '" << topic_filter.s3_id << + "' on topic: '" << topic_cfg.dest.arn_topic << + "' and bucket: '" << res.bucket->get_name() << + "' (unique topic: '" << topic_cfg.name << + "') apply to event of type: '" << to_string(event_type) << "'" << dendl; + + cls_2pc_reservation::id_t res_id; + if (topic_cfg.dest.persistent) { + // TODO: take default reservation size from conf + constexpr auto DEFAULT_RESERVATION = 4*1024U; // 4K + res.size = DEFAULT_RESERVATION; + librados::ObjectWriteOperation op; + bufferlist obl; + int rval; + const auto& queue_name = topic_cfg.dest.arn_topic; + cls_2pc_queue_reserve(op, res.size, 1, &obl, &rval); + auto ret = rgw_rados_operate( + res.dpp, res.store->getRados()->get_notif_pool_ctx(), + queue_name, &op, res.yield, librados::OPERATION_RETURNVEC); + if (ret < 0) { + ldpp_dout(res.dpp, 1) << + "ERROR: failed to reserve notification on queue: " + << queue_name << ". error: " << ret << dendl; + // if no space is left in queue we ask client to slow down + return (ret == -ENOSPC) ? -ERR_RATE_LIMITED : ret; + } + ret = cls_2pc_queue_reserve_result(obl, res_id); + if (ret < 0) { + ldpp_dout(res.dpp, 1) << "ERROR: failed to parse reservation id. error: " << ret << dendl; + return ret; + } + } + res.topics.emplace_back(topic_filter.s3_id, topic_cfg, res_id); + } + return 0; +} + +int publish_commit(rgw::sal::Object* obj, + uint64_t size, + const ceph::real_time& mtime, + const std::string& etag, + const std::string& version, + EventType event_type, + reservation_t& res, + const DoutPrefixProvider* dpp) +{ + for (auto& topic : res.topics) { + if (topic.cfg.dest.persistent && + topic.res_id == cls_2pc_reservation::NO_ID) { + // nothing to commit or already committed/aborted + continue; + } + event_entry_t event_entry; + populate_event(res, obj, size, mtime, etag, version, event_type, event_entry.event); + event_entry.event.configurationId = topic.configurationId; + event_entry.event.opaque_data = topic.cfg.opaque_data; + if (topic.cfg.dest.persistent) { + event_entry.push_endpoint = std::move(topic.cfg.dest.push_endpoint); + event_entry.push_endpoint_args = + std::move(topic.cfg.dest.push_endpoint_args); + event_entry.arn_topic = topic.cfg.dest.arn_topic; + bufferlist bl; + encode(event_entry, bl); + const auto& queue_name = topic.cfg.dest.arn_topic; + if (bl.length() > res.size) { + // try to make a larger reservation, fail only if this is not possible + ldpp_dout(dpp, 5) << "WARNING: committed size: " << bl.length() + << " exceeded reserved size: " << res.size + << + " . trying to make a larger reservation on queue:" << queue_name + << dendl; + // first cancel the existing reservation + librados::ObjectWriteOperation op; + cls_2pc_queue_abort(op, topic.res_id); + auto ret = rgw_rados_operate( + dpp, res.store->getRados()->get_notif_pool_ctx(), + topic.cfg.dest.arn_topic, &op, + res.yield); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to abort reservation: " + << topic.res_id << + " when trying to make a larger reservation on queue: " << queue_name + << ". error: " << ret << dendl; + return ret; + } + // now try to make a bigger one + buffer::list obl; + int rval; + cls_2pc_queue_reserve(op, bl.length(), 1, &obl, &rval); + ret = rgw_rados_operate( + dpp, res.store->getRados()->get_notif_pool_ctx(), + queue_name, &op, res.yield, librados::OPERATION_RETURNVEC); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to reserve extra space on queue: " + << queue_name + << ". error: " << ret << dendl; + return (ret == -ENOSPC) ? -ERR_RATE_LIMITED : ret; + } + ret = cls_2pc_queue_reserve_result(obl, topic.res_id); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to parse reservation id for " + "extra space. error: " << ret << dendl; + return ret; + } + } + std::vector bl_data_vec{std::move(bl)}; + librados::ObjectWriteOperation op; + cls_2pc_queue_commit(op, bl_data_vec, topic.res_id); + const auto ret = rgw_rados_operate( + dpp, res.store->getRados()->get_notif_pool_ctx(), + queue_name, &op, res.yield); + topic.res_id = cls_2pc_reservation::NO_ID; + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to commit reservation to queue: " + << queue_name << ". error: " << ret + << dendl; + return ret; + } + } else { + try { + // TODO add endpoint LRU cache + const auto push_endpoint = RGWPubSubEndpoint::create( + topic.cfg.dest.push_endpoint, + topic.cfg.dest.arn_topic, + RGWHTTPArgs(topic.cfg.dest.push_endpoint_args, dpp), + dpp->get_cct()); + ldpp_dout(res.dpp, 20) << "INFO: push endpoint created: " + << topic.cfg.dest.push_endpoint << dendl; + const auto ret = push_endpoint->send_to_completion_async( + dpp->get_cct(), event_entry.event, res.yield); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: push to endpoint " + << topic.cfg.dest.push_endpoint + << " failed. error: " << ret << dendl; + if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed); + return ret; + } + if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); + } catch (const RGWPubSubEndpoint::configuration_error& e) { + ldpp_dout(dpp, 1) << "ERROR: failed to create push endpoint: " + << topic.cfg.dest.push_endpoint << ". error: " << e.what() << dendl; + if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed); + return -EINVAL; + } + } + } + return 0; +} + +int publish_abort(reservation_t& res) { + for (auto& topic : res.topics) { + if (!topic.cfg.dest.persistent || + topic.res_id == cls_2pc_reservation::NO_ID) { + // nothing to abort or already committed/aborted + continue; + } + const auto& queue_name = topic.cfg.dest.arn_topic; + librados::ObjectWriteOperation op; + cls_2pc_queue_abort(op, topic.res_id); + const auto ret = rgw_rados_operate( + res.dpp, res.store->getRados()->get_notif_pool_ctx(), + queue_name, &op, res.yield); + if (ret < 0) { + ldpp_dout(res.dpp, 1) << "ERROR: failed to abort reservation: " + << topic.res_id << + " from queue: " << queue_name << ". error: " << ret << dendl; + return ret; + } + topic.res_id = cls_2pc_reservation::NO_ID; + } + return 0; +} + +reservation_t::reservation_t(const DoutPrefixProvider* _dpp, + rgw::sal::RadosStore* _store, + const req_state* _s, + rgw::sal::Object* _object, + rgw::sal::Object* _src_object, + const std::string* _object_name) : + dpp(_s), store(_store), s(_s), size(0) /* XXX */, + object(_object), src_object(_src_object), bucket(_s->bucket.get()), + object_name(_object_name), + tagset(_s->tagset), + x_meta_map(_s->info.x_meta_map), + metadata_fetched_from_attributes(false), + user_id(_s->user->get_id().id), + user_tenant(_s->user->get_id().tenant), + req_id(_s->req_id), + yield(_s->yield) +{} + +reservation_t::reservation_t(const DoutPrefixProvider* _dpp, + rgw::sal::RadosStore* _store, + rgw::sal::Object* _object, + rgw::sal::Object* _src_object, + rgw::sal::Bucket* _bucket, + const std::string& _user_id, + const std::string& _user_tenant, + const std::string& _req_id, + optional_yield y) : + dpp(_dpp), store(_store), s(nullptr), size(0) /* XXX */, + object(_object), src_object(_src_object), bucket(_bucket), + object_name(nullptr), + metadata_fetched_from_attributes(false), + user_id(_user_id), + user_tenant(_user_tenant), + req_id(_req_id), + yield(y) +{} + +reservation_t::~reservation_t() { + publish_abort(*this); +} + +} // namespace rgw::notify diff --git a/src/rgw/driver/rados/rgw_notify.h b/src/rgw/driver/rados/rgw_notify.h new file mode 100644 index 00000000000..175dc11463d --- /dev/null +++ b/src/rgw/driver/rados/rgw_notify.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include +#include "common/ceph_time.h" +#include "include/common_fwd.h" +#include "rgw_notify_event_type.h" +#include "common/async/yield_context.h" +#include "cls/2pc_queue/cls_2pc_queue_types.h" +#include "rgw_pubsub.h" + +// forward declarations +namespace rgw::sal { + class RadosStore; + class RGWObject; +} + +class RGWRados; +struct rgw_obj_key; + +namespace rgw::notify { + +// initialize the notification manager +// notification manager is dequeing the 2-phase-commit queues +// and send the notifications to the endpoints +bool init(CephContext* cct, rgw::sal::RadosStore* store, const DoutPrefixProvider *dpp); + +// shutdown the notification manager +void shutdown(); + +// create persistent delivery queue for a topic (endpoint) +// this operation also add a topic name to the common (to all RGWs) list of all topics +int add_persistent_topic(const std::string& topic_name, optional_yield y); + +// remove persistent delivery queue for a topic (endpoint) +// this operation also remove the topic name from the common (to all RGWs) list of all topics +int remove_persistent_topic(const std::string& topic_name, optional_yield y); + +// struct holding reservation information +// populated in the publish_reserve call +// then used to commit or abort the reservation +struct reservation_t { + struct topic_t { + topic_t(const std::string& _configurationId, const rgw_pubsub_topic& _cfg, + cls_2pc_reservation::id_t _res_id) : + configurationId(_configurationId), cfg(_cfg), res_id(_res_id) {} + + const std::string configurationId; + const rgw_pubsub_topic cfg; + // res_id is reset after topic is committed/aborted + cls_2pc_reservation::id_t res_id; + }; + + const DoutPrefixProvider* const dpp; + std::vector topics; + rgw::sal::RadosStore* const store; + const req_state* const s; + size_t size; + rgw::sal::Object* const object; + rgw::sal::Object* const src_object; // may differ from object + rgw::sal::Bucket* const bucket; + const std::string* const object_name; + boost::optional tagset; + meta_map_t x_meta_map; // metadata cached by value + bool metadata_fetched_from_attributes; + const std::string user_id; + const std::string user_tenant; + const std::string req_id; + optional_yield yield; + + /* ctor for rgw_op callers */ + reservation_t(const DoutPrefixProvider* _dpp, + rgw::sal::RadosStore* _store, + const req_state* _s, + rgw::sal::Object* _object, + rgw::sal::Object* _src_object, + const std::string* _object_name); + + /* ctor for non-request caller (e.g., lifecycle) */ + reservation_t(const DoutPrefixProvider* _dpp, + rgw::sal::RadosStore* _store, + rgw::sal::Object* _object, + rgw::sal::Object* _src_object, + rgw::sal::Bucket* _bucket, + const std::string& _user_id, + const std::string& _user_tenant, + const std::string& _req_id, + optional_yield y); + + // dtor doing resource leak guarding + // aborting the reservation if not already committed or aborted + ~reservation_t(); +}; + +// create a reservation on the 2-phase-commit queue + int publish_reserve(const DoutPrefixProvider *dpp, + EventType event_type, + reservation_t& reservation, + const RGWObjTags* req_tags); + +// commit the reservation to the queue +int publish_commit(rgw::sal::Object* obj, + uint64_t size, + const ceph::real_time& mtime, + const std::string& etag, + const std::string& version, + EventType event_type, + reservation_t& reservation, + const DoutPrefixProvider *dpp); + +// cancel the reservation +int publish_abort(reservation_t& reservation); + +} + diff --git a/src/rgw/driver/rados/rgw_obj_manifest.cc b/src/rgw/driver/rados/rgw_obj_manifest.cc new file mode 100644 index 00000000000..3838f5cf328 --- /dev/null +++ b/src/rgw/driver/rados/rgw_obj_manifest.cc @@ -0,0 +1,404 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_obj_manifest.h" + +#include "services/svc_zone.h" +#include "rgw_rados.h" +#include "rgw_bucket.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +int RGWObjManifest::generator::create_next(uint64_t ofs) +{ + if (ofs < last_ofs) /* only going forward */ + return -EINVAL; + + uint64_t max_head_size = manifest->get_max_head_size(); + + if (ofs < max_head_size) { + manifest->set_head_size(ofs); + } + + if (ofs >= max_head_size) { + manifest->set_head_size(max_head_size); + cur_stripe = (ofs - max_head_size) / rule.stripe_max_size; + cur_stripe_size = rule.stripe_max_size; + + if (cur_part_id == 0 && max_head_size > 0) { + cur_stripe++; + } + } + + last_ofs = ofs; + manifest->set_obj_size(ofs); + + manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, NULL, &cur_obj); + + return 0; +} + +int RGWObjManifest::append(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, + const RGWZoneParams& zone_params) +{ + if (explicit_objs || m.explicit_objs) { + return append_explicit(dpp, m, zonegroup, zone_params); + } + + if (rules.empty()) { + *this = m; + return 0; + } + + string override_prefix; + + if (prefix.empty()) { + prefix = m.prefix; + } + + if (prefix != m.prefix) { + override_prefix = m.prefix; + } + + map::iterator miter = m.rules.begin(); + if (miter == m.rules.end()) { + return append_explicit(dpp, m, zonegroup, zone_params); + } + + for (; miter != m.rules.end(); ++miter) { + map::reverse_iterator last_rule = rules.rbegin(); + + RGWObjManifestRule& rule = last_rule->second; + + if (rule.part_size == 0) { + rule.part_size = obj_size - rule.start_ofs; + } + + RGWObjManifestRule& next_rule = miter->second; + if (!next_rule.part_size) { + next_rule.part_size = m.obj_size - next_rule.start_ofs; + } + + string rule_prefix = prefix; + if (!rule.override_prefix.empty()) { + rule_prefix = rule.override_prefix; + } + + string next_rule_prefix = m.prefix; + if (!next_rule.override_prefix.empty()) { + next_rule_prefix = next_rule.override_prefix; + } + + if (rule.part_size != next_rule.part_size || + rule.stripe_max_size != next_rule.stripe_max_size || + rule_prefix != next_rule_prefix) { + if (next_rule_prefix != prefix) { + append_rules(m, miter, &next_rule_prefix); + } else { + append_rules(m, miter, NULL); + } + break; + } + + uint64_t expected_part_num = rule.start_part_num + 1; + if (rule.part_size > 0) { + expected_part_num = rule.start_part_num + (obj_size + next_rule.start_ofs - rule.start_ofs) / rule.part_size; + } + + if (expected_part_num != next_rule.start_part_num) { + append_rules(m, miter, NULL); + break; + } + } + + set_obj_size(obj_size + m.obj_size); + + return 0; +} + +void RGWObjManifest::append_rules(RGWObjManifest& m, map::iterator& miter, + string *override_prefix) +{ + for (; miter != m.rules.end(); ++miter) { + RGWObjManifestRule rule = miter->second; + rule.start_ofs += obj_size; + if (override_prefix) + rule.override_prefix = *override_prefix; + rules[rule.start_ofs] = rule; + } +} + +void RGWObjManifest::convert_to_explicit(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) +{ + if (explicit_objs) { + return; + } + obj_iterator iter = obj_begin(dpp); + + while (iter != obj_end(dpp)) { + RGWObjManifestPart& part = objs[iter.get_stripe_ofs()]; + const rgw_obj_select& os = iter.get_location(); + const rgw_raw_obj& raw_loc = os.get_raw_obj(zonegroup, zone_params); + part.loc_ofs = 0; + + uint64_t ofs = iter.get_stripe_ofs(); + + if (ofs == 0) { + part.loc = obj; + } else { + RGWSI_Tier_RADOS::raw_obj_to_obj(tail_placement.bucket, raw_loc, &part.loc); + } + ++iter; + uint64_t next_ofs = iter.get_stripe_ofs(); + + part.size = next_ofs - ofs; + } + + explicit_objs = true; + rules.clear(); + prefix.clear(); +} + +int RGWObjManifest::append_explicit(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) +{ + if (!explicit_objs) { + convert_to_explicit(dpp, zonegroup, zone_params); + } + if (!m.explicit_objs) { + m.convert_to_explicit(dpp, zonegroup, zone_params); + } + map::iterator iter; + uint64_t base = obj_size; + for (iter = m.objs.begin(); iter != m.objs.end(); ++iter) { + RGWObjManifestPart& part = iter->second; + objs[base + iter->first] = part; + } + obj_size += m.obj_size; + + return 0; +} + +bool RGWObjManifest::get_rule(uint64_t ofs, RGWObjManifestRule *rule) +{ + if (rules.empty()) { + return false; + } + + map::iterator iter = rules.upper_bound(ofs); + if (iter != rules.begin()) { + --iter; + } + + *rule = iter->second; + + return true; +} + +int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m, + const rgw_placement_rule& head_placement_rule, + const rgw_placement_rule *tail_placement_rule, + const rgw_bucket& _b, const rgw_obj& _obj) +{ + manifest = _m; + + if (!tail_placement_rule) { + manifest->set_tail_placement(head_placement_rule, _b); + } else { + rgw_placement_rule new_tail_rule = *tail_placement_rule; + new_tail_rule.inherit_from(head_placement_rule); + manifest->set_tail_placement(new_tail_rule, _b); + } + + manifest->set_head(head_placement_rule, _obj, 0); + last_ofs = 0; + + if (manifest->get_prefix().empty()) { + char buf[33]; + gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1); + + string oid_prefix = "."; + oid_prefix.append(buf); + oid_prefix.append("_"); + + manifest->set_prefix(oid_prefix); + } + + bool found = manifest->get_rule(0, &rule); + if (!found) { + derr << "ERROR: manifest->get_rule() could not find rule" << dendl; + return -EIO; + } + + uint64_t head_size = manifest->get_head_size(); + + if (head_size > 0) { + cur_stripe_size = head_size; + } else { + cur_stripe_size = rule.stripe_max_size; + } + + cur_part_id = rule.start_part_num; + + manifest->get_implicit_location(cur_part_id, cur_stripe, 0, NULL, &cur_obj); + + // Normal object which not generated through copy operation + manifest->set_tail_instance(_obj.key.instance); + + return 0; +} + +void RGWObjManifestPart::generate_test_instances(std::list& o) +{ + o.push_back(new RGWObjManifestPart); + + RGWObjManifestPart *p = new RGWObjManifestPart; + rgw_bucket b; + init_bucket(&b, "tenant", "bucket", ".pool", ".index_pool", "marker_", "12"); + + p->loc = rgw_obj(b, "object"); + p->loc_ofs = 512 * 1024; + p->size = 128 * 1024; + o.push_back(p); +} + +void RGWObjManifest::generate_test_instances(std::list& o) +{ + RGWObjManifest *m = new RGWObjManifest; + map objs; + uint64_t total_size = 0; + for (int i = 0; i<10; i++) { + RGWObjManifestPart p; + rgw_bucket b; + init_bucket(&b, "tenant", "bucket", ".pool", ".index_pool", "marker_", "12"); + p.loc = rgw_obj(b, "object"); + p.loc_ofs = 0; + p.size = 512 * 1024; + total_size += p.size; + objs[total_size] = p; + } + m->set_explicit(total_size, objs); + o.push_back(m); + o.push_back(new RGWObjManifest); +} + +void RGWObjManifestPart::dump(Formatter *f) const +{ + f->open_object_section("loc"); + loc.dump(f); + f->close_section(); + f->dump_unsigned("loc_ofs", loc_ofs); + f->dump_unsigned("size", size); +} + +void RGWObjManifest::obj_iterator::dump(Formatter *f) const +{ + f->dump_unsigned("part_ofs", part_ofs); + f->dump_unsigned("stripe_ofs", stripe_ofs); + f->dump_unsigned("ofs", ofs); + f->dump_unsigned("stripe_size", stripe_size); + f->dump_int("cur_part_id", cur_part_id); + f->dump_int("cur_stripe", cur_stripe); + f->dump_string("cur_override_prefix", cur_override_prefix); + f->dump_object("location", location); +} + +void RGWObjManifest::dump(Formatter *f) const +{ + map::const_iterator iter = objs.begin(); + f->open_array_section("objs"); + for (; iter != objs.end(); ++iter) { + f->dump_unsigned("ofs", iter->first); + f->open_object_section("part"); + iter->second.dump(f); + f->close_section(); + } + f->close_section(); + f->dump_unsigned("obj_size", obj_size); + ::encode_json("explicit_objs", explicit_objs, f); + ::encode_json("head_size", head_size, f); + ::encode_json("max_head_size", max_head_size, f); + ::encode_json("prefix", prefix, f); + ::encode_json("rules", rules, f); + ::encode_json("tail_instance", tail_instance, f); + ::encode_json("tail_placement", tail_placement, f); + + // nullptr being passed into iterators since there + // is no cct and we aren't doing anything with these + // iterators that would write do the log + f->dump_object("begin_iter", obj_begin(nullptr)); + f->dump_object("end_iter", obj_end(nullptr)); +} + +void RGWObjManifestRule::dump(Formatter *f) const +{ + encode_json("start_part_num", start_part_num, f); + encode_json("start_ofs", start_ofs, f); + encode_json("part_size", part_size, f); + encode_json("stripe_max_size", stripe_max_size, f); + encode_json("override_prefix", override_prefix, f); +} + +void rgw_obj_select::dump(Formatter *f) const +{ + f->dump_string("placement_rule", placement_rule.to_str()); + f->dump_object("obj", obj); + f->dump_object("raw_obj", raw_obj); + f->dump_bool("is_raw", is_raw); +} + +void RGWObjTier::dump(Formatter *f) const +{ + encode_json("name", name, f); + encode_json("tier_placement", tier_placement, f); + encode_json("is_multipart_upload", is_multipart_upload, f); +} + +// returns true on success, false on failure +static bool rgw_get_obj_data_pool(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params, + const rgw_placement_rule& head_placement_rule, + const rgw_obj& obj, rgw_pool *pool) +{ + if (!zone_params.get_head_data_pool(head_placement_rule, obj, pool)) { + RGWZonePlacementInfo placement; + if (!zone_params.get_placement(zonegroup.default_placement.name, &placement)) { + return false; + } + + if (!obj.in_extra_data) { + *pool = placement.get_data_pool(zonegroup.default_placement.storage_class); + } else { + *pool = placement.get_data_extra_pool(); + } + } + + return true; +} + +static bool rgw_obj_to_raw(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params, + const rgw_placement_rule& head_placement_rule, + const rgw_obj& obj, rgw_raw_obj *raw_obj) +{ + get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); + + return rgw_get_obj_data_pool(zonegroup, zone_params, head_placement_rule, obj, &raw_obj->pool); +} + +rgw_raw_obj rgw_obj_select::get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const +{ + if (!is_raw) { + rgw_raw_obj r; + rgw_obj_to_raw(zonegroup, zone_params, placement_rule, obj, &r); + return r; + } + return raw_obj; +} + +// returns true on success, false on failure +bool RGWRados::get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) +{ + return rgw_get_obj_data_pool(svc.zone->get_zonegroup(), svc.zone->get_zone_params(), placement_rule, obj, pool); +} + diff --git a/src/rgw/driver/rados/rgw_obj_manifest.h b/src/rgw/driver/rados/rgw_obj_manifest.h new file mode 100644 index 00000000000..ac73359305e --- /dev/null +++ b/src/rgw/driver/rados/rgw_obj_manifest.h @@ -0,0 +1,609 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "rgw_common.h" +#include "rgw_compression_types.h" +#include "rgw_sal.h" +#include "rgw_zone.h" + +class RGWSI_Zone; +struct RGWZoneGroup; +struct RGWZoneParams; +class RGWRados; +namespace rgw { namespace sal { + class RadosStore; +} }; + +class rgw_obj_select { + rgw_placement_rule placement_rule; + rgw_obj obj; + rgw_raw_obj raw_obj; + bool is_raw; + +public: + rgw_obj_select() : is_raw(false) {} + explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {} + explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {} + rgw_obj_select(const rgw_obj_select& rhs) { + placement_rule = rhs.placement_rule; + is_raw = rhs.is_raw; + if (is_raw) { + raw_obj = rhs.raw_obj; + } else { + obj = rhs.obj; + } + } + + rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const; + rgw_raw_obj get_raw_obj(rgw::sal::RadosStore* store) const; + + rgw_obj_select& operator=(const rgw_obj& rhs) { + obj = rhs; + is_raw = false; + return *this; + } + + rgw_obj_select& operator=(const rgw_raw_obj& rhs) { + raw_obj = rhs; + is_raw = true; + return *this; + } + + void set_placement_rule(const rgw_placement_rule& rule) { + placement_rule = rule; + } + void dump(Formatter *f) const; +}; + +struct RGWObjManifestPart { + rgw_obj loc; /* the object where the data is located */ + uint64_t loc_ofs; /* the offset at that object where the data is located */ + uint64_t size; /* the part size */ + + RGWObjManifestPart() : loc_ofs(0), size(0) {} + + void encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + encode(loc, bl); + encode(loc_ofs, bl); + encode(size, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl); + decode(loc, bl); + decode(loc_ofs, bl); + decode(size, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + static void generate_test_instances(std::list& o); +}; +WRITE_CLASS_ENCODER(RGWObjManifestPart) + +/* + The manifest defines a set of rules for structuring the object parts. + There are a few terms to note: + - head: the head part of the object, which is the part that contains + the first chunk of data. An object might not have a head (as in the + case of multipart-part objects). + - stripe: data portion of a single rgw object that resides on a single + rados object. + - part: a collection of stripes that make a contiguous part of an + object. A regular object will only have one part (although might have + many stripes), a multipart object might have many parts. Each part + has a fixed stripe size, although the last stripe of a part might + be smaller than that. Consecutive parts may be merged if their stripe + value is the same. +*/ + +struct RGWObjManifestRule { + uint32_t start_part_num; + uint64_t start_ofs; + uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */ + uint64_t stripe_max_size; /* underlying obj max size */ + std::string override_prefix; + + RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {} + RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) : + start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {} + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(start_part_num, bl); + encode(start_ofs, bl); + encode(part_size, bl); + encode(stripe_max_size, bl); + encode(override_prefix, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(start_part_num, bl); + decode(start_ofs, bl); + decode(part_size, bl); + decode(stripe_max_size, bl); + if (struct_v >= 2) + decode(override_prefix, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; +}; +WRITE_CLASS_ENCODER(RGWObjManifestRule) + +struct RGWObjTier { + std::string name; + RGWZoneGroupPlacementTier tier_placement; + bool is_multipart_upload{false}; + + RGWObjTier(): name("none") {} + + void encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + encode(name, bl); + encode(tier_placement, bl); + encode(is_multipart_upload, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + decode(name, bl); + decode(tier_placement, bl); + decode(is_multipart_upload, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; +}; +WRITE_CLASS_ENCODER(RGWObjTier) + +class RGWObjManifest { +protected: + bool explicit_objs{false}; /* really old manifest? */ + std::map objs; + + uint64_t obj_size{0}; + + rgw_obj obj; + uint64_t head_size{0}; + rgw_placement_rule head_placement_rule; + + uint64_t max_head_size{0}; + std::string prefix; + rgw_bucket_placement tail_placement; /* might be different than the original bucket, + as object might have been copied across pools */ + std::map rules; + + std::string tail_instance; /* tail object's instance */ + + std::string tier_type; + RGWObjTier tier_config; + + void convert_to_explicit(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params); + int append_explicit(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params); + void append_rules(RGWObjManifest& m, std::map::iterator& iter, std::string *override_prefix); + +public: + + RGWObjManifest() = default; + RGWObjManifest(const RGWObjManifest& rhs) { + *this = rhs; + } + RGWObjManifest& operator=(const RGWObjManifest& rhs) { + explicit_objs = rhs.explicit_objs; + objs = rhs.objs; + obj_size = rhs.obj_size; + obj = rhs.obj; + head_size = rhs.head_size; + max_head_size = rhs.max_head_size; + prefix = rhs.prefix; + tail_placement = rhs.tail_placement; + rules = rhs.rules; + tail_instance = rhs.tail_instance; + tier_type = rhs.tier_type; + tier_config = rhs.tier_config; + return *this; + } + + std::map& get_explicit_objs() { + return objs; + } + + + void set_explicit(uint64_t _size, std::map& _objs) { + explicit_objs = true; + objs.swap(_objs); + set_obj_size(_size); + } + + void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, + std::string *override_prefix, rgw_obj_select *location) const; + + void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) { + RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size); + rules[0] = rule; + max_head_size = tail_ofs; + } + + void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) { + RGWObjManifestRule rule(0, 0, 0, stripe_max_size); + rule.start_part_num = part_num; + rules[0] = rule; + max_head_size = 0; + } + + void encode(bufferlist& bl) const { + ENCODE_START(8, 6, bl); + encode(obj_size, bl); + encode(objs, bl); + encode(explicit_objs, bl); + encode(obj, bl); + encode(head_size, bl); + encode(max_head_size, bl); + encode(prefix, bl); + encode(rules, bl); + bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket); + encode(encode_tail_bucket, bl); + if (encode_tail_bucket) { + encode(tail_placement.bucket, bl); + } + bool encode_tail_instance = (tail_instance != obj.key.instance); + encode(encode_tail_instance, bl); + if (encode_tail_instance) { + encode(tail_instance, bl); + } + encode(head_placement_rule, bl); + encode(tail_placement.placement_rule, bl); + encode(tier_type, bl); + encode(tier_config, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl); + decode(obj_size, bl); + decode(objs, bl); + if (struct_v >= 3) { + decode(explicit_objs, bl); + decode(obj, bl); + decode(head_size, bl); + decode(max_head_size, bl); + decode(prefix, bl); + decode(rules, bl); + } else { + explicit_objs = true; + if (!objs.empty()) { + std::map::iterator iter = objs.begin(); + obj = iter->second.loc; + head_size = iter->second.size; + max_head_size = head_size; + } + } + + if (explicit_objs && head_size > 0 && !objs.empty()) { + /* patch up manifest due to issue 16435: + * the first object in the explicit objs list might not be the one we need to access, use the + * head object instead if set. This would happen if we had an old object that was created + * when the explicit objs manifest was around, and it got copied. + */ + rgw_obj& obj_0 = objs[0].loc; + if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) { + objs[0].loc = obj; + objs[0].size = head_size; + } + } + + if (struct_v >= 4) { + if (struct_v < 6) { + decode(tail_placement.bucket, bl); + } else { + bool need_to_decode; + decode(need_to_decode, bl); + if (need_to_decode) { + decode(tail_placement.bucket, bl); + } else { + tail_placement.bucket = obj.bucket; + } + } + } + + if (struct_v >= 5) { + if (struct_v < 6) { + decode(tail_instance, bl); + } else { + bool need_to_decode; + decode(need_to_decode, bl); + if (need_to_decode) { + decode(tail_instance, bl); + } else { + tail_instance = obj.key.instance; + } + } + } else { // old object created before 'tail_instance' field added to manifest + tail_instance = obj.key.instance; + } + + if (struct_v >= 7) { + decode(head_placement_rule, bl); + decode(tail_placement.placement_rule, bl); + } + + if (struct_v >= 8) { + decode(tier_type, bl); + decode(tier_config, bl); + } + + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + static void generate_test_instances(std::list& o); + + int append(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, + const RGWZoneParams& zone_params); + + bool get_rule(uint64_t ofs, RGWObjManifestRule *rule); + + bool empty() const { + if (explicit_objs) + return objs.empty(); + return rules.empty(); + } + + bool has_explicit_objs() const { + return explicit_objs; + } + + bool has_tail() const { + if (explicit_objs) { + if (objs.size() == 1) { + auto iter = objs.begin(); + const rgw_obj& o = iter->second.loc; + return !(obj == o); + } + return (objs.size() >= 2); + } + return (obj_size > head_size); + } + + void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) { + head_placement_rule = placement_rule; + obj = _o; + head_size = _s; + + if (explicit_objs && head_size > 0) { + objs[0].loc = obj; + objs[0].size = head_size; + } + } + + const rgw_obj& get_obj() const { + return obj; + } + + void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) { + tail_placement.placement_rule = placement_rule; + tail_placement.bucket = _b; + } + + const rgw_bucket_placement& get_tail_placement() const { + return tail_placement; + } + + const rgw_placement_rule& get_head_placement_rule() const { + return head_placement_rule; + } + + void set_prefix(const std::string& _p) { + prefix = _p; + } + + const std::string& get_prefix() const { + return prefix; + } + + void set_tail_instance(const std::string& _ti) { + tail_instance = _ti; + } + + const std::string& get_tail_instance() const { + return tail_instance; + } + + void set_head_size(uint64_t _s) { + head_size = _s; + } + + void set_obj_size(uint64_t s) { + obj_size = s; + } + + uint64_t get_obj_size() const { + return obj_size; + } + + uint64_t get_head_size() const { + return head_size; + } + + uint64_t get_max_head_size() const { + return max_head_size; + } + + const std::string& get_tier_type() { + return tier_type; + } + + inline void set_tier_type(std::string value) { + /* Only "cloud-s3" tier-type is supported for now */ + if (value == "cloud-s3") { + tier_type = value; + } + } + + inline void set_tier_config(RGWObjTier t) { + /* Set only if tier_type set to "cloud-s3" */ + if (tier_type != "cloud-s3") + return; + + tier_config.name = t.name; + tier_config.tier_placement = t.tier_placement; + tier_config.is_multipart_upload = t.is_multipart_upload; + } + + inline const void get_tier_config(RGWObjTier* t) { + if (tier_type != "cloud-s3") + return; + + t->name = tier_config.name; + t->tier_placement = tier_config.tier_placement; + t->is_multipart_upload = tier_config.is_multipart_upload; + } + + class obj_iterator { + const DoutPrefixProvider *dpp; + const RGWObjManifest *manifest = nullptr; + uint64_t part_ofs = 0; /* where current part starts */ + uint64_t stripe_ofs = 0; /* where current stripe starts */ + uint64_t ofs = 0; /* current position within the object */ + uint64_t stripe_size = 0; /* current part size */ + + int cur_part_id = 0; + int cur_stripe = 0; + std::string cur_override_prefix; + + rgw_obj_select location; + + std::map::const_iterator rule_iter; + std::map::const_iterator next_rule_iter; + std::map::const_iterator explicit_iter; + + void update_explicit_pos(); + + public: + obj_iterator() = default; + explicit obj_iterator(const DoutPrefixProvider *_dpp, const RGWObjManifest *_m) + : obj_iterator(_dpp, _m, 0) + {} + obj_iterator(const DoutPrefixProvider *_dpp, const RGWObjManifest *_m, uint64_t _ofs) : dpp(_dpp), manifest(_m) { + seek(_ofs); + } + void seek(uint64_t ofs); + + void operator++(); + bool operator==(const obj_iterator& rhs) const { + return (ofs == rhs.ofs); + } + bool operator!=(const obj_iterator& rhs) const { + return (ofs != rhs.ofs); + } + const rgw_obj_select& get_location() { + return location; + } + + /* where current part starts */ + uint64_t get_part_ofs() const { + return part_ofs; + } + + /* start of current stripe */ + uint64_t get_stripe_ofs() { + if (manifest->explicit_objs) { + return explicit_iter->first; + } + return stripe_ofs; + } + + /* current ofs relative to start of rgw object */ + uint64_t get_ofs() const { + return ofs; + } + + /* stripe number */ + int get_cur_stripe() const { + return cur_stripe; + } + + /* current stripe size */ + uint64_t get_stripe_size() { + if (manifest->explicit_objs) { + return explicit_iter->second.size; + } + return stripe_size; + } + + /* offset where data starts within current stripe */ + uint64_t location_ofs() { + if (manifest->explicit_objs) { + return explicit_iter->second.loc_ofs; + } + return 0; /* all stripes start at zero offset */ + } + + void update_location(); + + void dump(Formatter *f) const; + }; // class obj_iterator + + obj_iterator obj_begin(const DoutPrefixProvider *dpp) const { return obj_iterator{dpp, this}; } + obj_iterator obj_end(const DoutPrefixProvider *dpp) const { return obj_iterator{dpp, this, obj_size}; } + obj_iterator obj_find(const DoutPrefixProvider *dpp, uint64_t ofs) const { + return obj_iterator{dpp, this, std::min(ofs, obj_size)}; + } + + /* + * simple object generator. Using a simple single rule manifest. + */ + class generator { + RGWObjManifest *manifest; + uint64_t last_ofs; + uint64_t cur_part_ofs; + int cur_part_id; + int cur_stripe; + uint64_t cur_stripe_size; + std::string cur_oid; + + std::string oid_prefix; + + rgw_obj_select cur_obj; + + RGWObjManifestRule rule; + + public: + generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0), + cur_stripe(0), cur_stripe_size(0) {} + int create_begin(CephContext *cct, RGWObjManifest *manifest, + const rgw_placement_rule& head_placement_rule, + const rgw_placement_rule *tail_placement_rule, + const rgw_bucket& bucket, + const rgw_obj& obj); + + int create_next(uint64_t ofs); + + rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); } + rgw_raw_obj get_cur_obj(rgw::sal::RadosStore* store) const { return cur_obj.get_raw_obj(store); } + + /* total max size of current stripe (including head obj) */ + uint64_t cur_stripe_max_size() const { + return cur_stripe_size; + } + }; +}; +WRITE_CLASS_ENCODER(RGWObjManifest) diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.cc b/src/rgw/driver/rados/rgw_object_expirer_core.cc new file mode 100644 index 00000000000..ec1bf3fb6dc --- /dev/null +++ b/src/rgw/driver/rados/rgw_object_expirer_core.cc @@ -0,0 +1,442 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include +#include +#include +#include + + +#include "auth/Crypto.h" + +#include "common/armor.h" +#include "common/ceph_json.h" +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "common/Formatter.h" +#include "common/errno.h" + +#include "global/global_init.h" + +#include "include/utime.h" +#include "include/str_list.h" + +#include "rgw_user.h" +#include "rgw_bucket.h" +#include "rgw_acl.h" +#include "rgw_acl_s3.h" +#include "rgw_log.h" +#include "rgw_formats.h" +#include "rgw_usage.h" +#include "rgw_object_expirer_core.h" +#include "rgw_zone.h" +#include "rgw_sal_rados.h" + +#include "services/svc_rados.h" +#include "services/svc_zone.h" +#include "services/svc_sys_obj.h" +#include "services/svc_bi_rados.h" + +#include "cls/lock/cls_lock_client.h" +#include "cls/timeindex/cls_timeindex_client.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +static string objexp_lock_name = "gc_process"; + +static string objexp_hint_get_shardname(int shard_num) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "obj_delete_at_hint.%010u", (unsigned)shard_num); + return buf; +} + +static int objexp_key_shard(const rgw_obj_index_key& key, int num_shards) +{ + string obj_key = key.name + key.instance; + return RGWSI_BucketIndex_RADOS::bucket_shard_index(obj_key, num_shards); +} + +static string objexp_hint_get_keyext(const string& tenant_name, + const string& bucket_name, + const string& bucket_id, + const rgw_obj_key& obj_key) { + return tenant_name + (tenant_name.empty() ? "" : ":") + bucket_name + ":" + bucket_id + + ":" + obj_key.name + ":" + obj_key.instance; +} + +static void objexp_get_shard(int shard_num, + string *shard) +{ + *shard = objexp_hint_get_shardname(shard_num); +} + +static int objexp_hint_parse(const DoutPrefixProvider *dpp, CephContext *cct, cls_timeindex_entry &ti_entry, + objexp_hint_entry *hint_entry) +{ + try { + auto iter = ti_entry.value.cbegin(); + decode(*hint_entry, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: couldn't decode avail_pools" << dendl; + } + + return 0; +} + +int RGWObjExpStore::objexp_hint_add(const DoutPrefixProvider *dpp, + const ceph::real_time& delete_at, + const string& tenant_name, + const string& bucket_name, + const string& bucket_id, + const rgw_obj_index_key& obj_key) +{ + const string keyext = objexp_hint_get_keyext(tenant_name, bucket_name, + bucket_id, obj_key); + objexp_hint_entry he = { + .tenant = tenant_name, + .bucket_name = bucket_name, + .bucket_id = bucket_id, + .obj_key = obj_key, + .exp_time = delete_at }; + bufferlist hebl; + encode(he, hebl); + librados::ObjectWriteOperation op; + cls_timeindex_add(op, utime_t(delete_at), keyext, hebl); + + string shard_name = objexp_hint_get_shardname(objexp_key_shard(obj_key, cct->_conf->rgw_objexp_hints_num_shards)); + auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, shard_name)); + int r = obj.open(dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; + return r; + } + return obj.operate(dpp, &op, null_yield); +} + +int RGWObjExpStore::objexp_hint_list(const DoutPrefixProvider *dpp, + const string& oid, + const ceph::real_time& start_time, + const ceph::real_time& end_time, + const int max_entries, + const string& marker, + list& entries, /* out */ + string *out_marker, /* out */ + bool *truncated) /* out */ +{ + librados::ObjectReadOperation op; + cls_timeindex_list(op, utime_t(start_time), utime_t(end_time), marker, max_entries, entries, + out_marker, truncated); + + auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid)); + int r = obj.open(dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; + return r; + } + bufferlist obl; + int ret = obj.operate(dpp, &op, &obl, null_yield); + + if ((ret < 0 ) && (ret != -ENOENT)) { + return ret; + } + + if ((ret == -ENOENT) && truncated) { + *truncated = false; + } + + return 0; +} + +static int cls_timeindex_trim_repeat(const DoutPrefixProvider *dpp, + rgw_rados_ref ref, + const string& oid, + const utime_t& from_time, + const utime_t& to_time, + const string& from_marker, + const string& to_marker) +{ + bool done = false; + do { + librados::ObjectWriteOperation op; + cls_timeindex_trim(op, from_time, to_time, from_marker, to_marker); + int r = rgw_rados_operate(dpp, ref.pool.ioctx(), oid, &op, null_yield); + if (r == -ENODATA) + done = true; + else if (r < 0) + return r; + } while (!done); + + return 0; +} + +int RGWObjExpStore::objexp_hint_trim(const DoutPrefixProvider *dpp, + const string& oid, + const ceph::real_time& start_time, + const ceph::real_time& end_time, + const string& from_marker, + const string& to_marker) +{ + auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid)); + int r = obj.open(dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; + return r; + } + auto& ref = obj.get_ref(); + int ret = cls_timeindex_trim_repeat(dpp, ref, oid, utime_t(start_time), utime_t(end_time), + from_marker, to_marker); + if ((ret < 0 ) && (ret != -ENOENT)) { + return ret; + } + + return 0; +} + +int RGWObjectExpirer::garbage_single_object(const DoutPrefixProvider *dpp, objexp_hint_entry& hint) +{ + RGWBucketInfo bucket_info; + std::unique_ptr bucket; + + int ret = driver->get_bucket(dpp, nullptr, rgw_bucket(hint.tenant, hint.bucket_name, hint.bucket_id), &bucket, null_yield); + if (-ENOENT == ret) { + ldpp_dout(dpp, 15) << "NOTICE: cannot find bucket = " \ + << hint.bucket_name << ". The object must be already removed" << dendl; + return -ERR_PRECONDITION_FAILED; + } else if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: could not init bucket = " \ + << hint.bucket_name << "due to ret = " << ret << dendl; + return ret; + } + + rgw_obj_key key = hint.obj_key; + if (key.instance.empty()) { + key.instance = "null"; + } + + std::unique_ptr obj = bucket->get_object(key); + obj->set_atomic(); + ret = obj->delete_object(dpp, null_yield); + + return ret; +} + +void RGWObjectExpirer::garbage_chunk(const DoutPrefixProvider *dpp, + list& entries, /* in */ + bool& need_trim) /* out */ +{ + need_trim = false; + + for (list::iterator iter = entries.begin(); + iter != entries.end(); + ++iter) + { + objexp_hint_entry hint; + ldpp_dout(dpp, 15) << "got removal hint for: " << iter->key_ts.sec() \ + << " - " << iter->key_ext << dendl; + + int ret = objexp_hint_parse(dpp, driver->ctx(), *iter, &hint); + if (ret < 0) { + ldpp_dout(dpp, 1) << "cannot parse removal hint for " << hint.obj_key << dendl; + continue; + } + + /* PRECOND_FAILED simply means that our hint is not valid. + * We can silently ignore that and move forward. */ + ret = garbage_single_object(dpp, hint); + if (ret == -ERR_PRECONDITION_FAILED) { + ldpp_dout(dpp, 15) << "not actual hint for object: " << hint.obj_key << dendl; + } else if (ret < 0) { + ldpp_dout(dpp, 1) << "cannot remove expired object: " << hint.obj_key << dendl; + } + + need_trim = true; + } + + return; +} + +void RGWObjectExpirer::trim_chunk(const DoutPrefixProvider *dpp, + const string& shard, + const utime_t& from, + const utime_t& to, + const string& from_marker, + const string& to_marker) +{ + ldpp_dout(dpp, 20) << "trying to trim removal hints to=" << to + << ", to_marker=" << to_marker << dendl; + + real_time rt_from = from.to_real_time(); + real_time rt_to = to.to_real_time(); + + int ret = exp_store.objexp_hint_trim(dpp, shard, rt_from, rt_to, + from_marker, to_marker); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR during trim: " << ret << dendl; + } + + return; +} + +bool RGWObjectExpirer::process_single_shard(const DoutPrefixProvider *dpp, + const string& shard, + const utime_t& last_run, + const utime_t& round_start) +{ + string marker; + string out_marker; + bool truncated = false; + bool done = true; + + CephContext *cct = driver->ctx(); + int num_entries = cct->_conf->rgw_objexp_chunk_size; + + int max_secs = cct->_conf->rgw_objexp_gc_interval; + utime_t end = ceph_clock_now(); + end += max_secs; + + rados::cls::lock::Lock l(objexp_lock_name); + + utime_t time(max_secs, 0); + l.set_duration(time); + + int ret = l.lock_exclusive(&static_cast(driver)->getRados()->objexp_pool_ctx, shard); + if (ret == -EBUSY) { /* already locked by another processor */ + ldpp_dout(dpp, 5) << __func__ << "(): failed to acquire lock on " << shard << dendl; + return false; + } + + do { + real_time rt_last = last_run.to_real_time(); + real_time rt_start = round_start.to_real_time(); + + list entries; + ret = exp_store.objexp_hint_list(dpp, shard, rt_last, rt_start, + num_entries, marker, entries, + &out_marker, &truncated); + if (ret < 0) { + ldpp_dout(dpp, 10) << "cannot get removal hints from shard: " << shard + << dendl; + continue; + } + + bool need_trim; + garbage_chunk(dpp, entries, need_trim); + + if (need_trim) { + trim_chunk(dpp, shard, last_run, round_start, marker, out_marker); + } + + utime_t now = ceph_clock_now(); + if (now >= end) { + done = false; + break; + } + + marker = out_marker; + } while (truncated); + + l.unlock(&static_cast(driver)->getRados()->objexp_pool_ctx, shard); + return done; +} + +/* Returns true if all shards have been processed successfully. */ +bool RGWObjectExpirer::inspect_all_shards(const DoutPrefixProvider *dpp, + const utime_t& last_run, + const utime_t& round_start) +{ + CephContext * const cct = driver->ctx(); + int num_shards = cct->_conf->rgw_objexp_hints_num_shards; + bool all_done = true; + + for (int i = 0; i < num_shards; i++) { + string shard; + objexp_get_shard(i, &shard); + + ldpp_dout(dpp, 20) << "processing shard = " << shard << dendl; + + if (! process_single_shard(dpp, shard, last_run, round_start)) { + all_done = false; + } + } + + return all_done; +} + +bool RGWObjectExpirer::going_down() +{ + return down_flag; +} + +void RGWObjectExpirer::start_processor() +{ + worker = new OEWorker(driver->ctx(), this); + worker->create("rgw_obj_expirer"); +} + +void RGWObjectExpirer::stop_processor() +{ + down_flag = true; + if (worker) { + worker->stop(); + worker->join(); + } + delete worker; + worker = NULL; +} + +void *RGWObjectExpirer::OEWorker::entry() { + utime_t last_run; + do { + utime_t start = ceph_clock_now(); + ldpp_dout(this, 2) << "object expiration: start" << dendl; + if (oe->inspect_all_shards(this, last_run, start)) { + /* All shards have been processed properly. Next time we can start + * from this moment. */ + last_run = start; + } + ldpp_dout(this, 2) << "object expiration: stop" << dendl; + + + if (oe->going_down()) + break; + + utime_t end = ceph_clock_now(); + end -= start; + int secs = cct->_conf->rgw_objexp_gc_interval; + + if (secs <= end.sec()) + continue; // next round + + secs -= end.sec(); + + std::unique_lock l{lock}; + cond.wait_for(l, std::chrono::seconds(secs)); + } while (!oe->going_down()); + + return NULL; +} + +void RGWObjectExpirer::OEWorker::stop() +{ + std::lock_guard l{lock}; + cond.notify_all(); +} + +CephContext *RGWObjectExpirer::OEWorker::get_cct() const +{ + return cct; +} + +unsigned RGWObjectExpirer::OEWorker::get_subsys() const +{ + return dout_subsys; +} + +std::ostream& RGWObjectExpirer::OEWorker::gen_prefix(std::ostream& out) const +{ + return out << "rgw object expirer Worker thread: "; +} diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.h b/src/rgw/driver/rados/rgw_object_expirer_core.h new file mode 100644 index 00000000000..fccd4199e7e --- /dev/null +++ b/src/rgw/driver/rados/rgw_object_expirer_core.h @@ -0,0 +1,148 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_OBJEXP_H +#define CEPH_OBJEXP_H + +#include +#include +#include +#include +#include + +#include "auth/Crypto.h" + +#include "common/armor.h" +#include "common/ceph_json.h" +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "common/Formatter.h" +#include "common/errno.h" + +#include "common/ceph_mutex.h" +#include "common/Cond.h" +#include "common/Thread.h" + +#include "global/global_init.h" + +#include "include/common_fwd.h" +#include "include/utime.h" +#include "include/str_list.h" + +#include "rgw_sal_rados.h" + +class RGWSI_RADOS; +class RGWSI_Zone; +class RGWBucketInfo; +class cls_timeindex_entry; + +class RGWObjExpStore { + CephContext *cct; + RGWSI_RADOS *rados_svc; + rgw::sal::RadosStore* driver; +public: + RGWObjExpStore(CephContext *_cct, RGWSI_RADOS *_rados_svc, rgw::sal::RadosStore* _driver) : cct(_cct), + rados_svc(_rados_svc), + driver(_driver) {} + + int objexp_hint_add(const DoutPrefixProvider *dpp, + const ceph::real_time& delete_at, + const std::string& tenant_name, + const std::string& bucket_name, + const std::string& bucket_id, + const rgw_obj_index_key& obj_key); + + int objexp_hint_list(const DoutPrefixProvider *dpp, + const std::string& oid, + const ceph::real_time& start_time, + const ceph::real_time& end_time, + const int max_entries, + const std::string& marker, + std::list& entries, /* out */ + std::string *out_marker, /* out */ + bool *truncated); /* out */ + + int objexp_hint_trim(const DoutPrefixProvider *dpp, + const std::string& oid, + const ceph::real_time& start_time, + const ceph::real_time& end_time, + const std::string& from_marker, + const std::string& to_marker); +}; + +class RGWObjectExpirer { +protected: + rgw::sal::Driver* driver; + RGWObjExpStore exp_store; + + class OEWorker : public Thread, public DoutPrefixProvider { + CephContext *cct; + RGWObjectExpirer *oe; + ceph::mutex lock = ceph::make_mutex("OEWorker"); + ceph::condition_variable cond; + + public: + OEWorker(CephContext * const cct, + RGWObjectExpirer * const oe) + : cct(cct), + oe(oe) { + } + + void *entry() override; + void stop(); + + CephContext *get_cct() const override; + unsigned get_subsys() const override; + std::ostream& gen_prefix(std::ostream& out) const override; + }; + + OEWorker *worker{nullptr}; + std::atomic down_flag = { false }; + +public: + explicit RGWObjectExpirer(rgw::sal::Driver* _driver) + : driver(_driver), + exp_store(_driver->ctx(), static_cast(driver)->svc()->rados, static_cast(driver)), + worker(NULL) { + } + ~RGWObjectExpirer() { + stop_processor(); + } + + int hint_add(const DoutPrefixProvider *dpp, + const ceph::real_time& delete_at, + const std::string& tenant_name, + const std::string& bucket_name, + const std::string& bucket_id, + const rgw_obj_index_key& obj_key) { + return exp_store.objexp_hint_add(dpp, delete_at, tenant_name, bucket_name, + bucket_id, obj_key); + } + + int garbage_single_object(const DoutPrefixProvider *dpp, objexp_hint_entry& hint); + + void garbage_chunk(const DoutPrefixProvider *dpp, + std::list& entries, /* in */ + bool& need_trim); /* out */ + + void trim_chunk(const DoutPrefixProvider *dpp, + const std::string& shard, + const utime_t& from, + const utime_t& to, + const std::string& from_marker, + const std::string& to_marker); + + bool process_single_shard(const DoutPrefixProvider *dpp, + const std::string& shard, + const utime_t& last_run, + const utime_t& round_start); + + bool inspect_all_shards(const DoutPrefixProvider *dpp, + const utime_t& last_run, + const utime_t& round_start); + + bool going_down(); + void start_processor(); + void stop_processor(); +}; +#endif /* CEPH_OBJEXP_H */ diff --git a/src/rgw/driver/rados/rgw_otp.cc b/src/rgw/driver/rados/rgw_otp.cc new file mode 100644 index 00000000000..07cc14f113b --- /dev/null +++ b/src/rgw/driver/rados/rgw_otp.cc @@ -0,0 +1,211 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include + +#include +#include +#include + +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/ceph_json.h" +#include "rgw_otp.h" +#include "rgw_zone.h" +#include "rgw_metadata.h" + +#include "include/types.h" + +#include "rgw_common.h" +#include "rgw_tools.h" + +#include "services/svc_zone.h" +#include "services/svc_meta.h" +#include "services/svc_meta_be.h" +#include "services/svc_meta_be_otp.h" +#include "services/svc_otp.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + + +class RGWOTPMetadataHandler; + +class RGWOTPMetadataObject : public RGWMetadataObject { + friend class RGWOTPMetadataHandler; + + otp_devices_list_t devices; +public: + RGWOTPMetadataObject() {} + RGWOTPMetadataObject(otp_devices_list_t&& _devices, const obj_version& v, const real_time m) { + devices = std::move(_devices); + objv = v; + mtime = m; + } + + void dump(Formatter *f) const override { + encode_json("devices", devices, f); + } + + otp_devices_list_t& get_devs() { + return devices; + } +}; + + +class RGWOTPMetadataHandler : public RGWOTPMetadataHandlerBase { + friend class RGWOTPCtl; + + struct Svc { + RGWSI_Zone *zone; + RGWSI_MetaBackend *meta_be; + RGWSI_OTP *otp; + } svc; + + int init(RGWSI_Zone *zone, + RGWSI_MetaBackend *_meta_be, + RGWSI_OTP *_otp) { + base_init(zone->ctx(), _otp->get_be_handler().get()); + svc.zone = zone; + svc.meta_be = _meta_be; + svc.otp = _otp; + return 0; + } + + int call(std::function f) { + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + RGWSI_OTP_BE_Ctx ctx(op->ctx()); + return f(ctx); + }); + } + + RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { + otp_devices_list_t devices; + try { + JSONDecoder::decode_json("devices", devices, jo); + } catch (JSONDecoder::err& e) { + return nullptr; + } + + return new RGWOTPMetadataObject(std::move(devices), objv, mtime); + } + + int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { + RGWObjVersionTracker objv_tracker; + + std::unique_ptr mdo(new RGWOTPMetadataObject); + + + RGWSI_OTP_BE_Ctx be_ctx(op->ctx()); + + int ret = svc.otp->read_all(be_ctx, + entry, + &mdo->get_devs(), + &mdo->get_mtime(), + &objv_tracker, + y, + dpp); + if (ret < 0) { + return ret; + } + + mdo->objv = objv_tracker.read_version; + + *obj = mdo.release(); + + return 0; + } + + int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) override { + RGWOTPMetadataObject *obj = static_cast(_obj); + + RGWSI_OTP_BE_Ctx be_ctx(op->ctx()); + + int ret = svc.otp->store_all(dpp, be_ctx, + entry, + obj->devices, + obj->mtime, + &objv_tracker, + y); + if (ret < 0) { + return ret; + } + + return STATUS_APPLIED; + } + + int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) override { + RGWSI_MBOTP_RemoveParams params; + + RGWSI_OTP_BE_Ctx be_ctx(op->ctx()); + + return svc.otp->remove_all(dpp, be_ctx, + entry, + &objv_tracker, + y); + } + +public: + RGWOTPMetadataHandler() {} + + string get_type() override { return "otp"; } +}; + + +RGWOTPCtl::RGWOTPCtl(RGWSI_Zone *zone_svc, + RGWSI_OTP *otp_svc) +{ + svc.zone = zone_svc; + svc.otp = otp_svc; +} + + +void RGWOTPCtl::init(RGWOTPMetadataHandler *_meta_handler) +{ + meta_handler = _meta_handler; + be_handler = meta_handler->get_be_handler(); +} + +int RGWOTPCtl::read_all(const rgw_user& uid, + RGWOTPInfo *info, + optional_yield y, + const DoutPrefixProvider *dpp, + const GetParams& params) +{ + info->uid = uid; + return meta_handler->call([&](RGWSI_OTP_BE_Ctx& ctx) { + return svc.otp->read_all(ctx, uid, &info->devices, params.mtime, params.objv_tracker, y, dpp); + }); +} + +int RGWOTPCtl::store_all(const DoutPrefixProvider *dpp, + const RGWOTPInfo& info, + optional_yield y, + const PutParams& params) +{ + return meta_handler->call([&](RGWSI_OTP_BE_Ctx& ctx) { + return svc.otp->store_all(dpp, ctx, info.uid, info.devices, params.mtime, params.objv_tracker, y); + }); +} + +int RGWOTPCtl::remove_all(const DoutPrefixProvider *dpp, + const rgw_user& uid, + optional_yield y, + const RemoveParams& params) +{ + return meta_handler->call([&](RGWSI_OTP_BE_Ctx& ctx) { + return svc.otp->remove_all(dpp, ctx, uid, params.objv_tracker, y); + }); +} + + +RGWMetadataHandler *RGWOTPMetaHandlerAllocator::alloc() +{ + return new RGWOTPMetadataHandler(); +} diff --git a/src/rgw/driver/rados/rgw_otp.h b/src/rgw/driver/rados/rgw_otp.h new file mode 100644 index 00000000000..eacff15314c --- /dev/null +++ b/src/rgw/driver/rados/rgw_otp.h @@ -0,0 +1,114 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_OTP_H +#define CEPH_RGW_OTP_H + +#include "rgw_sal_fwd.h" +#include "cls/otp/cls_otp_types.h" +#include "services/svc_meta_be_otp.h" + +#include "rgw_basic_types.h" +#include "rgw_metadata.h" + + +class RGWObjVersionTracker; +class RGWMetadataHandler; +class RGWOTPMetadataHandler; +class RGWSI_Zone; +class RGWSI_OTP; +class RGWSI_MetaBackend; + +class RGWOTPMetadataHandlerBase : public RGWMetadataHandler_GenericMetaBE { +public: + virtual ~RGWOTPMetadataHandlerBase() {} + virtual int init(RGWSI_Zone *zone, + RGWSI_MetaBackend *_meta_be, + RGWSI_OTP *_otp) = 0; +}; + +class RGWOTPMetaHandlerAllocator { +public: + static RGWMetadataHandler *alloc(); +}; + +struct RGWOTPInfo { + rgw_user uid; + otp_devices_list_t devices; +}; + + +class RGWOTPCtl +{ + struct Svc { + RGWSI_Zone *zone{nullptr}; + RGWSI_OTP *otp{nullptr}; + } svc; + + RGWOTPMetadataHandler *meta_handler; + RGWSI_MetaBackend_Handler *be_handler; + +public: + RGWOTPCtl(RGWSI_Zone *zone_svc, + RGWSI_OTP *otp_svc); + + void init(RGWOTPMetadataHandler *_meta_handler); + + struct GetParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + ceph::real_time *mtime{nullptr}; + + GetParams() {} + + GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + GetParams& set_mtime(ceph::real_time *_mtime) { + mtime = _mtime; + return *this; + } + }; + + struct PutParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + ceph::real_time mtime; + + PutParams() {} + + PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + PutParams& set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + return *this; + } + }; + + struct RemoveParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + + RemoveParams() {} + + RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + }; + + int read_all(const rgw_user& uid, RGWOTPInfo *info, optional_yield y, + const DoutPrefixProvider *dpp, + const GetParams& params = {}); + int store_all(const DoutPrefixProvider *dpp, + const RGWOTPInfo& info, optional_yield y, + const PutParams& params = {}); + int remove_all(const DoutPrefixProvider *dpp, + const rgw_user& user, optional_yield y, + const RemoveParams& params = {}); +}; + +#endif + diff --git a/src/rgw/driver/rados/rgw_period.cc b/src/rgw/driver/rados/rgw_period.cc new file mode 100644 index 00000000000..61602b354e2 --- /dev/null +++ b/src/rgw/driver/rados/rgw_period.cc @@ -0,0 +1,324 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_sync.h" + +#include "services/svc_zone.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; +using namespace rgw_zone_defaults; + +int RGWPeriod::get_zonegroup(RGWZoneGroup& zonegroup, + const string& zonegroup_id) const +{ + map::const_iterator iter; + if (!zonegroup_id.empty()) { + iter = period_map.zonegroups.find(zonegroup_id); + } else { + iter = period_map.zonegroups.find("default"); + } + if (iter != period_map.zonegroups.end()) { + zonegroup = iter->second; + return 0; + } + + return -ENOENT; +} + +int RGWPeriod::get_latest_epoch(const DoutPrefixProvider *dpp, epoch_t& latest_epoch, optional_yield y) +{ + RGWPeriodLatestEpochInfo info; + + int ret = read_latest_epoch(dpp, info, y); + if (ret < 0) { + return ret; + } + + latest_epoch = info.epoch; + + return 0; +} + +int RGWPeriod::delete_obj(const DoutPrefixProvider *dpp, optional_yield y) +{ + rgw_pool pool(get_pool(cct)); + + // delete the object for each period epoch + for (epoch_t e = 1; e <= epoch; e++) { + RGWPeriod p{get_id(), e}; + rgw_raw_obj oid{pool, p.get_period_oid()}; + auto sysobj = sysobj_svc->get_obj(oid); + int ret = sysobj.wop().remove(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to delete period object " << oid + << ": " << cpp_strerror(-ret) << dendl; + } + } + + // delete the .latest_epoch object + rgw_raw_obj oid{pool, get_period_oid_prefix() + get_latest_epoch_oid()}; + auto sysobj = sysobj_svc->get_obj(oid); + int ret = sysobj.wop().remove(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to delete period object " << oid + << ": " << cpp_strerror(-ret) << dendl; + } + return ret; +} + +int RGWPeriod::add_zonegroup(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, optional_yield y) +{ + if (zonegroup.realm_id != realm_id) { + return 0; + } + int ret = period_map.update(zonegroup, cct); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl; + return ret; + } + + return store_info(dpp, false, y); +} + +int RGWPeriod::update(const DoutPrefixProvider *dpp, optional_yield y) +{ + auto zone_svc = sysobj_svc->get_zone_svc(); + ldpp_dout(dpp, 20) << __func__ << " realm " << realm_id << " period " << get_id() << dendl; + list zonegroups; + int ret = zone_svc->list_zonegroups(dpp, zonegroups); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to list zonegroups: " << cpp_strerror(-ret) << dendl; + return ret; + } + + // clear zone short ids of removed zones. period_map.update() will add the + // remaining zones back + period_map.short_zone_ids.clear(); + + for (auto& iter : zonegroups) { + RGWZoneGroup zg(string(), iter); + ret = zg.init(dpp, cct, sysobj_svc, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: zg.init() failed: " << cpp_strerror(-ret) << dendl; + continue; + } + + if (zg.realm_id != realm_id) { + ldpp_dout(dpp, 20) << "skipping zonegroup " << zg.get_name() << " zone realm id " << zg.realm_id << ", not on our realm " << realm_id << dendl; + continue; + } + + if (zg.master_zone.empty()) { + ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl; + return -EINVAL; + } + + if (zg.zones.find(zg.master_zone) == zg.zones.end()) { + ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() + << " has a non existent master zone "<< dendl; + return -EINVAL; + } + + if (zg.is_master_zonegroup()) { + master_zonegroup = zg.get_id(); + master_zone = zg.master_zone; + } + + int ret = period_map.update(zg, cct); + if (ret < 0) { + return ret; + } + } + + ret = period_config.read(dpp, sysobj_svc, realm_id, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed to read period config: " + << cpp_strerror(ret) << dendl; + return ret; + } + return 0; +} + +void RGWPeriod::fork() +{ + ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl; + predecessor_uuid = id; + id = get_staging_id(realm_id); + period_map.reset(); + realm_epoch++; +} + +static int read_sync_status(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw_meta_sync_status *sync_status) +{ + rgw::sal::RadosStore* rados_store = static_cast(driver); + // initialize a sync status manager to read the status + RGWMetaSyncStatusManager mgr(rados_store, rados_store->svc()->rados->get_async_processor()); + int r = mgr.init(dpp); + if (r < 0) { + return r; + } + r = mgr.read_sync_status(dpp, sync_status); + mgr.stop(); + return r; +} + +int RGWPeriod::update_sync_status(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, /* for now */ + const RGWPeriod ¤t_period, + std::ostream& error_stream, + bool force_if_stale) +{ + rgw_meta_sync_status status; + int r = read_sync_status(dpp, driver, &status); + if (r < 0) { + ldpp_dout(dpp, 0) << "period failed to read sync status: " + << cpp_strerror(-r) << dendl; + return r; + } + + std::vector markers; + + const auto current_epoch = current_period.get_realm_epoch(); + if (current_epoch != status.sync_info.realm_epoch) { + // no sync status markers for the current period + ceph_assert(current_epoch > status.sync_info.realm_epoch); + const int behind = current_epoch - status.sync_info.realm_epoch; + if (!force_if_stale && current_epoch > 1) { + error_stream << "ERROR: This zone is " << behind << " period(s) behind " + "the current master zone in metadata sync. If this zone is promoted " + "to master, any metadata changes during that time are likely to " + "be lost.\n" + "Waiting for this zone to catch up on metadata sync (see " + "'radosgw-admin sync status') is recommended.\n" + "To promote this zone to master anyway, add the flag " + "--yes-i-really-mean-it." << std::endl; + return -EINVAL; + } + // empty sync status markers - other zones will skip this period during + // incremental metadata sync + markers.resize(status.sync_info.num_shards); + } else { + markers.reserve(status.sync_info.num_shards); + for (auto& i : status.sync_markers) { + auto& marker = i.second; + // filter out markers from other periods + if (marker.realm_epoch != current_epoch) { + marker.marker.clear(); + } + markers.emplace_back(std::move(marker.marker)); + } + } + + std::swap(sync_status, markers); + return 0; +} + +int RGWPeriod::commit(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWRealm& realm, const RGWPeriod& current_period, + std::ostream& error_stream, optional_yield y, + bool force_if_stale) +{ + auto zone_svc = sysobj_svc->get_zone_svc(); + ldpp_dout(dpp, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl; + // gateway must be in the master zone to commit + if (master_zone != zone_svc->get_zone_params().get_id()) { + error_stream << "Cannot commit period on zone " + << zone_svc->get_zone_params().get_id() << ", it must be sent to " + "the period's master zone " << master_zone << '.' << std::endl; + return -EINVAL; + } + // period predecessor must match current period + if (predecessor_uuid != current_period.get_id()) { + error_stream << "Period predecessor " << predecessor_uuid + << " does not match current period " << current_period.get_id() + << ". Use 'period pull' to get the latest period from the master, " + "reapply your changes, and try again." << std::endl; + return -EINVAL; + } + // realm epoch must be 1 greater than current period + if (realm_epoch != current_period.get_realm_epoch() + 1) { + error_stream << "Period's realm epoch " << realm_epoch + << " does not come directly after current realm epoch " + << current_period.get_realm_epoch() << ". Use 'realm pull' to get the " + "latest realm and period from the master zone, reapply your changes, " + "and try again." << std::endl; + return -EINVAL; + } + // did the master zone change? + if (master_zone != current_period.get_master_zone()) { + // store the current metadata sync status in the period + int r = update_sync_status(dpp, driver, current_period, error_stream, force_if_stale); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to update metadata sync status: " + << cpp_strerror(-r) << dendl; + return r; + } + // create an object with a new period id + r = create(dpp, y, true); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl; + return r; + } + // set as current period + r = realm.set_current_period(dpp, *this, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to update realm's current period: " + << cpp_strerror(-r) << dendl; + return r; + } + ldpp_dout(dpp, 4) << "Promoted to master zone and committed new period " + << id << dendl; + realm.notify_new_period(dpp, *this, y); + return 0; + } + // period must be based on current epoch + if (epoch != current_period.get_epoch()) { + error_stream << "Period epoch " << epoch << " does not match " + "predecessor epoch " << current_period.get_epoch() + << ". Use 'period pull' to get the latest epoch from the master zone, " + "reapply your changes, and try again." << std::endl; + return -EINVAL; + } + // set period as next epoch + set_id(current_period.get_id()); + set_epoch(current_period.get_epoch() + 1); + set_predecessor(current_period.get_predecessor()); + realm_epoch = current_period.get_realm_epoch(); + // write the period to rados + int r = store_info(dpp, false, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to store period: " << cpp_strerror(-r) << dendl; + return r; + } + // set as latest epoch + r = update_latest_epoch(dpp, epoch, y); + if (r == -EEXIST) { + // already have this epoch (or a more recent one) + return 0; + } + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to set latest epoch: " << cpp_strerror(-r) << dendl; + return r; + } + r = reflect(dpp, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to update local objects: " << cpp_strerror(-r) << dendl; + return r; + } + ldpp_dout(dpp, 4) << "Committed new epoch " << epoch + << " for period " << id << dendl; + realm.notify_new_period(dpp, *this, y); + return 0; +} + +void RGWPeriod::generate_test_instances(list &o) +{ + RGWPeriod *z = new RGWPeriod; + o.push_back(z); + o.push_back(new RGWPeriod); +} + + diff --git a/src/rgw/driver/rados/rgw_rest_pubsub.cc b/src/rgw/driver/rados/rgw_rest_pubsub.cc new file mode 100644 index 00000000000..23d56615ac9 --- /dev/null +++ b/src/rgw/driver/rados/rgw_rest_pubsub.cc @@ -0,0 +1,1069 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include +#include +#include "rgw_rest_pubsub.h" +#include "rgw_pubsub_push.h" +#include "rgw_pubsub.h" +#include "rgw_op.h" +#include "rgw_rest.h" +#include "rgw_rest_s3.h" +#include "rgw_arn.h" +#include "rgw_auth_s3.h" +#include "rgw_notify.h" +#include "rgw_sal_rados.h" +#include "services/svc_zone.h" +#include "common/dout.h" +#include "rgw_url.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +static const char* AWS_SNS_NS("https://sns.amazonaws.com/doc/2010-03-31/"); + +bool verify_transport_security(CephContext *cct, const RGWEnv& env) { + const auto is_secure = rgw_transport_is_secure(cct, env); + if (!is_secure && g_conf().get_val("rgw_allow_notification_secrets_in_cleartext")) { + ldout(cct, 0) << "WARNING: bypassing endpoint validation, allows sending secrets over insecure transport" << dendl; + return true; + } + return is_secure; +} + +// make sure that endpoint is a valid URL +// make sure that if user/password are passed inside URL, it is over secure connection +// update rgw_pubsub_sub_dest to indicate that a password is stored in the URL +bool validate_and_update_endpoint_secret(rgw_pubsub_sub_dest& dest, CephContext *cct, const RGWEnv& env) { + if (dest.push_endpoint.empty()) { + return true; + } + std::string user; + std::string password; + if (!rgw::parse_url_userinfo(dest.push_endpoint, user, password)) { + ldout(cct, 1) << "endpoint validation error: malformed endpoint URL:" << dest.push_endpoint << dendl; + return false; + } + // this should be verified inside parse_url() + ceph_assert(user.empty() == password.empty()); + if (!user.empty()) { + dest.stored_secret = true; + if (!verify_transport_security(cct, env)) { + ldout(cct, 1) << "endpoint validation error: sending secrets over insecure transport" << dendl; + return false; + } + } + return true; +} + +bool topic_has_endpoint_secret(const rgw_pubsub_topic_subs& topic) { + return topic.topic.dest.stored_secret; +} + +bool topics_has_endpoint_secret(const rgw_pubsub_topics& topics) { + for (const auto& topic : topics.topics) { + if (topic_has_endpoint_secret(topic.second)) return true; + } + return false; +} + +// command (AWS compliant): +// POST +// Action=CreateTopic&Name=[&OpaqueData=data][&push-endpoint=[&persistent][&=]] +class RGWPSCreateTopicOp : public RGWOp { + private: + std::optional ps; + std::string topic_name; + rgw_pubsub_sub_dest dest; + std::string topic_arn; + std::string opaque_data; + + int get_params() { + topic_name = s->info.args.get("Name"); + if (topic_name.empty()) { + ldpp_dout(this, 1) << "CreateTopic Action 'Name' argument is missing" << dendl; + return -EINVAL; + } + + opaque_data = s->info.args.get("OpaqueData"); + + dest.push_endpoint = s->info.args.get("push-endpoint"); + s->info.args.get_bool("persistent", &dest.persistent, false); + + if (!validate_and_update_endpoint_secret(dest, s->cct, *(s->info.env))) { + return -EINVAL; + } + for (const auto& param : s->info.args.get_params()) { + if (param.first == "Action" || param.first == "Name" || param.first == "PayloadHash") { + continue; + } + dest.push_endpoint_args.append(param.first+"="+param.second+"&"); + } + + if (!dest.push_endpoint_args.empty()) { + // remove last separator + dest.push_endpoint_args.pop_back(); + } + if (!dest.push_endpoint.empty() && dest.persistent) { + const auto ret = rgw::notify::add_persistent_topic(topic_name, s->yield); + if (ret < 0) { + ldpp_dout(this, 1) << "CreateTopic Action failed to create queue for persistent topics. error:" << ret << dendl; + return ret; + } + } + + // dest object only stores endpoint info + dest.arn_topic = topic_name; + // the topic ARN will be sent in the reply + const rgw::ARN arn(rgw::Partition::aws, rgw::Service::sns, + driver->get_zone()->get_zonegroup().get_name(), + s->user->get_tenant(), topic_name); + topic_arn = arn.to_string(); + return 0; + } + + public: + int verify_permission(optional_yield) override { + return 0; + } + + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + void execute(optional_yield) override; + + const char* name() const override { return "pubsub_topic_create"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_CREATE; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + const auto f = s->formatter; + f->open_object_section_in_ns("CreateTopicResponse", AWS_SNS_NS); + f->open_object_section("CreateTopicResult"); + encode_xml("TopicArn", topic_arn, f); + f->close_section(); // CreateTopicResult + f->open_object_section("ResponseMetadata"); + encode_xml("RequestId", s->req_id, f); + f->close_section(); // ResponseMetadata + f->close_section(); // CreateTopicResponse + rgw_flush_formatter_and_reset(s, f); + } +}; + +void RGWPSCreateTopicOp::execute(optional_yield y) { + op_ret = get_params(); + if (op_ret < 0) { + return; + } + + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + op_ret = ps->create_topic(this, topic_name, dest, topic_arn, opaque_data, y); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to create topic '" << topic_name << "', ret=" << op_ret << dendl; + return; + } + ldpp_dout(this, 20) << "successfully created topic '" << topic_name << "'" << dendl; +} + +// command (AWS compliant): +// POST +// Action=ListTopics +class RGWPSListTopicsOp : public RGWOp { +private: + std::optional ps; + rgw_pubsub_topics result; + +public: + int verify_permission(optional_yield) override { + return 0; + } + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + void execute(optional_yield) override; + + const char* name() const override { return "pubsub_topics_list"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPICS_LIST; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + const auto f = s->formatter; + f->open_object_section_in_ns("ListTopicsResponse", AWS_SNS_NS); + f->open_object_section("ListTopicsResult"); + encode_xml("Topics", result, f); + f->close_section(); // ListTopicsResult + f->open_object_section("ResponseMetadata"); + encode_xml("RequestId", s->req_id, f); + f->close_section(); // ResponseMetadat + f->close_section(); // ListTopicsResponse + rgw_flush_formatter_and_reset(s, f); + } +}; + +void RGWPSListTopicsOp::execute(optional_yield y) { + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + op_ret = ps->get_topics(&result); + // if there are no topics it is not considered an error + op_ret = op_ret == -ENOENT ? 0 : op_ret; + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get topics, ret=" << op_ret << dendl; + return; + } + if (topics_has_endpoint_secret(result) && !verify_transport_security(s->cct, *(s->info.env))) { + ldpp_dout(this, 1) << "topics contain secrets and cannot be sent over insecure transport" << dendl; + op_ret = -EPERM; + return; + } + ldpp_dout(this, 20) << "successfully got topics" << dendl; +} + +// command (extension to AWS): +// POST +// Action=GetTopic&TopicArn= +class RGWPSGetTopicOp : public RGWOp { + private: + std::string topic_name; + std::optional ps; + rgw_pubsub_topic_subs result; + + int get_params() { + const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); + + if (!topic_arn || topic_arn->resource.empty()) { + ldpp_dout(this, 1) << "GetTopic Action 'TopicArn' argument is missing or invalid" << dendl; + return -EINVAL; + } + + topic_name = topic_arn->resource; + return 0; + } + + public: + int verify_permission(optional_yield y) override { + return 0; + } + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + void execute(optional_yield y) override; + + const char* name() const override { return "pubsub_topic_get"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_GET; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + const auto f = s->formatter; + f->open_object_section("GetTopicResponse"); + f->open_object_section("GetTopicResult"); + encode_xml("Topic", result.topic, f); + f->close_section(); + f->open_object_section("ResponseMetadata"); + encode_xml("RequestId", s->req_id, f); + f->close_section(); + f->close_section(); + rgw_flush_formatter_and_reset(s, f); + } +}; + +void RGWPSGetTopicOp::execute(optional_yield y) { + op_ret = get_params(); + if (op_ret < 0) { + return; + } + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + op_ret = ps->get_topic(topic_name, &result); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; + return; + } + if (topic_has_endpoint_secret(result) && !verify_transport_security(s->cct, *(s->info.env))) { + ldpp_dout(this, 1) << "topic '" << topic_name << "' contain secret and cannot be sent over insecure transport" << dendl; + op_ret = -EPERM; + return; + } + ldpp_dout(this, 1) << "successfully got topic '" << topic_name << "'" << dendl; +} + +// command (AWS compliant): +// POST +// Action=GetTopicAttributes&TopicArn= +class RGWPSGetTopicAttributesOp : public RGWOp { + private: + std::string topic_name; + std::optional ps; + rgw_pubsub_topic_subs result; + + int get_params() { + const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); + + if (!topic_arn || topic_arn->resource.empty()) { + ldpp_dout(this, 1) << "GetTopicAttribute Action 'TopicArn' argument is missing or invalid" << dendl; + return -EINVAL; + } + + topic_name = topic_arn->resource; + return 0; + } + + public: + int verify_permission(optional_yield y) override { + return 0; + } + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + void execute(optional_yield y) override; + + const char* name() const override { return "pubsub_topic_get"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_GET; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + const auto f = s->formatter; + f->open_object_section_in_ns("GetTopicAttributesResponse", AWS_SNS_NS); + f->open_object_section("GetTopicAttributesResult"); + result.topic.dump_xml_as_attributes(f); + f->close_section(); // GetTopicAttributesResult + f->open_object_section("ResponseMetadata"); + encode_xml("RequestId", s->req_id, f); + f->close_section(); // ResponseMetadata + f->close_section(); // GetTopicAttributesResponse + rgw_flush_formatter_and_reset(s, f); + } +}; + +void RGWPSGetTopicAttributesOp::execute(optional_yield y) { + op_ret = get_params(); + if (op_ret < 0) { + return; + } + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + op_ret = ps->get_topic(topic_name, &result); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; + return; + } + if (topic_has_endpoint_secret(result) && !verify_transport_security(s->cct, *(s->info.env))) { + ldpp_dout(this, 1) << "topic '" << topic_name << "' contain secret and cannot be sent over insecure transport" << dendl; + op_ret = -EPERM; + return; + } + ldpp_dout(this, 1) << "successfully got topic '" << topic_name << "'" << dendl; +} + +// command (AWS compliant): +// POST +// Action=DeleteTopic&TopicArn= +class RGWPSDeleteTopicOp : public RGWOp { + private: + std::string topic_name; + std::optional ps; + + int get_params() { + const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); + + if (!topic_arn || topic_arn->resource.empty()) { + ldpp_dout(this, 1) << "DeleteTopic Action 'TopicArn' argument is missing or invalid" << dendl; + return -EINVAL; + } + + topic_name = topic_arn->resource; + + // upon deletion it is not known if topic is persistent or not + // will try to delete the persistent topic anyway + const auto ret = rgw::notify::remove_persistent_topic(topic_name, s->yield); + if (ret == -ENOENT) { + // topic was not persistent, or already deleted + return 0; + } + if (ret < 0) { + ldpp_dout(this, 1) << "DeleteTopic Action failed to remove queue for persistent topics. error:" << ret << dendl; + return ret; + } + + return 0; + } + + public: + int verify_permission(optional_yield) override { + return 0; + } + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + void execute(optional_yield y) override; + + const char* name() const override { return "pubsub_topic_delete"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_DELETE; } + uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; } + + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + const auto f = s->formatter; + f->open_object_section_in_ns("DeleteTopicResponse", AWS_SNS_NS); + f->open_object_section("ResponseMetadata"); + encode_xml("RequestId", s->req_id, f); + f->close_section(); // ResponseMetadata + f->close_section(); // DeleteTopicResponse + rgw_flush_formatter_and_reset(s, f); + } +}; + +void RGWPSDeleteTopicOp::execute(optional_yield y) { + op_ret = get_params(); + if (op_ret < 0) { + return; + } + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + op_ret = ps->remove_topic(this, topic_name, y); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to remove topic '" << topic_name << ", ret=" << op_ret << dendl; + return; + } + ldpp_dout(this, 1) << "successfully removed topic '" << topic_name << "'" << dendl; +} + +namespace { +// utility classes and functions for handling parameters with the following format: +// Attributes.entry.{N}.{key|value}={VALUE} +// N - any unsigned number +// VALUE - url encoded string + +// and Attribute is holding key and value +// ctor and set are done according to the "type" argument +// if type is not "key" or "value" its a no-op +class Attribute { + std::string key; + std::string value; +public: + Attribute(const std::string& type, const std::string& key_or_value) { + set(type, key_or_value); + } + void set(const std::string& type, const std::string& key_or_value) { + if (type == "key") { + key = key_or_value; + } else if (type == "value") { + value = key_or_value; + } + } + const std::string& get_key() const { return key; } + const std::string& get_value() const { return value; } +}; + +using AttributeMap = std::map; + +// aggregate the attributes into a map +// the key and value are associated by the index (N) +// no assumptions are made on the order in which these parameters are added +void update_attribute_map(const std::string& input, AttributeMap& map) { + const boost::char_separator sep("."); + const boost::tokenizer tokens(input, sep); + auto token = tokens.begin(); + if (*token != "Attributes") { + return; + } + ++token; + + if (*token != "entry") { + return; + } + ++token; + + unsigned idx; + try { + idx = std::stoul(*token); + } catch (const std::invalid_argument&) { + return; + } + ++token; + + std::string key_or_value = ""; + // get the rest of the string regardless of dots + // this is to allow dots in the value + while (token != tokens.end()) { + key_or_value.append(*token+"."); + ++token; + } + // remove last separator + key_or_value.pop_back(); + + auto pos = key_or_value.find("="); + if (pos != std::string::npos) { + const auto key_or_value_lhs = key_or_value.substr(0, pos); + const auto key_or_value_rhs = url_decode(key_or_value.substr(pos + 1, key_or_value.size() - 1)); + const auto map_it = map.find(idx); + if (map_it == map.end()) { + // new entry + map.emplace(std::make_pair(idx, Attribute(key_or_value_lhs, key_or_value_rhs))); + } else { + // existing entry + map_it->second.set(key_or_value_lhs, key_or_value_rhs); + } + } +} +} + +void RGWHandler_REST_PSTopic_AWS::rgw_topic_parse_input() { + if (post_body.size() > 0) { + ldpp_dout(s, 10) << "Content of POST: " << post_body << dendl; + + if (post_body.find("Action") != std::string::npos) { + const boost::char_separator sep("&"); + const boost::tokenizer> tokens(post_body, sep); + AttributeMap map; + for (const auto& t : tokens) { + auto pos = t.find("="); + if (pos != std::string::npos) { + const auto key = t.substr(0, pos); + if (key == "Action") { + s->info.args.append(key, t.substr(pos + 1, t.size() - 1)); + } else if (key == "Name" || key == "TopicArn") { + const auto value = url_decode(t.substr(pos + 1, t.size() - 1)); + s->info.args.append(key, value); + } else { + update_attribute_map(t, map); + } + } + } + // update the regular args with the content of the attribute map + for (const auto& attr : map) { + s->info.args.append(attr.second.get_key(), attr.second.get_value()); + } + } + const auto payload_hash = rgw::auth::s3::calc_v4_payload_hash(post_body); + s->info.args.append("PayloadHash", payload_hash); + } +} + +RGWOp* RGWHandler_REST_PSTopic_AWS::op_post() { + rgw_topic_parse_input(); + + if (s->info.args.exists("Action")) { + const auto action = s->info.args.get("Action"); + if (action.compare("CreateTopic") == 0) + return new RGWPSCreateTopicOp(); + if (action.compare("DeleteTopic") == 0) + return new RGWPSDeleteTopicOp; + if (action.compare("ListTopics") == 0) + return new RGWPSListTopicsOp(); + if (action.compare("GetTopic") == 0) + return new RGWPSGetTopicOp(); + if (action.compare("GetTopicAttributes") == 0) + return new RGWPSGetTopicAttributesOp(); + } + + return nullptr; +} + +int RGWHandler_REST_PSTopic_AWS::authorize(const DoutPrefixProvider* dpp, optional_yield y) { + return RGW_Auth_S3::authorize(dpp, driver, auth_registry, s, y); +} + +namespace { +// return a unique topic by prefexing with the notification name: _ +std::string topic_to_unique(const std::string& topic, const std::string& notification) { + return notification + "_" + topic; +} + +// extract the topic from a unique topic of the form: _ +[[maybe_unused]] std::string unique_to_topic(const std::string& unique_topic, const std::string& notification) { + if (unique_topic.find(notification + "_") == std::string::npos) { + return ""; + } + return unique_topic.substr(notification.length() + 1); +} + +// from list of bucket topics, find the one that was auto-generated by a notification +auto find_unique_topic(const rgw_pubsub_bucket_topics& bucket_topics, const std::string& notif_name) { + auto it = std::find_if(bucket_topics.topics.begin(), bucket_topics.topics.end(), [&](const auto& val) { return notif_name == val.second.s3_id; }); + return it != bucket_topics.topics.end() ? + std::optional>(it->second): + std::nullopt; +} +} + +int remove_notification_by_topic(const DoutPrefixProvider *dpp, const std::string& topic_name, const RGWPubSub::BucketRef& b, optional_yield y, RGWPubSub& ps) { + int op_ret = b->remove_notification(dpp, topic_name, y); + if (op_ret < 0) { + ldpp_dout(dpp, 1) << "failed to remove notification of topic '" << topic_name << "', ret=" << op_ret << dendl; + } + op_ret = ps.remove_topic(dpp, topic_name, y); + if (op_ret < 0) { + ldpp_dout(dpp, 1) << "failed to remove auto-generated topic '" << topic_name << "', ret=" << op_ret << dendl; + } + return op_ret; +} + +int delete_all_notifications(const DoutPrefixProvider *dpp, const rgw_pubsub_bucket_topics& bucket_topics, const RGWPubSub::BucketRef& b, optional_yield y, RGWPubSub& ps) { + // delete all notifications of on a bucket + for (const auto& topic : bucket_topics.topics) { + const auto op_ret = remove_notification_by_topic(dpp, topic.first, b, y, ps); + if (op_ret < 0) { + return op_ret; + } + } + return 0; +} + +// command (S3 compliant): PUT /?notification +// a "notification" and a subscription will be auto-generated +// actual configuration is XML encoded in the body of the message +class RGWPSCreateNotifOp : public RGWDefaultResponseOp { + private: + std::optional ps; + std::string bucket_name; + RGWBucketInfo bucket_info; + rgw_pubsub_s3_notifications configurations; + + int get_params() { + bool exists; + const auto no_value = s->info.args.get("notification", &exists); + if (!exists) { + ldpp_dout(this, 1) << "missing required param 'notification'" << dendl; + return -EINVAL; + } + if (no_value.length() > 0) { + ldpp_dout(this, 1) << "param 'notification' should not have any value" << dendl; + return -EINVAL; + } + if (s->bucket_name.empty()) { + ldpp_dout(this, 1) << "request must be on a bucket" << dendl; + return -EINVAL; + } + bucket_name = s->bucket_name; + return 0; + } + + public: + int verify_permission(optional_yield y) override; + + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + + const char* name() const override { return "pubsub_notification_create_s3"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_CREATE; } + uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } + + int get_params_from_body() { + const auto max_size = s->cct->_conf->rgw_max_put_param_size; + int r; + bufferlist data; + std::tie(r, data) = read_all_input(s, max_size, false); + + if (r < 0) { + ldpp_dout(this, 1) << "failed to read XML payload" << dendl; + return r; + } + if (data.length() == 0) { + ldpp_dout(this, 1) << "XML payload missing" << dendl; + return -EINVAL; + } + + RGWXMLDecoder::XMLParser parser; + + if (!parser.init()){ + ldpp_dout(this, 1) << "failed to initialize XML parser" << dendl; + return -EINVAL; + } + if (!parser.parse(data.c_str(), data.length(), 1)) { + ldpp_dout(this, 1) << "failed to parse XML payload" << dendl; + return -ERR_MALFORMED_XML; + } + try { + // NotificationConfigurations is mandatory + // It can be empty which means we delete all the notifications + RGWXMLDecoder::decode_xml("NotificationConfiguration", configurations, &parser, true); + } catch (RGWXMLDecoder::err& err) { + ldpp_dout(this, 1) << "failed to parse XML payload. error: " << err << dendl; + return -ERR_MALFORMED_XML; + } + return 0; + } + + void execute(optional_yield) override; +}; + +void RGWPSCreateNotifOp::execute(optional_yield y) { + op_ret = get_params_from_body(); + if (op_ret < 0) { + return; + } + + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); + ceph_assert(b); + + if(configurations.list.empty()) { + // get all topics on a bucket + rgw_pubsub_bucket_topics bucket_topics; + op_ret = b->get_topics(&bucket_topics); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl; + return; + } + + op_ret = delete_all_notifications(this, bucket_topics, b, y, *ps); + return; + } + + for (const auto& c : configurations.list) { + const auto& notif_name = c.id; + if (notif_name.empty()) { + ldpp_dout(this, 1) << "missing notification id" << dendl; + op_ret = -EINVAL; + return; + } + if (c.topic_arn.empty()) { + ldpp_dout(this, 1) << "missing topic ARN in notification: '" << notif_name << "'" << dendl; + op_ret = -EINVAL; + return; + } + + const auto arn = rgw::ARN::parse(c.topic_arn); + if (!arn || arn->resource.empty()) { + ldpp_dout(this, 1) << "topic ARN has invalid format: '" << c.topic_arn << "' in notification: '" << notif_name << "'" << dendl; + op_ret = -EINVAL; + return; + } + + if (std::find(c.events.begin(), c.events.end(), rgw::notify::UnknownEvent) != c.events.end()) { + ldpp_dout(this, 1) << "unknown event type in notification: '" << notif_name << "'" << dendl; + op_ret = -EINVAL; + return; + } + + const auto topic_name = arn->resource; + + // get topic information. destination information is stored in the topic + rgw_pubsub_topic topic_info; + op_ret = ps->get_topic(topic_name, &topic_info); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; + return; + } + // make sure that full topic configuration match + // TODO: use ARN match function + + // create unique topic name. this has 2 reasons: + // (1) topics cannot be shared between different S3 notifications because they hold the filter information + // (2) make topic clneaup easier, when notification is removed + const auto unique_topic_name = topic_to_unique(topic_name, notif_name); + // generate the internal topic. destination is stored here for the "push-only" case + // when no subscription exists + // ARN is cached to make the "GET" method faster + op_ret = ps->create_topic(this, unique_topic_name, topic_info.dest, topic_info.arn, topic_info.opaque_data, y); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to auto-generate unique topic '" << unique_topic_name << + "', ret=" << op_ret << dendl; + return; + } + ldpp_dout(this, 20) << "successfully auto-generated unique topic '" << unique_topic_name << "'" << dendl; + // generate the notification + rgw::notify::EventTypeList events; + op_ret = b->create_notification(this, unique_topic_name, c.events, std::make_optional(c.filter), notif_name, y); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to auto-generate notification for unique topic '" << unique_topic_name << + "', ret=" << op_ret << dendl; + // rollback generated topic (ignore return value) + ps->remove_topic(this, unique_topic_name, y); + return; + } + ldpp_dout(this, 20) << "successfully auto-generated notification for unique topic '" << unique_topic_name << "'" << dendl; + } +} + +int RGWPSCreateNotifOp::verify_permission(optional_yield y) { + int ret = get_params(); + if (ret < 0) { + return ret; + } + + std::unique_ptr user = driver->get_user(s->owner.get_id()); + std::unique_ptr bucket; + ret = driver->get_bucket(this, user.get(), s->owner.get_id().tenant, bucket_name, &bucket, y); + if (ret < 0) { + ldpp_dout(this, 1) << "failed to get bucket info, cannot verify ownership" << dendl; + return ret; + } + bucket_info = bucket->get_info(); + + if (bucket_info.owner != s->owner.get_id()) { + ldpp_dout(this, 1) << "user doesn't own bucket, not allowed to create notification" << dendl; + return -EPERM; + } + return 0; +} + +// command (extension to S3): DELETE /bucket?notification[=] +class RGWPSDeleteNotifOp : public RGWDefaultResponseOp { + private: + std::optional ps; + std::string bucket_name; + RGWBucketInfo bucket_info; + std::string notif_name; + + public: + int verify_permission(optional_yield y) override; + + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + + const char* name() const override { return "pubsub_notification_delete_s3"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_DELETE; } + uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; } + + int get_params() { + bool exists; + notif_name = s->info.args.get("notification", &exists); + if (!exists) { + ldpp_dout(this, 1) << "missing required param 'notification'" << dendl; + return -EINVAL; + } + if (s->bucket_name.empty()) { + ldpp_dout(this, 1) << "request must be on a bucket" << dendl; + return -EINVAL; + } + bucket_name = s->bucket_name; + return 0; + } + + void execute(optional_yield y) override; +}; + +void RGWPSDeleteNotifOp::execute(optional_yield y) { + op_ret = get_params(); + if (op_ret < 0) { + return; + } + + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); + ceph_assert(b); + + // get all topics on a bucket + rgw_pubsub_bucket_topics bucket_topics; + op_ret = b->get_topics(&bucket_topics); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl; + return; + } + + if (!notif_name.empty()) { + // delete a specific notification + const auto unique_topic = find_unique_topic(bucket_topics, notif_name); + if (unique_topic) { + const auto unique_topic_name = unique_topic->get().topic.name; + op_ret = remove_notification_by_topic(this, unique_topic_name, b, y, *ps); + return; + } + // notification to be removed is not found - considered success + ldpp_dout(this, 20) << "notification '" << notif_name << "' already removed" << dendl; + return; + } + + op_ret = delete_all_notifications(this, bucket_topics, b, y, *ps); +} + +int RGWPSDeleteNotifOp::verify_permission(optional_yield y) { + int ret = get_params(); + if (ret < 0) { + return ret; + } + + std::unique_ptr user = driver->get_user(s->owner.get_id()); + std::unique_ptr bucket; + ret = driver->get_bucket(this, user.get(), s->owner.get_id().tenant, bucket_name, &bucket, y); + if (ret < 0) { + return ret; + } + bucket_info = bucket->get_info(); + + if (bucket_info.owner != s->owner.get_id()) { + ldpp_dout(this, 1) << "user doesn't own bucket, cannot remove notification" << dendl; + return -EPERM; + } + return 0; +} + +// command (S3 compliant): GET /bucket?notification[=] +class RGWPSListNotifsOp : public RGWOp { +private: + std::string bucket_name; + RGWBucketInfo bucket_info; + std::optional ps; + std::string notif_name; + rgw_pubsub_s3_notifications notifications; + + int get_params() { + bool exists; + notif_name = s->info.args.get("notification", &exists); + if (!exists) { + ldpp_dout(this, 1) << "missing required param 'notification'" << dendl; + return -EINVAL; + } + if (s->bucket_name.empty()) { + ldpp_dout(this, 1) << "request must be on a bucket" << dendl; + return -EINVAL; + } + bucket_name = s->bucket_name; + return 0; + } + + public: + int verify_permission(optional_yield y) override; + + void pre_exec() override { + rgw_bucket_object_pre_exec(s); + } + + const char* name() const override { return "pubsub_notifications_get_s3"; } + RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_LIST; } + uint32_t op_mask() override { return RGW_OP_TYPE_READ; } + + void execute(optional_yield y) override; + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + notifications.dump_xml(s->formatter); + rgw_flush_formatter_and_reset(s, s->formatter); + } +}; + +void RGWPSListNotifsOp::execute(optional_yield y) { + ps.emplace(static_cast(driver), s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); + ceph_assert(b); + + // get all topics on a bucket + rgw_pubsub_bucket_topics bucket_topics; + op_ret = b->get_topics(&bucket_topics); + if (op_ret < 0) { + ldpp_dout(this, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl; + return; + } + if (!notif_name.empty()) { + // get info of a specific notification + const auto unique_topic = find_unique_topic(bucket_topics, notif_name); + if (unique_topic) { + notifications.list.emplace_back(unique_topic->get()); + return; + } + op_ret = -ENOENT; + ldpp_dout(this, 1) << "failed to get notification info for '" << notif_name << "', ret=" << op_ret << dendl; + return; + } + // loop through all topics of the bucket + for (const auto& topic : bucket_topics.topics) { + if (topic.second.s3_id.empty()) { + // not an s3 notification + continue; + } + notifications.list.emplace_back(topic.second); + } +} + +int RGWPSListNotifsOp::verify_permission(optional_yield y) { + int ret = get_params(); + if (ret < 0) { + return ret; + } + + std::unique_ptr user = driver->get_user(s->owner.get_id()); + std::unique_ptr bucket; + ret = driver->get_bucket(this, user.get(), s->owner.get_id().tenant, bucket_name, &bucket, y); + if (ret < 0) { + return ret; + } + bucket_info = bucket->get_info(); + + if (bucket_info.owner != s->owner.get_id()) { + ldpp_dout(this, 1) << "user doesn't own bucket, cannot get notification list" << dendl; + return -EPERM; + } + + return 0; +} + +RGWOp* RGWHandler_REST_PSNotifs_S3::op_get() { + return new RGWPSListNotifsOp(); +} + +RGWOp* RGWHandler_REST_PSNotifs_S3::op_put() { + return new RGWPSCreateNotifOp(); +} + +RGWOp* RGWHandler_REST_PSNotifs_S3::op_delete() { + return new RGWPSDeleteNotifOp(); +} + +RGWOp* RGWHandler_REST_PSNotifs_S3::create_get_op() { + return new RGWPSListNotifsOp(); +} + +RGWOp* RGWHandler_REST_PSNotifs_S3::create_put_op() { + return new RGWPSCreateNotifOp(); +} + +RGWOp* RGWHandler_REST_PSNotifs_S3::create_delete_op() { + return new RGWPSDeleteNotifOp(); +} + diff --git a/src/rgw/driver/rados/rgw_rest_pubsub.h b/src/rgw/driver/rados/rgw_rest_pubsub.h new file mode 100644 index 00000000000..3b1a1bc9670 --- /dev/null +++ b/src/rgw/driver/rados/rgw_rest_pubsub.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +#include "rgw_rest_s3.h" + +// s3 compliant notification handler factory +class RGWHandler_REST_PSNotifs_S3 : public RGWHandler_REST_S3 { +protected: + int init_permissions(RGWOp* op, optional_yield y) override {return 0;} + int read_permissions(RGWOp* op, optional_yield y) override {return 0;} + bool supports_quota() override {return false;} + RGWOp* op_get() override; + RGWOp* op_put() override; + RGWOp* op_delete() override; +public: + using RGWHandler_REST_S3::RGWHandler_REST_S3; + virtual ~RGWHandler_REST_PSNotifs_S3() = default; + // following are used to generate the operations when invoked by another REST handler + static RGWOp* create_get_op(); + static RGWOp* create_put_op(); + static RGWOp* create_delete_op(); +}; + +// AWS compliant topics handler factory +class RGWHandler_REST_PSTopic_AWS : public RGWHandler_REST { + const rgw::auth::StrategyRegistry& auth_registry; + const std::string& post_body; + void rgw_topic_parse_input(); +protected: + RGWOp* op_post() override; +public: + RGWHandler_REST_PSTopic_AWS(const rgw::auth::StrategyRegistry& _auth_registry, const std::string& _post_body) : + auth_registry(_auth_registry), + post_body(_post_body) {} + virtual ~RGWHandler_REST_PSTopic_AWS() = default; + int postauth_init(optional_yield) override { return 0; } + int authorize(const DoutPrefixProvider* dpp, optional_yield y) override; +}; diff --git a/src/rgw/driver/rados/rgw_rest_realm.cc b/src/rgw/driver/rados/rgw_rest_realm.cc new file mode 100644 index 00000000000..79640a2a108 --- /dev/null +++ b/src/rgw/driver/rados/rgw_rest_realm.cc @@ -0,0 +1,376 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/errno.h" +#include "rgw_rest_realm.h" +#include "rgw_rest_s3.h" +#include "rgw_rest_config.h" +#include "rgw_zone.h" +#include "rgw_sal_rados.h" + +#include "services/svc_zone.h" +#include "services/svc_mdlog.h" + +#include "include/ceph_assert.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +// reject 'period push' if we would have to fetch too many intermediate periods +static const uint32_t PERIOD_HISTORY_FETCH_MAX = 64; + +// base period op, shared between Get and Post +class RGWOp_Period_Base : public RGWRESTOp { + protected: + RGWPeriod period; + std::ostringstream error_stream; + public: + int verify_permission(optional_yield) override { return 0; } + void send_response() override; +}; + +// reply with the period object on success +void RGWOp_Period_Base::send_response() +{ + set_req_state_err(s, op_ret, error_stream.str()); + dump_errno(s); + + if (op_ret < 0) { + if (!s->err.message.empty()) { + ldpp_dout(this, 4) << "Request failed with " << op_ret + << ": " << s->err.message << dendl; + } + end_header(s); + return; + } + + encode_json("period", period, s->formatter); + end_header(s, NULL, "application/json", s->formatter->get_len()); + flusher.flush(); +} + +// GET /admin/realm/period +class RGWOp_Period_Get : public RGWOp_Period_Base { + public: + void execute(optional_yield y) override; + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("zone", RGW_CAP_READ); + } + int verify_permission(optional_yield) override { + return check_caps(s->user->get_caps()); + } + const char* name() const override { return "get_period"; } +}; + +void RGWOp_Period_Get::execute(optional_yield y) +{ + string realm_id, realm_name, period_id; + epoch_t epoch = 0; + RESTArgs::get_string(s, "realm_id", realm_id, &realm_id); + RESTArgs::get_string(s, "realm_name", realm_name, &realm_name); + RESTArgs::get_string(s, "period_id", period_id, &period_id); + RESTArgs::get_uint32(s, "epoch", 0, &epoch); + + period.set_id(period_id); + period.set_epoch(epoch); + + op_ret = period.init(this, driver->ctx(), static_cast(driver)->svc()->sysobj, realm_id, y, realm_name); + if (op_ret < 0) + ldpp_dout(this, 5) << "failed to read period" << dendl; +} + +// POST /admin/realm/period +class RGWOp_Period_Post : public RGWOp_Period_Base { + public: + void execute(optional_yield y) override; + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("zone", RGW_CAP_WRITE); + } + int verify_permission(optional_yield) override { + return check_caps(s->user->get_caps()); + } + const char* name() const override { return "post_period"; } + RGWOpType get_type() override { return RGW_OP_PERIOD_POST; } +}; + +void RGWOp_Period_Post::execute(optional_yield y) +{ + auto cct = driver->ctx(); + + // initialize the period without reading from rados + period.init(this, cct, static_cast(driver)->svc()->sysobj, y, false); + + // decode the period from input + const auto max_size = cct->_conf->rgw_max_put_param_size; + bool empty; + op_ret = get_json_input(cct, s, period, max_size, &empty); + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to decode period" << dendl; + return; + } + + // require period.realm_id to match our realm + if (period.get_realm() != static_cast(driver)->svc()->zone->get_realm().get_id()) { + error_stream << "period with realm id " << period.get_realm() + << " doesn't match current realm " << static_cast(driver)->svc()->zone->get_realm().get_id() << std::endl; + op_ret = -EINVAL; + return; + } + + // load the realm and current period from rados; there may be a more recent + // period that we haven't restarted with yet. we also don't want to modify + // the objects in use by RGWRados + RGWRealm realm(period.get_realm()); + op_ret = realm.init(this, cct, static_cast(driver)->svc()->sysobj, y); + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to read current realm: " + << cpp_strerror(-op_ret) << dendl; + return; + } + + RGWPeriod current_period; + op_ret = current_period.init(this, cct, static_cast(driver)->svc()->sysobj, realm.get_id(), y); + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to read current period: " + << cpp_strerror(-op_ret) << dendl; + return; + } + + // if period id is empty, handle as 'period commit' + if (period.get_id().empty()) { + op_ret = period.commit(this, driver, realm, current_period, error_stream, y); + if (op_ret < 0) { + ldpp_dout(this, -1) << "master zone failed to commit period" << dendl; + } + return; + } + + // if it's not period commit, nobody is allowed to push to the master zone + if (period.get_master_zone() == static_cast(driver)->svc()->zone->get_zone_params().get_id()) { + ldpp_dout(this, 10) << "master zone rejecting period id=" + << period.get_id() << " epoch=" << period.get_epoch() << dendl; + op_ret = -EINVAL; // XXX: error code + return; + } + + // write the period to rados + op_ret = period.store_info(this, false, y); + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to store period " << period.get_id() << dendl; + return; + } + // set as latest epoch + op_ret = period.update_latest_epoch(this, period.get_epoch(), y); + if (op_ret == -EEXIST) { + // already have this epoch (or a more recent one) + ldpp_dout(this, 4) << "already have epoch >= " << period.get_epoch() + << " for period " << period.get_id() << dendl; + op_ret = 0; + return; + } + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to set latest epoch" << dendl; + return; + } + + auto period_history = static_cast(driver)->svc()->mdlog->get_period_history(); + + // decide whether we can set_current_period() or set_latest_epoch() + if (period.get_id() != current_period.get_id()) { + auto current_epoch = current_period.get_realm_epoch(); + // discard periods in the past + if (period.get_realm_epoch() < current_epoch) { + ldpp_dout(this, 10) << "discarding period " << period.get_id() + << " with realm epoch " << period.get_realm_epoch() + << " older than current epoch " << current_epoch << dendl; + // return success to ack that we have this period + return; + } + // discard periods too far in the future + if (period.get_realm_epoch() > current_epoch + PERIOD_HISTORY_FETCH_MAX) { + ldpp_dout(this, -1) << "discarding period " << period.get_id() + << " with realm epoch " << period.get_realm_epoch() << " too far in " + "the future from current epoch " << current_epoch << dendl; + op_ret = -ENOENT; // XXX: error code + return; + } + // attach a copy of the period into the period history + auto cursor = period_history->attach(this, RGWPeriod{period}, y); + if (!cursor) { + // we're missing some history between the new period and current_period + op_ret = cursor.get_error(); + ldpp_dout(this, -1) << "failed to collect the periods between current period " + << current_period.get_id() << " (realm epoch " << current_epoch + << ") and the new period " << period.get_id() + << " (realm epoch " << period.get_realm_epoch() + << "): " << cpp_strerror(-op_ret) << dendl; + return; + } + if (cursor.has_next()) { + // don't switch if we have a newer period in our history + ldpp_dout(this, 4) << "attached period " << period.get_id() + << " to history, but the history contains newer periods" << dendl; + return; + } + // set as current period + op_ret = realm.set_current_period(this, period, y); + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to update realm's current period" << dendl; + return; + } + ldpp_dout(this, 4) << "period " << period.get_id() + << " is newer than current period " << current_period.get_id() + << ", updating realm's current period and notifying zone" << dendl; + realm.notify_new_period(this, period, y); + return; + } + // reflect the period into our local objects + op_ret = period.reflect(this, y); + if (op_ret < 0) { + ldpp_dout(this, -1) << "failed to update local objects: " + << cpp_strerror(-op_ret) << dendl; + return; + } + ldpp_dout(this, 4) << "period epoch " << period.get_epoch() + << " is newer than current epoch " << current_period.get_epoch() + << ", updating period's latest epoch and notifying zone" << dendl; + realm.notify_new_period(this, period, y); + // update the period history + period_history->insert(RGWPeriod{period}); +} + +class RGWHandler_Period : public RGWHandler_Auth_S3 { + protected: + using RGWHandler_Auth_S3::RGWHandler_Auth_S3; + + RGWOp *op_get() override { return new RGWOp_Period_Get; } + RGWOp *op_post() override { return new RGWOp_Period_Post; } +}; + +class RGWRESTMgr_Period : public RGWRESTMgr { + public: + RGWHandler_REST* get_handler(rgw::sal::Driver* driver, + req_state*, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string&) override { + return new RGWHandler_Period(auth_registry); + } +}; + + +// GET /admin/realm +class RGWOp_Realm_Get : public RGWRESTOp { + std::unique_ptr realm; +public: + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("zone", RGW_CAP_READ); + } + int verify_permission(optional_yield) override { + return check_caps(s->user->get_caps()); + } + void execute(optional_yield y) override; + void send_response() override; + const char* name() const override { return "get_realm"; } +}; + +void RGWOp_Realm_Get::execute(optional_yield y) +{ + string id; + RESTArgs::get_string(s, "id", id, &id); + string name; + RESTArgs::get_string(s, "name", name, &name); + + // read realm + realm.reset(new RGWRealm(id, name)); + op_ret = realm->init(this, g_ceph_context, static_cast(driver)->svc()->sysobj, y); + if (op_ret < 0) + ldpp_dout(this, -1) << "failed to read realm id=" << id + << " name=" << name << dendl; +} + +void RGWOp_Realm_Get::send_response() +{ + set_req_state_err(s, op_ret); + dump_errno(s); + + if (op_ret < 0) { + end_header(s); + return; + } + + encode_json("realm", *realm, s->formatter); + end_header(s, NULL, "application/json", s->formatter->get_len()); + flusher.flush(); +} + +// GET /admin/realm?list +class RGWOp_Realm_List : public RGWRESTOp { + std::string default_id; + std::list realms; +public: + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("zone", RGW_CAP_READ); + } + int verify_permission(optional_yield) override { + return check_caps(s->user->get_caps()); + } + void execute(optional_yield y) override; + void send_response() override; + const char* name() const override { return "list_realms"; } +}; + +void RGWOp_Realm_List::execute(optional_yield y) +{ + { + // read default realm + RGWRealm realm(driver->ctx(), static_cast(driver)->svc()->sysobj); + [[maybe_unused]] int ret = realm.read_default_id(this, default_id, y); + } + op_ret = static_cast(driver)->svc()->zone->list_realms(this, realms); + if (op_ret < 0) + ldpp_dout(this, -1) << "failed to list realms" << dendl; +} + +void RGWOp_Realm_List::send_response() +{ + set_req_state_err(s, op_ret); + dump_errno(s); + + if (op_ret < 0) { + end_header(s); + return; + } + + s->formatter->open_object_section("realms_list"); + encode_json("default_info", default_id, s->formatter); + encode_json("realms", realms, s->formatter); + s->formatter->close_section(); + end_header(s, NULL, "application/json", s->formatter->get_len()); + flusher.flush(); +} + +class RGWHandler_Realm : public RGWHandler_Auth_S3 { +protected: + using RGWHandler_Auth_S3::RGWHandler_Auth_S3; + RGWOp *op_get() override { + if (s->info.args.sub_resource_exists("list")) + return new RGWOp_Realm_List; + return new RGWOp_Realm_Get; + } +}; + +RGWRESTMgr_Realm::RGWRESTMgr_Realm() +{ + // add the /admin/realm/period resource + register_resource("period", new RGWRESTMgr_Period); +} + +RGWHandler_REST* +RGWRESTMgr_Realm::get_handler(rgw::sal::Driver* driver, + req_state*, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string&) +{ + return new RGWHandler_Realm(auth_registry); +} diff --git a/src/rgw/driver/rados/rgw_rest_realm.h b/src/rgw/driver/rados/rgw_rest_realm.h new file mode 100644 index 00000000000..a0d1dc1c92a --- /dev/null +++ b/src/rgw/driver/rados/rgw_rest_realm.h @@ -0,0 +1,16 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw_rest.h" + +class RGWRESTMgr_Realm : public RGWRESTMgr { +public: + RGWRESTMgr_Realm(); + + RGWHandler_REST* get_handler(rgw::sal::Driver* driver, + req_state*, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string&) override; +}; diff --git a/src/rgw/driver/rados/rgw_rest_user.cc b/src/rgw/driver/rados/rgw_rest_user.cc new file mode 100644 index 00000000000..c2aeece247d --- /dev/null +++ b/src/rgw/driver/rados/rgw_rest_user.cc @@ -0,0 +1,1109 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/ceph_json.h" + +#include "rgw_op.h" +#include "rgw_user.h" +#include "rgw_rest_user.h" +#include "rgw_sal.h" + +#include "include/str_list.h" +#include "include/ceph_assert.h" + +#include "services/svc_zone.h" +#include "services/svc_sys_obj.h" +#include "rgw_zone.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +class RGWOp_User_List : public RGWRESTOp { + +public: + RGWOp_User_List() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_READ); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "list_user"; } +}; + +void RGWOp_User_List::execute(optional_yield y) +{ + RGWUserAdminOpState op_state(driver); + + uint32_t max_entries; + std::string marker; + RESTArgs::get_uint32(s, "max-entries", 1000, &max_entries); + RESTArgs::get_string(s, "marker", marker, &marker); + + op_state.max_entries = max_entries; + op_state.marker = marker; + op_ret = RGWUserAdminOp_User::list(this, driver, op_state, flusher); +} + +class RGWOp_User_Info : public RGWRESTOp { + +public: + RGWOp_User_Info() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_READ); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "get_user_info"; } +}; + +void RGWOp_User_Info::execute(optional_yield y) +{ + RGWUserAdminOpState op_state(driver); + + std::string uid_str, access_key_str; + bool fetch_stats; + bool sync_stats; + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + RESTArgs::get_string(s, "access-key", access_key_str, &access_key_str); + + // if uid was not supplied in rest argument, error out now, otherwise we'll + // end up initializing anonymous user, for which keys.init will eventually + // return -EACESS + if (uid_str.empty() && access_key_str.empty()){ + op_ret=-EINVAL; + return; + } + + rgw_user uid(uid_str); + + RESTArgs::get_bool(s, "stats", false, &fetch_stats); + + RESTArgs::get_bool(s, "sync", false, &sync_stats); + + op_state.set_user_id(uid); + op_state.set_access_key(access_key_str); + op_state.set_fetch_stats(fetch_stats); + op_state.set_sync_stats(sync_stats); + + op_ret = RGWUserAdminOp_User::info(s, driver, op_state, flusher, y); +} + +class RGWOp_User_Create : public RGWRESTOp { + +public: + RGWOp_User_Create() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "create_user"; } +}; + +void RGWOp_User_Create::execute(optional_yield y) +{ + std::string uid_str; + std::string display_name; + std::string email; + std::string access_key; + std::string secret_key; + std::string key_type_str; + std::string caps; + std::string tenant_name; + std::string op_mask_str; + std::string default_placement_str; + std::string placement_tags_str; + + bool gen_key; + bool suspended; + bool system; + bool exclusive; + + int32_t max_buckets; + const int32_t default_max_buckets = + s->cct->_conf.get_val("rgw_user_max_buckets"); + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "display-name", display_name, &display_name); + RESTArgs::get_string(s, "email", email, &email); + RESTArgs::get_string(s, "access-key", access_key, &access_key); + RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); + RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); + RESTArgs::get_string(s, "user-caps", caps, &caps); + RESTArgs::get_string(s, "tenant", tenant_name, &tenant_name); + RESTArgs::get_bool(s, "generate-key", true, &gen_key); + RESTArgs::get_bool(s, "suspended", false, &suspended); + RESTArgs::get_int32(s, "max-buckets", default_max_buckets, &max_buckets); + RESTArgs::get_bool(s, "system", false, &system); + RESTArgs::get_bool(s, "exclusive", false, &exclusive); + RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str); + RESTArgs::get_string(s, "default-placement", default_placement_str, &default_placement_str); + RESTArgs::get_string(s, "placement-tags", placement_tags_str, &placement_tags_str); + + if (!s->user->get_info().system && system) { + ldpp_dout(this, 0) << "cannot set system flag by non-system user" << dendl; + op_ret = -EINVAL; + return; + } + + if (!tenant_name.empty()) { + uid.tenant = tenant_name; + } + + // TODO: validate required args are passed in. (for eg. uid and display_name here) + op_state.set_user_id(uid); + op_state.set_display_name(display_name); + op_state.set_user_email(email); + op_state.set_caps(caps); + op_state.set_access_key(access_key); + op_state.set_secret_key(secret_key); + + if (!op_mask_str.empty()) { + uint32_t op_mask; + int ret = rgw_parse_op_type_list(op_mask_str, &op_mask); + if (ret < 0) { + ldpp_dout(this, 0) << "failed to parse op_mask: " << ret << dendl; + op_ret = -EINVAL; + return; + } + op_state.set_op_mask(op_mask); + } + + if (!key_type_str.empty()) { + int32_t key_type = KEY_TYPE_UNDEFINED; + if (key_type_str.compare("swift") == 0) + key_type = KEY_TYPE_SWIFT; + else if (key_type_str.compare("s3") == 0) + key_type = KEY_TYPE_S3; + + op_state.set_key_type(key_type); + } + + if (max_buckets != default_max_buckets) { + if (max_buckets < 0) { + max_buckets = -1; + } + op_state.set_max_buckets(max_buckets); + } + if (s->info.args.exists("suspended")) + op_state.set_suspension(suspended); + + if (s->info.args.exists("system")) + op_state.set_system(system); + + if (s->info.args.exists("exclusive")) + op_state.set_exclusive(exclusive); + + if (gen_key) + op_state.set_generate_key(); + + if (!default_placement_str.empty()) { + rgw_placement_rule target_rule; + target_rule.from_str(default_placement_str); + if (!driver->valid_placement(target_rule)) { + ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << target_rule.to_str() << dendl; + op_ret = -EINVAL; + return; + } + op_state.set_default_placement(target_rule); + } + + if (!placement_tags_str.empty()) { + list placement_tags_list; + get_str_list(placement_tags_str, ",", placement_tags_list); + op_state.set_placement_tags(placement_tags_list); + } + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_User::create(s, driver, op_state, flusher, y); +} + +class RGWOp_User_Modify : public RGWRESTOp { + +public: + RGWOp_User_Modify() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "modify_user"; } +}; + +void RGWOp_User_Modify::execute(optional_yield y) +{ + std::string uid_str; + std::string display_name; + std::string email; + std::string access_key; + std::string secret_key; + std::string key_type_str; + std::string op_mask_str; + std::string default_placement_str; + std::string placement_tags_str; + + bool gen_key; + bool suspended; + bool system; + bool email_set; + bool quota_set; + int32_t max_buckets; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "display-name", display_name, &display_name); + RESTArgs::get_string(s, "email", email, &email, &email_set); + RESTArgs::get_string(s, "access-key", access_key, &access_key); + RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); + RESTArgs::get_bool(s, "generate-key", false, &gen_key); + RESTArgs::get_bool(s, "suspended", false, &suspended); + RESTArgs::get_int32(s, "max-buckets", RGW_DEFAULT_MAX_BUCKETS, &max_buckets, "a_set); + RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); + + RESTArgs::get_bool(s, "system", false, &system); + RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str); + RESTArgs::get_string(s, "default-placement", default_placement_str, &default_placement_str); + RESTArgs::get_string(s, "placement-tags", placement_tags_str, &placement_tags_str); + + if (!s->user->get_info().system && system) { + ldpp_dout(this, 0) << "cannot set system flag by non-system user" << dendl; + op_ret = -EINVAL; + return; + } + + op_state.set_user_id(uid); + op_state.set_display_name(display_name); + + if (email_set) + op_state.set_user_email(email); + + op_state.set_access_key(access_key); + op_state.set_secret_key(secret_key); + + if (quota_set) { + if (max_buckets < 0 ) { + max_buckets = -1; + } + op_state.set_max_buckets(max_buckets); + } + if (gen_key) + op_state.set_generate_key(); + + if (!key_type_str.empty()) { + int32_t key_type = KEY_TYPE_UNDEFINED; + if (key_type_str.compare("swift") == 0) + key_type = KEY_TYPE_SWIFT; + else if (key_type_str.compare("s3") == 0) + key_type = KEY_TYPE_S3; + + op_state.set_key_type(key_type); + } + + if (!op_mask_str.empty()) { + uint32_t op_mask; + if (rgw_parse_op_type_list(op_mask_str, &op_mask) < 0) { + ldpp_dout(this, 0) << "failed to parse op_mask" << dendl; + op_ret = -EINVAL; + return; + } + op_state.set_op_mask(op_mask); + } + + if (s->info.args.exists("suspended")) + op_state.set_suspension(suspended); + + if (s->info.args.exists("system")) + op_state.set_system(system); + + if (!op_mask_str.empty()) { + uint32_t op_mask; + int ret = rgw_parse_op_type_list(op_mask_str, &op_mask); + if (ret < 0) { + ldpp_dout(this, 0) << "failed to parse op_mask: " << ret << dendl; + op_ret = -EINVAL; + return; + } + op_state.set_op_mask(op_mask); + } + + if (!default_placement_str.empty()) { + rgw_placement_rule target_rule; + target_rule.from_str(default_placement_str); + if (!driver->valid_placement(target_rule)) { + ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << target_rule.to_str() << dendl; + op_ret = -EINVAL; + return; + } + op_state.set_default_placement(target_rule); + } + + if (!placement_tags_str.empty()) { + list placement_tags_list; + get_str_list(placement_tags_str, ",", placement_tags_list); + op_state.set_placement_tags(placement_tags_list); + } + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_User::modify(s, driver, op_state, flusher, y); +} + +class RGWOp_User_Remove : public RGWRESTOp { + +public: + RGWOp_User_Remove() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "remove_user"; } +}; + +void RGWOp_User_Remove::execute(optional_yield y) +{ + std::string uid_str; + bool purge_data; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_bool(s, "purge-data", false, &purge_data); + + // FIXME: no double checking + if (!uid.empty()) + op_state.set_user_id(uid); + + op_state.set_purge_data(purge_data); + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_User::remove(s, driver, op_state, flusher, s->yield); +} + +class RGWOp_Subuser_Create : public RGWRESTOp { + +public: + RGWOp_Subuser_Create() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "create_subuser"; } +}; + +void RGWOp_Subuser_Create::execute(optional_yield y) +{ + std::string uid_str; + std::string subuser; + std::string secret_key; + std::string access_key; + std::string perm_str; + std::string key_type_str; + + bool gen_subuser = false; // FIXME placeholder + bool gen_secret; + bool gen_access; + + uint32_t perm_mask = 0; + int32_t key_type = KEY_TYPE_SWIFT; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "subuser", subuser, &subuser); + RESTArgs::get_string(s, "access-key", access_key, &access_key); + RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); + RESTArgs::get_string(s, "access", perm_str, &perm_str); + RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); + RESTArgs::get_bool(s, "generate-secret", false, &gen_secret); + RESTArgs::get_bool(s, "gen-access-key", false, &gen_access); + + perm_mask = rgw_str_to_perm(perm_str.c_str()); + op_state.set_perm(perm_mask); + + op_state.set_user_id(uid); + op_state.set_subuser(subuser); + op_state.set_access_key(access_key); + op_state.set_secret_key(secret_key); + op_state.set_generate_subuser(gen_subuser); + + if (gen_access) + op_state.set_gen_access(); + + if (gen_secret) + op_state.set_gen_secret(); + + if (!key_type_str.empty()) { + if (key_type_str.compare("swift") == 0) + key_type = KEY_TYPE_SWIFT; + else if (key_type_str.compare("s3") == 0) + key_type = KEY_TYPE_S3; + } + op_state.set_key_type(key_type); + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_Subuser::create(s, driver, op_state, flusher, y); +} + +class RGWOp_Subuser_Modify : public RGWRESTOp { + +public: + RGWOp_Subuser_Modify() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "modify_subuser"; } +}; + +void RGWOp_Subuser_Modify::execute(optional_yield y) +{ + std::string uid_str; + std::string subuser; + std::string secret_key; + std::string key_type_str; + std::string perm_str; + + RGWUserAdminOpState op_state(driver); + + uint32_t perm_mask; + int32_t key_type = KEY_TYPE_SWIFT; + + bool gen_secret; + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "subuser", subuser, &subuser); + RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); + RESTArgs::get_string(s, "access", perm_str, &perm_str); + RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); + RESTArgs::get_bool(s, "generate-secret", false, &gen_secret); + + perm_mask = rgw_str_to_perm(perm_str.c_str()); + op_state.set_perm(perm_mask); + + op_state.set_user_id(uid); + op_state.set_subuser(subuser); + + if (!secret_key.empty()) + op_state.set_secret_key(secret_key); + + if (gen_secret) + op_state.set_gen_secret(); + + if (!key_type_str.empty()) { + if (key_type_str.compare("swift") == 0) + key_type = KEY_TYPE_SWIFT; + else if (key_type_str.compare("s3") == 0) + key_type = KEY_TYPE_S3; + } + op_state.set_key_type(key_type); + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_Subuser::modify(s, driver, op_state, flusher, y); +} + +class RGWOp_Subuser_Remove : public RGWRESTOp { + +public: + RGWOp_Subuser_Remove() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "remove_subuser"; } +}; + +void RGWOp_Subuser_Remove::execute(optional_yield y) +{ + std::string uid_str; + std::string subuser; + bool purge_keys; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "subuser", subuser, &subuser); + RESTArgs::get_bool(s, "purge-keys", true, &purge_keys); + + op_state.set_user_id(uid); + op_state.set_subuser(subuser); + + if (purge_keys) + op_state.set_purge_keys(); + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_Subuser::remove(s, driver, op_state, flusher, y); +} + +class RGWOp_Key_Create : public RGWRESTOp { + +public: + RGWOp_Key_Create() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "create_access_key"; } +}; + +void RGWOp_Key_Create::execute(optional_yield y) +{ + std::string uid_str; + std::string subuser; + std::string access_key; + std::string secret_key; + std::string key_type_str; + + bool gen_key; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "subuser", subuser, &subuser); + RESTArgs::get_string(s, "access-key", access_key, &access_key); + RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); + RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); + RESTArgs::get_bool(s, "generate-key", true, &gen_key); + + op_state.set_user_id(uid); + op_state.set_subuser(subuser); + op_state.set_access_key(access_key); + op_state.set_secret_key(secret_key); + + if (gen_key) + op_state.set_generate_key(); + + if (!key_type_str.empty()) { + int32_t key_type = KEY_TYPE_UNDEFINED; + if (key_type_str.compare("swift") == 0) + key_type = KEY_TYPE_SWIFT; + else if (key_type_str.compare("s3") == 0) + key_type = KEY_TYPE_S3; + + op_state.set_key_type(key_type); + } + + op_ret = RGWUserAdminOp_Key::create(s, driver, op_state, flusher, y); +} + +class RGWOp_Key_Remove : public RGWRESTOp { + +public: + RGWOp_Key_Remove() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "remove_access_key"; } +}; + +void RGWOp_Key_Remove::execute(optional_yield y) +{ + std::string uid_str; + std::string subuser; + std::string access_key; + std::string key_type_str; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "subuser", subuser, &subuser); + RESTArgs::get_string(s, "access-key", access_key, &access_key); + RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); + + op_state.set_user_id(uid); + op_state.set_subuser(subuser); + op_state.set_access_key(access_key); + + if (!key_type_str.empty()) { + int32_t key_type = KEY_TYPE_UNDEFINED; + if (key_type_str.compare("swift") == 0) + key_type = KEY_TYPE_SWIFT; + else if (key_type_str.compare("s3") == 0) + key_type = KEY_TYPE_S3; + + op_state.set_key_type(key_type); + } + + op_ret = RGWUserAdminOp_Key::remove(s, driver, op_state, flusher, y); +} + +class RGWOp_Caps_Add : public RGWRESTOp { + +public: + RGWOp_Caps_Add() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "add_user_caps"; } +}; + +void RGWOp_Caps_Add::execute(optional_yield y) +{ + std::string uid_str; + std::string caps; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "user-caps", caps, &caps); + + op_state.set_user_id(uid); + op_state.set_caps(caps); + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_Caps::add(s, driver, op_state, flusher, y); +} + +class RGWOp_Caps_Remove : public RGWRESTOp { + +public: + RGWOp_Caps_Remove() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "remove_user_caps"; } +}; + +void RGWOp_Caps_Remove::execute(optional_yield y) +{ + std::string uid_str; + std::string caps; + + RGWUserAdminOpState op_state(driver); + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + rgw_user uid(uid_str); + + RESTArgs::get_string(s, "user-caps", caps, &caps); + + op_state.set_user_id(uid); + op_state.set_caps(caps); + + bufferlist data; + op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; + return; + } + op_ret = RGWUserAdminOp_Caps::remove(s, driver, op_state, flusher, y); +} + +struct UserQuotas { + RGWQuota quota; + + UserQuotas() {} + + explicit UserQuotas(RGWUserInfo& info){ + quota.bucket_quota = info.quota.bucket_quota; + quota.user_quota = info.quota.user_quota; + } + + void dump(Formatter *f) const { + encode_json("bucket_quota", quota.bucket_quota, f); + encode_json("user_quota", quota.user_quota, f); + } + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("bucket_quota", quota.bucket_quota, obj); + JSONDecoder::decode_json("user_quota", quota.user_quota, obj); + } +}; + +class RGWOp_Quota_Info : public RGWRESTOp { + +public: + RGWOp_Quota_Info() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_READ); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "get_quota_info"; } +}; + + +void RGWOp_Quota_Info::execute(optional_yield y) +{ + RGWUserAdminOpState op_state(driver); + + std::string uid_str; + std::string quota_type; + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + RESTArgs::get_string(s, "quota-type", quota_type, "a_type); + + if (uid_str.empty()) { + op_ret = -EINVAL; + return; + } + + rgw_user uid(uid_str); + + bool show_all = quota_type.empty(); + bool show_bucket = show_all || (quota_type == "bucket"); + bool show_user = show_all || (quota_type == "user"); + + if (!(show_all || show_bucket || show_user)) { + op_ret = -EINVAL; + return; + } + + op_state.set_user_id(uid); + + RGWUser user; + op_ret = user.init(s, driver, op_state, y); + if (op_ret < 0) + return; + + if (!op_state.has_existing_user()) { + op_ret = -ERR_NO_SUCH_USER; + return; + } + + RGWUserInfo info; + string err_msg; + op_ret = user.info(info, &err_msg); + if (op_ret < 0) + return; + + flusher.start(0); + if (show_all) { + UserQuotas quotas(info); + encode_json("quota", quotas, s->formatter); + } else if (show_user) { + encode_json("user_quota", info.quota.user_quota, s->formatter); + } else { + encode_json("bucket_quota", info.quota.bucket_quota, s->formatter); + } + + flusher.flush(); +} + +class RGWOp_Quota_Set : public RGWRESTOp { + +public: + RGWOp_Quota_Set() {} + + int check_caps(const RGWUserCaps& caps) override { + return caps.check_cap("users", RGW_CAP_WRITE); + } + + void execute(optional_yield y) override; + + const char* name() const override { return "set_quota_info"; } +}; + +/** + * set quota + * + * two different ways to set the quota info: as json struct in the message body or via http params. + * + * as json: + * + * PUT /admin/user?uid=["a-type=] + * + * whereas quota-type is optional and is either user, or bucket + * + * if quota-type is not specified then we expect to get a structure that contains both quotas, + * otherwise we'll only get the relevant configuration. + * + * E.g., if quota type not specified: + * { + * "user_quota" : { + * "max_size_kb" : 4096, + * "max_objects" : -1, + * "enabled" : false + * }, + * "bucket_quota" : { + * "max_size_kb" : 1024, + * "max_objects" : -1, + * "enabled" : true + * } + * } + * + * + * or if quota type is specified: + * { + * "max_size_kb" : 4096, + * "max_objects" : -1, + * "enabled" : false + * } + * + * Another option is not to pass any body and set the following http params: + * + * + * max-size-kb= + * max-objects= + * enabled[={true,false}] + * + * all params are optionals and default to the current settings. With this type of configuration the + * quota-type param is mandatory. + * + */ + +void RGWOp_Quota_Set::execute(optional_yield y) +{ + RGWUserAdminOpState op_state(driver); + + std::string uid_str; + std::string quota_type; + + RESTArgs::get_string(s, "uid", uid_str, &uid_str); + RESTArgs::get_string(s, "quota-type", quota_type, "a_type); + + if (uid_str.empty()) { + op_ret = -EINVAL; + return; + } + + rgw_user uid(uid_str); + + bool set_all = quota_type.empty(); + bool set_bucket = set_all || (quota_type == "bucket"); + bool set_user = set_all || (quota_type == "user"); + + if (!(set_all || set_bucket || set_user)) { + ldpp_dout(this, 20) << "invalid quota type" << dendl; + op_ret = -EINVAL; + return; + } + + bool use_http_params; + + if (s->content_length > 0) { + use_http_params = false; + } else { + const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING"); + use_http_params = (!encoding || strcmp(encoding, "chunked") != 0); + } + + if (use_http_params && set_all) { + ldpp_dout(this, 20) << "quota type was not specified, can't set all quotas via http headers" << dendl; + op_ret = -EINVAL; + return; + } + + op_state.set_user_id(uid); + + RGWUser user; + op_ret = user.init(s, driver, op_state, y); + if (op_ret < 0) { + ldpp_dout(this, 20) << "failed initializing user info: " << op_ret << dendl; + return; + } + + if (!op_state.has_existing_user()) { + op_ret = -ERR_NO_SUCH_USER; + return; + } + +#define QUOTA_INPUT_MAX_LEN 1024 + if (set_all) { + UserQuotas quotas; + + if ((op_ret = get_json_input(driver->ctx(), s, quotas, QUOTA_INPUT_MAX_LEN, NULL)) < 0) { + ldpp_dout(this, 20) << "failed to retrieve input" << dendl; + return; + } + + op_state.set_user_quota(quotas.quota.user_quota); + op_state.set_bucket_quota(quotas.quota.bucket_quota); + } else { + RGWQuotaInfo quota; + + if (!use_http_params) { + bool empty; + op_ret = get_json_input(driver->ctx(), s, quota, QUOTA_INPUT_MAX_LEN, &empty); + if (op_ret < 0) { + ldpp_dout(this, 20) << "failed to retrieve input" << dendl; + if (!empty) + return; + + /* was probably chunked input, but no content provided, configure via http params */ + use_http_params = true; + } + } + + if (use_http_params) { + RGWUserInfo info; + string err_msg; + op_ret = user.info(info, &err_msg); + if (op_ret < 0) { + ldpp_dout(this, 20) << "failed to get user info: " << op_ret << dendl; + return; + } + RGWQuotaInfo *old_quota; + if (set_user) { + old_quota = &info.quota.user_quota; + } else { + old_quota = &info.quota.bucket_quota; + } + + RESTArgs::get_int64(s, "max-objects", old_quota->max_objects, "a.max_objects); + RESTArgs::get_int64(s, "max-size", old_quota->max_size, "a.max_size); + int64_t max_size_kb; + bool has_max_size_kb = false; + RESTArgs::get_int64(s, "max-size-kb", 0, &max_size_kb, &has_max_size_kb); + if (has_max_size_kb) { + quota.max_size = max_size_kb * 1024; + } + RESTArgs::get_bool(s, "enabled", old_quota->enabled, "a.enabled); + } + + if (set_user) { + op_state.set_user_quota(quota); + } else { + op_state.set_bucket_quota(quota); + } + } + + string err; + op_ret = user.modify(s, op_state, y, &err); + if (op_ret < 0) { + ldpp_dout(this, 20) << "failed updating user info: " << op_ret << ": " << err << dendl; + return; + } +} + +RGWOp *RGWHandler_User::op_get() +{ + if (s->info.args.sub_resource_exists("quota")) + return new RGWOp_Quota_Info; + + if (s->info.args.sub_resource_exists("list")) + return new RGWOp_User_List; + + return new RGWOp_User_Info; +} + +RGWOp *RGWHandler_User::op_put() +{ + if (s->info.args.sub_resource_exists("subuser")) + return new RGWOp_Subuser_Create; + + if (s->info.args.sub_resource_exists("key")) + return new RGWOp_Key_Create; + + if (s->info.args.sub_resource_exists("caps")) + return new RGWOp_Caps_Add; + + if (s->info.args.sub_resource_exists("quota")) + return new RGWOp_Quota_Set; + + return new RGWOp_User_Create; +} + +RGWOp *RGWHandler_User::op_post() +{ + if (s->info.args.sub_resource_exists("subuser")) + return new RGWOp_Subuser_Modify; + + return new RGWOp_User_Modify; +} + +RGWOp *RGWHandler_User::op_delete() +{ + if (s->info.args.sub_resource_exists("subuser")) + return new RGWOp_Subuser_Remove; + + if (s->info.args.sub_resource_exists("key")) + return new RGWOp_Key_Remove; + + if (s->info.args.sub_resource_exists("caps")) + return new RGWOp_Caps_Remove; + + return new RGWOp_User_Remove; +} + diff --git a/src/rgw/driver/rados/rgw_rest_user.h b/src/rgw/driver/rados/rgw_rest_user.h new file mode 100644 index 00000000000..ee585be4508 --- /dev/null +++ b/src/rgw/driver/rados/rgw_rest_user.h @@ -0,0 +1,36 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw_rest.h" +#include "rgw_rest_s3.h" + + +class RGWHandler_User : public RGWHandler_Auth_S3 { +protected: + RGWOp *op_get() override; + RGWOp *op_put() override; + RGWOp *op_post() override; + RGWOp *op_delete() override; +public: + using RGWHandler_Auth_S3::RGWHandler_Auth_S3; + ~RGWHandler_User() override = default; + + int read_permissions(RGWOp*, optional_yield) override { + return 0; + } +}; + +class RGWRESTMgr_User : public RGWRESTMgr { +public: + RGWRESTMgr_User() = default; + ~RGWRESTMgr_User() override = default; + + RGWHandler_REST *get_handler(rgw::sal::Driver* driver, + req_state*, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string&) override { + return new RGWHandler_User(auth_registry); + } +}; diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc new file mode 100644 index 00000000000..577569dd5d5 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -0,0 +1,3630 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/Clock.h" +#include "common/errno.h" + +#include "rgw_sal.h" +#include "rgw_sal_rados.h" +#include "rgw_bucket.h" +#include "rgw_multi.h" +#include "rgw_acl_s3.h" +#include "rgw_aio.h" +#include "rgw_aio_throttle.h" +#include "rgw_tracer.h" + +#include "rgw_zone.h" +#include "rgw_rest_conn.h" +#include "rgw_service.h" +#include "rgw_lc.h" +#include "rgw_lc_tier.h" +#include "rgw_rest_admin.h" +#include "rgw_rest_bucket.h" +#include "rgw_rest_metadata.h" +#include "rgw_rest_log.h" +#include "rgw_rest_config.h" +#include "rgw_rest_ratelimit.h" +#include "rgw_rest_realm.h" +#include "rgw_rest_user.h" +#include "services/svc_sys_obj.h" +#include "services/svc_meta.h" +#include "services/svc_meta_be_sobj.h" +#include "services/svc_cls.h" +#include "services/svc_zone.h" +#include "services/svc_tier_rados.h" +#include "services/svc_quota.h" +#include "services/svc_config_key.h" +#include "services/svc_zone_utils.h" +#include "services/svc_role_rados.h" +#include "services/svc_user.h" +#include "cls/rgw/cls_rgw_client.h" + +#include "rgw_pubsub.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +static string mp_ns = RGW_OBJ_NS_MULTIPART; + +namespace rgw::sal { + +// default number of entries to list with each bucket listing call +// (use marker to bridge between calls) +static constexpr size_t listing_max_entries = 1000; + +static int decode_policy(CephContext* cct, + bufferlist& bl, + RGWAccessControlPolicy* policy) +{ + auto iter = bl.cbegin(); + try { + policy->decode(iter); + } catch (buffer::error& err) { + ldout(cct, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl; + return -EIO; + } + if (cct->_conf->subsys.should_gather()) { + ldout(cct, 15) << __func__ << " Read AccessControlPolicy"; + RGWAccessControlPolicy_S3* s3policy = static_cast(policy); + s3policy->to_xml(*_dout); + *_dout << dendl; + } + return 0; +} + +static int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider* dpp, + RadosStore* store, + User* user, + Attrs& bucket_attrs, + RGWAccessControlPolicy* policy, + optional_yield y) +{ + auto aiter = bucket_attrs.find(RGW_ATTR_ACL); + + if (aiter != bucket_attrs.end()) { + int ret = decode_policy(store->ctx(), aiter->second, policy); + if (ret < 0) + return ret; + } else { + ldout(store->ctx(), 0) << "WARNING: couldn't find acl header for bucket, generating default" << dendl; + /* object exists, but policy is broken */ + int r = user->load_user(dpp, y); + if (r < 0) + return r; + + policy->create_default(user->get_id(), user->get_display_name()); + } + return 0; +} + +int RadosCompletions::drain() +{ + int ret = 0; + while (!handles.empty()) { + librados::AioCompletion* handle = handles.front(); + handles.pop_front(); + handle->wait_for_complete(); + int r = handle->get_return_value(); + handle->release(); + if (r < 0) { + ret = r; + } + } + return ret; +} + +int RadosUser::list_buckets(const DoutPrefixProvider* dpp, const std::string& marker, + const std::string& end_marker, uint64_t max, bool need_stats, + BucketList &buckets, optional_yield y) +{ + RGWUserBuckets ulist; + bool is_truncated = false; + int ret; + + buckets.clear(); + ret = store->ctl()->user->list_buckets(dpp, info.user_id, marker, end_marker, max, + need_stats, &ulist, &is_truncated, y); + if (ret < 0) + return ret; + + buckets.set_truncated(is_truncated); + for (const auto& ent : ulist.get_buckets()) { + buckets.add(std::unique_ptr(new RadosBucket(this->store, ent.second, this))); + } + + return 0; +} + +int RadosUser::create_bucket(const DoutPrefixProvider* dpp, + const rgw_bucket& b, + const std::string& zonegroup_id, + rgw_placement_rule& placement_rule, + std::string& swift_ver_location, + const RGWQuotaInfo * pquota_info, + const RGWAccessControlPolicy& policy, + Attrs& attrs, + RGWBucketInfo& info, + obj_version& ep_objv, + bool exclusive, + bool obj_lock_enabled, + bool* existed, + req_info& req_info, + std::unique_ptr* bucket_out, + optional_yield y) +{ + int ret; + bufferlist in_data; + RGWBucketInfo master_info; + rgw_bucket* pmaster_bucket; + uint32_t* pmaster_num_shards; + real_time creation_time; + std::unique_ptr bucket; + obj_version objv,* pobjv = NULL; + + /* If it exists, look it up; otherwise create it */ + ret = store->get_bucket(dpp, this, b, &bucket, y); + if (ret < 0 && ret != -ENOENT) + return ret; + + if (ret != -ENOENT) { + RGWAccessControlPolicy old_policy(store->ctx()); + *existed = true; + if (swift_ver_location.empty()) { + swift_ver_location = bucket->get_info().swift_ver_location; + } + placement_rule.inherit_from(bucket->get_info().placement_rule); + + // don't allow changes to the acl policy + int r = rgw_op_get_bucket_policy_from_attr(dpp, store, this, bucket->get_attrs(), + &old_policy, y); + if (r >= 0 && old_policy != policy) { + bucket_out->swap(bucket); + return -EEXIST; + } + } else { + bucket = std::unique_ptr(new RadosBucket(store, b, this)); + *existed = false; + bucket->set_attrs(attrs); + } + + if (!store->svc()->zone->is_meta_master()) { + JSONParser jp; + ret = store->forward_request_to_master(dpp, this, NULL, in_data, &jp, req_info, y); + if (ret < 0) { + return ret; + } + + JSONDecoder::decode_json("entry_point_object_ver", ep_objv, &jp); + JSONDecoder::decode_json("object_ver", objv, &jp); + JSONDecoder::decode_json("bucket_info", master_info, &jp); + ldpp_dout(dpp, 20) << "parsed: objv.tag=" << objv.tag << " objv.ver=" << objv.ver << dendl; + std::time_t ctime = ceph::real_clock::to_time_t(master_info.creation_time); + ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl; + pmaster_bucket= &master_info.bucket; + creation_time = master_info.creation_time; + pmaster_num_shards = &master_info.layout.current_index.layout.normal.num_shards; + pobjv = &objv; + if (master_info.obj_lock_enabled()) { + info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; + } + } else { + pmaster_bucket = NULL; + pmaster_num_shards = NULL; + if (obj_lock_enabled) + info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; + } + + std::string zid = zonegroup_id; + if (zid.empty()) { + zid = store->svc()->zone->get_zonegroup().get_id(); + } + + if (*existed) { + rgw_placement_rule selected_placement_rule; + ret = store->svc()->zone->select_bucket_placement(dpp, this->get_info(), + zid, placement_rule, + &selected_placement_rule, nullptr, y); + if (selected_placement_rule != info.placement_rule) { + ret = -EEXIST; + bucket_out->swap(bucket); + return ret; + } + } else { + + ret = store->getRados()->create_bucket(this->get_info(), bucket->get_key(), + zid, placement_rule, swift_ver_location, pquota_info, + attrs, info, pobjv, &ep_objv, creation_time, + pmaster_bucket, pmaster_num_shards, y, dpp, + exclusive); + if (ret == -EEXIST) { + *existed = true; + /* bucket already existed, might have raced with another bucket creation, + * or might be partial bucket creation that never completed. Read existing + * bucket info, verify that the reported bucket owner is the current user. + * If all is ok then update the user's list of buckets. Otherwise inform + * client about a name conflict. + */ + if (info.owner.compare(this->get_id()) != 0) { + return -EEXIST; + } + ret = 0; + } else if (ret != 0) { + return ret; + } + } + + bucket->set_version(ep_objv); + bucket->get_info() = info; + + RadosBucket* rbucket = static_cast(bucket.get()); + ret = rbucket->link(dpp, this, y, false); + if (ret && !*existed && ret != -EEXIST) { + /* if it exists (or previously existed), don't remove it! */ + ret = rbucket->unlink(dpp, this, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to unlink bucket: ret=" << ret + << dendl; + } + } else if (ret == -EEXIST || (ret == 0 && *existed)) { + ret = -ERR_BUCKET_EXISTS; + } + + bucket_out->swap(bucket); + + return ret; +} + +int RadosUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) +{ + return store->ctl()->user->get_attrs_by_uid(dpp, get_id(), &attrs, y, &objv_tracker); +} + +int RadosUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) +{ + for(auto& it : new_attrs) { + attrs[it.first] = it.second; + } + return store_user(dpp, y, false); +} + +int RadosUser::read_stats(const DoutPrefixProvider *dpp, + optional_yield y, RGWStorageStats* stats, + ceph::real_time* last_stats_sync, + ceph::real_time* last_stats_update) +{ + return store->ctl()->user->read_stats(dpp, get_id(), stats, y, last_stats_sync, last_stats_update); +} + +int RadosUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) +{ + return store->svc()->user->read_stats_async(dpp, get_id(), cb); +} + +int RadosUser::complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) +{ + return store->svc()->user->complete_flush_stats(dpp, get_id(), y); +} + +int RadosUser::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, + RGWUsageIter& usage_iter, + map& usage) +{ + std::string bucket_name; + return store->getRados()->read_usage(dpp, get_id(), bucket_name, start_epoch, + end_epoch, max_entries, is_truncated, + usage_iter, usage); +} + +int RadosUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) +{ + std::string bucket_name; + + return store->getRados()->trim_usage(dpp, get_id(), bucket_name, start_epoch, end_epoch); +} + +int RadosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) +{ + return store->ctl()->user->get_info_by_uid(dpp, info.user_id, &info, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker).set_attrs(&attrs)); +} + +int RadosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info) +{ + return store->ctl()->user->store_info(dpp, info, y, + RGWUserCtl::PutParams().set_objv_tracker(&objv_tracker) + .set_exclusive(exclusive) + .set_attrs(&attrs) + .set_old_info(old_info)); +} + +int RadosUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) +{ + return store->ctl()->user->remove_info(dpp, info, y, + RGWUserCtl::RemoveParams().set_objv_tracker(&objv_tracker)); +} + +int RadosUser::verify_mfa(const std::string& mfa_str, bool* verified, + const DoutPrefixProvider* dpp, optional_yield y) +{ + vector params; + get_str_vec(mfa_str, " ", params); + + if (params.size() != 2) { + ldpp_dout(dpp, 5) << "NOTICE: invalid mfa string provided: " << mfa_str << dendl; + return -EINVAL; + } + + string& serial = params[0]; + string& pin = params[1]; + + auto i = info.mfa_ids.find(serial); + if (i == info.mfa_ids.end()) { + ldpp_dout(dpp, 5) << "NOTICE: user does not have mfa device with serial=" << serial << dendl; + return -EACCES; + } + + int ret = store->svc()->cls->mfa.check_mfa(dpp, info.user_id, serial, pin, y); + if (ret < 0) { + ldpp_dout(dpp, 20) << "NOTICE: failed to check MFA, serial=" << serial << dendl; + return -EACCES; + } + + *verified = true; + + return 0; +} + +RadosBucket::~RadosBucket() {} + +int RadosBucket::remove_bucket(const DoutPrefixProvider* dpp, + bool delete_children, + bool forward_to_master, + req_info* req_info, + optional_yield y) +{ + int ret; + + // Refresh info + ret = load_bucket(dpp, y); + if (ret < 0) { + return ret; + } + + ListParams params; + params.list_versions = true; + params.allow_unordered = true; + + ListResults results; + + do { + results.objs.clear(); + + ret = list(dpp, params, 1000, results, y); + if (ret < 0) { + return ret; + } + + if (!results.objs.empty() && !delete_children) { + ldpp_dout(dpp, -1) << "ERROR: could not remove non-empty bucket " << info.bucket.name << + dendl; + return -ENOTEMPTY; + } + + for (const auto& obj : results.objs) { + rgw_obj_key key(obj.key); + /* xxx dang */ + ret = rgw_remove_object(dpp, store, this, key); + if (ret < 0 && ret != -ENOENT) { + return ret; + } + } + } while(results.is_truncated); + + ret = abort_multiparts(dpp, store->ctx()); + if (ret < 0) { + return ret; + } + + // remove lifecycle config, if any (XXX note could be made generic) + (void) store->getRados()->get_lc()->remove_bucket_config( + this, get_attrs()); + + ret = store->ctl()->bucket->sync_user_stats(dpp, info.owner, info, y, nullptr); + if (ret < 0) { + ldout(store->ctx(), 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; + } + + RGWObjVersionTracker ot; + + // if we deleted children above we will force delete, as any that + // remain is detrius from a prior bug + ret = store->getRados()->delete_bucket(info, ot, y, dpp, !delete_children); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: could not remove bucket " << + info.bucket.name << dendl; + return ret; + } + + // if bucket has notification definitions associated with it + // they should be removed (note that any pending notifications on the bucket are still going to be sent) + RGWPubSub ps(store, info.owner.tenant); + RGWPubSub::Bucket ps_bucket(&ps, info.bucket); + const auto ps_ret = ps_bucket.remove_notifications(dpp, y); + if (ps_ret < 0 && ps_ret != -ENOENT) { + ldpp_dout(dpp, -1) << "ERROR: unable to remove notifications from bucket. ret=" << ps_ret << dendl; + } + + ret = store->ctl()->bucket->unlink_bucket(info.owner, info.bucket, y, dpp, false); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: unable to remove user bucket information" << dendl; + } + + if (forward_to_master) { + bufferlist in_data; + ret = store->forward_request_to_master(dpp, owner, &ot.read_version, in_data, nullptr, *req_info, y); + if (ret < 0) { + if (ret == -ENOENT) { + /* adjust error, we want to return with NoSuchBucket and not + * NoSuchKey */ + ret = -ERR_NO_SUCH_BUCKET; + } + return ret; + } + } + + return ret; +} + +int RadosBucket::remove_bucket_bypass_gc(int concurrent_max, bool + keep_index_consistent, + optional_yield y, const + DoutPrefixProvider *dpp) +{ + int ret; + map stats; + map common_prefixes; + RGWObjectCtx obj_ctx(store); + CephContext *cct = store->ctx(); + + string bucket_ver, master_ver; + + ret = load_bucket(dpp, null_yield); + if (ret < 0) + return ret; + + const auto& index = info.get_current_index(); + ret = read_stats(dpp, index, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, NULL); + if (ret < 0) + return ret; + + ret = abort_multiparts(dpp, cct); + if (ret < 0) { + return ret; + } + + rgw::sal::Bucket::ListParams params; + rgw::sal::Bucket::ListResults results; + + params.list_versions = true; + params.allow_unordered = true; + + std::unique_ptr handles = store->get_completions(); + + int max_aio = concurrent_max; + results.is_truncated = true; + + while (results.is_truncated) { + ret = list(dpp, params, listing_max_entries, results, null_yield); + if (ret < 0) + return ret; + + std::vector::iterator it = results.objs.begin(); + for (; it != results.objs.end(); ++it) { + RGWObjState *astate = NULL; + RGWObjManifest *amanifest = nullptr; + std::unique_ptr obj = get_object((*it).key); + + ret = store->getRados()->get_obj_state(dpp, &obj_ctx, obj->get_bucket()->get_info(), + obj.get(), &astate, &amanifest, + false, y); + if (ret == -ENOENT) { + ldpp_dout(dpp, 1) << "WARNING: cannot find obj state for obj " << obj << dendl; + continue; + } + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: get obj state returned with error " << ret << dendl; + return ret; + } + + if (amanifest) { + RGWObjManifest& manifest = *amanifest; + RGWObjManifest::obj_iterator miter = manifest.obj_begin(dpp); + std::unique_ptr head_obj = get_object(manifest.get_obj().key); + rgw_raw_obj raw_head_obj; + dynamic_cast(head_obj.get())->get_raw_obj(&raw_head_obj); + + for (; miter != manifest.obj_end(dpp) && max_aio--; ++miter) { + if (!max_aio) { + ret = handles->drain(); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; + return ret; + } + max_aio = concurrent_max; + } + + rgw_raw_obj last_obj = miter.get_location().get_raw_obj(store); + if (last_obj == raw_head_obj) { + // have the head obj deleted at the end + continue; + } + + ret = store->delete_raw_obj_aio(dpp, last_obj, handles.get()); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: delete obj aio failed with " << ret << dendl; + return ret; + } + } // for all shadow objs + + ret = head_obj->delete_obj_aio(dpp, astate, handles.get(), keep_index_consistent, null_yield); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: delete obj aio failed with " << ret << dendl; + return ret; + } + } + + if (!max_aio) { + ret = handles->drain(); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; + return ret; + } + max_aio = concurrent_max; + } + obj_ctx.invalidate(obj->get_obj()); + } // for all RGW objects in results + } // while is_truncated + + ret = handles->drain(); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; + return ret; + } + + sync_user_stats(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; + } + + RGWObjVersionTracker objv_tracker; + + // this function can only be run if caller wanted children to be + // deleted, so we can ignore the check for children as any that + // remain are detritus from a prior bug + ret = remove_bucket(dpp, true, false, nullptr, y); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: could not remove bucket " << this << dendl; + return ret; + } + + return ret; +} + +int RadosBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y, bool get_stats) +{ + int ret; + + RGWSI_MetaBackend_CtxParams bectx_params = RGWSI_MetaBackend_CtxParams_SObj(); + RGWObjVersionTracker ep_ot; + if (info.bucket.bucket_id.empty()) { + ret = store->ctl()->bucket->read_bucket_info(info.bucket, &info, y, dpp, + RGWBucketCtl::BucketInstance::GetParams() + .set_mtime(&mtime) + .set_attrs(&attrs) + .set_bectx_params(bectx_params), + &ep_ot); + } else { + ret = store->ctl()->bucket->read_bucket_instance_info(info.bucket, &info, y, dpp, + RGWBucketCtl::BucketInstance::GetParams() + .set_mtime(&mtime) + .set_attrs(&attrs) + .set_bectx_params(bectx_params)); + } + if (ret != 0) { + return ret; + } + + bucket_version = ep_ot.read_version; + + if (get_stats) { + ret = store->ctl()->bucket->read_bucket_stats(info.bucket, &ent, y, dpp); + } + + return ret; +} + +int RadosBucket::read_stats(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, std::string* bucket_ver, std::string* master_ver, + std::map& stats, + std::string* max_marker, bool* syncstopped) +{ + return store->getRados()->get_bucket_stats(dpp, info, idx_layout, shard_id, bucket_ver, master_ver, stats, max_marker, syncstopped); +} + +int RadosBucket::read_stats_async(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, RGWGetBucketStats_CB* ctx) +{ + return store->getRados()->get_bucket_stats_async(dpp, get_info(), idx_layout, shard_id, ctx); +} + +int RadosBucket::sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) +{ + return store->ctl()->bucket->sync_user_stats(dpp, owner->get_id(), info, y, &ent); +} + +int RadosBucket::update_container_stats(const DoutPrefixProvider* dpp) +{ + int ret; + map m; + + m[info.bucket.name] = ent; + ret = store->getRados()->update_containers_stats(m, dpp); + if (!ret) + return -EEXIST; + if (ret < 0) + return ret; + + map::iterator iter = m.find(info.bucket.name); + if (iter == m.end()) + return -EINVAL; + + ent.count = iter->second.count; + ent.size = iter->second.size; + ent.size_rounded = iter->second.size_rounded; + ent.creation_time = iter->second.creation_time; + ent.placement_rule = std::move(iter->second.placement_rule); + + info.creation_time = ent.creation_time; + info.placement_rule = ent.placement_rule; + + return 0; +} + +int RadosBucket::check_bucket_shards(const DoutPrefixProvider* dpp) +{ + return store->getRados()->check_bucket_shards(info, info.bucket, get_count(), dpp); +} + +int RadosBucket::link(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint, RGWObjVersionTracker* objv) +{ + RGWBucketEntryPoint ep; + ep.bucket = info.bucket; + ep.owner = new_user->get_id(); + ep.creation_time = get_creation_time(); + ep.linked = true; + Attrs ep_attrs; + rgw_ep_info ep_data{ep, ep_attrs}; + + int r = store->ctl()->bucket->link_bucket(new_user->get_id(), info.bucket, + get_creation_time(), y, dpp, update_entrypoint, + &ep_data); + if (r < 0) + return r; + + if (objv) + *objv = ep_data.ep_objv; + + return r; +} + +int RadosBucket::unlink(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint) +{ + return store->ctl()->bucket->unlink_bucket(new_user->get_id(), info.bucket, y, dpp, update_entrypoint); +} + +int RadosBucket::chown(const DoutPrefixProvider* dpp, User* new_user, User* old_user, optional_yield y, const std::string* marker) +{ + std::string obj_marker; + + if (marker == nullptr) + marker = &obj_marker; + + int r = this->link(dpp, new_user, y); + if (r < 0) { + return r; + } + if (!old_user) { + return r; + } + + return store->ctl()->bucket->chown(store, this, new_user->get_id(), + old_user->get_display_name(), *marker, y, dpp); +} + +int RadosBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive, ceph::real_time _mtime) +{ + mtime = _mtime; + return store->getRados()->put_bucket_instance_info(info, exclusive, mtime, &attrs, dpp); +} + +/* Make sure to call get_bucket_info() if you need it first */ +bool RadosBucket::is_owner(User* user) +{ + return (info.owner.compare(user->get_id()) == 0); +} + +int RadosBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) +{ + return store->getRados()->check_bucket_empty(dpp, info, y); +} + +int RadosBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, + optional_yield y, bool check_size_only) +{ + return store->getRados()->check_quota(dpp, owner->get_id(), get_key(), + quota, obj_size, y, check_size_only); +} + +int RadosBucket::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) +{ + for(auto& it : new_attrs) { + attrs[it.first] = it.second; + } + return store->ctl()->bucket->set_bucket_instance_attrs(get_info(), + new_attrs, &get_info().objv_tracker, y, dpp); +} + +int RadosBucket::try_refresh_info(const DoutPrefixProvider* dpp, ceph::real_time* pmtime) +{ + return store->getRados()->try_refresh_bucket_info(info, pmtime, dpp, &attrs); +} + +int RadosBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, + RGWUsageIter& usage_iter, + map& usage) +{ + return store->getRados()->read_usage(dpp, owner->get_id(), get_name(), start_epoch, + end_epoch, max_entries, is_truncated, + usage_iter, usage); +} + +int RadosBucket::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) +{ + return store->getRados()->trim_usage(dpp, owner->get_id(), get_name(), start_epoch, end_epoch); +} + +int RadosBucket::remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) +{ + return store->getRados()->remove_objs_from_index(dpp, info, objs_to_unlink); +} + +int RadosBucket::check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) +{ + return store->getRados()->bucket_check_index(dpp, info, &existing_stats, &calculated_stats); +} + +int RadosBucket::rebuild_index(const DoutPrefixProvider *dpp) +{ + return store->getRados()->bucket_rebuild_index(dpp, info); +} + +int RadosBucket::set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) +{ + return store->getRados()->cls_obj_set_bucket_tag_timeout(dpp, info, timeout); +} + +int RadosBucket::purge_instance(const DoutPrefixProvider* dpp) +{ + int max_shards = (info.layout.current_index.layout.normal.num_shards > 0 ? info.layout.current_index.layout.normal.num_shards : 1); + for (int i = 0; i < max_shards; i++) { + RGWRados::BucketShard bs(store->getRados()); + int shard_id = (info.layout.current_index.layout.normal.num_shards > 0 ? i : -1); + int ret = bs.init(dpp, info, info.layout.current_index, shard_id); + if (ret < 0) { + cerr << "ERROR: bs.init(bucket=" << info.bucket << ", shard=" << shard_id + << "): " << cpp_strerror(-ret) << std::endl; + return ret; + } + ret = store->getRados()->bi_remove(dpp, bs); + if (ret < 0) { + cerr << "ERROR: failed to remove bucket index object: " + << cpp_strerror(-ret) << std::endl; + return ret; + } + } + return 0; +} + +int RadosBucket::set_acl(const DoutPrefixProvider* dpp, RGWAccessControlPolicy &acl, optional_yield y) +{ + bufferlist aclbl; + + acls = acl; + acl.encode(aclbl); + map& attrs = get_attrs(); + + attrs[RGW_ATTR_ACL] = aclbl; + info.owner = acl.get_owner().get_id(); + + int r = store->ctl()->bucket->store_bucket_instance_info(info.bucket, + info, y, dpp, + RGWBucketCtl::BucketInstance::PutParams().set_attrs(&attrs)); + if (r < 0) { + cerr << "ERROR: failed to set bucket owner: " << cpp_strerror(-r) << std::endl; + return r; + } + + return 0; +} + +std::unique_ptr RadosBucket::get_object(const rgw_obj_key& k) +{ + return std::make_unique(this->store, k, this); +} + +int RadosBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max, ListResults& results, optional_yield y) +{ + RGWRados::Bucket target(store->getRados(), get_info()); + if (params.shard_id >= 0) { + target.set_shard_id(params.shard_id); + } + RGWRados::Bucket::List list_op(&target); + + list_op.params.prefix = params.prefix; + list_op.params.delim = params.delim; + list_op.params.marker = params.marker; + list_op.params.ns = params.ns; + list_op.params.end_marker = params.end_marker; + list_op.params.ns = params.ns; + list_op.params.enforce_ns = params.enforce_ns; + list_op.params.access_list_filter = params.access_list_filter; + list_op.params.force_check_filter = params.force_check_filter; + list_op.params.list_versions = params.list_versions; + list_op.params.allow_unordered = params.allow_unordered; + + int ret = list_op.list_objects(dpp, max, &results.objs, &results.common_prefixes, &results.is_truncated, y); + if (ret >= 0) { + results.next_marker = list_op.get_next_marker(); + params.marker = results.next_marker; + } + + return ret; +} + +std::unique_ptr RadosBucket::get_multipart_upload( + const std::string& oid, + std::optional upload_id, + ACLOwner owner, ceph::real_time mtime) +{ + return std::make_unique(this->store, this, oid, upload_id, + std::move(owner), mtime); +} + +int RadosBucket::list_multiparts(const DoutPrefixProvider *dpp, + const string& prefix, + string& marker, + const string& delim, + const int& max_uploads, + vector>& uploads, + map *common_prefixes, + bool *is_truncated) +{ + rgw::sal::Bucket::ListParams params; + rgw::sal::Bucket::ListResults results; + MultipartMetaFilter mp_filter; + + params.prefix = prefix; + params.delim = delim; + params.marker = marker; + params.ns = RGW_OBJ_NS_MULTIPART; + params.access_list_filter = &mp_filter; + + int ret = list(dpp, params, max_uploads, results, null_yield); + + if (ret < 0) + return ret; + + if (!results.objs.empty()) { + for (const rgw_bucket_dir_entry& dentry : results.objs) { + rgw_obj_key key(dentry.key); + ACLOwner owner(rgw_user(dentry.meta.owner)); + owner.set_name(dentry.meta.owner_display_name); + uploads.push_back(this->get_multipart_upload(key.name, + std::nullopt, std::move(owner))); + } + } + if (common_prefixes) { + *common_prefixes = std::move(results.common_prefixes); + } + *is_truncated = results.is_truncated; + marker = params.marker.name; + + return 0; +} + +int RadosBucket::abort_multiparts(const DoutPrefixProvider* dpp, + CephContext* cct) +{ + constexpr int max = 1000; + int ret, num_deleted = 0; + vector> uploads; + string marker; + bool is_truncated; + + const std::string empty_delim; + const std::string empty_prefix; + + do { + ret = list_multiparts(dpp, empty_prefix, marker, empty_delim, + max, uploads, nullptr, &is_truncated); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << + " ERROR : calling list_bucket_multiparts; ret=" << ret << + "; bucket=\"" << this << "\"" << dendl; + return ret; + } + ldpp_dout(dpp, 20) << __func__ << + " INFO: aborting and cleaning up multipart upload(s); bucket=\"" << + this << "\"; uploads.size()=" << uploads.size() << + "; is_truncated=" << is_truncated << dendl; + + if (!uploads.empty()) { + for (const auto& upload : uploads) { + ret = upload->abort(dpp, cct); + if (ret < 0) { + // we're doing a best-effort; if something cannot be found, + // log it and keep moving forward + if (ret != -ENOENT && ret != -ERR_NO_SUCH_UPLOAD) { + ldpp_dout(dpp, 0) << __func__ << + " ERROR : failed to abort and clean-up multipart upload \"" << + upload->get_meta() << "\"" << dendl; + return ret; + } else { + ldpp_dout(dpp, 10) << __func__ << + " NOTE : unable to find part(s) of " + "aborted multipart upload of \"" << upload->get_meta() << + "\" for cleaning up" << dendl; + } + } + num_deleted++; + } + if (num_deleted) { + ldpp_dout(dpp, 0) << __func__ << + " WARNING : aborted " << num_deleted << + " incomplete multipart uploads" << dendl; + } + } + } while (is_truncated); + + return 0; +} + +std::unique_ptr RadosStore::get_user(const rgw_user &u) +{ + return std::make_unique(this, u); +} + +std::string RadosStore::get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) +{ + return getRados()->get_cluster_fsid(dpp, y); +} + +int RadosStore::get_user_by_access_key(const DoutPrefixProvider* dpp, const std::string& key, optional_yield y, std::unique_ptr* user) +{ + RGWUserInfo uinfo; + User* u; + RGWObjVersionTracker objv_tracker; + + int r = ctl()->user->get_info_by_access_key(dpp, key, &uinfo, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); + if (r < 0) + return r; + + u = new RadosUser(this, uinfo); + if (!u) + return -ENOMEM; + + u->get_version_tracker() = objv_tracker; + + user->reset(u); + return 0; +} + +int RadosStore::get_user_by_email(const DoutPrefixProvider* dpp, const std::string& email, optional_yield y, std::unique_ptr* user) +{ + RGWUserInfo uinfo; + User* u; + RGWObjVersionTracker objv_tracker; + + int r = ctl()->user->get_info_by_email(dpp, email, &uinfo, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); + if (r < 0) + return r; + + u = new RadosUser(this, uinfo); + if (!u) + return -ENOMEM; + + u->get_version_tracker() = objv_tracker; + + user->reset(u); + return 0; +} + +int RadosStore::get_user_by_swift(const DoutPrefixProvider* dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) +{ + RGWUserInfo uinfo; + User* u; + RGWObjVersionTracker objv_tracker; + + int r = ctl()->user->get_info_by_swift(dpp, user_str, &uinfo, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); + if (r < 0) + return r; + + u = new RadosUser(this, uinfo); + if (!u) + return -ENOMEM; + + u->get_version_tracker() = objv_tracker; + + user->reset(u); + return 0; +} + +std::unique_ptr RadosStore::get_object(const rgw_obj_key& k) +{ + return std::make_unique(this, k); +} + +int RadosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) +{ + int ret; + Bucket* bp; + + bp = new RadosBucket(this, b, u); + ret = bp->load_bucket(dpp, y); + if (ret < 0) { + delete bp; + return ret; + } + + bucket->reset(bp); + return 0; +} + +int RadosStore::get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) +{ + Bucket* bp; + + bp = new RadosBucket(this, i, u); + /* Don't need to fetch the bucket info, use the provided one */ + + bucket->reset(bp); + return 0; +} + +int RadosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, const std::string& tenant, const std::string& name, std::unique_ptr* bucket, optional_yield y) +{ + rgw_bucket b; + + b.tenant = tenant; + b.name = name; + + return get_bucket(dpp, u, b, bucket, y); +} + +bool RadosStore::is_meta_master() +{ + return svc()->zone->is_meta_master(); +} + +int RadosStore::forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv, + bufferlist& in_data, + JSONParser* jp, req_info& info, + optional_yield y) +{ + if (is_meta_master()) { + /* We're master, don't forward */ + return 0; + } + + if (!svc()->zone->get_master_conn()) { + ldpp_dout(dpp, 0) << "rest connection is invalid" << dendl; + return -EINVAL; + } + ldpp_dout(dpp, 0) << "sending request to master zonegroup" << dendl; + bufferlist response; + std::string uid_str = user->get_id().to_str(); +#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response + int ret = svc()->zone->get_master_conn()->forward(dpp, rgw_user(uid_str), info, + objv, MAX_REST_RESPONSE, + &in_data, &response, y); + if (ret < 0) + return ret; + + ldpp_dout(dpp, 20) << "response: " << response.c_str() << dendl; + if (jp && !jp->parse(response.c_str(), response.length())) { + ldpp_dout(dpp, 0) << "failed parsing response from master zonegroup" << dendl; + return -EINVAL; + } + + return 0; +} + +int RadosStore::forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, + bufferlist& in_data, + RGWXMLDecoder::XMLParser* parser, req_info& info, + optional_yield y) +{ + if (is_meta_master()) { + /* We're master, don't forward */ + return 0; + } + + if (!svc()->zone->get_master_conn()) { + ldpp_dout(dpp, 0) << "rest connection is invalid" << dendl; + return -EINVAL; + } + ldpp_dout(dpp, 0) << "sending request to master zonegroup" << dendl; + bufferlist response; +#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response + int ret = svc()->zone->get_master_conn()->forward_iam_request(dpp, key, info, + objv, MAX_REST_RESPONSE, + &in_data, &response, y); + if (ret < 0) + return ret; + + ldpp_dout(dpp, 20) << "response: " << response.c_str() << dendl; + + std::string r = response.c_str(); + std::string str_to_search = """; + std::string str_to_replace = "\""; + boost::replace_all(r, str_to_search, str_to_replace); + ldpp_dout(dpp, 20) << "r: " << r.c_str() << dendl; + + if (parser && !parser->parse(r.c_str(), r.length(), 1)) { + ldpp_dout(dpp, 0) << "ERROR: failed to parse response from master zonegroup" << dendl; + return -EIO; + } + + return 0; +} + +std::string RadosStore::zone_unique_id(uint64_t unique_num) +{ + return svc()->zone_utils->unique_id(unique_num); +} + +std::string RadosStore::zone_unique_trans_id(const uint64_t unique_num) +{ + return svc()->zone_utils->unique_trans_id(unique_num); +} + +int RadosStore::get_zonegroup(const std::string& id, + std::unique_ptr* zonegroup) +{ + ZoneGroup* zg; + RGWZoneGroup rzg; + int r = svc()->zone->get_zonegroup(id, rzg); + if (r < 0) + return r; + + zg = new RadosZoneGroup(this, rzg); + if (!zg) + return -ENOMEM; + + zonegroup->reset(zg); + return 0; +} + +int RadosStore::list_all_zones(const DoutPrefixProvider* dpp, std::list& zone_ids) +{ + return svc()->zone->list_zones(dpp, zone_ids); +} + +int RadosStore::cluster_stat(RGWClusterStat& stats) +{ + rados_cluster_stat_t rados_stats; + int ret; + + ret = rados->get_rados_handle()->cluster_stat(rados_stats); + if (ret < 0) + return ret; + + stats.kb = rados_stats.kb; + stats.kb_used = rados_stats.kb_used; + stats.kb_avail = rados_stats.kb_avail; + stats.num_objects = rados_stats.num_objects; + + return ret; +} + +std::unique_ptr RadosStore::get_lifecycle(void) +{ + return std::make_unique(this); +} + +std::unique_ptr RadosStore::get_completions(void) +{ + return std::make_unique(); +} + +std::unique_ptr RadosStore::get_notification( + rgw::sal::Object* obj, rgw::sal::Object* src_obj, req_state* s, rgw::notify::EventType event_type, const std::string* object_name) +{ + return std::make_unique(s, this, obj, src_obj, s, event_type, object_name); +} + +std::unique_ptr RadosStore::get_notification(const DoutPrefixProvider* dpp, rgw::sal::Object* obj, rgw::sal::Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) +{ + return std::make_unique(dpp, this, obj, src_obj, event_type, _bucket, _user_id, _user_tenant, _req_id, y); +} + +int RadosStore::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj) +{ + return rados->delete_raw_obj(dpp, obj); +} + +int RadosStore::delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, Completions* aio) +{ + RadosCompletions* raio = static_cast(aio); + + return rados->delete_raw_obj_aio(dpp, obj, raio->handles); +} + +void RadosStore::get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj) +{ + rados->obj_to_raw(placement_rule, obj, raw_obj); +} + +int RadosStore::get_raw_chunk_size(const DoutPrefixProvider* dpp, const rgw_raw_obj& obj, uint64_t* chunk_size) +{ + return rados->get_max_chunk_size(obj.pool, chunk_size, dpp); +} + +int RadosStore::initialize(CephContext *cct, const DoutPrefixProvider *dpp) +{ + std::unique_ptr zg = + std::make_unique(this, svc()->zone->get_zonegroup()); + zone = make_unique(this, std::move(zg)); + return 0; +} + +int RadosStore::log_usage(const DoutPrefixProvider *dpp, map& usage_info) +{ + return rados->log_usage(dpp, usage_info); +} + +int RadosStore::log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) +{ + rgw_raw_obj obj(svc()->zone->get_zone_params().log_pool, oid); + + int ret = rados->append_async(dpp, obj, bl.length(), bl); + if (ret == -ENOENT) { + ret = rados->create_pool(dpp, svc()->zone->get_zone_params().log_pool); + if (ret < 0) + return ret; + // retry + ret = rados->append_async(dpp, obj, bl.length(), bl); + } + + return ret; +} + +int RadosStore::register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, + const map& meta) +{ + return rados->register_to_service_map(dpp, daemon_type, meta); +} + +void RadosStore::get_quota(RGWQuota& quota) +{ + quota.bucket_quota = svc()->quota->get_bucket_quota(); + quota.user_quota = svc()->quota->get_user_quota(); +} + +void RadosStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) +{ + bucket_ratelimit = svc()->zone->get_current_period().get_config().bucket_ratelimit; + user_ratelimit = svc()->zone->get_current_period().get_config().user_ratelimit; + anon_ratelimit = svc()->zone->get_current_period().get_config().anon_ratelimit; +} + +int RadosStore::set_buckets_enabled(const DoutPrefixProvider* dpp, vector& buckets, bool enabled) +{ + return rados->set_buckets_enabled(buckets, enabled, dpp); +} + +int RadosStore::get_sync_policy_handler(const DoutPrefixProvider* dpp, + std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef* phandler, + optional_yield y) +{ + return ctl()->bucket->get_sync_policy_handler(zone, bucket, phandler, y, dpp); +} + +RGWDataSyncStatusManager* RadosStore::get_data_sync_manager(const rgw_zone_id& source_zone) +{ + return rados->get_data_sync_manager(source_zone); +} + +int RadosStore::read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, + RGWUsageIter& usage_iter, + map& usage) +{ + rgw_user uid; + std::string bucket_name; + + return rados->read_usage(dpp, uid, bucket_name, start_epoch, end_epoch, max_entries, + is_truncated, usage_iter, usage); +} + +int RadosStore::trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) +{ + rgw_user uid; + std::string bucket_name; + + return rados->trim_usage(dpp, uid, bucket_name, start_epoch, end_epoch); +} + +int RadosStore::get_config_key_val(std::string name, bufferlist* bl) +{ + return svc()->config_key->get(name, true, bl); +} + +int RadosStore::meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) +{ + return ctl()->meta.mgr->list_keys_init(dpp, section, marker, phandle); +} + +int RadosStore::meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, list& keys, bool* truncated) +{ + return ctl()->meta.mgr->list_keys_next(dpp, handle, max, keys, truncated); +} + +void RadosStore::meta_list_keys_complete(void* handle) +{ + ctl()->meta.mgr->list_keys_complete(handle); +} + +std::string RadosStore::meta_get_marker(void* handle) +{ + return ctl()->meta.mgr->get_marker(handle); +} + +int RadosStore::meta_remove(const DoutPrefixProvider* dpp, std::string& metadata_key, optional_yield y) +{ + return ctl()->meta.mgr->remove(metadata_key, y, dpp); +} + +void RadosStore::finalize(void) +{ + if (rados) + rados->finalize(); +} + +void RadosStore::register_admin_apis(RGWRESTMgr* mgr) +{ + mgr->register_resource("user", new RGWRESTMgr_User); + mgr->register_resource("bucket", new RGWRESTMgr_Bucket); + /*Registering resource for /admin/metadata */ + mgr->register_resource("metadata", new RGWRESTMgr_Metadata); + mgr->register_resource("log", new RGWRESTMgr_Log); + /* XXX These may become global when cbodley is done with his zone work */ + mgr->register_resource("config", new RGWRESTMgr_Config); + mgr->register_resource("realm", new RGWRESTMgr_Realm); + mgr->register_resource("ratelimit", new RGWRESTMgr_Ratelimit); +} + +std::unique_ptr RadosStore::get_lua_manager() +{ + return std::make_unique(this); +} + +std::unique_ptr RadosStore::get_role(std::string name, + std::string tenant, + std::string path, + std::string trust_policy, + std::string max_session_duration_str, + std::multimap tags) +{ + return std::make_unique(this, name, tenant, path, trust_policy, max_session_duration_str, tags); +} + +std::unique_ptr RadosStore::get_role(std::string id) +{ + return std::make_unique(this, id); +} + +std::unique_ptr RadosStore::get_role(const RGWRoleInfo& info) +{ + return std::make_unique(this, info); +} + +int RadosStore::get_roles(const DoutPrefixProvider *dpp, + optional_yield y, + const std::string& path_prefix, + const std::string& tenant, + vector>& roles) +{ + auto pool = svc()->zone->get_zone_params().roles_pool; + std::string prefix; + + // List all roles if path prefix is empty + if (! path_prefix.empty()) { + prefix = tenant + RGWRole::role_path_oid_prefix + path_prefix; + } else { + prefix = tenant + RGWRole::role_path_oid_prefix; + } + + //Get the filtered objects + list result; + bool is_truncated; + RGWListRawObjsCtx ctx; + do { + list oids; + int r = rados->list_raw_objects(dpp, pool, prefix, 1000, ctx, oids, &is_truncated); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: listing filtered objects failed: " + << prefix << ": " << cpp_strerror(-r) << dendl; + return r; + } + for (const auto& iter : oids) { + result.push_back(iter.substr(RGWRole::role_path_oid_prefix.size())); + } + } while (is_truncated); + + for (const auto& it : result) { + //Find the role oid prefix from the end + size_t pos = it.rfind(RGWRole::role_oid_prefix); + if (pos == std::string::npos) { + continue; + } + // Split the result into path and info_oid + id + std::string path = it.substr(0, pos); + + /*Make sure that prefix is part of path (False results could've been returned) + because of the role info oid + id appended to the path)*/ + if(path_prefix.empty() || path.find(path_prefix) != std::string::npos) { + //Get id from info oid prefix + id + std::string id = it.substr(pos + RGWRole::role_oid_prefix.length()); + + std::unique_ptr role = get_role(id); + int ret = role->read_info(dpp, y); + if (ret < 0) { + return ret; + } + roles.push_back(std::move(role)); + } + } + + return 0; +} + +std::unique_ptr RadosStore::get_oidc_provider() +{ + return std::make_unique(this); +} + +int RadosStore::get_oidc_providers(const DoutPrefixProvider *dpp, + const std::string& tenant, + vector>& providers) +{ + std::string prefix = tenant + RGWOIDCProvider::oidc_url_oid_prefix; + auto pool = svc()->zone->get_zone_params().oidc_pool; + + //Get the filtered objects + list result; + bool is_truncated; + RGWListRawObjsCtx ctx; + do { + list oids; + int r = rados->list_raw_objects(dpp, pool, prefix, 1000, ctx, oids, &is_truncated); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: listing filtered objects failed: OIDC pool: " + << pool.name << ": " << prefix << ": " << cpp_strerror(-r) << dendl; + return r; + } + for (const auto& iter : oids) { + std::unique_ptr provider = get_oidc_provider(); + bufferlist bl; + + r = rgw_get_system_obj(svc()->sysobj, pool, iter, bl, nullptr, nullptr, null_yield, dpp); + if (r < 0) { + return r; + } + + try { + using ceph::decode; + auto iter = bl.cbegin(); + decode(*provider, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode oidc provider info from pool: " + << pool.name << ": " << iter << dendl; + return -EIO; + } + + providers.push_back(std::move(provider)); + } + } while (is_truncated); + + return 0; +} + +std::unique_ptr RadosStore::get_append_writer(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + const std::string& unique_tag, + uint64_t position, + uint64_t *cur_accounted_size) +{ + auto aio = rgw::make_throttle(ctx()->_conf->rgw_put_obj_min_window_size, y); + return std::make_unique(dpp, y, + std::move(_head_obj), + this, std::move(aio), owner, + ptail_placement_rule, + unique_tag, position, + cur_accounted_size); +} + +std::unique_ptr RadosStore::get_atomic_writer(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t olh_epoch, + const std::string& unique_tag) +{ + auto aio = rgw::make_throttle(ctx()->_conf->rgw_put_obj_min_window_size, y); + return std::make_unique(dpp, y, + std::move(_head_obj), + this, std::move(aio), owner, + ptail_placement_rule, + olh_epoch, unique_tag); +} + +const std::string& RadosStore::get_compression_type(const rgw_placement_rule& rule) +{ + return svc()->zone->get_zone_params().get_compression_type(rule); +} + +bool RadosStore::valid_placement(const rgw_placement_rule& rule) +{ + return svc()->zone->get_zone_params().valid_placement(rule); +} + +int RadosStore::get_obj_head_ioctx(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx* ioctx) +{ + return rados->get_obj_head_ioctx(dpp, bucket_info, obj, ioctx); +} + +RadosObject::~RadosObject() +{ + if (rados_ctx_owned) + delete rados_ctx; +} + +int RadosObject::get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **pstate, optional_yield y, bool follow_olh) +{ + int ret = store->getRados()->get_obj_state(dpp, rados_ctx, bucket->get_info(), this, pstate, &manifest, follow_olh, y); + if (ret < 0) { + return ret; + } + + /* Don't overwrite obj, atomic, or prefetch */ + rgw_obj obj = get_obj(); + bool is_atomic = state.is_atomic; + bool prefetch_data = state.prefetch_data; + + state = **pstate; + + state.obj = obj; + state.is_atomic = is_atomic; + state.prefetch_data = prefetch_data; + return ret; +} + +int RadosObject::read_attrs(const DoutPrefixProvider* dpp, RGWRados::Object::Read &read_op, optional_yield y, rgw_obj* target_obj) +{ + read_op.params.attrs = &attrs; + read_op.params.target_obj = target_obj; + read_op.params.obj_size = &state.size; + read_op.params.lastmod = &state.mtime; + + return read_op.prepare(y, dpp); +} + +int RadosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) +{ + Attrs empty; + return store->getRados()->set_attrs(dpp, rados_ctx, + bucket->get_info(), + this, + setattrs ? *setattrs : empty, + delattrs ? delattrs : nullptr, + y); +} + +int RadosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj) +{ + RGWRados::Object op_target(store->getRados(), bucket, *rados_ctx, this); + RGWRados::Object::Read read_op(&op_target); + + return read_attrs(dpp, read_op, y, target_obj); +} + +int RadosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) +{ + rgw_obj target = get_obj(); + rgw_obj save = get_obj(); + int r = get_obj_attrs(y, dpp, &target); + if (r < 0) { + return r; + } + + /* Temporarily set target */ + state.obj = target; + set_atomic(); + attrs[attr_name] = attr_val; + r = set_obj_attrs(dpp, &attrs, nullptr, y); + /* Restore target */ + state.obj = save; + + return r; +} + +int RadosObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) +{ + Attrs rmattr; + bufferlist bl; + + set_atomic(); + rmattr[attr_name] = bl; + return set_obj_attrs(dpp, nullptr, &rmattr, y); +} + +bool RadosObject::is_expired() { + auto iter = attrs.find(RGW_ATTR_DELETE_AT); + if (iter != attrs.end()) { + utime_t delete_at; + try { + auto bufit = iter->second.cbegin(); + decode(delete_at, bufit); + } catch (buffer::error& err) { + ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl; + return false; + } + + if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { + return true; + } + } + + return false; +} + +void RadosObject::gen_rand_obj_instance_name() +{ + store->getRados()->gen_rand_obj_instance_name(&state.obj.key); +} + +void RadosObject::raw_obj_to_obj(const rgw_raw_obj& raw_obj) +{ + rgw_obj tobj = get_obj(); + RGWSI_Tier_RADOS::raw_obj_to_obj(get_bucket()->get_key(), raw_obj, &tobj); + set_key(tobj.key); +} + +void RadosObject::get_raw_obj(rgw_raw_obj* raw_obj) +{ + store->getRados()->obj_to_raw((bucket->get_info()).placement_rule, get_obj(), raw_obj); +} + +int RadosObject::omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count, + std::map *m, + bool* pmore, optional_yield y) +{ + rgw_raw_obj raw_obj; + get_raw_obj(&raw_obj); + auto sysobj = store->svc()->sysobj->get_obj(raw_obj); + + return sysobj.omap().get_vals(dpp, marker, count, m, pmore, y); +} + +int RadosObject::omap_get_all(const DoutPrefixProvider *dpp, std::map *m, + optional_yield y) +{ + rgw_raw_obj raw_obj; + get_raw_obj(&raw_obj); + auto sysobj = store->svc()->sysobj->get_obj(raw_obj); + + return sysobj.omap().get_all(dpp, m, y); +} + +int RadosObject::omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, + const std::set& keys, + Attrs* vals) +{ + int ret; + rgw_raw_obj head_obj; + librados::IoCtx cur_ioctx; + rgw_obj obj = get_obj(); + + store->getRados()->obj_to_raw(bucket->get_placement_rule(), obj, &head_obj); + ret = store->get_obj_head_ioctx(dpp, bucket->get_info(), obj, &cur_ioctx); + if (ret < 0) { + return ret; + } + + return cur_ioctx.omap_get_vals_by_keys(oid, keys, vals); +} + +int RadosObject::omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) +{ + rgw_raw_obj raw_meta_obj; + rgw_obj obj = get_obj(); + + store->getRados()->obj_to_raw(bucket->get_placement_rule(), obj, &raw_meta_obj); + + auto sysobj = store->svc()->sysobj->get_obj(raw_meta_obj); + + return sysobj.omap().set_must_exist(must_exist).set(dpp, key, val, y); +} + +std::unique_ptr RadosObject::get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) +{ + return std::make_unique(dpp, store, this, lock_name); +} + +int RadosObject::transition(Bucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider* dpp, + optional_yield y) +{ + return store->getRados()->transition_obj(*rados_ctx, bucket, *this, placement_rule, mtime, olh_epoch, dpp, y); +} + +int RadosObject::transition_to_cloud(Bucket* bucket, + rgw::sal::PlacementTier* tier, + rgw_bucket_dir_entry& o, + std::set& cloud_targets, + CephContext* cct, + bool update_object, + const DoutPrefixProvider* dpp, + optional_yield y) +{ + /* init */ + rgw::sal::RadosPlacementTier* rtier = static_cast(tier); + string id = "cloudid"; + string endpoint = rtier->get_rt().t.s3.endpoint; + RGWAccessKey key = rtier->get_rt().t.s3.key; + string region = rtier->get_rt().t.s3.region; + HostStyle host_style = rtier->get_rt().t.s3.host_style; + string bucket_name = rtier->get_rt().t.s3.target_path; + const rgw::sal::ZoneGroup& zonegroup = store->get_zone()->get_zonegroup(); + + if (bucket_name.empty()) { + bucket_name = "rgwx-" + zonegroup.get_name() + "-" + tier->get_storage_class() + + "-cloud-bucket"; + boost::algorithm::to_lower(bucket_name); + } + + /* Create RGW REST connection */ + S3RESTConn conn(cct, id, { endpoint }, key, zonegroup.get_id(), region, host_style); + + RGWLCCloudTierCtx tier_ctx(cct, dpp, o, store, bucket->get_info(), + this, conn, bucket_name, + rtier->get_rt().t.s3.target_storage_class); + tier_ctx.acl_mappings = rtier->get_rt().t.s3.acl_mappings; + tier_ctx.multipart_min_part_size = rtier->get_rt().t.s3.multipart_min_part_size; + tier_ctx.multipart_sync_threshold = rtier->get_rt().t.s3.multipart_sync_threshold; + tier_ctx.storage_class = tier->get_storage_class(); + + ldpp_dout(dpp, 0) << "Transitioning object(" << o.key << ") to the cloud endpoint(" << endpoint << ")" << dendl; + + /* Transition object to cloud end point */ + int ret = rgw_cloud_tier_transfer_object(tier_ctx, cloud_targets); + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to transfer object(" << o.key << ") to the cloud endpoint(" << endpoint << ") ret=" << ret << dendl; + return ret; + } + + if (update_object) { + real_time read_mtime; + + std::unique_ptr read_op(get_read_op()); + read_op->params.lastmod = &read_mtime; + + ret = read_op->prepare(null_yield, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: Updating tier object(" << o.key << ") failed ret=" << ret << dendl; + return ret; + } + + if (read_mtime != tier_ctx.o.meta.mtime) { + /* raced */ + ldpp_dout(dpp, 0) << "ERROR: Updating tier object(" << o.key << ") failed ret=" << -ECANCELED << dendl; + return -ECANCELED; + } + + rgw_placement_rule target_placement; + target_placement.inherit_from(tier_ctx.bucket_info.placement_rule); + target_placement.storage_class = tier->get_storage_class(); + + ret = write_cloud_tier(dpp, null_yield, tier_ctx.o.versioned_epoch, + tier, tier_ctx.is_multipart_upload, + target_placement, tier_ctx.obj); + + } + + return ret; +} + +int RadosObject::write_cloud_tier(const DoutPrefixProvider* dpp, + optional_yield y, + uint64_t olh_epoch, + PlacementTier* tier, + bool is_multipart_upload, + rgw_placement_rule& target_placement, + Object* head_obj) +{ + rgw::sal::RadosPlacementTier* rtier = static_cast(tier); + map attrs = get_attrs(); + RGWRados::Object op_target(store->getRados(), bucket, *rados_ctx, this); + RGWRados::Object::Write obj_op(&op_target); + + obj_op.meta.modify_tail = true; + obj_op.meta.flags = PUT_OBJ_CREATE; + obj_op.meta.category = RGWObjCategory::CloudTiered; + obj_op.meta.delete_at = real_time(); + bufferlist blo; + obj_op.meta.data = &blo; + obj_op.meta.if_match = NULL; + obj_op.meta.if_nomatch = NULL; + obj_op.meta.user_data = NULL; + obj_op.meta.zones_trace = NULL; + obj_op.meta.delete_at = real_time(); + obj_op.meta.olh_epoch = olh_epoch; + + RGWObjManifest *pmanifest; + RGWObjManifest manifest; + + pmanifest = &manifest; + RGWObjTier tier_config; + tier_config.name = tier->get_storage_class(); + tier_config.tier_placement = rtier->get_rt(); + tier_config.is_multipart_upload = is_multipart_upload; + + pmanifest->set_tier_type("cloud-s3"); + pmanifest->set_tier_config(tier_config); + + /* check if its necessary */ + pmanifest->set_head(target_placement, head_obj->get_obj(), 0); + pmanifest->set_tail_placement(target_placement, head_obj->get_obj().bucket); + pmanifest->set_obj_size(0); + obj_op.meta.manifest = pmanifest; + + /* update storage class */ + bufferlist bl; + bl.append(tier->get_storage_class()); + attrs[RGW_ATTR_STORAGE_CLASS] = bl; + + attrs.erase(RGW_ATTR_ID_TAG); + attrs.erase(RGW_ATTR_TAIL_TAG); + + return obj_op.write_meta(dpp, 0, 0, attrs, y); +} + +int RadosObject::get_max_chunk_size(const DoutPrefixProvider* dpp, rgw_placement_rule placement_rule, uint64_t* max_chunk_size, uint64_t* alignment) +{ + return store->getRados()->get_max_chunk_size(placement_rule, get_obj(), max_chunk_size, dpp, alignment); +} + +void RadosObject::get_max_aligned_size(uint64_t size, uint64_t alignment, + uint64_t* max_size) +{ + store->getRados()->get_max_aligned_size(size, alignment, max_size); +} + +bool RadosObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) +{ + rgw_obj obj; + rgw_pool p1, p2; + + obj = get_obj(); + + if (r1 == r2) + return true; + + if (!store->getRados()->get_obj_data_pool(r1, obj, &p1)) { + return false; + } + if (!store->getRados()->get_obj_data_pool(r2, obj, &p2)) { + return false; + } + + return p1 == p2; +} + +int RadosObject::dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) +{ + int ret; + RGWObjManifest *amanifest{nullptr}; + rgw_raw_obj head_obj; + + RGWRados::Object op_target(store->getRados(), get_bucket(), *rados_ctx, this); + RGWRados::Object::Read parent_op(&op_target); + uint64_t obj_size; + + parent_op.params.obj_size = &obj_size; + parent_op.params.attrs = &get_attrs(); + + ret = parent_op.prepare(y, dpp); + if (ret < 0) { + return ret; + } + + head_obj = parent_op.state.head_obj; + + ret = op_target.get_manifest(dpp, &amanifest, y); + if (ret < 0) { + return ret; + } + + ::encode_json("head", head_obj, f); + ::encode_json("manifest", *amanifest, f); + f->open_array_section("data_location"); + for (auto miter = amanifest->obj_begin(dpp); miter != amanifest->obj_end(dpp); ++miter) { + f->open_object_section("obj"); + rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(store); + uint64_t ofs = miter.get_ofs(); + uint64_t left = amanifest->get_obj_size() - ofs; + ::encode_json("ofs", miter.get_ofs(), f); + ::encode_json("loc", raw_loc, f); + ::encode_json("loc_ofs", miter.location_ofs(), f); + uint64_t loc_size = miter.get_stripe_size(); + if (loc_size > left) { + loc_size = left; + } + ::encode_json("loc_size", loc_size, f); + f->close_section(); + } + f->close_section(); + + return 0; +} + +std::unique_ptr RadosObject::get_read_op() +{ + return std::make_unique(this, rados_ctx); +} + +RadosObject::RadosReadOp::RadosReadOp(RadosObject *_source, RGWObjectCtx *_rctx) : + source(_source), + rctx(_rctx), + op_target(_source->store->getRados(), + _source->get_bucket(), + *static_cast(rctx), + _source), + parent_op(&op_target) +{ } + +int RadosObject::RadosReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp) +{ + uint64_t obj_size; + + parent_op.conds.mod_ptr = params.mod_ptr; + parent_op.conds.unmod_ptr = params.unmod_ptr; + parent_op.conds.high_precision_time = params.high_precision_time; + parent_op.conds.mod_zone_id = params.mod_zone_id; + parent_op.conds.mod_pg_ver = params.mod_pg_ver; + parent_op.conds.if_match = params.if_match; + parent_op.conds.if_nomatch = params.if_nomatch; + parent_op.params.lastmod = params.lastmod; + parent_op.params.target_obj = params.target_obj; + parent_op.params.obj_size = &obj_size; + parent_op.params.attrs = &source->get_attrs(); + + int ret = parent_op.prepare(y, dpp); + if (ret < 0) + return ret; + + source->set_key(parent_op.state.obj.key); + source->set_obj_size(obj_size); + + return ret; +} + +int RadosObject::RadosReadOp::read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider* dpp) +{ + return parent_op.read(ofs, end, bl, y, dpp); +} + +int RadosObject::RadosReadOp::get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) +{ + return parent_op.get_attr(dpp, name, dest, y); +} + +std::unique_ptr RadosObject::get_delete_op() +{ + return std::make_unique(this); +} + +RadosObject::RadosDeleteOp::RadosDeleteOp(RadosObject *_source) : + source(_source), + op_target(_source->store->getRados(), + _source->get_bucket(), + _source->get_ctx(), + _source), + parent_op(&op_target) +{ } + +int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) +{ + parent_op.params.bucket_owner = params.bucket_owner.get_id(); + parent_op.params.versioning_status = params.versioning_status; + parent_op.params.obj_owner = params.obj_owner; + parent_op.params.olh_epoch = params.olh_epoch; + parent_op.params.marker_version_id = params.marker_version_id; + parent_op.params.bilog_flags = params.bilog_flags; + parent_op.params.remove_objs = params.remove_objs; + parent_op.params.expiration_time = params.expiration_time; + parent_op.params.unmod_since = params.unmod_since; + parent_op.params.mtime = params.mtime; + parent_op.params.high_precision_time = params.high_precision_time; + parent_op.params.zones_trace = params.zones_trace; + parent_op.params.abortmp = params.abortmp; + parent_op.params.parts_accounted_size = params.parts_accounted_size; + + int ret = parent_op.delete_obj(y, dpp); + if (ret < 0) + return ret; + + result.delete_marker = parent_op.result.delete_marker; + result.version_id = parent_op.result.version_id; + + return ret; +} + +int RadosObject::delete_object(const DoutPrefixProvider* dpp, + optional_yield y, + bool prevent_versioning) +{ + RGWRados::Object del_target(store->getRados(), bucket, *rados_ctx, this); + RGWRados::Object::Delete del_op(&del_target); + + del_op.params.bucket_owner = bucket->get_info().owner; + del_op.params.versioning_status = prevent_versioning ? 0 : bucket->get_info().versioning_status(); + + return del_op.delete_obj(y, dpp); +} + +int RadosObject::delete_obj_aio(const DoutPrefixProvider* dpp, RGWObjState* astate, + Completions* aio, bool keep_index_consistent, + optional_yield y) +{ + RadosCompletions* raio = static_cast(aio); + + return store->getRados()->delete_obj_aio(dpp, get_obj(), bucket->get_info(), astate, + raio->handles, keep_index_consistent, y); +} + +int RadosObject::copy_object(User* user, + req_info* info, + const rgw_zone_id& source_zone, + rgw::sal::Object* dest_object, + rgw::sal::Bucket* dest_bucket, + rgw::sal::Bucket* src_bucket, + const rgw_placement_rule& dest_placement, + ceph::real_time* src_mtime, + ceph::real_time* mtime, + const ceph::real_time* mod_ptr, + const ceph::real_time* unmod_ptr, + bool high_precision_time, + const char* if_match, + const char* if_nomatch, + AttrsMod attrs_mod, + bool copy_if_newer, + Attrs& attrs, + RGWObjCategory category, + uint64_t olh_epoch, + boost::optional delete_at, + std::string* version_id, + std::string* tag, + std::string* etag, + void (*progress_cb)(off_t, void *), + void* progress_data, + const DoutPrefixProvider* dpp, + optional_yield y) +{ + return store->getRados()->copy_obj(*rados_ctx, + user->get_id(), + info, + source_zone, + dest_object, + this, + dest_bucket, + src_bucket, + dest_placement, + src_mtime, + mtime, + mod_ptr, + unmod_ptr, + high_precision_time, + if_match, + if_nomatch, + static_cast(attrs_mod), + copy_if_newer, + attrs, + category, + olh_epoch, + (delete_at ? *delete_at : real_time()), + version_id, + tag, + etag, + progress_cb, + progress_data, + dpp, + y); +} + +int RadosObject::RadosReadOp::iterate(const DoutPrefixProvider* dpp, int64_t ofs, int64_t end, RGWGetDataCB* cb, optional_yield y) +{ + return parent_op.iterate(dpp, ofs, end, cb, y); +} + +int RadosObject::swift_versioning_restore(bool& restored, + const DoutPrefixProvider* dpp) +{ + return store->getRados()->swift_versioning_restore(*rados_ctx, + bucket->get_owner()->get_id(), + bucket, + this, + restored, + dpp); +} + +int RadosObject::swift_versioning_copy(const DoutPrefixProvider* dpp, optional_yield y) +{ + return store->getRados()->swift_versioning_copy(*rados_ctx, + bucket->get_info().owner, + bucket, + this, + dpp, + y); +} + +int RadosMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct) +{ + std::unique_ptr meta_obj = get_meta_obj(); + meta_obj->set_in_extra_data(true); + meta_obj->set_hash_source(mp_obj.get_key()); + cls_rgw_obj_chain chain; + list remove_objs; + bool truncated; + int marker = 0; + int ret; + uint64_t parts_accounted_size = 0; + + do { + ret = list_parts(dpp, cct, 1000, marker, &marker, &truncated); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << ": RadosMultipartUpload::list_parts returned " << + ret << dendl; + return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret; + } + + for (auto part_it = parts.begin(); + part_it != parts.end(); + ++part_it) { + RadosMultipartPart* obj_part = dynamic_cast(part_it->second.get()); + if (obj_part->info.manifest.empty()) { + std::unique_ptr obj = bucket->get_object( + rgw_obj_key(obj_part->oid, std::string(), RGW_OBJ_NS_MULTIPART)); + obj->set_hash_source(mp_obj.get_key()); + ret = obj->delete_object(dpp, null_yield); + if (ret < 0 && ret != -ENOENT) + return ret; + } else { + auto target = meta_obj->get_obj(); + store->getRados()->update_gc_chain(dpp, target, obj_part->info.manifest, &chain); + RGWObjManifest::obj_iterator oiter = obj_part->info.manifest.obj_begin(dpp); + if (oiter != obj_part->info.manifest.obj_end(dpp)) { + std::unique_ptr head = bucket->get_object(rgw_obj_key()); + rgw_raw_obj raw_head = oiter.get_location().get_raw_obj(store); + dynamic_cast(head.get())->raw_obj_to_obj(raw_head); + + rgw_obj_index_key key; + head->get_key().get_index_key(&key); + remove_objs.push_back(key); + } + } + parts_accounted_size += obj_part->info.accounted_size; + } + } while (truncated); + + if (store->getRados()->get_gc() == nullptr) { + //Delete objects inline if gc hasn't been initialised (in case when bypass gc is specified) + store->getRados()->delete_objs_inline(dpp, chain, mp_obj.get_upload_id()); + } else { + /* use upload id as tag and do it synchronously */ + auto [ret, leftover_chain] = store->getRados()->send_chain_to_gc(chain, mp_obj.get_upload_id()); + if (ret < 0 && leftover_chain) { + ldpp_dout(dpp, 5) << __func__ << ": gc->send_chain() returned " << ret << dendl; + if (ret == -ENOENT) { + return -ERR_NO_SUCH_UPLOAD; + } + //Delete objects inline if send chain to gc fails + store->getRados()->delete_objs_inline(dpp, *leftover_chain, mp_obj.get_upload_id()); + } + } + + std::unique_ptr del_op = meta_obj->get_delete_op(); + del_op->params.bucket_owner = bucket->get_acl_owner(); + del_op->params.versioning_status = 0; + if (!remove_objs.empty()) { + del_op->params.remove_objs = &remove_objs; + } + + del_op->params.abortmp = true; + del_op->params.parts_accounted_size = parts_accounted_size; + + // and also remove the metadata obj + ret = del_op->delete_obj(dpp, null_yield); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << ": del_op.delete_obj returned " << + ret << dendl; + } + return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret; +} + +std::unique_ptr RadosMultipartUpload::get_meta_obj() +{ + return bucket->get_object(rgw_obj_key(get_meta(), string(), mp_ns)); +} + +int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) +{ + int ret; + std::string oid = mp_obj.get_key(); + RGWObjectCtx obj_ctx(store); + + do { + char buf[33]; + string tmp_obj_name; + std::unique_ptr obj; + gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); + std::string upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */ + upload_id.append(buf); + + mp_obj.init(oid, upload_id); + tmp_obj_name = mp_obj.get_meta(); + + obj = bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns)); + // the meta object will be indexed with 0 size, we c + obj->set_in_extra_data(true); + obj->set_hash_source(oid); + + RGWRados::Object op_target(store->getRados(), + obj->get_bucket(), + obj_ctx, obj.get()); + RGWRados::Object::Write obj_op(&op_target); + + op_target.set_versioning_disabled(true); /* no versioning for multipart meta */ + obj_op.meta.owner = owner.get_id(); + obj_op.meta.category = RGWObjCategory::MultiMeta; + obj_op.meta.flags = PUT_OBJ_CREATE_EXCL; + obj_op.meta.mtime = &mtime; + + multipart_upload_info upload_info; + upload_info.dest_placement = dest_placement; + + bufferlist bl; + encode(upload_info, bl); + obj_op.meta.data = &bl; + + ret = obj_op.write_meta(dpp, bl.length(), 0, attrs, y); + } while (ret == -EEXIST); + + return ret; +} + +int RadosMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct, + int num_parts, int marker, + int *next_marker, bool *truncated, + bool assume_unsorted) +{ + map parts_map; + map::iterator iter; + + std::unique_ptr obj = bucket->get_object( + rgw_obj_key(get_meta(), std::string(), RGW_OBJ_NS_MULTIPART)); + obj->set_in_extra_data(true); + + bool sorted_omap = is_v2_upload_id(get_upload_id()) && !assume_unsorted; + + parts.clear(); + + int ret; + if (sorted_omap) { + string p; + p = "part."; + char buf[32]; + + snprintf(buf, sizeof(buf), "%08d", marker); + p.append(buf); + + ret = obj->omap_get_vals(dpp, p, num_parts + 1, &parts_map, + nullptr, null_yield); + } else { + ret = obj->omap_get_all(dpp, &parts_map, null_yield); + } + if (ret < 0) { + return ret; + } + + int i; + int last_num = 0; + + uint32_t expected_next = marker + 1; + + for (i = 0, iter = parts_map.begin(); + (i < num_parts || !sorted_omap) && iter != parts_map.end(); + ++iter, ++i) { + bufferlist& bl = iter->second; + auto bli = bl.cbegin(); + std::unique_ptr part = std::make_unique(); + try { + decode(part->info, bli); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: could not part info, caught buffer::error" << + dendl; + return -EIO; + } + if (sorted_omap) { + if (part->info.num != expected_next) { + /* ouch, we expected a specific part num here, but we got a + * different one. Either a part is missing, or it could be a + * case of mixed rgw versions working on the same upload, + * where one gateway doesn't support correctly sorted omap + * keys for multipart upload just assume data is unsorted. + */ + return list_parts(dpp, cct, num_parts, marker, next_marker, truncated, true); + } + expected_next++; + } + if (sorted_omap || + (int)part->info.num > marker) { + last_num = part->info.num; + parts[part->info.num] = std::move(part); + } + } + + if (sorted_omap) { + if (truncated) { + *truncated = (iter != parts_map.end()); + } + } else { + /* rebuild a map with only num_parts entries */ + std::map> new_parts; + std::map>::iterator piter; + for (i = 0, piter = parts.begin(); + i < num_parts && piter != parts.end(); + ++i, ++piter) { + last_num = piter->first; + new_parts[piter->first] = std::move(piter->second); + } + + if (truncated) { + *truncated = (piter != parts.end()); + } + + parts.swap(new_parts); + } + + if (next_marker) { + *next_marker = last_num; + } + + return 0; +} + +int RadosMultipartUpload::complete(const DoutPrefixProvider *dpp, + optional_yield y, CephContext* cct, + map& part_etags, + list& remove_objs, + uint64_t& accounted_size, bool& compressed, + RGWCompressionInfo& cs_info, off_t& ofs, + std::string& tag, ACLOwner& owner, + uint64_t olh_epoch, + rgw::sal::Object* target_obj) +{ + char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; + std::string etag; + bufferlist etag_bl; + MD5 hash; + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + bool truncated; + int ret; + + int total_parts = 0; + int handled_parts = 0; + int max_parts = 1000; + int marker = 0; + uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; + auto etags_iter = part_etags.begin(); + rgw::sal::Attrs attrs = target_obj->get_attrs(); + + do { + ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); + if (ret == -ENOENT) { + ret = -ERR_NO_SUCH_UPLOAD; + } + if (ret < 0) + return ret; + + total_parts += parts.size(); + if (!truncated && total_parts != (int)part_etags.size()) { + ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts + << " expected: " << part_etags.size() << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + + for (auto obj_iter = parts.begin(); etags_iter != part_etags.end() && obj_iter != parts.end(); ++etags_iter, ++obj_iter, ++handled_parts) { + RadosMultipartPart* part = dynamic_cast(obj_iter->second.get()); + uint64_t part_size = part->get_size(); + if (handled_parts < (int)part_etags.size() - 1 && + part_size < min_part_size) { + ret = -ERR_TOO_SMALL; + return ret; + } + + char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; + if (etags_iter->first != (int)obj_iter->first) { + ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: " + << etags_iter->first << " next uploaded: " + << obj_iter->first << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + string part_etag = rgw_string_unquote(etags_iter->second); + if (part_etag.compare(part->get_etag()) != 0) { + ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " << etags_iter->first + << " etag: " << etags_iter->second << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + + hex_to_buf(part->get_etag().c_str(), petag, + CEPH_CRYPTO_MD5_DIGESTSIZE); + hash.Update((const unsigned char *)petag, sizeof(petag)); + + RGWUploadPartInfo& obj_part = part->info; + + /* update manifest for part */ + string oid = mp_obj.get_part(part->info.num); + rgw_obj src_obj; + src_obj.init_ns(bucket->get_key(), oid, mp_ns); + + if (obj_part.manifest.empty()) { + ldpp_dout(dpp, 0) << "ERROR: empty manifest for object part: obj=" + << src_obj << dendl; + ret = -ERR_INVALID_PART; + return ret; + } else { + manifest.append(dpp, obj_part.manifest, store->svc()->zone->get_zonegroup(), store->svc()->zone->get_zone_params()); + } + + bool part_compressed = (obj_part.cs_info.compression_type != "none"); + if ((handled_parts > 0) && + ((part_compressed != compressed) || + (cs_info.compression_type != obj_part.cs_info.compression_type))) { + ldpp_dout(dpp, 0) << "ERROR: compression type was changed during multipart upload (" + << cs_info.compression_type << ">>" << obj_part.cs_info.compression_type << ")" << dendl; + ret = -ERR_INVALID_PART; + return ret; + } + + if (part_compressed) { + int64_t new_ofs; // offset in compression data for new part + if (cs_info.blocks.size() > 0) + new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len; + else + new_ofs = 0; + for (const auto& block : obj_part.cs_info.blocks) { + compression_block cb; + cb.old_ofs = block.old_ofs + cs_info.orig_size; + cb.new_ofs = new_ofs; + cb.len = block.len; + cs_info.blocks.push_back(cb); + new_ofs = cb.new_ofs + cb.len; + } + if (!compressed) + cs_info.compression_type = obj_part.cs_info.compression_type; + cs_info.orig_size += obj_part.cs_info.orig_size; + compressed = true; + } + + rgw_obj_index_key remove_key; + src_obj.key.get_index_key(&remove_key); + + remove_objs.push_back(remove_key); + + ofs += obj_part.size; + accounted_size += obj_part.accounted_size; + } + } while (truncated); + hash.Final((unsigned char *)final_etag); + + buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str); + snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], + sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, + "-%lld", (long long)part_etags.size()); + etag = final_etag_str; + ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; + + etag_bl.append(etag); + + attrs[RGW_ATTR_ETAG] = etag_bl; + + if (compressed) { + // write compression attribute to full object + bufferlist tmp; + encode(cs_info, tmp); + attrs[RGW_ATTR_COMPRESSION] = tmp; + } + + target_obj->set_atomic(); + + RGWRados::Object op_target(store->getRados(), + target_obj->get_bucket(), + dynamic_cast(target_obj)->get_ctx(), + target_obj); + RGWRados::Object::Write obj_op(&op_target); + + obj_op.meta.manifest = &manifest; + obj_op.meta.remove_objs = &remove_objs; + + obj_op.meta.ptag = &tag; /* use req_id as operation tag */ + obj_op.meta.owner = owner.get_id(); + obj_op.meta.flags = PUT_OBJ_CREATE; + obj_op.meta.modify_tail = true; + obj_op.meta.completeMultipart = true; + obj_op.meta.olh_epoch = olh_epoch; + + ret = obj_op.write_meta(dpp, ofs, accounted_size, attrs, y); + if (ret < 0) + return ret; + + return ret; +} + +int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs) +{ + if (!rule && !attrs) { + return 0; + } + + if (rule) { + if (!placement.empty()) { + *rule = &placement; + if (!attrs) { + /* Don't need attrs, done */ + return 0; + } + } else { + *rule = nullptr; + } + } + + /* We need either attributes or placement, so we need a read */ + std::unique_ptr meta_obj; + meta_obj = get_meta_obj(); + meta_obj->set_in_extra_data(true); + + multipart_upload_info upload_info; + bufferlist headbl; + + /* Read the obj head which contains the multipart_upload_info */ + std::unique_ptr read_op = meta_obj->get_read_op(); + meta_obj->set_prefetch_data(); + + int ret = read_op->prepare(y, dpp); + if (ret < 0) { + if (ret == -ENOENT) { + return -ERR_NO_SUCH_UPLOAD; + } + return ret; + } + + extract_span_context(meta_obj->get_attrs(), trace_ctx); + + if (attrs) { + /* Attrs are filled in by prepare */ + *attrs = meta_obj->get_attrs(); + if (!rule || *rule != nullptr) { + /* placement was cached; don't actually read */ + return 0; + } + } + + /* Now read the placement from the head */ + ret = read_op->read(0, store->ctx()->_conf->rgw_max_chunk_size, headbl, y, dpp); + if (ret < 0) { + if (ret == -ENOENT) { + return -ERR_NO_SUCH_UPLOAD; + } + return ret; + } + + if (headbl.length() <= 0) { + return -ERR_NO_SUCH_UPLOAD; + } + + /* Decode multipart_upload_info */ + auto hiter = headbl.cbegin(); + try { + decode(upload_info, hiter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode multipart upload info" << dendl; + return -EIO; + } + placement = upload_info.dest_placement; + *rule = &placement; + + return 0; +} + +std::unique_ptr RadosMultipartUpload::get_writer( + const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t part_num, + const std::string& part_num_str) +{ + auto aio = rgw::make_throttle(store->ctx()->_conf->rgw_put_obj_min_window_size, y); + return std::make_unique(dpp, y, this, + std::move(_head_obj), store, std::move(aio), owner, + ptail_placement_rule, part_num, part_num_str); +} + +MPRadosSerializer::MPRadosSerializer(const DoutPrefixProvider *dpp, RadosStore* store, RadosObject* obj, const std::string& lock_name) : + lock(lock_name) +{ + rgw_pool meta_pool; + rgw_raw_obj raw_obj; + + obj->get_raw_obj(&raw_obj); + oid = raw_obj.oid; + store->getRados()->get_obj_data_pool(obj->get_bucket()->get_placement_rule(), + obj->get_obj(), &meta_pool); + store->getRados()->open_pool_ctx(dpp, meta_pool, ioctx, true); +} + +int MPRadosSerializer::try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) +{ + op.assert_exists(); + lock.set_duration(dur); + lock.lock_exclusive(&op); + int ret = rgw_rados_operate(dpp, ioctx, oid, &op, y); + if (! ret) { + locked = true; + } + return ret; +} + +LCRadosSerializer::LCRadosSerializer(RadosStore* store, const std::string& _oid, const std::string& lock_name, const std::string& cookie) : + StoreLCSerializer(_oid), + lock(lock_name) +{ + ioctx = &store->getRados()->lc_pool_ctx; + lock.set_cookie(cookie); +} + +int LCRadosSerializer::try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) +{ + lock.set_duration(dur); + return lock.lock_exclusive(ioctx, oid); +} + +int RadosLifecycle::get_entry(const std::string& oid, const std::string& marker, + std::unique_ptr* entry) +{ + cls_rgw_lc_entry cls_entry; + int ret = cls_rgw_lc_get_entry(*store->getRados()->get_lc_pool_ctx(), oid, marker, cls_entry); + if (ret) + return ret; + + LCEntry* e; + e = new StoreLCEntry(cls_entry.bucket, cls_entry.start_time, cls_entry.status); + if (!e) + return -ENOMEM; + + entry->reset(e); + return 0; +} + +int RadosLifecycle::get_next_entry(const std::string& oid, const std::string& marker, + std::unique_ptr* entry) +{ + cls_rgw_lc_entry cls_entry; + int ret = cls_rgw_lc_get_next_entry(*store->getRados()->get_lc_pool_ctx(), oid, marker, + cls_entry); + + if (ret) + return ret; + + LCEntry* e; + e = new StoreLCEntry(cls_entry.bucket, cls_entry.start_time, cls_entry.status); + if (!e) + return -ENOMEM; + + entry->reset(e); + return 0; +} + +int RadosLifecycle::set_entry(const std::string& oid, LCEntry& entry) +{ + cls_rgw_lc_entry cls_entry; + + cls_entry.bucket = entry.get_bucket(); + cls_entry.start_time = entry.get_start_time(); + cls_entry.status = entry.get_status(); + + return cls_rgw_lc_set_entry(*store->getRados()->get_lc_pool_ctx(), oid, cls_entry); +} + +int RadosLifecycle::list_entries(const std::string& oid, const std::string& marker, + uint32_t max_entries, std::vector>& entries) +{ + entries.clear(); + + vector cls_entries; + int ret = cls_rgw_lc_list(*store->getRados()->get_lc_pool_ctx(), oid, marker, max_entries, cls_entries); + + if (ret < 0) + return ret; + + for (auto& entry : cls_entries) { + entries.push_back(std::make_unique(entry.bucket, oid, + entry.start_time, entry.status)); + } + + return ret; +} + +int RadosLifecycle::rm_entry(const std::string& oid, LCEntry& entry) +{ + cls_rgw_lc_entry cls_entry; + + cls_entry.bucket = entry.get_bucket(); + cls_entry.start_time = entry.get_start_time(); + cls_entry.status = entry.get_status(); + + return cls_rgw_lc_rm_entry(*store->getRados()->get_lc_pool_ctx(), oid, cls_entry); +} + +int RadosLifecycle::get_head(const std::string& oid, std::unique_ptr* head) +{ + cls_rgw_lc_obj_head cls_head; + int ret = cls_rgw_lc_get_head(*store->getRados()->get_lc_pool_ctx(), oid, cls_head); + if (ret) + return ret; + + LCHead* h; + h = new StoreLCHead(cls_head.start_date, cls_head.shard_rollover_date, cls_head.marker); + if (!h) + return -ENOMEM; + + head->reset(h); + return 0; +} + +int RadosLifecycle::put_head(const std::string& oid, LCHead& head) +{ + cls_rgw_lc_obj_head cls_head; + + cls_head.marker = head.get_marker(); + cls_head.start_date = head.get_start_date(); + cls_head.shard_rollover_date = head.get_shard_rollover_date(); + + return cls_rgw_lc_put_head(*store->getRados()->get_lc_pool_ctx(), oid, cls_head); +} + +std::unique_ptr RadosLifecycle::get_serializer(const std::string& lock_name, + const std::string& oid, + const std::string& cookie) +{ + return std::make_unique(store, oid, lock_name, cookie); +} + +int RadosNotification::publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags) +{ + return rgw::notify::publish_reserve(dpp, event_type, res, obj_tags); +} + +int RadosNotification::publish_commit(const DoutPrefixProvider* dpp, uint64_t size, + const ceph::real_time& mtime, const std::string& etag, const std::string& version) +{ + return rgw::notify::publish_commit(obj, size, mtime, etag, version, event_type, res, dpp); +} + +int RadosAtomicWriter::prepare(optional_yield y) +{ + return processor.prepare(y); +} + +int RadosAtomicWriter::process(bufferlist&& data, uint64_t offset) +{ + return processor.process(std::move(data), offset); +} + +int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) +{ + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, + if_match, if_nomatch, user_data, zones_trace, canceled, y); +} + +int RadosAppendWriter::prepare(optional_yield y) +{ + return processor.prepare(y); +} + +int RadosAppendWriter::process(bufferlist&& data, uint64_t offset) +{ + return processor.process(std::move(data), offset); +} + +int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) +{ + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, + if_match, if_nomatch, user_data, zones_trace, canceled, y); +} + +int RadosMultipartWriter::prepare(optional_yield y) +{ + return processor.prepare(y); +} + +int RadosMultipartWriter::process(bufferlist&& data, uint64_t offset) +{ + return processor.process(std::move(data), offset); +} + +int RadosMultipartWriter::complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) +{ + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, + if_match, if_nomatch, user_data, zones_trace, canceled, y); +} + +const std::string& RadosZoneGroup::get_endpoint() const +{ + if (!group.endpoints.empty()) { + return group.endpoints.front(); + } else { + // use zonegroup's master zone endpoints + auto z = group.zones.find(group.master_zone); + if (z != group.zones.end() && !z->second.endpoints.empty()) { + return z->second.endpoints.front(); + } + } + return empty; +} + +bool RadosZoneGroup::placement_target_exists(std::string& target) const +{ + return !!group.placement_targets.count(target); +} + +int RadosZoneGroup::get_placement_target_names(std::set& names) const +{ + for (const auto& target : group.placement_targets) { + names.emplace(target.second.name); + } + + return 0; +} + +int RadosZoneGroup::get_placement_tier(const rgw_placement_rule& rule, + std::unique_ptr* tier) +{ + std::map::const_iterator titer; + titer = group.placement_targets.find(rule.name); + if (titer == group.placement_targets.end()) { + return -ENOENT; + } + + const auto& target_rule = titer->second; + std::map::const_iterator ttier; + ttier = target_rule.tier_targets.find(rule.storage_class); + if (ttier == target_rule.tier_targets.end()) { + // not found + return -ENOENT; + } + + PlacementTier* t; + t = new RadosPlacementTier(store, ttier->second); + if (!t) + return -ENOMEM; + + tier->reset(t); + return 0; +} + +int RadosZoneGroup::get_zone_by_id(const std::string& id, std::unique_ptr* zone) +{ + RGWZone* rz = store->svc()->zone->find_zone(id); + if (!rz) + return -ENOENT; + + Zone* z = new RadosZone(store, clone(), *rz); + zone->reset(z); + return 0; +} + +int RadosZoneGroup::get_zone_by_name(const std::string& name, std::unique_ptr* zone) +{ + rgw_zone_id id; + int ret = store->svc()->zone->find_zone_id_by_name(name, &id); + if (ret < 0) + return ret; + + RGWZone* rz = store->svc()->zone->find_zone(id.id); + if (!rz) + return -ENOENT; + + Zone* z = new RadosZone(store, clone(), *rz); + zone->reset(z); + return 0; +} + +int RadosZoneGroup::list_zones(std::list& zone_ids) +{ + for (const auto& entry : group.zones) + { + zone_ids.push_back(entry.second.id); + } + return 0; +} + +std::unique_ptr RadosZone::clone() +{ + if (local_zone) + return std::make_unique(store, group->clone()); + + return std::make_unique(store, group->clone(), rgw_zone); +} + +const std::string& RadosZone::get_id() +{ + if (local_zone) + return store->svc()->zone->zone_id().id; + + return rgw_zone.id; +} + +const std::string& RadosZone::get_name() const +{ + if (local_zone) + return store->svc()->zone->zone_name(); + + return rgw_zone.name; +} + +bool RadosZone::is_writeable() +{ + if (local_zone) + return store->svc()->zone->zone_is_writeable(); + + return !rgw_zone.read_only; +} + +bool RadosZone::get_redirect_endpoint(std::string* endpoint) +{ + if (local_zone) + return store->svc()->zone->get_redirect_zone_endpoint(endpoint); + + endpoint = &rgw_zone.redirect_zone; + return true; +} + +bool RadosZone::has_zonegroup_api(const std::string& api) const +{ + return store->svc()->zone->has_zonegroup_api(api); +} + +const std::string& RadosZone::get_current_period_id() +{ + return store->svc()->zone->get_current_period_id(); +} + +const RGWAccessKey& RadosZone::get_system_key() +{ + return store->svc()->zone->get_zone_params().system_key; +} + +const std::string& RadosZone::get_realm_name() +{ + return store->svc()->zone->get_realm().get_name(); +} + +const std::string& RadosZone::get_realm_id() +{ + return store->svc()->zone->get_realm().get_id(); +} + +const std::string_view RadosZone::get_tier_type() +{ + if (local_zone) + return store->svc()->zone->get_zone().tier_type; + + return rgw_zone.id; +} + +RGWBucketSyncPolicyHandlerRef RadosZone::get_sync_policy_handler() +{ + return store->svc()->zone->get_sync_policy_handler(get_id()); +} + +RadosLuaManager::RadosLuaManager(RadosStore* _s) : + store(_s), + pool((store->svc() && store->svc()->zone) ? store->svc()->zone->get_zone_params().log_pool : rgw_pool()) +{ } + +int RadosLuaManager::get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) +{ + if (pool.empty()) { + ldpp_dout(dpp, 10) << "WARNING: missing pool when reading lua script " << dendl; + return 0; + } + bufferlist bl; + + int r = rgw_get_system_obj(store->svc()->sysobj, pool, key, bl, nullptr, nullptr, y, dpp); + if (r < 0) { + return r; + } + + auto iter = bl.cbegin(); + try { + ceph::decode(script, iter); + } catch (buffer::error& err) { + return -EIO; + } + + return 0; +} + +int RadosLuaManager::put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) +{ + if (pool.empty()) { + ldpp_dout(dpp, 10) << "WARNING: missing pool when writing lua script " << dendl; + return 0; + } + bufferlist bl; + ceph::encode(script, bl); + + int r = rgw_put_system_obj(dpp, store->svc()->sysobj, pool, key, bl, false, nullptr, real_time(), y); + if (r < 0) { + return r; + } + + return 0; +} + +int RadosLuaManager::del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) +{ + if (pool.empty()) { + ldpp_dout(dpp, 10) << "WARNING: missing pool when deleting lua script " << dendl; + return 0; + } + int r = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, key, nullptr, y); + if (r < 0 && r != -ENOENT) { + return r; + } + + return 0; +} + +const std::string PACKAGE_LIST_OBJECT_NAME = "lua_package_allowlist"; + +int RadosLuaManager::add_package(const DoutPrefixProvider *dpp, optional_yield y, const std::string& package_name) +{ + // add package to list + const bufferlist empty_bl; + std::map new_package{{package_name, empty_bl}}; + librados::ObjectWriteOperation op; + op.omap_set(new_package); + auto ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), + PACKAGE_LIST_OBJECT_NAME, &op, y); + + if (ret < 0) { + return ret; + } + return 0; +} + +int RadosLuaManager::remove_package(const DoutPrefixProvider *dpp, optional_yield y, const std::string& package_name) +{ + librados::ObjectWriteOperation op; + size_t pos = package_name.find(" "); + if (pos != package_name.npos) { + // remove specfic version of the the package + op.omap_rm_keys(std::set({package_name})); + auto ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), + PACKAGE_LIST_OBJECT_NAME, &op, y); + if (ret < 0) { + return ret; + } + return 0; + } + // otherwise, remove any existing versions of the package + rgw::lua::packages_t packages; + auto ret = list_packages(dpp, y, packages); + if (ret < 0 && ret != -ENOENT) { + return ret; + } + for(const auto& package : packages) { + const std::string package_no_version = package.substr(0, package.find(" ")); + if (package_no_version.compare(package_name) == 0) { + op.omap_rm_keys(std::set({package})); + ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), + PACKAGE_LIST_OBJECT_NAME, &op, y); + if (ret < 0) { + return ret; + } + } + } + return 0; +} + +int RadosLuaManager::list_packages(const DoutPrefixProvider *dpp, optional_yield y, rgw::lua::packages_t& packages) +{ + constexpr auto max_chunk = 1024U; + std::string start_after; + bool more = true; + int rval; + while (more) { + librados::ObjectReadOperation op; + rgw::lua::packages_t packages_chunk; + op.omap_get_keys2(start_after, max_chunk, &packages_chunk, &more, &rval); + const auto ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), + PACKAGE_LIST_OBJECT_NAME, &op, nullptr, y); + + if (ret < 0) { + return ret; + } + + packages.merge(packages_chunk); + } + + return 0; +} + +int RadosOIDCProvider::store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) +{ + auto sysobj = store->svc()->sysobj; + std::string oid = tenant + get_url_oid_prefix() + url; + + bufferlist bl; + using ceph::encode; + encode(*this, bl); + return rgw_put_system_obj(dpp, sysobj, store->svc()->zone->get_zone_params().oidc_pool, oid, bl, exclusive, nullptr, real_time(), y); +} + +int RadosOIDCProvider::read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) +{ + auto sysobj = store->svc()->sysobj; + auto& pool = store->svc()->zone->get_zone_params().oidc_pool; + std::string oid = tenant + get_url_oid_prefix() + url; + bufferlist bl; + + int ret = rgw_get_system_obj(sysobj, pool, oid, bl, nullptr, nullptr, null_yield, dpp); + if (ret < 0) { + return ret; + } + + try { + using ceph::decode; + auto iter = bl.cbegin(); + decode(*this, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode oidc provider info from pool: " << pool.name << + ": " << url << dendl; + return -EIO; + } + + return 0; +} + +int RadosOIDCProvider::delete_obj(const DoutPrefixProvider *dpp, optional_yield y) +{ + auto& pool = store->svc()->zone->get_zone_params().oidc_pool; + + std::string url, tenant; + auto ret = get_tenant_url_from_arn(tenant, url); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to parse arn" << dendl; + return -EINVAL; + } + + if (this->tenant != tenant) { + ldpp_dout(dpp, 0) << "ERROR: tenant in arn doesn't match that of user " << this->tenant << ", " + << tenant << ": " << dendl; + return -EINVAL; + } + + // Delete url + std::string oid = tenant + get_url_oid_prefix() + url; + ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: deleting oidc url from pool: " << pool.name << ": " + << provider_url << ": " << cpp_strerror(-ret) << dendl; + } + + return ret; +} + +int RadosRole::store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) +{ + using ceph::encode; + std::string oid; + + oid = info.id; + + bufferlist bl; + encode(this->info, bl); + + if (!this->info.tags.empty()) { + bufferlist bl_tags; + encode(this->info.tags, bl_tags); + map attrs; + attrs.emplace("tagging", bl_tags); + + RGWSI_MBSObj_PutParams params(bl, &attrs, info.mtime, exclusive); + std::unique_ptr ctx(store->svc()->role->svc.meta_be->alloc_ctx()); + ctx->init(store->svc()->role->get_be_handler()); + return store->svc()->role->svc.meta_be->put(ctx.get(), oid, params, &info.objv_tracker, y, dpp); + } else { + RGWSI_MBSObj_PutParams params(bl, nullptr, info.mtime, exclusive); + std::unique_ptr ctx(store->svc()->role->svc.meta_be->alloc_ctx()); + ctx->init(store->svc()->role->get_be_handler()); + return store->svc()->role->svc.meta_be->put(ctx.get(), oid, params, &info.objv_tracker, y, dpp); + } +} + +int RadosRole::store_name(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) +{ + auto sysobj = store->svc()->sysobj; + RGWNameToId nameToId; + nameToId.obj_id = info.id; + + std::string oid = info.tenant + get_names_oid_prefix() + info.name; + + bufferlist bl; + using ceph::encode; + encode(nameToId, bl); + + return rgw_put_system_obj(dpp, sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, exclusive, &info.objv_tracker, real_time(), y); +} + +int RadosRole::store_path(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) +{ + auto sysobj = store->svc()->sysobj; + std::string oid = info.tenant + get_path_oid_prefix() + info.path + get_info_oid_prefix() + info.id; + + bufferlist bl; + + return rgw_put_system_obj(dpp, sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, exclusive, &info.objv_tracker, real_time(), y); +} + +int RadosRole::read_id(const DoutPrefixProvider *dpp, const std::string& role_name, const std::string& tenant, std::string& role_id, optional_yield y) +{ + auto sysobj = store->svc()->sysobj; + std::string oid = info.tenant + get_names_oid_prefix() + role_name; + bufferlist bl; + + int ret = rgw_get_system_obj(sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, nullptr, nullptr, null_yield, dpp); + if (ret < 0) { + return ret; + } + + RGWNameToId nameToId; + try { + auto iter = bl.cbegin(); + using ceph::decode; + decode(nameToId, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode role from Role pool: " << role_name << dendl; + return -EIO; + } + role_id = nameToId.obj_id; + return 0; +} + +int RadosRole::read_name(const DoutPrefixProvider *dpp, optional_yield y) +{ + auto sysobj = store->svc()->sysobj; + std::string oid = info.tenant + get_names_oid_prefix() + info.name; + bufferlist bl; + + int ret = rgw_get_system_obj(sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, nullptr, nullptr, null_yield, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed reading role name from Role pool: " << info.name << + ": " << cpp_strerror(-ret) << dendl; + return ret; + } + + RGWNameToId nameToId; + try { + using ceph::decode; + auto iter = bl.cbegin(); + decode(nameToId, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode role name from Role pool: " << info.name << dendl; + return -EIO; + } + info.id = nameToId.obj_id; + return 0; +} + +int RadosRole::read_info(const DoutPrefixProvider *dpp, optional_yield y) +{ + std::string oid; + + oid = info.id; + ldpp_dout(dpp, 20) << "INFO: oid in read_info is: " << oid << dendl; + + bufferlist bl; + + RGWSI_MBSObj_GetParams params(&bl, &info.attrs, &info.mtime); + std::unique_ptr ctx(store->svc()->role->svc.meta_be->alloc_ctx()); + ctx->init(store->svc()->role->get_be_handler()); + int ret = store->svc()->role->svc.meta_be->get(ctx.get(), oid, params, &info.objv_tracker, y, dpp, true); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed reading role info from Role pool: " << info.id << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + + try { + using ceph::decode; + auto iter = bl.cbegin(); + decode(this->info, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode role info from Role pool: " << info.id << dendl; + return -EIO; + } + + auto it = info.attrs.find("tagging"); + if (it != info.attrs.end()) { + bufferlist bl_tags = it->second; + try { + using ceph::decode; + auto iter = bl_tags.cbegin(); + decode(info.tags, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode attrs" << info.id << dendl; + return -EIO; + } + } + + return 0; +} + +int RadosRole::create(const DoutPrefixProvider *dpp, bool exclusive, const std::string& role_id, optional_yield y) +{ + int ret; + + if (! validate_input(dpp)) { + return -EINVAL; + } + + if (!role_id.empty()) { + info.id = role_id; + } + + /* check to see the name is not used */ + ret = read_id(dpp, info.name, info.tenant, info.id, y); + if (exclusive && ret == 0) { + ldpp_dout(dpp, 0) << "ERROR: name " << info.name << " already in use for role id " + << info.id << dendl; + return -EEXIST; + } else if ( ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "failed reading role id " << info.id << ": " + << cpp_strerror(-ret) << dendl; + return ret; + } + + if (info.id.empty()) { + /* create unique id */ + uuid_d new_uuid; + char uuid_str[37]; + new_uuid.generate_random(); + new_uuid.print(uuid_str); + info.id = uuid_str; + } + + //arn + info.arn = role_arn_prefix + info.tenant + ":role" + info.path + info.name; + + // Creation time + real_clock::time_point t = real_clock::now(); + + struct timeval tv; + real_clock::to_timeval(t, tv); + + char buf[30]; + struct tm result; + gmtime_r(&tv.tv_sec, &result); + strftime(buf,30,"%Y-%m-%dT%H:%M:%S", &result); + sprintf(buf + strlen(buf),".%dZ",(int)tv.tv_usec/1000); + info.creation_date.assign(buf, strlen(buf)); + + auto& pool = store->svc()->zone->get_zone_params().roles_pool; + ret = store_info(dpp, exclusive, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: storing role info in Role pool: " + << info.id << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + + ret = store_name(dpp, exclusive, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: storing role name in Role pool: " + << info.name << ": " << cpp_strerror(-ret) << dendl; + + //Delete the role info that was stored in the previous call + std::string oid = get_info_oid_prefix() + info.id; + int info_ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (info_ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: cleanup of role id from Role pool: " + << info.id << ": " << cpp_strerror(-info_ret) << dendl; + } + return ret; + } + + ret = store_path(dpp, exclusive, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: storing role path in Role pool: " + << info.path << ": " << cpp_strerror(-ret) << dendl; + //Delete the role info that was stored in the previous call + std::string oid = get_info_oid_prefix() + info.id; + int info_ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (info_ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: cleanup of role id from Role pool: " + << info.id << ": " << cpp_strerror(-info_ret) << dendl; + } + //Delete role name that was stored in previous call + oid = info.tenant + get_names_oid_prefix() + info.name; + int name_ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (name_ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: cleanup of role name from Role pool: " + << info.name << ": " << cpp_strerror(-name_ret) << dendl; + } + return ret; + } + return 0; +} + +int RadosRole::delete_obj(const DoutPrefixProvider *dpp, optional_yield y) +{ + auto& pool = store->svc()->zone->get_zone_params().roles_pool; + + int ret = read_name(dpp, y); + if (ret < 0) { + return ret; + } + + ret = read_info(dpp, y); + if (ret < 0) { + return ret; + } + + if (! info.perm_policy_map.empty()) { + return -ERR_DELETE_CONFLICT; + } + + // Delete id + std::string oid = get_info_oid_prefix() + info.id; + ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: deleting role id from Role pool: " + << info.id << ": " << cpp_strerror(-ret) << dendl; + } + + // Delete name + oid = info.tenant + get_names_oid_prefix() + info.name; + ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: deleting role name from Role pool: " + << info.name << ": " << cpp_strerror(-ret) << dendl; + } + + // Delete path + oid = info.tenant + get_path_oid_prefix() + info.path + get_info_oid_prefix() + info.id; + ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: deleting role path from Role pool: " + << info.path << ": " << cpp_strerror(-ret) << dendl; + } + return ret; +} + +} // namespace rgw::sal + +extern "C" { + +void* newRadosStore(void) +{ + rgw::sal::RadosStore* store = new rgw::sal::RadosStore(); + if (store) { + RGWRados* rados = new RGWRados(); + + if (!rados) { + delete store; store = nullptr; + } else { + store->setRados(rados); + rados->set_store(store); + } + } + + return store; +} + +} diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h new file mode 100644 index 00000000000..499e0994807 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -0,0 +1,959 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include "rgw_sal_store.h" +#include "rgw_rados.h" +#include "rgw_notify.h" +#include "rgw_oidc_provider.h" +#include "rgw_role.h" +#include "rgw_multi.h" +#include "rgw_putobj_processor.h" +#include "services/svc_tier_rados.h" +#include "cls/lock/cls_lock_client.h" + +namespace rgw { namespace sal { + +class RadosMultipartUpload; + +class RadosCompletions : public Completions { + public: + std::list handles; + RadosCompletions() {} + ~RadosCompletions() = default; + virtual int drain() override; +}; + +class RadosPlacementTier: public StorePlacementTier { + RadosStore* store; + RGWZoneGroupPlacementTier tier; +public: + RadosPlacementTier(RadosStore* _store, const RGWZoneGroupPlacementTier& _tier) : store(_store), tier(_tier) {} + virtual ~RadosPlacementTier() = default; + + virtual const std::string& get_tier_type() { return tier.tier_type; } + virtual const std::string& get_storage_class() { return tier.storage_class; } + virtual bool retain_head_object() { return tier.retain_head_object; } + RGWZoneGroupPlacementTier& get_rt() { return tier; } +}; + +class RadosZoneGroup : public StoreZoneGroup { + RadosStore* store; + const RGWZoneGroup group; + std::string empty; +public: + RadosZoneGroup(RadosStore* _store, const RGWZoneGroup& _group) : store(_store), group(_group) {} + virtual ~RadosZoneGroup() = default; + + virtual const std::string& get_id() const override { return group.get_id(); }; + virtual const std::string& get_name() const override { return group.get_name(); }; + virtual int equals(const std::string& other_zonegroup) const override { + return group.equals(other_zonegroup); + }; + /** Get the endpoint from zonegroup, or from master zone if not set */ + virtual const std::string& get_endpoint() const override; + virtual bool placement_target_exists(std::string& target) const override; + virtual bool is_master_zonegroup() const override { + return group.is_master_zonegroup(); + }; + virtual const std::string& get_api_name() const override { return group.api_name; }; + virtual int get_placement_target_names(std::set& names) const override; + virtual const std::string& get_default_placement_name() const override { + return group.default_placement.name; }; + virtual int get_hostnames(std::list& names) const override { + names = group.hostnames; + return 0; + }; + virtual int get_s3website_hostnames(std::list& names) const override { + names = group.hostnames_s3website; + return 0; + }; + virtual int get_zone_count() const override { + return group.zones.size(); + } + virtual int get_placement_tier(const rgw_placement_rule& rule, std::unique_ptr* tier); + virtual int get_zone_by_id(const std::string& id, std::unique_ptr* zone) override; + virtual int get_zone_by_name(const std::string& name, std::unique_ptr* zone) override; + virtual int list_zones(std::list& zone_ids) override; + virtual std::unique_ptr clone() override { + return std::make_unique(store, group); + } + const RGWZoneGroup& get_group() const { return group; } +}; + +class RadosZone : public StoreZone { + protected: + RadosStore* store; + std::unique_ptr group; + RGWZone rgw_zone; + bool local_zone{false}; + public: + RadosZone(RadosStore* _store, std::unique_ptr _zg) : store(_store), group(std::move(_zg)), local_zone(true) {} + RadosZone(RadosStore* _store, std::unique_ptr _zg, RGWZone& z) : store(_store), group(std::move(_zg)), rgw_zone(z) {} + ~RadosZone() = default; + + virtual std::unique_ptr clone() override; + virtual ZoneGroup& get_zonegroup() override { return *(group.get()); } + virtual const std::string& get_id() override; + virtual const std::string& get_name() const override; + virtual bool is_writeable() override; + virtual bool get_redirect_endpoint(std::string* endpoint) override; + virtual bool has_zonegroup_api(const std::string& api) const override; + virtual const std::string& get_current_period_id() override; + virtual const RGWAccessKey& get_system_key() override; + virtual const std::string& get_realm_name() override; + virtual const std::string& get_realm_id() override; + virtual const std::string_view get_tier_type() override; + virtual RGWBucketSyncPolicyHandlerRef get_sync_policy_handler() override; +}; + +class RadosStore : public StoreDriver { + private: + RGWRados* rados; + RGWUserCtl* user_ctl; + std::string luarocks_path; + std::unique_ptr zone; + + public: + RadosStore() + : rados(nullptr) { + } + ~RadosStore() { + delete rados; + } + + virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) override; + virtual const std::string get_name() const override { + return "rados"; + } + virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual std::unique_ptr get_user(const rgw_user& u) override; + virtual int get_user_by_access_key(const DoutPrefixProvider* dpp, const std::string& key, optional_yield y, std::unique_ptr* user) override; + virtual int get_user_by_email(const DoutPrefixProvider* dpp, const std::string& email, optional_yield y, std::unique_ptr* user) override; + virtual int get_user_by_swift(const DoutPrefixProvider* dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) override; + virtual std::unique_ptr get_object(const rgw_obj_key& k) override; + virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) override; + virtual int get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) override; + virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, const std::string& tenant, const std::string&name, std::unique_ptr* bucket, optional_yield y) override; + virtual bool is_meta_master() override; + virtual int forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv, + bufferlist& in_data, JSONParser* jp, req_info& info, + optional_yield y) override; + virtual int forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, + bufferlist& in_data, + RGWXMLDecoder::XMLParser* parser, req_info& info, + optional_yield y) override; + virtual Zone* get_zone() { return zone.get(); } + virtual std::string zone_unique_id(uint64_t unique_num) override; + virtual std::string zone_unique_trans_id(const uint64_t unique_num) override; + virtual int get_zonegroup(const std::string& id, std::unique_ptr* zonegroup) override; + virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list& zone_ids) override; + virtual int cluster_stat(RGWClusterStat& stats) override; + virtual std::unique_ptr get_lifecycle(void) override; + virtual std::unique_ptr get_completions(void) override; + virtual std::unique_ptr get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj, req_state* s, rgw::notify::EventType event_type, const std::string* object_name=nullptr) override; + virtual std::unique_ptr get_notification( + const DoutPrefixProvider* dpp, rgw::sal::Object* obj, rgw::sal::Object* src_obj, + rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, std::string& _user_id, std::string& _user_tenant, + std::string& _req_id, optional_yield y) override; + virtual RGWLC* get_rgwlc(void) override { return rados->get_lc(); } + virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return rados->get_cr_registry(); } + + virtual int log_usage(const DoutPrefixProvider *dpp, std::map& usage_info) override; + virtual int log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) override; + virtual int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, + const std::map& meta) override; + virtual void get_quota(RGWQuota& quota) override; + virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) override; + virtual int set_buckets_enabled(const DoutPrefixProvider* dpp, std::vector& buckets, bool enabled) override; + virtual int get_sync_policy_handler(const DoutPrefixProvider* dpp, + std::optional zone, + std::optional bucket, + RGWBucketSyncPolicyHandlerRef* phandler, + optional_yield y) override; + virtual RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone) override; + virtual void wakeup_meta_sync_shards(std::set& shard_ids) override { rados->wakeup_meta_sync_shards(shard_ids); } + virtual void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, boost::container::flat_map>& shard_ids) override { rados->wakeup_data_sync_shards(dpp, source_zone, shard_ids); } + virtual int clear_usage(const DoutPrefixProvider *dpp) override { return rados->clear_usage(dpp); } + virtual int read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, + uint32_t max_entries, bool* is_truncated, + RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; + virtual int get_config_key_val(std::string name, bufferlist* bl) override; + virtual int meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) override; + virtual int meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, std::list& keys, bool* truncated) override; + virtual void meta_list_keys_complete(void* handle) override; + virtual std::string meta_get_marker(void* handle) override; + virtual int meta_remove(const DoutPrefixProvider* dpp, std::string& metadata_key, optional_yield y) override; + virtual const RGWSyncModuleInstanceRef& get_sync_module() { return rados->get_sync_module(); } + virtual std::string get_host_id() { return rados->host_id; } + virtual std::unique_ptr get_lua_manager() override; + virtual std::unique_ptr get_role(std::string name, + std::string tenant, + std::string path="", + std::string trust_policy="", + std::string max_session_duration_str="", + std::multimap tags={}) override; + virtual std::unique_ptr get_role(std::string id) override; + virtual std::unique_ptr get_role(const RGWRoleInfo& info) override; + virtual int get_roles(const DoutPrefixProvider *dpp, + optional_yield y, + const std::string& path_prefix, + const std::string& tenant, + std::vector>& roles) override; + virtual std::unique_ptr get_oidc_provider() override; + virtual int get_oidc_providers(const DoutPrefixProvider *dpp, + const std::string& tenant, + std::vector>& providers) override; + virtual std::unique_ptr get_append_writer(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + const std::string& unique_tag, + uint64_t position, + uint64_t *cur_accounted_size) override; + virtual std::unique_ptr get_atomic_writer(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t olh_epoch, + const std::string& unique_tag) override; + virtual const std::string& get_compression_type(const rgw_placement_rule& rule) override; + virtual bool valid_placement(const rgw_placement_rule& rule) override; + + virtual void finalize(void) override; + + virtual CephContext* ctx(void) override { return rados->ctx(); } + + virtual const std::string& get_luarocks_path() const override { + return luarocks_path; + } + + virtual void set_luarocks_path(const std::string& path) override { + luarocks_path = path; + } + virtual void register_admin_apis(RGWRESTMgr* mgr) override; + + /* Unique to RadosStore */ + int get_obj_head_ioctx(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, + librados::IoCtx* ioctx); + int delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj); + int delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, Completions* aio); + void get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj); + int get_raw_chunk_size(const DoutPrefixProvider* dpp, const rgw_raw_obj& obj, uint64_t* chunk_size); + + void setRados(RGWRados * st) { rados = st; } + RGWRados* getRados(void) { return rados; } + + RGWServices* svc() { return &rados->svc; } + const RGWServices* svc() const { return &rados->svc; } + RGWCtl* ctl() { return &rados->ctl; } + const RGWCtl* ctl() const { return &rados->ctl; } + + void setUserCtl(RGWUserCtl *_ctl) { user_ctl = _ctl; } +}; + +class RadosUser : public StoreUser { + private: + RadosStore* store; + + public: + RadosUser(RadosStore *_st, const rgw_user& _u) : StoreUser(_u), store(_st) { } + RadosUser(RadosStore *_st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) { } + RadosUser(RadosStore *_st) : store(_st) { } + RadosUser(RadosUser& _o) = default; + + virtual std::unique_ptr clone() override { + return std::unique_ptr(new RadosUser(*this)); + } + int list_buckets(const DoutPrefixProvider* dpp, const std::string& marker, const std::string& end_marker, + uint64_t max, bool need_stats, BucketList& buckets, + optional_yield y) override; + virtual int create_bucket(const DoutPrefixProvider* dpp, + const rgw_bucket& b, + const std::string& zonegroup_id, + rgw_placement_rule& placement_rule, + std::string& swift_ver_location, + const RGWQuotaInfo * pquota_info, + const RGWAccessControlPolicy& policy, + Attrs& attrs, + RGWBucketInfo& info, + obj_version& ep_objv, + bool exclusive, + bool obj_lock_enabled, + bool* existed, + req_info& req_info, + std::unique_ptr* bucket, + optional_yield y) override; + virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) override; + virtual int read_stats(const DoutPrefixProvider *dpp, + optional_yield y, RGWStorageStats* stats, + ceph::real_time* last_stats_sync = nullptr, + ceph::real_time* last_stats_update = nullptr) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; + virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, + bool* is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; + + virtual int load_user(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info = nullptr) override; + virtual int remove_user(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider* dpp, optional_yield y) override; + + friend class RadosBucket; +}; + +class RadosObject : public StoreObject { + private: + RadosStore* store; + RGWAccessControlPolicy acls; + RGWObjManifest *manifest{nullptr}; + RGWObjectCtx* rados_ctx; + bool rados_ctx_owned; + + public: + + struct RadosReadOp : public ReadOp { + private: + RadosObject* source; + RGWObjectCtx* rctx; + RGWRados::Object op_target; + RGWRados::Object::Read parent_op; + + public: + RadosReadOp(RadosObject *_source, RGWObjectCtx *_rctx); + + virtual int prepare(optional_yield y, const DoutPrefixProvider* dpp) override; + + /* + * Both `read` and `iterate` read up through index `end` + * *inclusive*. The number of bytes that could be returned is + * `end - ofs + 1`. + */ + virtual int read(int64_t ofs, int64_t end, + bufferlist& bl, optional_yield y, + const DoutPrefixProvider* dpp) override; + virtual int iterate(const DoutPrefixProvider* dpp, + int64_t ofs, int64_t end, + RGWGetDataCB* cb, optional_yield y) override; + + virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) override; + }; + + struct RadosDeleteOp : public DeleteOp { + private: + RadosObject* source; + RGWRados::Object op_target; + RGWRados::Object::Delete parent_op; + + public: + RadosDeleteOp(RadosObject* _source); + + virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override; + }; + + RadosObject(RadosStore *_st, const rgw_obj_key& _k) + : StoreObject(_k), + store(_st), + acls(), + rados_ctx(new RGWObjectCtx(dynamic_cast(store))), + rados_ctx_owned(true) { + } + RadosObject(RadosStore *_st, const rgw_obj_key& _k, Bucket* _b) + : StoreObject(_k, _b), + store(_st), + acls(), + rados_ctx(new RGWObjectCtx(dynamic_cast(store))) , + rados_ctx_owned(true) { + } + RadosObject(RadosObject& _o) : StoreObject(_o) { + store = _o.store; + acls = _o.acls; + manifest = _o.manifest; + rados_ctx = _o.rados_ctx; + rados_ctx_owned = false; + } + + virtual ~RadosObject(); + + virtual void invalidate() override { + StoreObject::invalidate(); + rados_ctx->invalidate(get_obj()); + } + virtual int delete_object(const DoutPrefixProvider* dpp, + optional_yield y, bool prevent_versioning) override; + virtual int delete_obj_aio(const DoutPrefixProvider* dpp, RGWObjState* astate, Completions* aio, + bool keep_index_consistent, optional_yield y) override; + virtual int copy_object(User* user, + req_info* info, const rgw_zone_id& source_zone, + rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, + rgw::sal::Bucket* src_bucket, + const rgw_placement_rule& dest_placement, + ceph::real_time* src_mtime, ceph::real_time* mtime, + const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, + bool high_precision_time, + const char* if_match, const char* if_nomatch, + AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, + RGWObjCategory category, uint64_t olh_epoch, + boost::optional delete_at, + std::string* version_id, std::string* tag, std::string* etag, + void (*progress_cb)(off_t, void *), void* progress_data, + const DoutPrefixProvider* dpp, optional_yield y) override; + virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } + virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } + virtual void set_atomic() override { + rados_ctx->set_atomic(state.obj); + StoreObject::set_atomic(); + } + virtual void set_prefetch_data() override { + rados_ctx->set_prefetch_data(state.obj); + StoreObject::set_prefetch_data(); + } + virtual void set_compressed() override { + rados_ctx->set_compressed(state.obj); + StoreObject::set_compressed(); + } + + virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **state, optional_yield y, bool follow_olh = true) override; + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; + virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; + virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; + virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; + virtual bool is_expired() override; + virtual void gen_rand_obj_instance_name() override; + void get_raw_obj(rgw_raw_obj* raw_obj); + virtual std::unique_ptr clone() override { + return std::unique_ptr(new RadosObject(*this)); + } + virtual std::unique_ptr get_serializer(const DoutPrefixProvider *dpp, + const std::string& lock_name) override; + virtual int transition(Bucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual int transition_to_cloud(Bucket* bucket, + rgw::sal::PlacementTier* tier, + rgw_bucket_dir_entry& o, + std::set& cloud_targets, + CephContext* cct, + bool update_object, + const DoutPrefixProvider* dpp, + optional_yield y) override; + virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override; + virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override; + + /* Swift versioning */ + virtual int swift_versioning_restore(bool& restored, + const DoutPrefixProvider* dpp) override; + virtual int swift_versioning_copy(const DoutPrefixProvider* dpp, + optional_yield y) override; + + /* OPs */ + virtual std::unique_ptr get_read_op() override; + virtual std::unique_ptr get_delete_op() override; + + /* OMAP */ + virtual int omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count, + std::map *m, + bool* pmore, optional_yield y) override; + virtual int omap_get_all(const DoutPrefixProvider *dpp, std::map *m, + optional_yield y) override; + virtual int omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, + const std::set& keys, + Attrs* vals) override; + virtual int omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) override; + + /* Internal to RadosStore */ + int get_max_chunk_size(const DoutPrefixProvider* dpp, + rgw_placement_rule placement_rule, + uint64_t* max_chunk_size, + uint64_t* alignment = nullptr); + void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t* max_size); + void raw_obj_to_obj(const rgw_raw_obj& raw_obj); + int write_cloud_tier(const DoutPrefixProvider* dpp, + optional_yield y, + uint64_t olh_epoch, + rgw::sal::PlacementTier* tier, + bool is_multipart_upload, + rgw_placement_rule& target_placement, + Object* head_obj); + RGWObjManifest* get_manifest() { return manifest; } + RGWObjectCtx& get_ctx() { return *rados_ctx; } + + private: + int read_attrs(const DoutPrefixProvider* dpp, RGWRados::Object::Read &read_op, optional_yield y, rgw_obj* target_obj = nullptr); +}; + +class RadosBucket : public StoreBucket { + private: + RadosStore* store; + RGWAccessControlPolicy acls; + + public: + RadosBucket(RadosStore *_st) + : store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, User* _u) + : StoreBucket(_u), + store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, const rgw_bucket& _b) + : StoreBucket(_b), + store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, const RGWBucketEnt& _e) + : StoreBucket(_e), + store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, const RGWBucketInfo& _i) + : StoreBucket(_i), + store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, const rgw_bucket& _b, User* _u) + : StoreBucket(_b, _u), + store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, const RGWBucketEnt& _e, User* _u) + : StoreBucket(_e, _u), + store(_st), + acls() { + } + + RadosBucket(RadosStore *_st, const RGWBucketInfo& _i, User* _u) + : StoreBucket(_i, _u), + store(_st), + acls() { + } + + virtual ~RadosBucket(); + virtual std::unique_ptr get_object(const rgw_obj_key& k) override; + virtual int list(const DoutPrefixProvider* dpp, ListParams&, int, ListResults&, optional_yield y) override; + virtual int remove_bucket(const DoutPrefixProvider* dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) override; + virtual int remove_bucket_bypass_gc(int concurrent_max, bool + keep_index_consistent, + optional_yield y, const + DoutPrefixProvider *dpp) override; + virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } + virtual int set_acl(const DoutPrefixProvider* dpp, RGWAccessControlPolicy& acl, optional_yield y) override; + virtual int load_bucket(const DoutPrefixProvider* dpp, optional_yield y, bool get_stats = false) override; + virtual int read_stats(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, std::string* bucket_ver, std::string* master_ver, + std::map& stats, + std::string* max_marker = nullptr, + bool* syncstopped = nullptr) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, + const bucket_index_layout_generation& idx_layout, + int shard_id, RGWGetBucketStats_CB* ctx) override; + virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int update_container_stats(const DoutPrefixProvider* dpp) override; + virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override; + virtual int chown(const DoutPrefixProvider* dpp, User* new_user, User* old_user, optional_yield y, const std::string* marker = nullptr) override; + virtual int put_info(const DoutPrefixProvider* dpp, bool exclusive, ceph::real_time mtime) override; + virtual bool is_owner(User* user) override; + virtual int check_empty(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) override; + virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& attrs, optional_yield y) override; + virtual int try_refresh_info(const DoutPrefixProvider* dpp, ceph::real_time* pmtime) override; + virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, + bool* is_truncated, RGWUsageIter& usage_iter, + std::map& usage) override; + virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; + virtual int remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) override; + virtual int check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) override; + virtual int rebuild_index(const DoutPrefixProvider *dpp) override; + virtual int set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) override; + virtual int purge_instance(const DoutPrefixProvider* dpp) override; + virtual std::unique_ptr clone() override { + return std::make_unique(*this); + } + virtual std::unique_ptr get_multipart_upload( + const std::string& oid, + std::optional upload_id=std::nullopt, + ACLOwner owner={}, ceph::real_time mtime=real_clock::now()) override; + virtual int list_multiparts(const DoutPrefixProvider *dpp, + const std::string& prefix, + std::string& marker, + const std::string& delim, + const int& max_uploads, + std::vector>& uploads, + std::map *common_prefixes, + bool *is_truncated) override; + virtual int abort_multiparts(const DoutPrefixProvider* dpp, + CephContext* cct) override; + + private: + int link(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint = true, RGWObjVersionTracker* objv = nullptr); + int unlink(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint = true); + friend class RadosUser; +}; + +class RadosMultipartPart : public StoreMultipartPart { +protected: + RGWUploadPartInfo info; + +public: + RadosMultipartPart() = default; + virtual ~RadosMultipartPart() = default; + + virtual uint32_t get_num() { return info.num; } + virtual uint64_t get_size() { return info.accounted_size; } + virtual const std::string& get_etag() { return info.etag; } + virtual ceph::real_time& get_mtime() { return info.modified; } + + /* For RadosStore code */ + RGWObjManifest& get_manifest() { return info.manifest; } + + friend class RadosMultipartUpload; +}; + +class RadosMultipartUpload : public StoreMultipartUpload { + RadosStore* store; + RGWMPObj mp_obj; + ACLOwner owner; + ceph::real_time mtime; + rgw_placement_rule placement; + RGWObjManifest manifest; + +public: + RadosMultipartUpload(RadosStore* _store, Bucket* _bucket, const std::string& oid, + std::optional upload_id, ACLOwner owner, + ceph::real_time _mtime) + : StoreMultipartUpload(_bucket), store(_store), mp_obj(oid, upload_id), + owner(owner), mtime(_mtime) {} + virtual ~RadosMultipartUpload() = default; + + virtual const std::string& get_meta() const override { return mp_obj.get_meta(); } + virtual const std::string& get_key() const override { return mp_obj.get_key(); } + virtual const std::string& get_upload_id() const override { return mp_obj.get_upload_id(); } + virtual const ACLOwner& get_owner() const override { return owner; } + virtual ceph::real_time& get_mtime() override { return mtime; } + virtual std::unique_ptr get_meta_obj() override; + virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) override; + virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct, + int num_parts, int marker, + int* next_marker, bool* truncated, + bool assume_unsorted = false) override; + virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override; + virtual int complete(const DoutPrefixProvider* dpp, + optional_yield y, CephContext* cct, + std::map& part_etags, + std::list& remove_objs, + uint64_t& accounted_size, bool& compressed, + RGWCompressionInfo& cs_info, off_t& ofs, + std::string& tag, ACLOwner& owner, + uint64_t olh_epoch, + rgw::sal::Object* target_obj) override; + virtual int get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs = nullptr) override; + virtual std::unique_ptr get_writer(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t part_num, + const std::string& part_num_str) override; +}; + +class MPRadosSerializer : public StoreMPSerializer { + librados::IoCtx ioctx; + rados::cls::lock::Lock lock; + librados::ObjectWriteOperation op; + +public: + MPRadosSerializer(const DoutPrefixProvider *dpp, RadosStore* store, RadosObject* obj, const std::string& lock_name); + + virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override; + virtual int unlock() override { + return lock.unlock(&ioctx, oid); + } +}; + +class LCRadosSerializer : public StoreLCSerializer { + librados::IoCtx* ioctx; + rados::cls::lock::Lock lock; + +public: + LCRadosSerializer(RadosStore* store, const std::string& oid, const std::string& lock_name, const std::string& cookie); + + virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override; + virtual int unlock() override { + return lock.unlock(ioctx, oid); + } +}; + +class RadosLifecycle : public StoreLifecycle { + RadosStore* store; + +public: + RadosLifecycle(RadosStore* _st) : store(_st) {} + + using StoreLifecycle::get_entry; + virtual int get_entry(const std::string& oid, const std::string& marker, std::unique_ptr* entry) override; + virtual int get_next_entry(const std::string& oid, const std::string& marker, std::unique_ptr* entry) override; + virtual int set_entry(const std::string& oid, LCEntry& entry) override; + virtual int list_entries(const std::string& oid, const std::string& marker, + uint32_t max_entries, + std::vector>& entries) override; + virtual int rm_entry(const std::string& oid, LCEntry& entry) override; + virtual int get_head(const std::string& oid, std::unique_ptr* head) override; + virtual int put_head(const std::string& oid, LCHead& head) override; + virtual std::unique_ptr get_serializer(const std::string& lock_name, + const std::string& oid, + const std::string& cookie) override; +}; + +class RadosNotification : public StoreNotification { + RadosStore* store; + /* XXX it feels incorrect to me that rgw::notify::reservation_t is + * currently RADOS-specific; instead, I think notification types such as + * reservation_t should be generally visible, whereas the internal + * notification behavior should be made portable (e.g., notification + * to non-RADOS message sinks) */ + rgw::notify::reservation_t res; + + public: + RadosNotification(const DoutPrefixProvider* _dpp, RadosStore* _store, Object* _obj, Object* _src_obj, req_state* _s, rgw::notify::EventType _type, const std::string* object_name=nullptr) : + StoreNotification(_obj, _src_obj, _type), store(_store), res(_dpp, _store, _s, _obj, _src_obj, object_name) { } + + RadosNotification(const DoutPrefixProvider* _dpp, RadosStore* _store, Object* _obj, Object* _src_obj, rgw::notify::EventType _type, rgw::sal::Bucket* _bucket, std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) : + StoreNotification(_obj, _src_obj, _type), store(_store), res(_dpp, _store, _obj, _src_obj, _bucket, _user_id, _user_tenant, _req_id, y) {} + + ~RadosNotification() = default; + + rgw::notify::reservation_t& get_reservation(void) { + return res; + } + + virtual int publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags = nullptr) override; + virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size, + const ceph::real_time& mtime, const std::string& etag, const std::string& version) override; +}; + +class RadosAtomicWriter : public StoreWriter { +protected: + rgw::sal::RadosStore* store; + std::unique_ptr aio; + RGWObjectCtx* obj_ctx; + rgw::putobj::AtomicObjectProcessor processor; + +public: + RadosAtomicWriter(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + RadosStore* _store, std::unique_ptr _aio, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t olh_epoch, + const std::string& unique_tag) : + StoreWriter(dpp, y), + store(_store), + aio(std::move(_aio)), + obj_ctx(&dynamic_cast(_head_obj.get())->get_ctx()), + processor(&*aio, store, + ptail_placement_rule, owner, + *obj_ctx, + std::move(_head_obj), olh_epoch, unique_tag, + dpp, y) + {} + ~RadosAtomicWriter() = default; + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) override; +}; + +class RadosAppendWriter : public StoreWriter { +protected: + rgw::sal::RadosStore* store; + std::unique_ptr aio; + RGWObjectCtx* obj_ctx; + rgw::putobj::AppendObjectProcessor processor; + +public: + RadosAppendWriter(const DoutPrefixProvider *dpp, + optional_yield y, + std::unique_ptr _head_obj, + RadosStore* _store, std::unique_ptr _aio, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + const std::string& unique_tag, + uint64_t position, + uint64_t *cur_accounted_size) : + StoreWriter(dpp, y), + store(_store), + aio(std::move(_aio)), + obj_ctx(&dynamic_cast(_head_obj.get())->get_ctx()), + processor(&*aio, store, + ptail_placement_rule, owner, + *obj_ctx, + std::move(_head_obj), unique_tag, position, + cur_accounted_size, dpp, y) + {} + ~RadosAppendWriter() = default; + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) override; +}; + +class RadosMultipartWriter : public StoreWriter { +protected: + rgw::sal::RadosStore* store; + std::unique_ptr aio; + RGWObjectCtx* obj_ctx; + rgw::putobj::MultipartObjectProcessor processor; + +public: + RadosMultipartWriter(const DoutPrefixProvider *dpp, + optional_yield y, MultipartUpload* upload, + std::unique_ptr _head_obj, + RadosStore* _store, std::unique_ptr _aio, + const rgw_user& owner, + const rgw_placement_rule *ptail_placement_rule, + uint64_t part_num, const std::string& part_num_str) : + StoreWriter(dpp, y), + store(_store), + aio(std::move(_aio)), + obj_ctx(&dynamic_cast(_head_obj.get())->get_ctx()), + processor(&*aio, store, + ptail_placement_rule, owner, + *obj_ctx, + std::move(_head_obj), upload->get_upload_id(), + part_num, part_num_str, dpp, y) + {} + ~RadosMultipartWriter() = default; + + // prepare to start processing object data + virtual int prepare(optional_yield y) override; + + // Process a bufferlist + virtual int process(bufferlist&& data, uint64_t offset) override; + + // complete the operation and make its result visible to clients + virtual int complete(size_t accounted_size, const std::string& etag, + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map& attrs, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + optional_yield y) override; +}; + +class RadosLuaManager : public StoreLuaManager { + RadosStore* const store; + rgw_pool pool; + +public: + RadosLuaManager(RadosStore* _s); + virtual ~RadosLuaManager() = default; + + virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script); + virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script); + virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key); + virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name); + virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name); + virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages); +}; + +class RadosOIDCProvider : public RGWOIDCProvider { + RadosStore* store; +public: + RadosOIDCProvider(RadosStore* _store) : store(_store) {} + ~RadosOIDCProvider() = default; + + virtual int store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) override; + virtual int read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) override; + virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override; + void encode(bufferlist& bl) const { + RGWOIDCProvider::encode(bl); + } + void decode(bufferlist::const_iterator& bl) { + RGWOIDCProvider::decode(bl); + } +}; + +class RadosRole : public RGWRole { + RadosStore* store; +public: + RadosRole(RadosStore* _store, std::string name, + std::string tenant, + std::string path, + std::string trust_policy, + std::string max_session_duration, + std::multimap tags) : RGWRole(name, tenant, path, trust_policy, max_session_duration, tags), store(_store) {} + RadosRole(RadosStore* _store, std::string id) : RGWRole(id), store(_store) {} + RadosRole(RadosStore* _store, const RGWRoleInfo& info) : RGWRole(info), store(_store) {} + RadosRole(RadosStore* _store) : store(_store) {} + ~RadosRole() = default; + + virtual int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) override; + virtual int store_name(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) override; + virtual int store_path(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) override; + virtual int read_id(const DoutPrefixProvider *dpp, const std::string& role_name, const std::string& tenant, std::string& role_id, optional_yield y) override; + virtual int read_name(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int read_info(const DoutPrefixProvider *dpp, optional_yield y) override; + virtual int create(const DoutPrefixProvider *dpp, bool exclusive, const std::string& role_id, optional_yield y) override; + virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override; +}; +}} // namespace rgw::sal + +WRITE_CLASS_ENCODER(rgw::sal::RadosOIDCProvider) diff --git a/src/rgw/driver/rados/rgw_service.cc b/src/rgw/driver/rados/rgw_service.cc new file mode 100644 index 00000000000..4fcb1ebdef7 --- /dev/null +++ b/src/rgw/driver/rados/rgw_service.cc @@ -0,0 +1,476 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_service.h" + +#include "services/svc_finisher.h" +#include "services/svc_bi_rados.h" +#include "services/svc_bilog_rados.h" +#include "services/svc_bucket_sobj.h" +#include "services/svc_bucket_sync_sobj.h" +#include "services/svc_cls.h" +#include "services/svc_config_key_rados.h" +#include "services/svc_mdlog.h" +#include "services/svc_meta.h" +#include "services/svc_meta_be.h" +#include "services/svc_meta_be_sobj.h" +#include "services/svc_meta_be_otp.h" +#include "services/svc_notify.h" +#include "services/svc_otp.h" +#include "services/svc_rados.h" +#include "services/svc_zone.h" +#include "services/svc_zone_utils.h" +#include "services/svc_quota.h" +#include "services/svc_sync_modules.h" +#include "services/svc_sys_obj.h" +#include "services/svc_sys_obj_cache.h" +#include "services/svc_sys_obj_core.h" +#include "services/svc_user_rados.h" +#include "services/svc_role_rados.h" + +#include "common/errno.h" + +#include "rgw_bucket.h" +#include "rgw_datalog.h" +#include "rgw_metadata.h" +#include "rgw_otp.h" +#include "rgw_user.h" +#include "rgw_role.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +RGWServices_Def::RGWServices_Def() = default; +RGWServices_Def::~RGWServices_Def() +{ + shutdown(); +} + +int RGWServices_Def::init(CephContext *cct, + bool have_cache, + bool raw, + bool run_sync, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + finisher = std::make_unique(cct); + bucket_sobj = std::make_unique(cct); + bucket_sync_sobj = std::make_unique(cct); + bi_rados = std::make_unique(cct); + bilog_rados = std::make_unique(cct); + cls = std::make_unique(cct); + config_key_rados = std::make_unique(cct); + datalog_rados = std::make_unique(cct); + mdlog = std::make_unique(cct, run_sync); + meta = std::make_unique(cct); + meta_be_sobj = std::make_unique(cct); + meta_be_otp = std::make_unique(cct); + notify = std::make_unique(cct); + otp = std::make_unique(cct); + rados = std::make_unique(cct); + zone = std::make_unique(cct); + zone_utils = std::make_unique(cct); + quota = std::make_unique(cct); + sync_modules = std::make_unique(cct); + sysobj = std::make_unique(cct); + sysobj_core = std::make_unique(cct); + user_rados = std::make_unique(cct); + role_rados = std::make_unique(cct); + + if (have_cache) { + sysobj_cache = std::make_unique(dpp, cct); + } + + vector meta_bes{meta_be_sobj.get(), meta_be_otp.get()}; + + finisher->init(); + bi_rados->init(zone.get(), rados.get(), bilog_rados.get(), datalog_rados.get()); + bilog_rados->init(bi_rados.get()); + bucket_sobj->init(zone.get(), sysobj.get(), sysobj_cache.get(), + bi_rados.get(), meta.get(), meta_be_sobj.get(), + sync_modules.get(), bucket_sync_sobj.get()); + bucket_sync_sobj->init(zone.get(), + sysobj.get(), + sysobj_cache.get(), + bucket_sobj.get()); + cls->init(zone.get(), rados.get()); + config_key_rados->init(rados.get()); + mdlog->init(rados.get(), zone.get(), sysobj.get(), cls.get()); + meta->init(sysobj.get(), mdlog.get(), meta_bes); + meta_be_sobj->init(sysobj.get(), mdlog.get()); + meta_be_otp->init(sysobj.get(), mdlog.get(), cls.get()); + notify->init(zone.get(), rados.get(), finisher.get()); + otp->init(zone.get(), meta.get(), meta_be_otp.get()); + rados->init(); + zone->init(sysobj.get(), rados.get(), sync_modules.get(), bucket_sync_sobj.get()); + zone_utils->init(rados.get(), zone.get()); + quota->init(zone.get()); + sync_modules->init(zone.get()); + sysobj_core->core_init(rados.get(), zone.get()); + if (have_cache) { + sysobj_cache->init(rados.get(), zone.get(), notify.get()); + sysobj->init(rados.get(), sysobj_cache.get()); + } else { + sysobj->init(rados.get(), sysobj_core.get()); + } + user_rados->init(rados.get(), zone.get(), sysobj.get(), sysobj_cache.get(), + meta.get(), meta_be_sobj.get(), sync_modules.get()); + role_rados->init(zone.get(), meta.get(), meta_be_sobj.get(), sysobj.get()); + + can_shutdown = true; + + int r = finisher->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start finisher service (" << cpp_strerror(-r) << dendl; + return r; + } + + if (!raw) { + r = notify->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start notify service (" << cpp_strerror(-r) << dendl; + return r; + } + } + + r = rados->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start rados service (" << cpp_strerror(-r) << dendl; + return r; + } + + if (!raw) { + r = zone->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start zone service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = datalog_rados->start(dpp, &zone->get_zone(), + zone->get_zone_params(), + rados->get_rados_handle()); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start datalog_rados service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = mdlog->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start mdlog service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = sync_modules->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start sync modules service (" << cpp_strerror(-r) << dendl; + return r; + } + } + + r = cls->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start cls service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = config_key_rados->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start config_key service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = zone_utils->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start zone_utils service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = quota->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start quota service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = sysobj_core->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start sysobj_core service (" << cpp_strerror(-r) << dendl; + return r; + } + + if (have_cache) { + r = sysobj_cache->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start sysobj_cache service (" << cpp_strerror(-r) << dendl; + return r; + } + } + + r = sysobj->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start sysobj service (" << cpp_strerror(-r) << dendl; + return r; + } + + if (!raw) { + r = meta_be_sobj->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start meta_be_sobj service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = meta->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start meta service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = bucket_sobj->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start bucket service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = bucket_sync_sobj->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start bucket_sync service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = user_rados->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start user_rados service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = otp->start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start otp service (" << cpp_strerror(-r) << dendl; + return r; + } + + r = role_rados->start(y, dpp); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to start role_rados service (" << cpp_strerror(-r) << dendl; + return r; + } + + } + + /* cache or core services will be started by sysobj */ + + return 0; +} + +void RGWServices_Def::shutdown() +{ + if (!can_shutdown) { + return; + } + + if (has_shutdown) { + return; + } + + role_rados->shutdown(); + datalog_rados.reset(); + user_rados->shutdown(); + sync_modules->shutdown(); + otp->shutdown(); + notify->shutdown(); + meta_be_otp->shutdown(); + meta_be_sobj->shutdown(); + meta->shutdown(); + mdlog->shutdown(); + config_key_rados->shutdown(); + cls->shutdown(); + bilog_rados->shutdown(); + bi_rados->shutdown(); + bucket_sync_sobj->shutdown(); + bucket_sobj->shutdown(); + finisher->shutdown(); + + sysobj->shutdown(); + sysobj_core->shutdown(); + notify->shutdown(); + if (sysobj_cache) { + sysobj_cache->shutdown(); + } + quota->shutdown(); + zone_utils->shutdown(); + zone->shutdown(); + rados->shutdown(); + + has_shutdown = true; + +} + + +int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) +{ + cct = _cct; + + int r = _svc.init(cct, have_cache, raw, run_sync, y, dpp); + if (r < 0) { + return r; + } + + finisher = _svc.finisher.get(); + bi_rados = _svc.bi_rados.get(); + bi = bi_rados; + bilog_rados = _svc.bilog_rados.get(); + bucket_sobj = _svc.bucket_sobj.get(); + bucket = bucket_sobj; + bucket_sync_sobj = _svc.bucket_sync_sobj.get(); + bucket_sync = bucket_sync_sobj; + cls = _svc.cls.get(); + config_key_rados = _svc.config_key_rados.get(); + config_key = config_key_rados; + datalog_rados = _svc.datalog_rados.get(); + mdlog = _svc.mdlog.get(); + meta = _svc.meta.get(); + meta_be_sobj = _svc.meta_be_sobj.get(); + meta_be_otp = _svc.meta_be_otp.get(); + notify = _svc.notify.get(); + otp = _svc.otp.get(); + rados = _svc.rados.get(); + zone = _svc.zone.get(); + zone_utils = _svc.zone_utils.get(); + quota = _svc.quota.get(); + sync_modules = _svc.sync_modules.get(); + sysobj = _svc.sysobj.get(); + cache = _svc.sysobj_cache.get(); + core = _svc.sysobj_core.get(); + user = _svc.user_rados.get(); + role = _svc.role_rados.get(); + + return 0; +} + +RGWServiceInstance::~RGWServiceInstance() {} + +int RGWServiceInstance::start(optional_yield y, const DoutPrefixProvider *dpp) +{ + if (start_state != StateInit) { + return 0; + } + + start_state = StateStarting;; /* setting started prior to do_start() on purpose so that circular + references can call start() on each other */ + + int r = do_start(y, dpp); + if (r < 0) { + return r; + } + + start_state = StateStarted; + + return 0; +} + +RGWCtlDef::RGWCtlDef() {} +RGWCtlDef::~RGWCtlDef() {} +RGWCtlDef::_meta::_meta() {} +RGWCtlDef::_meta::~_meta() {} + + +int RGWCtlDef::init(RGWServices& svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp) +{ + meta.mgr.reset(new RGWMetadataManager(svc.meta)); + + meta.user.reset(RGWUserMetaHandlerAllocator::alloc(svc.user)); + + auto sync_module = svc.sync_modules->get_sync_module(); + if (sync_module) { + meta.bucket.reset(sync_module->alloc_bucket_meta_handler()); + meta.bucket_instance.reset(sync_module->alloc_bucket_instance_meta_handler(driver)); + } else { + meta.bucket.reset(RGWBucketMetaHandlerAllocator::alloc()); + meta.bucket_instance.reset(RGWBucketInstanceMetaHandlerAllocator::alloc(driver)); + } + + meta.otp.reset(RGWOTPMetaHandlerAllocator::alloc()); + meta.role = std::make_unique(driver, svc.role); + + user.reset(new RGWUserCtl(svc.zone, svc.user, (RGWUserMetadataHandler *)meta.user.get())); + bucket.reset(new RGWBucketCtl(svc.zone, + svc.bucket, + svc.bucket_sync, + svc.bi, svc.user)); + otp.reset(new RGWOTPCtl(svc.zone, svc.otp)); + + RGWBucketMetadataHandlerBase *bucket_meta_handler = static_cast(meta.bucket.get()); + RGWBucketInstanceMetadataHandlerBase *bi_meta_handler = static_cast(meta.bucket_instance.get()); + + bucket_meta_handler->init(svc.bucket, bucket.get()); + bi_meta_handler->init(svc.zone, svc.bucket, svc.bi); + + RGWOTPMetadataHandlerBase *otp_handler = static_cast(meta.otp.get()); + otp_handler->init(svc.zone, svc.meta_be_otp, svc.otp); + + user->init(bucket.get()); + bucket->init(user.get(), + (RGWBucketMetadataHandler *)bucket_meta_handler, + (RGWBucketInstanceMetadataHandler *)bi_meta_handler, + svc.datalog_rados, + dpp); + + otp->init((RGWOTPMetadataHandler *)meta.otp.get()); + + return 0; +} + +int RGWCtl::init(RGWServices *_svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp) +{ + svc = _svc; + cct = svc->cct; + + int r = _ctl.init(*svc, driver, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start init ctls (" << cpp_strerror(-r) << dendl; + return r; + } + + meta.mgr = _ctl.meta.mgr.get(); + meta.user = _ctl.meta.user.get(); + meta.bucket = _ctl.meta.bucket.get(); + meta.bucket_instance = _ctl.meta.bucket_instance.get(); + meta.otp = _ctl.meta.otp.get(); + meta.role = _ctl.meta.role.get(); + + user = _ctl.user.get(); + bucket = _ctl.bucket.get(); + otp = _ctl.otp.get(); + + r = meta.user->attach(meta.mgr); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to start init meta.user ctl (" << cpp_strerror(-r) << dendl; + return r; + } + + r = meta.bucket->attach(meta.mgr); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to start init meta.bucket ctl (" << cpp_strerror(-r) << dendl; + return r; + } + + r = meta.bucket_instance->attach(meta.mgr); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to start init meta.bucket_instance ctl (" << cpp_strerror(-r) << dendl; + return r; + } + + r = meta.otp->attach(meta.mgr); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to start init otp ctl (" << cpp_strerror(-r) << dendl; + return r; + } + + r = meta.role->attach(meta.mgr); + if (r < 0) { + ldout(cct, 0) << "ERROR: failed to start init otp ctl (" << cpp_strerror(-r) << dendl; + return r; + } + return 0; +} + diff --git a/src/rgw/driver/rados/rgw_service.h b/src/rgw/driver/rados/rgw_service.h new file mode 100644 index 00000000000..dc4991388a9 --- /dev/null +++ b/src/rgw/driver/rados/rgw_service.h @@ -0,0 +1,219 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SERVICE_H +#define CEPH_RGW_SERVICE_H + + +#include +#include +#include + +#include "common/async/yield_context.h" + +#include "rgw_common.h" + +struct RGWServices_Def; + +class RGWServiceInstance +{ + friend struct RGWServices_Def; + +protected: + CephContext *cct; + + enum StartState { + StateInit = 0, + StateStarting = 1, + StateStarted = 2, + } start_state{StateInit}; + + virtual void shutdown() {} + virtual int do_start(optional_yield, const DoutPrefixProvider *dpp) { + return 0; + } +public: + RGWServiceInstance(CephContext *_cct) : cct(_cct) {} + virtual ~RGWServiceInstance(); + + int start(optional_yield y, const DoutPrefixProvider *dpp); + bool is_started() { + return (start_state == StateStarted); + } + + CephContext *ctx() { + return cct; + } +}; + +class RGWSI_Finisher; +class RGWSI_Bucket; +class RGWSI_Bucket_SObj; +class RGWSI_Bucket_Sync; +class RGWSI_Bucket_Sync_SObj; +class RGWSI_BucketIndex; +class RGWSI_BucketIndex_RADOS; +class RGWSI_BILog_RADOS; +class RGWSI_Cls; +class RGWSI_ConfigKey; +class RGWSI_ConfigKey_RADOS; +class RGWSI_MDLog; +class RGWSI_Meta; +class RGWSI_MetaBackend; +class RGWSI_MetaBackend_SObj; +class RGWSI_MetaBackend_OTP; +class RGWSI_Notify; +class RGWSI_OTP; +class RGWSI_RADOS; +class RGWSI_Zone; +class RGWSI_ZoneUtils; +class RGWSI_Quota; +class RGWSI_SyncModules; +class RGWSI_SysObj; +class RGWSI_SysObj_Core; +class RGWSI_SysObj_Cache; +class RGWSI_User; +class RGWSI_User_RADOS; +class RGWDataChangesLog; +class RGWSI_Role_RADOS; + +struct RGWServices_Def +{ + bool can_shutdown{false}; + bool has_shutdown{false}; + + std::unique_ptr finisher; + std::unique_ptr bucket_sobj; + std::unique_ptr bucket_sync_sobj; + std::unique_ptr bi_rados; + std::unique_ptr bilog_rados; + std::unique_ptr cls; + std::unique_ptr config_key_rados; + std::unique_ptr mdlog; + std::unique_ptr meta; + std::unique_ptr meta_be_sobj; + std::unique_ptr meta_be_otp; + std::unique_ptr notify; + std::unique_ptr otp; + std::unique_ptr rados; + std::unique_ptr zone; + std::unique_ptr zone_utils; + std::unique_ptr quota; + std::unique_ptr sync_modules; + std::unique_ptr sysobj; + std::unique_ptr sysobj_core; + std::unique_ptr sysobj_cache; + std::unique_ptr user_rados; + std::unique_ptr datalog_rados; + std::unique_ptr role_rados; + + RGWServices_Def(); + ~RGWServices_Def(); + + int init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp); + void shutdown(); +}; + + +struct RGWServices +{ + RGWServices_Def _svc; + + CephContext *cct; + + RGWSI_Finisher *finisher{nullptr}; + RGWSI_Bucket *bucket{nullptr}; + RGWSI_Bucket_SObj *bucket_sobj{nullptr}; + RGWSI_Bucket_Sync *bucket_sync{nullptr}; + RGWSI_Bucket_Sync_SObj *bucket_sync_sobj{nullptr}; + RGWSI_BucketIndex *bi{nullptr}; + RGWSI_BucketIndex_RADOS *bi_rados{nullptr}; + RGWSI_BILog_RADOS *bilog_rados{nullptr}; + RGWSI_Cls *cls{nullptr}; + RGWSI_ConfigKey_RADOS *config_key_rados{nullptr}; + RGWSI_ConfigKey *config_key{nullptr}; + RGWDataChangesLog *datalog_rados{nullptr}; + RGWSI_MDLog *mdlog{nullptr}; + RGWSI_Meta *meta{nullptr}; + RGWSI_MetaBackend *meta_be_sobj{nullptr}; + RGWSI_MetaBackend *meta_be_otp{nullptr}; + RGWSI_Notify *notify{nullptr}; + RGWSI_OTP *otp{nullptr}; + RGWSI_RADOS *rados{nullptr}; + RGWSI_Zone *zone{nullptr}; + RGWSI_ZoneUtils *zone_utils{nullptr}; + RGWSI_Quota *quota{nullptr}; + RGWSI_SyncModules *sync_modules{nullptr}; + RGWSI_SysObj *sysobj{nullptr}; + RGWSI_SysObj_Cache *cache{nullptr}; + RGWSI_SysObj_Core *core{nullptr}; + RGWSI_User *user{nullptr}; + RGWSI_Role_RADOS *role{nullptr}; + + int do_init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp); + + int init(CephContext *cct, bool have_cache, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) { + return do_init(cct, have_cache, false, run_sync, y, dpp); + } + + int init_raw(CephContext *cct, bool have_cache, optional_yield y, const DoutPrefixProvider *dpp) { + return do_init(cct, have_cache, true, false, y, dpp); + } + void shutdown() { + _svc.shutdown(); + } +}; + +class RGWMetadataManager; +class RGWMetadataHandler; +class RGWUserCtl; +class RGWBucketCtl; +class RGWOTPCtl; + +struct RGWCtlDef { + struct _meta { + std::unique_ptr mgr; + std::unique_ptr bucket; + std::unique_ptr bucket_instance; + std::unique_ptr user; + std::unique_ptr otp; + std::unique_ptr role; + + _meta(); + ~_meta(); + } meta; + + std::unique_ptr user; + std::unique_ptr bucket; + std::unique_ptr otp; + + RGWCtlDef(); + ~RGWCtlDef(); + + int init(RGWServices& svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp); +}; + +struct RGWCtl { + CephContext *cct{nullptr}; + RGWServices *svc{nullptr}; + + RGWCtlDef _ctl; + + struct _meta { + RGWMetadataManager *mgr{nullptr}; + + RGWMetadataHandler *bucket{nullptr}; + RGWMetadataHandler *bucket_instance{nullptr}; + RGWMetadataHandler *user{nullptr}; + RGWMetadataHandler *otp{nullptr}; + RGWMetadataHandler *role{nullptr}; + } meta; + + RGWUserCtl *user{nullptr}; + RGWBucketCtl *bucket{nullptr}; + RGWOTPCtl *otp{nullptr}; + + int init(RGWServices *_svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp); +}; + +#endif diff --git a/src/rgw/driver/rados/rgw_sync.cc b/src/rgw/driver/rados/rgw_sync.cc new file mode 100644 index 00000000000..065d20985c4 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync.cc @@ -0,0 +1,2567 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_sync.h" +#include "rgw_rest_conn.h" +#include "rgw_cr_rados.h" +#include "rgw_cr_rest.h" + +#include "services/svc_zone.h" +#include "services/svc_mdlog.h" +#include "services/svc_cls.h" + +#include + +#define dout_subsys ceph_subsys_rgw + +#undef dout_prefix +#define dout_prefix (*_dout << "meta sync: ") + +using namespace std; + +static string mdlog_sync_status_oid = "mdlog.sync-status"; +static string mdlog_sync_status_shard_prefix = "mdlog.sync-status.shard"; +static string mdlog_sync_full_sync_index_prefix = "meta.full-sync.index"; + +RGWContinuousLeaseCR::~RGWContinuousLeaseCR() {} + +RGWSyncErrorLogger::RGWSyncErrorLogger(rgw::sal::RadosStore* _store, const string &oid_prefix, int _num_shards) : store(_store), num_shards(_num_shards) { + for (int i = 0; i < num_shards; i++) { + oids.push_back(get_shard_oid(oid_prefix, i)); + } +} +string RGWSyncErrorLogger::get_shard_oid(const string& oid_prefix, int shard_id) { + char buf[oid_prefix.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%d", oid_prefix.c_str(), shard_id); + return string(buf); +} + +RGWCoroutine *RGWSyncErrorLogger::log_error_cr(const DoutPrefixProvider *dpp, const string& source_zone, const string& section, const string& name, uint32_t error_code, const string& message) { + cls_log_entry entry; + + rgw_sync_error_info info(source_zone, error_code, message); + bufferlist bl; + encode(info, bl); + store->svc()->cls->timelog.prepare_entry(entry, real_clock::now(), section, name, bl); + + uint32_t shard_id = ++counter % num_shards; + + + return new RGWRadosTimelogAddCR(dpp, store, oids[shard_id], entry); +} + +void RGWSyncBackoff::update_wait_time() +{ + if (cur_wait == 0) { + cur_wait = 1; + } else { + cur_wait = (cur_wait << 1); + } + if (cur_wait >= max_secs) { + cur_wait = max_secs; + } +} + +void RGWSyncBackoff::backoff_sleep() +{ + update_wait_time(); + sleep(cur_wait); +} + +void RGWSyncBackoff::backoff(RGWCoroutine *op) +{ + update_wait_time(); + op->wait(utime_t(cur_wait, 0)); +} + +int RGWBackoffControlCR::operate(const DoutPrefixProvider *dpp) { + reenter(this) { + // retry the operation until it succeeds + while (true) { + yield { + std::lock_guard l{lock}; + cr = alloc_cr(); + cr->get(); + call(cr); + } + { + std::lock_guard l{lock}; + cr->put(); + cr = NULL; + } + if (retcode >= 0) { + break; + } + if (retcode != -EBUSY && retcode != -EAGAIN) { + ldout(cct, 0) << "ERROR: RGWBackoffControlCR called coroutine returned " << retcode << dendl; + if (exit_on_error) { + return set_cr_error(retcode); + } + } + if (reset_backoff) { + backoff.reset(); + } + yield backoff.backoff(this); + } + + // run an optional finisher + yield call(alloc_finisher_cr()); + if (retcode < 0) { + ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; +} + +void rgw_mdlog_info::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("num_objects", num_shards, obj); + JSONDecoder::decode_json("period", period, obj); + JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); +} + +void rgw_mdlog_entry::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("id", id, obj); + JSONDecoder::decode_json("section", section, obj); + JSONDecoder::decode_json("name", name, obj); + utime_t ut; + JSONDecoder::decode_json("timestamp", ut, obj); + timestamp = ut.to_real_time(); + JSONDecoder::decode_json("data", log_data, obj); +} + +void rgw_mdlog_shard_data::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("truncated", truncated, obj); + JSONDecoder::decode_json("entries", entries, obj); +}; + +int RGWShardCollectCR::operate(const DoutPrefixProvider *dpp) { + reenter(this) { + while (spawn_next()) { + current_running++; + + if (current_running >= max_concurrent) { + int child_ret; + yield wait_for_child(); + if (collect_next(&child_ret)) { + current_running--; + child_ret = handle_result(child_ret); + if (child_ret < 0) { + status = child_ret; + } + } + } + } + while (current_running > 0) { + int child_ret; + yield wait_for_child(); + if (collect_next(&child_ret)) { + current_running--; + child_ret = handle_result(child_ret); + if (child_ret < 0) { + status = child_ret; + } + } + } + if (status < 0) { + return set_cr_error(status); + } + return set_cr_done(); + } + return 0; +} + +class RGWReadRemoteMDLogInfoCR : public RGWShardCollectCR { + RGWMetaSyncEnv *sync_env; + + const std::string& period; + int num_shards; + map *mdlog_info; + + int shard_id; +#define READ_MDLOG_MAX_CONCURRENT 10 + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to fetch mdlog status: " << cpp_strerror(r) << dendl; + } + return r; + } +public: + RGWReadRemoteMDLogInfoCR(RGWMetaSyncEnv *_sync_env, + const std::string& period, int _num_shards, + map *_mdlog_info) : RGWShardCollectCR(_sync_env->cct, READ_MDLOG_MAX_CONCURRENT), + sync_env(_sync_env), + period(period), num_shards(_num_shards), + mdlog_info(_mdlog_info), shard_id(0) {} + bool spawn_next() override; +}; + +class RGWListRemoteMDLogCR : public RGWShardCollectCR { + RGWMetaSyncEnv *sync_env; + + const std::string& period; + map shards; + int max_entries_per_shard; + map *result; + + map::iterator iter; +#define READ_MDLOG_MAX_CONCURRENT 10 + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to list remote mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } +public: + RGWListRemoteMDLogCR(RGWMetaSyncEnv *_sync_env, + const std::string& period, map& _shards, + int _max_entries_per_shard, + map *_result) : RGWShardCollectCR(_sync_env->cct, READ_MDLOG_MAX_CONCURRENT), + sync_env(_sync_env), period(period), + max_entries_per_shard(_max_entries_per_shard), + result(_result) { + shards.swap(_shards); + iter = shards.begin(); + } + bool spawn_next() override; +}; + +int RGWRemoteMetaLog::read_log_info(const DoutPrefixProvider *dpp, rgw_mdlog_info *log_info) +{ + rgw_http_param_pair pairs[] = { { "type", "metadata" }, + { NULL, NULL } }; + + int ret = conn->get_json_resource(dpp, "/admin/log", pairs, null_yield, *log_info); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog info" << dendl; + return ret; + } + + ldpp_dout(dpp, 20) << "remote mdlog, num_shards=" << log_info->num_shards << dendl; + + return 0; +} + +int RGWRemoteMetaLog::read_master_log_shards_info(const DoutPrefixProvider *dpp, const string &master_period, map *shards_info) +{ + if (store->svc()->zone->is_meta_master()) { + return 0; + } + + rgw_mdlog_info log_info; + int ret = read_log_info(dpp, &log_info); + if (ret < 0) { + return ret; + } + + return run(dpp, new RGWReadRemoteMDLogInfoCR(&sync_env, master_period, log_info.num_shards, shards_info)); +} + +int RGWRemoteMetaLog::read_master_log_shards_next(const DoutPrefixProvider *dpp, const string& period, map shard_markers, map *result) +{ + if (store->svc()->zone->is_meta_master()) { + return 0; + } + + return run(dpp, new RGWListRemoteMDLogCR(&sync_env, period, shard_markers, 1, result)); +} + +int RGWRemoteMetaLog::init() +{ + conn = store->svc()->zone->get_master_conn(); + + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + + error_logger = new RGWSyncErrorLogger(store, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS); + + init_sync_env(&sync_env); + + tn = sync_env.sync_tracer->add_node(sync_env.sync_tracer->root_node, "meta"); + + return 0; +} + +#define CLONE_MAX_ENTRIES 100 + +int RGWMetaSyncStatusManager::init(const DoutPrefixProvider *dpp) +{ + if (store->svc()->zone->is_meta_master()) { + return 0; + } + + if (!store->svc()->zone->get_master_conn()) { + ldpp_dout(dpp, -1) << "no REST connection to master zone" << dendl; + return -EIO; + } + + int r = rgw_init_ioctx(dpp, store->getRados()->get_rados_handle(), store->svc()->zone->get_zone_params().log_pool, ioctx, true); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to open log pool (" << store->svc()->zone->get_zone_params().log_pool << " ret=" << r << dendl; + return r; + } + + r = master_log.init(); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to init remote log, r=" << r << dendl; + return r; + } + + RGWMetaSyncEnv& sync_env = master_log.get_sync_env(); + + rgw_meta_sync_status sync_status; + r = read_sync_status(dpp, &sync_status); + if (r < 0 && r != -ENOENT) { + ldpp_dout(dpp, -1) << "ERROR: failed to read sync status, r=" << r << dendl; + return r; + } + + int num_shards = sync_status.sync_info.num_shards; + + for (int i = 0; i < num_shards; i++) { + shard_objs[i] = rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env.shard_obj_name(i)); + } + + std::unique_lock wl{ts_to_shard_lock}; + for (int i = 0; i < num_shards; i++) { + clone_markers.push_back(string()); + utime_shard ut; + ut.shard_id = i; + ts_to_shard[ut] = i; + } + + return 0; +} + +void RGWMetaSyncEnv::init(const DoutPrefixProvider *_dpp, CephContext *_cct, rgw::sal::RadosStore* _store, RGWRESTConn *_conn, + RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager, + RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer) { + dpp = _dpp; + cct = _cct; + store = _store; + conn = _conn; + async_rados = _async_rados; + http_manager = _http_manager; + error_logger = _error_logger; + sync_tracer = _sync_tracer; +} + +string RGWMetaSyncEnv::status_oid() +{ + return mdlog_sync_status_oid; +} + +string RGWMetaSyncEnv::shard_obj_name(int shard_id) +{ + char buf[mdlog_sync_status_shard_prefix.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%d", mdlog_sync_status_shard_prefix.c_str(), shard_id); + + return string(buf); +} + +class RGWAsyncReadMDLogEntries : public RGWAsyncRadosRequest { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + RGWMetadataLog *mdlog; + int shard_id; + int max_entries; + +protected: + int _send_request(const DoutPrefixProvider *dpp) override { + real_time from_time; + real_time end_time; + + void *handle; + + mdlog->init_list_entries(shard_id, from_time, end_time, marker, &handle); + + int ret = mdlog->list_entries(dpp, handle, max_entries, entries, &marker, &truncated); + + mdlog->complete_list_entries(handle); + + return ret; + } +public: + string marker; + list entries; + bool truncated; + + RGWAsyncReadMDLogEntries(const DoutPrefixProvider *dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + RGWMetadataLog* mdlog, int _shard_id, + std::string _marker, int _max_entries) + : RGWAsyncRadosRequest(caller, cn), dpp(dpp), store(_store), mdlog(mdlog), + shard_id(_shard_id), max_entries(_max_entries), marker(std::move(_marker)) {} +}; + +class RGWReadMDLogEntriesCR : public RGWSimpleCoroutine { + RGWMetaSyncEnv *sync_env; + RGWMetadataLog *const mdlog; + int shard_id; + string marker; + string *pmarker; + int max_entries; + list *entries; + bool *truncated; + + RGWAsyncReadMDLogEntries *req{nullptr}; + +public: + RGWReadMDLogEntriesCR(RGWMetaSyncEnv *_sync_env, RGWMetadataLog* mdlog, + int _shard_id, string*_marker, int _max_entries, + list *_entries, bool *_truncated) + : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), mdlog(mdlog), + shard_id(_shard_id), pmarker(_marker), max_entries(_max_entries), + entries(_entries), truncated(_truncated) {} + + ~RGWReadMDLogEntriesCR() override { + if (req) { + req->finish(); + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + marker = *pmarker; + req = new RGWAsyncReadMDLogEntries(dpp, this, stack->create_completion_notifier(), + sync_env->store, mdlog, shard_id, marker, + max_entries); + sync_env->async_rados->queue(req); + return 0; + } + + int request_complete() override { + *pmarker = std::move(req->marker); + *entries = std::move(req->entries); + *truncated = req->truncated; + return req->get_ret_status(); + } +}; + + +class RGWReadRemoteMDLogShardInfoCR : public RGWCoroutine { + RGWMetaSyncEnv *env; + RGWRESTReadResource *http_op; + + const std::string& period; + int shard_id; + RGWMetadataLogInfo *shard_info; + +public: + RGWReadRemoteMDLogShardInfoCR(RGWMetaSyncEnv *env, const std::string& period, + int _shard_id, RGWMetadataLogInfo *_shard_info) + : RGWCoroutine(env->store->ctx()), env(env), http_op(NULL), + period(period), shard_id(_shard_id), shard_info(_shard_info) {} + + int operate(const DoutPrefixProvider *dpp) override { + auto store = env->store; + RGWRESTConn *conn = store->svc()->zone->get_master_conn(); + reenter(this) { + yield { + char buf[16]; + snprintf(buf, sizeof(buf), "%d", shard_id); + rgw_http_param_pair pairs[] = { { "type" , "metadata" }, + { "id", buf }, + { "period", period.c_str() }, + { "info" , NULL }, + { NULL, NULL } }; + + string p = "/admin/log/"; + + http_op = new RGWRESTReadResource(conn, p, pairs, NULL, + env->http_manager); + + init_new_io(http_op); + + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(env->dpp, 0) << "ERROR: failed to read from " << p << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + http_op->put(); + return set_cr_error(ret); + } + + return io_block(0); + } + yield { + int ret = http_op->wait(shard_info, null_yield); + http_op->put(); + if (ret < 0) { + return set_cr_error(ret); + } + return set_cr_done(); + } + } + return 0; + } +}; + +RGWCoroutine* create_read_remote_mdlog_shard_info_cr(RGWMetaSyncEnv *env, + const std::string& period, + int shard_id, + RGWMetadataLogInfo* info) +{ + return new RGWReadRemoteMDLogShardInfoCR(env, period, shard_id, info); +} + +class RGWListRemoteMDLogShardCR : public RGWSimpleCoroutine { + RGWMetaSyncEnv *sync_env; + RGWRESTReadResource *http_op; + + const std::string& period; + int shard_id; + string marker; + uint32_t max_entries; + rgw_mdlog_shard_data *result; + +public: + RGWListRemoteMDLogShardCR(RGWMetaSyncEnv *env, const std::string& period, + int _shard_id, const string& _marker, uint32_t _max_entries, + rgw_mdlog_shard_data *_result) + : RGWSimpleCoroutine(env->store->ctx()), sync_env(env), http_op(NULL), + period(period), shard_id(_shard_id), marker(_marker), max_entries(_max_entries), result(_result) {} + + int send_request(const DoutPrefixProvider *dpp) override { + RGWRESTConn *conn = sync_env->conn; + + char buf[32]; + snprintf(buf, sizeof(buf), "%d", shard_id); + + char max_entries_buf[32]; + snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", (int)max_entries); + + const char *marker_key = (marker.empty() ? "" : "marker"); + + rgw_http_param_pair pairs[] = { { "type", "metadata" }, + { "id", buf }, + { "period", period.c_str() }, + { "max-entries", max_entries_buf }, + { marker_key, marker.c_str() }, + { NULL, NULL } }; + + string p = "/admin/log/"; + + http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager); + init_new_io(http_op); + + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + http_op->put(); + return ret; + } + + return 0; + } + + int request_complete() override { + int ret = http_op->wait(result, null_yield); + http_op->put(); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to list remote mdlog shard, ret=" << ret << dendl; + return ret; + } + return 0; + } +}; + +RGWCoroutine* create_list_remote_mdlog_shard_cr(RGWMetaSyncEnv *env, + const std::string& period, + int shard_id, + const std::string& marker, + uint32_t max_entries, + rgw_mdlog_shard_data *result) +{ + return new RGWListRemoteMDLogShardCR(env, period, shard_id, marker, + max_entries, result); +} + +bool RGWReadRemoteMDLogInfoCR::spawn_next() { + if (shard_id >= num_shards) { + return false; + } + spawn(new RGWReadRemoteMDLogShardInfoCR(sync_env, period, shard_id, &(*mdlog_info)[shard_id]), false); + shard_id++; + return true; +} + +bool RGWListRemoteMDLogCR::spawn_next() { + if (iter == shards.end()) { + return false; + } + + spawn(new RGWListRemoteMDLogShardCR(sync_env, period, iter->first, iter->second, max_entries_per_shard, &(*result)[iter->first]), false); + ++iter; + return true; +} + +class RGWInitSyncStatusCoroutine : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + + rgw_meta_sync_info status; + vector shards_info; + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; +public: + RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, + const rgw_meta_sync_info &status) + : RGWCoroutine(_sync_env->store->ctx()), sync_env(_sync_env), + status(status), shards_info(status.num_shards), + lease_cr(nullptr), lease_stack(nullptr) {} + + ~RGWInitSyncStatusCoroutine() override { + if (lease_cr) { + lease_cr->abort(); + } + } + + int operate(const DoutPrefixProvider *dpp) override { + int ret; + reenter(this) { + yield { + set_status("acquiring sync lock"); + uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; + string lock_name = "sync_lock"; + rgw::sal::RadosStore* store = sync_env->store; + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); + } + while (!lease_cr->is_locked()) { + if (lease_cr->is_done()) { + ldpp_dout(dpp, 5) << "failed to take lease" << dendl; + set_status("lease lock failed, early abort"); + return set_cr_error(lease_cr->get_ret_status()); + } + set_sleeping(true); + yield; + } + yield { + set_status("writing sync status"); + rgw::sal::RadosStore* store = sync_env->store; + call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, store->svc()->sysobj, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), + status)); + } + + if (retcode < 0) { + set_status("failed to write sync status"); + ldpp_dout(dpp, 0) << "ERROR: failed to write sync status, retcode=" << retcode << dendl; + yield lease_cr->go_down(); + return set_cr_error(retcode); + } + /* fetch current position in logs */ + set_status("fetching remote log position"); + yield { + for (int i = 0; i < (int)status.num_shards; i++) { + spawn(new RGWReadRemoteMDLogShardInfoCR(sync_env, status.period, i, + &shards_info[i]), false); + } + } + + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ + + yield { + set_status("updating sync status"); + for (int i = 0; i < (int)status.num_shards; i++) { + rgw_meta_sync_marker marker; + RGWMetadataLogInfo& info = shards_info[i]; + marker.next_step_marker = info.marker; + marker.timestamp = info.last_update; + rgw::sal::RadosStore* store = sync_env->store; + spawn(new RGWSimpleRadosWriteCR(dpp, + sync_env->async_rados, + store->svc()->sysobj, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->shard_obj_name(i)), + marker), true); + } + } + yield { + set_status("changing sync state: build full sync maps"); + status.state = rgw_meta_sync_info::StateBuildingFullSyncMaps; + rgw::sal::RadosStore* store = sync_env->store; + call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, store->svc()->sysobj, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), + status)); + } + set_status("drop lock lease"); + yield lease_cr->go_down(); + while (collect(&ret, NULL)) { + if (ret < 0) { + return set_cr_error(ret); + } + yield; + } + drain_all(); + return set_cr_done(); + } + return 0; + } +}; + +class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + RGWMetaSyncEnv *env; + const int num_shards; + int shard_id{0}; + map& markers; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to read metadata sync markers: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards, + map& markers) + : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS), + env(env), num_shards(num_shards), markers(markers) + {} + bool spawn_next() override; +}; + +bool RGWReadSyncStatusMarkersCR::spawn_next() +{ + if (shard_id >= num_shards) { + return false; + } + using CR = RGWSimpleRadosReadCR; + rgw_raw_obj obj{env->store->svc()->zone->get_zone_params().log_pool, + env->shard_obj_name(shard_id)}; + spawn(new CR(env->dpp, env->async_rados, env->store->svc()->sysobj, obj, &markers[shard_id]), false); + shard_id++; + return true; +} + +class RGWReadSyncStatusCoroutine : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + rgw_meta_sync_status *sync_status; + +public: + RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, + rgw_meta_sync_status *_status) + : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), sync_status(_status) + {} + int operate(const DoutPrefixProvider *dpp) override; +}; + +int RGWReadSyncStatusCoroutine::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + // read sync info + using ReadInfoCR = RGWSimpleRadosReadCR; + yield { + bool empty_on_enoent = false; // fail on ENOENT + rgw_raw_obj obj{sync_env->store->svc()->zone->get_zone_params().log_pool, + sync_env->status_oid()}; + call(new ReadInfoCR(dpp, sync_env->async_rados, sync_env->store->svc()->sysobj, obj, + &sync_status->sync_info, empty_on_enoent)); + } + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to read sync status info with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + // read shard markers + using ReadMarkersCR = RGWReadSyncStatusMarkersCR; + yield call(new ReadMarkersCR(sync_env, sync_status->sync_info.num_shards, + sync_status->sync_markers)); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to read sync status markers with " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; +} + +class RGWFetchAllMetaCR : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + + int num_shards; + + + int ret_status; + + list sections; + list::iterator sections_iter; + + struct meta_list_result { + list keys; + string marker; + uint64_t count{0}; + bool truncated{false}; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("keys", keys, obj); + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("count", count, obj); + JSONDecoder::decode_json("truncated", truncated, obj); + } + } result; + list::iterator iter; + + std::unique_ptr entries_index; + + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; + bool lost_lock; + bool failed; + + string marker; + + map& markers; + + RGWSyncTraceNodeRef tn; + +public: + RGWFetchAllMetaCR(RGWMetaSyncEnv *_sync_env, int _num_shards, + map& _markers, + RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), + num_shards(_num_shards), + ret_status(0), lease_cr(nullptr), lease_stack(nullptr), + lost_lock(false), failed(false), markers(_markers) { + tn = sync_env->sync_tracer->add_node(_tn_parent, "fetch_all_meta"); + } + + ~RGWFetchAllMetaCR() override { + } + + void append_section_from_set(set& all_sections, const string& name) { + set::iterator iter = all_sections.find(name); + if (iter != all_sections.end()) { + sections.emplace_back(std::move(*iter)); + all_sections.erase(iter); + } + } + /* + * meta sync should go in the following order: user, bucket.instance, bucket + * then whatever other sections exist (if any) + */ + void rearrange_sections() { + set all_sections; + std::move(sections.begin(), sections.end(), + std::inserter(all_sections, all_sections.end())); + sections.clear(); + + append_section_from_set(all_sections, "user"); + append_section_from_set(all_sections, "bucket.instance"); + append_section_from_set(all_sections, "bucket"); + append_section_from_set(all_sections, "roles"); + + std::move(all_sections.begin(), all_sections.end(), + std::back_inserter(sections)); + } + + int operate(const DoutPrefixProvider *dpp) override { + RGWRESTConn *conn = sync_env->conn; + + reenter(this) { + yield { + set_status(string("acquiring lock (") + sync_env->status_oid() + ")"); + uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; + string lock_name = "sync_lock"; + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, + sync_env->store, + rgw_raw_obj(sync_env->store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); + } + while (!lease_cr->is_locked()) { + if (lease_cr->is_done()) { + ldpp_dout(dpp, 5) << "failed to take lease" << dendl; + set_status("lease lock failed, early abort"); + return set_cr_error(lease_cr->get_ret_status()); + } + set_sleeping(true); + yield; + } + entries_index.reset(new RGWShardedOmapCRManager(sync_env->async_rados, sync_env->store, this, num_shards, + sync_env->store->svc()->zone->get_zone_params().log_pool, + mdlog_sync_full_sync_index_prefix)); + yield { + call(new RGWReadRESTResourceCR >(cct, conn, sync_env->http_manager, + "/admin/metadata", NULL, §ions)); + } + if (get_ret_status() < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch metadata sections" << dendl; + yield entries_index->finish(); + yield lease_cr->go_down(); + drain_all(); + return set_cr_error(get_ret_status()); + } + rearrange_sections(); + sections_iter = sections.begin(); + for (; sections_iter != sections.end(); ++sections_iter) { + do { + yield { +#define META_FULL_SYNC_CHUNK_SIZE "1000" + string entrypoint = string("/admin/metadata/") + *sections_iter; + rgw_http_param_pair pairs[] = { { "max-entries", META_FULL_SYNC_CHUNK_SIZE }, + { "marker", result.marker.c_str() }, + { NULL, NULL } }; + result.keys.clear(); + call(new RGWReadRESTResourceCR(cct, conn, sync_env->http_manager, + entrypoint, pairs, &result)); + } + ret_status = get_ret_status(); + if (ret_status == -ENOENT) { + set_retcode(0); /* reset coroutine status so that we don't return it */ + ret_status = 0; + } + if (ret_status < 0) { + tn->log(0, SSTR("ERROR: failed to fetch metadata section: " << *sections_iter)); + yield entries_index->finish(); + yield lease_cr->go_down(); + drain_all(); + return set_cr_error(ret_status); + } + iter = result.keys.begin(); + for (; iter != result.keys.end(); ++iter) { + if (!lease_cr->is_locked()) { + lost_lock = true; + tn->log(1, "lease is lost, abort"); + break; + } + yield; // allow entries_index consumer to make progress + + tn->log(20, SSTR("list metadata: section=" << *sections_iter << " key=" << *iter)); + string s = *sections_iter + ":" + *iter; + int shard_id; + rgw::sal::RadosStore* store = sync_env->store; + int ret = store->ctl()->meta.mgr->get_shard_id(*sections_iter, *iter, &shard_id); + if (ret < 0) { + tn->log(0, SSTR("ERROR: could not determine shard id for " << *sections_iter << ":" << *iter)); + ret_status = ret; + break; + } + if (!entries_index->append(s, shard_id)) { + break; + } + } + } while (result.truncated); + } + yield { + if (!entries_index->finish()) { + failed = true; + } + } + if (!failed) { + for (map::iterator iter = markers.begin(); iter != markers.end(); ++iter) { + int shard_id = (int)iter->first; + rgw_meta_sync_marker& marker = iter->second; + marker.total_entries = entries_index->get_total_entries(shard_id); + spawn(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, sync_env->store->svc()->sysobj, + rgw_raw_obj(sync_env->store->svc()->zone->get_zone_params().log_pool, sync_env->shard_obj_name(shard_id)), + marker), true); + } + } + + drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ + + yield lease_cr->go_down(); + + int ret; + while (collect(&ret, NULL)) { + if (ret < 0) { + return set_cr_error(ret); + } + yield; + } + drain_all(); + if (failed) { + yield return set_cr_error(-EIO); + } + if (lost_lock) { + yield return set_cr_error(-EBUSY); + } + + if (ret_status < 0) { + yield return set_cr_error(ret_status); + } + + yield return set_cr_done(); + } + return 0; + } +}; + +static string full_sync_index_shard_oid(int shard_id) +{ + char buf[mdlog_sync_full_sync_index_prefix.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%d", mdlog_sync_full_sync_index_prefix.c_str(), shard_id); + return string(buf); +} + +class RGWReadRemoteMetadataCR : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + + RGWRESTReadResource *http_op; + + string section; + string key; + + bufferlist *pbl; + + RGWSyncTraceNodeRef tn; + +public: + RGWReadRemoteMetadataCR(RGWMetaSyncEnv *_sync_env, + const string& _section, const string& _key, bufferlist *_pbl, + const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), + http_op(NULL), + section(_section), + key(_key), + pbl(_pbl) { + tn = sync_env->sync_tracer->add_node(_tn_parent, "read_remote_meta", + section + ":" + key); + } + + int operate(const DoutPrefixProvider *dpp) override { + RGWRESTConn *conn = sync_env->conn; + reenter(this) { + yield { + string key_encode; + url_encode(key, key_encode); + rgw_http_param_pair pairs[] = { { "key" , key.c_str()}, + { NULL, NULL } }; + + string p = string("/admin/metadata/") + section + "/" + key_encode; + + http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager); + + init_new_io(http_op); + + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog data" << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + http_op->put(); + return set_cr_error(ret); + } + + return io_block(0); + } + yield { + int ret = http_op->wait(pbl, null_yield); + http_op->put(); + if (ret < 0) { + return set_cr_error(ret); + } + return set_cr_done(); + } + } + return 0; + } +}; + +class RGWAsyncMetaStoreEntry : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + string raw_key; + bufferlist bl; + const DoutPrefixProvider *dpp; +protected: + int _send_request(const DoutPrefixProvider *dpp) override { + int ret = store->ctl()->meta.mgr->put(raw_key, bl, null_yield, dpp, RGWMDLogSyncType::APPLY_ALWAYS, true); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: can't store key: " << raw_key << " ret=" << ret << dendl; + return ret; + } + return 0; + } +public: + RGWAsyncMetaStoreEntry(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + const string& _raw_key, + bufferlist& _bl, + const DoutPrefixProvider *dpp) : RGWAsyncRadosRequest(caller, cn), store(_store), + raw_key(_raw_key), bl(_bl), dpp(dpp) {} +}; + + +class RGWMetaStoreEntryCR : public RGWSimpleCoroutine { + RGWMetaSyncEnv *sync_env; + string raw_key; + bufferlist bl; + + RGWAsyncMetaStoreEntry *req; + +public: + RGWMetaStoreEntryCR(RGWMetaSyncEnv *_sync_env, + const string& _raw_key, + bufferlist& _bl) : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), + raw_key(_raw_key), bl(_bl), req(NULL) { + } + + ~RGWMetaStoreEntryCR() override { + if (req) { + req->finish(); + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncMetaStoreEntry(this, stack->create_completion_notifier(), + sync_env->store, raw_key, bl, dpp); + sync_env->async_rados->queue(req); + return 0; + } + + int request_complete() override { + return req->get_ret_status(); + } +}; + +class RGWAsyncMetaRemoveEntry : public RGWAsyncRadosRequest { + rgw::sal::RadosStore* store; + string raw_key; + const DoutPrefixProvider *dpp; +protected: + int _send_request(const DoutPrefixProvider *dpp) override { + int ret = store->ctl()->meta.mgr->remove(raw_key, null_yield, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: can't remove key: " << raw_key << " ret=" << ret << dendl; + return ret; + } + return 0; + } +public: + RGWAsyncMetaRemoveEntry(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, + const string& _raw_key, const DoutPrefixProvider *dpp) : RGWAsyncRadosRequest(caller, cn), store(_store), + raw_key(_raw_key), dpp(dpp) {} +}; + + +class RGWMetaRemoveEntryCR : public RGWSimpleCoroutine { + RGWMetaSyncEnv *sync_env; + string raw_key; + + RGWAsyncMetaRemoveEntry *req; + +public: + RGWMetaRemoveEntryCR(RGWMetaSyncEnv *_sync_env, + const string& _raw_key) : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), + raw_key(_raw_key), req(NULL) { + } + + ~RGWMetaRemoveEntryCR() override { + if (req) { + req->finish(); + } + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new RGWAsyncMetaRemoveEntry(this, stack->create_completion_notifier(), + sync_env->store, raw_key, dpp); + sync_env->async_rados->queue(req); + return 0; + } + + int request_complete() override { + int r = req->get_ret_status(); + if (r == -ENOENT) { + r = 0; + } + return r; + } +}; + +#define META_SYNC_UPDATE_MARKER_WINDOW 10 + + +int RGWLastCallerWinsCR::operate(const DoutPrefixProvider *dpp) { + RGWCoroutine *call_cr; + reenter(this) { + while (cr) { + call_cr = cr; + cr = nullptr; + yield call(call_cr); + /* cr might have been modified at this point */ + } + return set_cr_done(); + } + return 0; +} + +class RGWMetaSyncShardMarkerTrack : public RGWSyncShardMarkerTrack { + RGWMetaSyncEnv *sync_env; + + string marker_oid; + rgw_meta_sync_marker sync_marker; + + RGWSyncTraceNodeRef tn; + +public: + RGWMetaSyncShardMarkerTrack(RGWMetaSyncEnv *_sync_env, + const string& _marker_oid, + const rgw_meta_sync_marker& _marker, + RGWSyncTraceNodeRef& _tn) : RGWSyncShardMarkerTrack(META_SYNC_UPDATE_MARKER_WINDOW), + sync_env(_sync_env), + marker_oid(_marker_oid), + sync_marker(_marker), + tn(_tn){} + + RGWCoroutine *store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override { + sync_marker.marker = new_marker; + if (index_pos > 0) { + sync_marker.pos = index_pos; + } + + if (!real_clock::is_zero(timestamp)) { + sync_marker.timestamp = timestamp; + } + + ldpp_dout(sync_env->dpp, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << " realm_epoch=" << sync_marker.realm_epoch << dendl; + tn->log(20, SSTR("new marker=" << new_marker)); + rgw::sal::RadosStore* store = sync_env->store; + return new RGWSimpleRadosWriteCR(sync_env->dpp, sync_env->async_rados, + store->svc()->sysobj, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, marker_oid), + sync_marker); + } + + RGWOrderCallCR *allocate_order_control_cr() override { + return new RGWLastCallerWinsCR(sync_env->cct); + } +}; + +RGWMetaSyncSingleEntryCR::RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env, + const string& _raw_key, const string& _entry_marker, + const RGWMDLogStatus& _op_status, + RGWMetaSyncShardMarkerTrack *_marker_tracker, const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), + sync_env(_sync_env), + raw_key(_raw_key), entry_marker(_entry_marker), + op_status(_op_status), + pos(0), sync_status(0), + marker_tracker(_marker_tracker), tries(0) { + error_injection = (sync_env->cct->_conf->rgw_sync_meta_inject_err_probability > 0); + tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", raw_key); +} + +int RGWMetaSyncSingleEntryCR::operate(const DoutPrefixProvider *dpp) { + reenter(this) { +#define NUM_TRANSIENT_ERROR_RETRIES 10 + + if (error_injection && + rand() % 10000 < cct->_conf->rgw_sync_meta_inject_err_probability * 10000.0) { + return set_cr_error(-EIO); + } + + if (op_status != MDLOG_STATUS_COMPLETE) { + tn->log(20, "skipping pending operation"); + yield call(marker_tracker->finish(entry_marker)); + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + tn->set_flag(RGW_SNS_FLAG_ACTIVE); + for (tries = 0; tries < NUM_TRANSIENT_ERROR_RETRIES; tries++) { + yield { + pos = raw_key.find(':'); + section = raw_key.substr(0, pos); + key = raw_key.substr(pos + 1); + tn->log(10, SSTR("fetching remote metadata entry" << (tries == 0 ? "" : " (retry)"))); + call(new RGWReadRemoteMetadataCR(sync_env, section, key, &md_bl, tn)); + } + + sync_status = retcode; + + if (sync_status == -ENOENT) { + break; + } + + if (sync_status < 0) { + if (tries < NUM_TRANSIENT_ERROR_RETRIES - 1) { + ldpp_dout(dpp, 20) << *this << ": failed to fetch remote metadata entry: " << section << ":" << key << ", will retry" << dendl; + continue; + } + + tn->log(10, SSTR("failed to read remote metadata entry: section=" << section << " key=" << key << " status=" << sync_status)); + log_error() << "failed to read remote metadata entry: section=" << section << " key=" << key << " status=" << sync_status << std::endl; + yield call(sync_env->error_logger->log_error_cr(dpp, sync_env->conn->get_remote_id(), section, key, -sync_status, + string("failed to read remote metadata entry: ") + cpp_strerror(-sync_status))); + return set_cr_error(sync_status); + } + + break; + } + + retcode = 0; + for (tries = 0; tries < NUM_TRANSIENT_ERROR_RETRIES; tries++) { + if (sync_status != -ENOENT) { + tn->log(10, SSTR("storing local metadata entry: " << section << ":" << key)); + yield call(new RGWMetaStoreEntryCR(sync_env, raw_key, md_bl)); + } else { + tn->log(10, SSTR("removing local metadata entry:" << section << ":" << key)); + yield call(new RGWMetaRemoveEntryCR(sync_env, raw_key)); + if (retcode == -ENOENT) { + retcode = 0; + break; + } + } + if ((retcode < 0) && (tries < NUM_TRANSIENT_ERROR_RETRIES - 1)) { + ldpp_dout(dpp, 20) << *this << ": failed to store metadata entry: " << section << ":" << key << ", got retcode=" << retcode << ", will retry" << dendl; + continue; + } + break; + } + + sync_status = retcode; + + if (sync_status == 0 && marker_tracker) { + /* update marker */ + yield call(marker_tracker->finish(entry_marker)); + sync_status = retcode; + } + if (sync_status < 0) { + tn->log(10, SSTR("failed, status=" << sync_status)); + return set_cr_error(sync_status); + } + tn->log(10, "success"); + return set_cr_done(); + } + return 0; +} + +class RGWCloneMetaLogCoroutine : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + RGWMetadataLog *mdlog; + + const std::string& period; + int shard_id; + string marker; + bool truncated = false; + string *new_marker; + + int max_entries = CLONE_MAX_ENTRIES; + + RGWRESTReadResource *http_op = nullptr; + boost::intrusive_ptr completion; + + RGWMetadataLogInfo shard_info; + rgw_mdlog_shard_data data; + +public: + RGWCloneMetaLogCoroutine(RGWMetaSyncEnv *_sync_env, RGWMetadataLog* mdlog, + const std::string& period, int _id, + const string& _marker, string *_new_marker) + : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), mdlog(mdlog), + period(period), shard_id(_id), marker(_marker), new_marker(_new_marker) { + if (new_marker) { + *new_marker = marker; + } + } + ~RGWCloneMetaLogCoroutine() override { + if (http_op) { + http_op->put(); + } + if (completion) { + completion->cancel(); + } + } + + int operate(const DoutPrefixProvider *dpp) override; + + int state_init(); + int state_read_shard_status(); + int state_read_shard_status_complete(); + int state_send_rest_request(const DoutPrefixProvider *dpp); + int state_receive_rest_response(); + int state_store_mdlog_entries(); + int state_store_mdlog_entries_complete(); +}; + +class RGWMetaSyncShardCR : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + + const rgw_pool& pool; + const std::string& period; //< currently syncing period id + const epoch_t realm_epoch; //< realm_epoch of period + RGWMetadataLog* mdlog; //< log of syncing period + uint32_t shard_id; + rgw_meta_sync_marker& sync_marker; + boost::optional temp_marker; //< for pending updates + string marker; + string max_marker; + const std::string& period_marker; //< max marker stored in next period + + RGWRadosGetOmapKeysCR::ResultPtr omapkeys; + std::set entries; + std::set::iterator iter; + + string oid; + + RGWMetaSyncShardMarkerTrack *marker_tracker = nullptr; + + list log_entries; + list::iterator log_iter; + bool truncated = false; + + string mdlog_marker; + string raw_key; + rgw_mdlog_entry mdlog_entry; + + ceph::mutex inc_lock = ceph::make_mutex("RGWMetaSyncShardCR::inc_lock"); + ceph::condition_variable inc_cond; + + boost::asio::coroutine incremental_cr; + boost::asio::coroutine full_cr; + + boost::intrusive_ptr lease_cr; + boost::intrusive_ptr lease_stack; + + bool lost_lock = false; + + bool *reset_backoff; + + // hold a reference to the cr stack while it's in the map + using StackRef = boost::intrusive_ptr; + map stack_to_pos; + map pos_to_prev; + + bool can_adjust_marker = false; + bool done_with_period = false; + + int total_entries = 0; + + RGWSyncTraceNodeRef tn; +public: + RGWMetaSyncShardCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool, + const std::string& period, epoch_t realm_epoch, + RGWMetadataLog* mdlog, uint32_t _shard_id, + rgw_meta_sync_marker& _marker, + const std::string& period_marker, bool *_reset_backoff, + RGWSyncTraceNodeRef& _tn) + : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), pool(_pool), + period(period), realm_epoch(realm_epoch), mdlog(mdlog), + shard_id(_shard_id), sync_marker(_marker), + period_marker(period_marker), + reset_backoff(_reset_backoff), tn(_tn) { + *reset_backoff = false; + } + + ~RGWMetaSyncShardCR() override { + delete marker_tracker; + if (lease_cr) { + lease_cr->abort(); + } + } + + void set_marker_tracker(RGWMetaSyncShardMarkerTrack *mt) { + delete marker_tracker; + marker_tracker = mt; + } + + int operate(const DoutPrefixProvider *dpp) override { + int r; + while (true) { + switch (sync_marker.state) { + case rgw_meta_sync_marker::FullSync: + r = full_sync(); + if (r < 0) { + ldpp_dout(dpp, 10) << "sync: full_sync: shard_id=" << shard_id << " r=" << r << dendl; + return set_cr_error(r); + } + return 0; + case rgw_meta_sync_marker::IncrementalSync: + r = incremental_sync(); + if (r < 0) { + ldpp_dout(dpp, 10) << "sync: incremental_sync: shard_id=" << shard_id << " r=" << r << dendl; + return set_cr_error(r); + } + return 0; + } + } + /* unreachable */ + return 0; + } + + void collect_children() + { + int child_ret; + RGWCoroutinesStack *child; + while (collect_next(&child_ret, &child)) { + auto iter = stack_to_pos.find(child); + if (iter == stack_to_pos.end()) { + /* some other stack that we don't care about */ + continue; + } + + string& pos = iter->second; + + if (child_ret < 0) { + ldpp_dout(sync_env->dpp, 0) << *this << ": child operation stack=" << child << " entry=" << pos << " returned " << child_ret << dendl; + // on any error code from RGWMetaSyncSingleEntryCR, we do not advance + // the sync status marker past this entry, and set + // can_adjust_marker=false to exit out of RGWMetaSyncShardCR. + // RGWMetaSyncShardControlCR will rerun RGWMetaSyncShardCR from the + // previous marker and retry + can_adjust_marker = false; + } + + map::iterator prev_iter = pos_to_prev.find(pos); + ceph_assert(prev_iter != pos_to_prev.end()); + + if (pos_to_prev.size() == 1) { + if (can_adjust_marker) { + sync_marker.marker = pos; + } + pos_to_prev.erase(prev_iter); + } else { + ceph_assert(pos_to_prev.size() > 1); + pos_to_prev.erase(prev_iter); + prev_iter = pos_to_prev.begin(); + if (can_adjust_marker) { + sync_marker.marker = prev_iter->second; + } + } + + ldpp_dout(sync_env->dpp, 4) << *this << ": adjusting marker pos=" << sync_marker.marker << dendl; + stack_to_pos.erase(iter); + } + } + + int full_sync() { +#define OMAP_GET_MAX_ENTRIES 100 + int max_entries = OMAP_GET_MAX_ENTRIES; + reenter(&full_cr) { + set_status("full_sync"); + tn->log(10, "start full sync"); + oid = full_sync_index_shard_oid(shard_id); + can_adjust_marker = true; + /* grab lock */ + yield { + uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; + string lock_name = "sync_lock"; + rgw::sal::RadosStore* store = sync_env->store; + lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); + lost_lock = false; + } + while (!lease_cr->is_locked()) { + if (lease_cr->is_done()) { + drain_all(); + tn->log(5, "failed to take lease"); + return lease_cr->get_ret_status(); + } + set_sleeping(true); + yield; + } + tn->log(10, "took lease"); + + /* lock succeeded, a retry now should avoid previous backoff status */ + *reset_backoff = true; + + /* prepare marker tracker */ + set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env, + sync_env->shard_obj_name(shard_id), + sync_marker, tn)); + + marker = sync_marker.marker; + + total_entries = sync_marker.pos; + + /* sync! */ + do { + if (!lease_cr->is_locked()) { + tn->log(1, "lease is lost, abort"); + lost_lock = true; + break; + } + omapkeys = std::make_shared(); + yield call(new RGWRadosGetOmapKeysCR(sync_env->store, rgw_raw_obj(pool, oid), + marker, max_entries, omapkeys)); + if (retcode < 0) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: " << __func__ << "(): RGWRadosGetOmapKeysCR() returned ret=" << retcode << dendl; + tn->log(0, SSTR("ERROR: failed to list omap keys, status=" << retcode)); + yield lease_cr->go_down(); + drain_all(); + return retcode; + } + entries = std::move(omapkeys->entries); + tn->log(20, SSTR("retrieved " << entries.size() << " entries to sync")); + if (entries.size() > 0) { + tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + } + iter = entries.begin(); + for (; iter != entries.end(); ++iter) { + marker = *iter; + tn->log(20, SSTR("full sync: " << marker)); + total_entries++; + if (!marker_tracker->start(marker, total_entries, real_time())) { + tn->log(0, SSTR("ERROR: cannot start syncing " << marker << ". Duplicate entry?")); + } else { + // fetch remote and write locally + yield { + RGWCoroutinesStack *stack = spawn(new RGWMetaSyncSingleEntryCR(sync_env, marker, marker, MDLOG_STATUS_COMPLETE, marker_tracker, tn), false); + // stack_to_pos holds a reference to the stack + stack_to_pos[stack] = marker; + pos_to_prev[marker] = marker; + } + // limit spawn window + while (num_spawned() > static_cast(cct->_conf->rgw_meta_sync_spawn_window)) { + yield wait_for_child(); + collect_children(); + } + } + } + collect_children(); + } while (omapkeys->more && can_adjust_marker); + + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + + while (num_spawned() > 1) { + yield wait_for_child(); + collect_children(); + } + + if (!lost_lock) { + /* update marker to reflect we're done with full sync */ + if (can_adjust_marker) { + // apply updates to a temporary marker, or operate() will send us + // to incremental_sync() after we yield + temp_marker = sync_marker; + temp_marker->state = rgw_meta_sync_marker::IncrementalSync; + temp_marker->marker = std::move(temp_marker->next_step_marker); + temp_marker->next_step_marker.clear(); + temp_marker->realm_epoch = realm_epoch; + ldpp_dout(sync_env->dpp, 4) << *this << ": saving marker pos=" << temp_marker->marker << " realm_epoch=" << realm_epoch << dendl; + + using WriteMarkerCR = RGWSimpleRadosWriteCR; + yield call(new WriteMarkerCR(sync_env->dpp, sync_env->async_rados, sync_env->store->svc()->sysobj, + rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), + *temp_marker)); + } + + if (retcode < 0) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to set sync marker: retcode=" << retcode << dendl; + yield lease_cr->go_down(); + drain_all(); + return retcode; + } + // clean up full sync index + yield { + auto oid = full_sync_index_shard_oid(shard_id); + call(new RGWRadosRemoveCR(sync_env->store, {pool, oid})); + } + } + + /* + * if we reached here, it means that lost_lock is true, otherwise the state + * change in the previous block will prevent us from reaching here + */ + + yield lease_cr->go_down(); + + lease_cr.reset(); + + drain_all(); + + if (!can_adjust_marker) { + return -EAGAIN; + } + + if (lost_lock) { + return -EBUSY; + } + + tn->log(10, "full sync complete"); + + // apply the sync marker update + ceph_assert(temp_marker); + sync_marker = std::move(*temp_marker); + temp_marker = boost::none; + // must not yield after this point! + } + return 0; + } + + + int incremental_sync() { + reenter(&incremental_cr) { + set_status("incremental_sync"); + tn->log(10, "start incremental sync"); + can_adjust_marker = true; + /* grab lock */ + if (!lease_cr) { /* could have had a lease_cr lock from previous state */ + yield { + uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; + string lock_name = "sync_lock"; + rgw::sal::RadosStore* store = sync_env->store; + lease_cr.reset( new RGWContinuousLeaseCR(sync_env->async_rados, store, + rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), + lock_name, lock_duration, this)); + lease_stack.reset(spawn(lease_cr.get(), false)); + lost_lock = false; + } + while (!lease_cr->is_locked()) { + if (lease_cr->is_done()) { + drain_all(); + tn->log(5, "failed to take lease"); + return lease_cr->get_ret_status(); + } + set_sleeping(true); + yield; + } + } + tn->log(10, "took lease"); + // if the period has advanced, we can't use the existing marker + if (sync_marker.realm_epoch < realm_epoch) { + ldpp_dout(sync_env->dpp, 4) << "clearing marker=" << sync_marker.marker + << " from old realm_epoch=" << sync_marker.realm_epoch + << " (now " << realm_epoch << ')' << dendl; + sync_marker.realm_epoch = realm_epoch; + sync_marker.marker.clear(); + } + mdlog_marker = sync_marker.marker; + set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env, + sync_env->shard_obj_name(shard_id), + sync_marker, tn)); + + /* + * mdlog_marker: the remote sync marker positiion + * sync_marker: the local sync marker position + * max_marker: the max mdlog position that we fetched + * marker: the current position we try to sync + * period_marker: the last marker before the next period begins (optional) + */ + marker = max_marker = sync_marker.marker; + /* inc sync */ + do { + if (!lease_cr->is_locked()) { + lost_lock = true; + tn->log(1, "lease is lost, abort"); + break; + } +#define INCREMENTAL_MAX_ENTRIES 100 + ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << " truncated=" << truncated << dendl; + if (!period_marker.empty() && period_marker <= mdlog_marker) { + tn->log(10, SSTR("finished syncing current period: mdlog_marker=" << mdlog_marker << " sync_marker=" << sync_marker.marker << " period_marker=" << period_marker)); + done_with_period = true; + break; + } + if (mdlog_marker <= max_marker || !truncated) { + /* we're at the tip, try to bring more entries */ + ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " syncing mdlog for shard_id=" << shard_id << dendl; + yield call(new RGWCloneMetaLogCoroutine(sync_env, mdlog, + period, shard_id, + mdlog_marker, &mdlog_marker)); + } + if (retcode < 0) { + tn->log(10, SSTR(*this << ": failed to fetch more log entries, retcode=" << retcode)); + yield lease_cr->go_down(); + drain_all(); + *reset_backoff = false; // back off and try again later + return retcode; + } + truncated = true; + *reset_backoff = true; /* if we got to this point, all systems function */ + if (mdlog_marker > max_marker) { + tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ + tn->log(20, SSTR("mdlog_marker=" << mdlog_marker << " sync_marker=" << sync_marker.marker)); + marker = max_marker; + yield call(new RGWReadMDLogEntriesCR(sync_env, mdlog, shard_id, + &max_marker, INCREMENTAL_MAX_ENTRIES, + &log_entries, &truncated)); + if (retcode < 0) { + tn->log(10, SSTR("failed to list mdlog entries, retcode=" << retcode)); + yield lease_cr->go_down(); + drain_all(); + *reset_backoff = false; // back off and try again later + return retcode; + } + for (log_iter = log_entries.begin(); log_iter != log_entries.end() && !done_with_period; ++log_iter) { + if (!period_marker.empty() && period_marker <= log_iter->id) { + done_with_period = true; + if (period_marker < log_iter->id) { + tn->log(10, SSTR("found key=" << log_iter->id + << " past period_marker=" << period_marker)); + break; + } + ldpp_dout(sync_env->dpp, 10) << "found key at period_marker=" << period_marker << dendl; + // sync this entry, then return control to RGWMetaSyncCR + } + if (!mdlog_entry.convert_from(*log_iter)) { + tn->log(0, SSTR("ERROR: failed to convert mdlog entry, shard_id=" << shard_id << " log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp << " ... skipping entry")); + continue; + } + tn->log(20, SSTR("log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp)); + if (!marker_tracker->start(log_iter->id, 0, log_iter->timestamp.to_real_time())) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: cannot start syncing " << log_iter->id << ". Duplicate entry?" << dendl; + } else { + raw_key = log_iter->section + ":" + log_iter->name; + yield { + RGWCoroutinesStack *stack = spawn(new RGWMetaSyncSingleEntryCR(sync_env, raw_key, log_iter->id, mdlog_entry.log_data.status, marker_tracker, tn), false); + ceph_assert(stack); + // stack_to_pos holds a reference to the stack + stack_to_pos[stack] = log_iter->id; + pos_to_prev[log_iter->id] = marker; + } + // limit spawn window + while (num_spawned() > static_cast(cct->_conf->rgw_meta_sync_spawn_window)) { + yield wait_for_child(); + collect_children(); + } + } + marker = log_iter->id; + } + } + collect_children(); + ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " max_marker=" << max_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl; + if (done_with_period) { + // return control to RGWMetaSyncCR and advance to the next period + tn->log(10, SSTR(*this << ": done with period")); + break; + } + if (mdlog_marker == max_marker && can_adjust_marker) { + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + yield wait(utime_t(cct->_conf->rgw_meta_sync_poll_interval, 0)); + } + } while (can_adjust_marker); + + tn->unset_flag(RGW_SNS_FLAG_ACTIVE); + + while (num_spawned() > 1) { + yield wait_for_child(); + collect_children(); + } + + yield lease_cr->go_down(); + + drain_all(); + + if (lost_lock) { + return -EBUSY; + } + + if (!can_adjust_marker) { + return -EAGAIN; + } + + return set_cr_done(); + } + /* TODO */ + return 0; + } +}; + +class RGWMetaSyncShardControlCR : public RGWBackoffControlCR +{ + RGWMetaSyncEnv *sync_env; + + const rgw_pool& pool; + const std::string& period; + epoch_t realm_epoch; + RGWMetadataLog* mdlog; + uint32_t shard_id; + rgw_meta_sync_marker sync_marker; + const std::string period_marker; + + RGWSyncTraceNodeRef tn; + + static constexpr bool exit_on_error = false; // retry on all errors +public: + RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool, + const std::string& period, epoch_t realm_epoch, + RGWMetadataLog* mdlog, uint32_t _shard_id, + const rgw_meta_sync_marker& _marker, + std::string&& period_marker, + RGWSyncTraceNodeRef& _tn_parent) + : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env), + pool(_pool), period(period), realm_epoch(realm_epoch), mdlog(mdlog), + shard_id(_shard_id), sync_marker(_marker), + period_marker(std::move(period_marker)) { + tn = sync_env->sync_tracer->add_node(_tn_parent, "shard", + std::to_string(shard_id)); + } + + RGWCoroutine *alloc_cr() override { + return new RGWMetaSyncShardCR(sync_env, pool, period, realm_epoch, mdlog, + shard_id, sync_marker, period_marker, backoff_ptr(), tn); + } + + RGWCoroutine *alloc_finisher_cr() override { + rgw::sal::RadosStore* store = sync_env->store; + return new RGWSimpleRadosReadCR(sync_env->dpp, sync_env->async_rados, store->svc()->sysobj, + rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), + &sync_marker); + } +}; + +class RGWMetaSyncCR : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + const rgw_pool& pool; + RGWPeriodHistory::Cursor cursor; //< sync position in period history + RGWPeriodHistory::Cursor next; //< next period in history + rgw_meta_sync_status sync_status; + RGWSyncTraceNodeRef tn; + + std::mutex mutex; //< protect access to shard_crs + + // TODO: it should be enough to hold a reference on the stack only, as calling + // RGWCoroutinesStack::wakeup() doesn't refer to the RGWCoroutine if it has + // already completed + using ControlCRRef = boost::intrusive_ptr; + using StackRef = boost::intrusive_ptr; + using RefPair = std::pair; + map shard_crs; + int ret{0}; + +public: + RGWMetaSyncCR(RGWMetaSyncEnv *_sync_env, const RGWPeriodHistory::Cursor &cursor, + const rgw_meta_sync_status& _sync_status, RGWSyncTraceNodeRef& _tn) + : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), + pool(sync_env->store->svc()->zone->get_zone_params().log_pool), + cursor(cursor), sync_status(_sync_status), tn(_tn) {} + + ~RGWMetaSyncCR() { + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + // loop through one period at a time + tn->log(1, "start"); + for (;;) { + if (cursor == sync_env->store->svc()->mdlog->get_period_history()->get_current()) { + next = RGWPeriodHistory::Cursor{}; + if (cursor) { + ldpp_dout(dpp, 10) << "RGWMetaSyncCR on current period=" + << cursor.get_period().get_id() << dendl; + } else { + ldpp_dout(dpp, 10) << "RGWMetaSyncCR with no period" << dendl; + } + } else { + next = cursor; + next.next(); + ldpp_dout(dpp, 10) << "RGWMetaSyncCR on period=" + << cursor.get_period().get_id() << ", next=" + << next.get_period().get_id() << dendl; + } + + yield { + // get the mdlog for the current period (may be empty) + auto& period_id = sync_status.sync_info.period; + auto realm_epoch = sync_status.sync_info.realm_epoch; + auto mdlog = sync_env->store->svc()->mdlog->get_log(period_id); + + tn->log(1, SSTR("realm epoch=" << realm_epoch << " period id=" << period_id)); + + // prevent wakeup() from accessing shard_crs while we're spawning them + std::lock_guard lock(mutex); + + // sync this period on each shard + for (const auto& m : sync_status.sync_markers) { + uint32_t shard_id = m.first; + auto& marker = m.second; + + std::string period_marker; + if (next) { + // read the maximum marker from the next period's sync status + period_marker = next.get_period().get_sync_status()[shard_id]; + if (period_marker.empty()) { + // no metadata changes have occurred on this shard, skip it + ldpp_dout(dpp, 10) << "RGWMetaSyncCR: skipping shard " << shard_id + << " with empty period marker" << dendl; + continue; + } + } + + using ShardCR = RGWMetaSyncShardControlCR; + auto cr = new ShardCR(sync_env, pool, period_id, realm_epoch, + mdlog, shard_id, marker, + std::move(period_marker), tn); + auto stack = spawn(cr, false); + shard_crs[shard_id] = RefPair{cr, stack}; + } + } + // wait for each shard to complete + while (ret == 0 && num_spawned() > 0) { + yield wait_for_child(); + collect(&ret, nullptr); + } + drain_all(); + { + // drop shard cr refs under lock + std::lock_guard lock(mutex); + shard_crs.clear(); + } + if (ret < 0) { + return set_cr_error(ret); + } + // advance to the next period + ceph_assert(next); + cursor = next; + + // write the updated sync info + sync_status.sync_info.period = cursor.get_period().get_id(); + sync_status.sync_info.realm_epoch = cursor.get_epoch(); + yield call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, + sync_env->store->svc()->sysobj, + rgw_raw_obj(pool, sync_env->status_oid()), + sync_status.sync_info)); + } + } + return 0; + } + + void wakeup(int shard_id) { + std::lock_guard lock(mutex); + auto iter = shard_crs.find(shard_id); + if (iter == shard_crs.end()) { + return; + } + iter->second.first->wakeup(); + } +}; + +void RGWRemoteMetaLog::init_sync_env(RGWMetaSyncEnv *env) { + env->dpp = dpp; + env->cct = store->ctx(); + env->store = store; + env->conn = conn; + env->async_rados = async_rados; + env->http_manager = &http_manager; + env->error_logger = error_logger; + env->sync_tracer = store->getRados()->get_sync_tracer(); +} + +int RGWRemoteMetaLog::read_sync_status(const DoutPrefixProvider *dpp, rgw_meta_sync_status *sync_status) +{ + if (store->svc()->zone->is_meta_master()) { + return 0; + } + // cannot run concurrently with run_sync(), so run in a separate manager + RGWCoroutinesManager crs(store->ctx(), store->getRados()->get_cr_registry()); + RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr()); + int ret = http_manager.start(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; + return ret; + } + RGWMetaSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + tn->log(20, "read sync status"); + ret = crs.run(dpp, new RGWReadSyncStatusCoroutine(&sync_env_local, sync_status)); + http_manager.stop(); + return ret; +} + +int RGWRemoteMetaLog::init_sync_status(const DoutPrefixProvider *dpp) +{ + if (store->svc()->zone->is_meta_master()) { + return 0; + } + + rgw_mdlog_info mdlog_info; + int r = read_log_info(dpp, &mdlog_info); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl; + return r; + } + + rgw_meta_sync_info sync_info; + sync_info.num_shards = mdlog_info.num_shards; + auto cursor = store->svc()->mdlog->get_period_history()->get_current(); + if (cursor) { + sync_info.period = cursor.get_period().get_id(); + sync_info.realm_epoch = cursor.get_epoch(); + } + + return run(dpp, new RGWInitSyncStatusCoroutine(&sync_env, sync_info)); +} + +int RGWRemoteMetaLog::store_sync_info(const DoutPrefixProvider *dpp, const rgw_meta_sync_info& sync_info) +{ + tn->log(20, "store sync info"); + return run(dpp, new RGWSimpleRadosWriteCR(dpp, async_rados, store->svc()->sysobj, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env.status_oid()), + sync_info)); +} + +// return a cursor to the period at our sync position +static RGWPeriodHistory::Cursor get_period_at(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + const rgw_meta_sync_info& info, + optional_yield y) +{ + if (info.period.empty()) { + // return an empty cursor with error=0 + return RGWPeriodHistory::Cursor{}; + } + + // look for an existing period in our history + auto cursor = store->svc()->mdlog->get_period_history()->lookup(info.realm_epoch); + if (cursor) { + // verify that the period ids match + auto& existing = cursor.get_period().get_id(); + if (existing != info.period) { + ldpp_dout(dpp, -1) << "ERROR: sync status period=" << info.period + << " does not match period=" << existing + << " in history at realm epoch=" << info.realm_epoch << dendl; + return RGWPeriodHistory::Cursor{-EEXIST}; + } + return cursor; + } + + // read the period from rados or pull it from the master + RGWPeriod period; + int r = store->svc()->mdlog->pull_period(dpp, info.period, period, y); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to read period id " + << info.period << ": " << cpp_strerror(r) << dendl; + return RGWPeriodHistory::Cursor{r}; + } + // attach the period to our history + cursor = store->svc()->mdlog->get_period_history()->attach(dpp, std::move(period), y); + if (!cursor) { + r = cursor.get_error(); + ldpp_dout(dpp, -1) << "ERROR: failed to read period history back to " + << info.period << ": " << cpp_strerror(r) << dendl; + } + return cursor; +} + +int RGWRemoteMetaLog::run_sync(const DoutPrefixProvider *dpp, optional_yield y) +{ + if (store->svc()->zone->is_meta_master()) { + return 0; + } + + int r = 0; + + // get shard count and oldest log period from master + rgw_mdlog_info mdlog_info; + for (;;) { + if (going_down) { + ldpp_dout(dpp, 1) << __func__ << "(): going down" << dendl; + return 0; + } + r = read_log_info(dpp, &mdlog_info); + if (r == -EIO || r == -ENOENT) { + // keep retrying if master isn't alive or hasn't initialized the log + ldpp_dout(dpp, 10) << __func__ << "(): waiting for master.." << dendl; + backoff.backoff_sleep(); + continue; + } + backoff.reset(); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl; + return r; + } + break; + } + + rgw_meta_sync_status sync_status; + do { + if (going_down) { + ldpp_dout(dpp, 1) << __func__ << "(): going down" << dendl; + return 0; + } + r = run(dpp, new RGWReadSyncStatusCoroutine(&sync_env, &sync_status)); + if (r < 0 && r != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch sync status r=" << r << dendl; + return r; + } + + if (!mdlog_info.period.empty()) { + // restart sync if the remote has a period, but: + // a) our status does not, or + // b) our sync period comes before the remote's oldest log period + if (sync_status.sync_info.period.empty() || + sync_status.sync_info.realm_epoch < mdlog_info.realm_epoch) { + sync_status.sync_info.state = rgw_meta_sync_info::StateInit; + string reason; + if (sync_status.sync_info.period.empty()) { + reason = "period is empty"; + } else { + reason = SSTR("sync_info realm epoch is behind: " << sync_status.sync_info.realm_epoch << " < " << mdlog_info.realm_epoch); + } + tn->log(1, "initialize sync (reason: " + reason + ")"); + ldpp_dout(dpp, 1) << "epoch=" << sync_status.sync_info.realm_epoch + << " in sync status comes before remote's oldest mdlog epoch=" + << mdlog_info.realm_epoch << ", restarting sync" << dendl; + } + } + + if (sync_status.sync_info.state == rgw_meta_sync_info::StateInit) { + ldpp_dout(dpp, 20) << __func__ << "(): init" << dendl; + sync_status.sync_info.num_shards = mdlog_info.num_shards; + auto cursor = store->svc()->mdlog->get_period_history()->get_current(); + if (cursor) { + // run full sync, then start incremental from the current period/epoch + sync_status.sync_info.period = cursor.get_period().get_id(); + sync_status.sync_info.realm_epoch = cursor.get_epoch(); + } + r = run(dpp, new RGWInitSyncStatusCoroutine(&sync_env, sync_status.sync_info)); + if (r == -EBUSY) { + backoff.backoff_sleep(); + continue; + } + backoff.reset(); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to init sync status r=" << r << dendl; + return r; + } + } + } while (sync_status.sync_info.state == rgw_meta_sync_info::StateInit); + + auto num_shards = sync_status.sync_info.num_shards; + if (num_shards != mdlog_info.num_shards) { + ldpp_dout(dpp, -1) << "ERROR: can't sync, mismatch between num shards, master num_shards=" << mdlog_info.num_shards << " local num_shards=" << num_shards << dendl; + return -EINVAL; + } + + RGWPeriodHistory::Cursor cursor; + do { + r = run(dpp, new RGWReadSyncStatusCoroutine(&sync_env, &sync_status)); + if (r < 0 && r != -ENOENT) { + tn->log(0, SSTR("ERROR: failed to fetch sync status r=" << r)); + return r; + } + + switch ((rgw_meta_sync_info::SyncState)sync_status.sync_info.state) { + case rgw_meta_sync_info::StateBuildingFullSyncMaps: + tn->log(20, "building full sync maps"); + r = run(dpp, new RGWFetchAllMetaCR(&sync_env, num_shards, sync_status.sync_markers, tn)); + if (r == -EBUSY || r == -EIO) { + backoff.backoff_sleep(); + continue; + } + backoff.reset(); + if (r < 0) { + tn->log(0, SSTR("ERROR: failed to fetch all metadata keys (r=" << r << ")")); + return r; + } + + sync_status.sync_info.state = rgw_meta_sync_info::StateSync; + r = store_sync_info(dpp, sync_status.sync_info); + if (r < 0) { + tn->log(0, SSTR("ERROR: failed to update sync status (r=" << r << ")")); + return r; + } + /* fall through */ + case rgw_meta_sync_info::StateSync: + tn->log(20, "sync"); + // find our position in the period history (if any) + cursor = get_period_at(dpp, store, sync_status.sync_info, y); + r = cursor.get_error(); + if (r < 0) { + return r; + } + meta_sync_cr = new RGWMetaSyncCR(&sync_env, cursor, sync_status, tn); + r = run(dpp, meta_sync_cr); + if (r < 0) { + tn->log(0, "ERROR: failed to fetch all metadata keys"); + return r; + } + break; + default: + tn->log(0, "ERROR: bad sync state!"); + return -EIO; + } + } while (!going_down); + + return 0; +} + +void RGWRemoteMetaLog::wakeup(int shard_id) +{ + if (!meta_sync_cr) { + return; + } + meta_sync_cr->wakeup(shard_id); +} + +int RGWCloneMetaLogCoroutine::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + do { + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": init request" << dendl; + return state_init(); + } + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": reading shard status" << dendl; + return state_read_shard_status(); + } + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": reading shard status complete" << dendl; + return state_read_shard_status_complete(); + } + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": sending rest request" << dendl; + return state_send_rest_request(dpp); + } + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": receiving rest response" << dendl; + return state_receive_rest_response(); + } + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": storing mdlog entries" << dendl; + return state_store_mdlog_entries(); + } + } while (truncated); + yield { + ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": storing mdlog entries complete" << dendl; + return state_store_mdlog_entries_complete(); + } + } + + return 0; +} + +int RGWCloneMetaLogCoroutine::state_init() +{ + data = rgw_mdlog_shard_data(); + + return 0; +} + +int RGWCloneMetaLogCoroutine::state_read_shard_status() +{ + const bool add_ref = false; // default constructs with refs=1 + + completion.reset(new RGWMetadataLogInfoCompletion( + [this](int ret, const cls_log_header& header) { + if (ret < 0) { + if (ret != -ENOENT) { + ldpp_dout(sync_env->dpp, 1) << "ERROR: failed to read mdlog info with " + << cpp_strerror(ret) << dendl; + } + } else { + shard_info.marker = header.max_marker; + shard_info.last_update = header.max_time.to_real_time(); + } + // wake up parent stack + io_complete(); + }), add_ref); + + int ret = mdlog->get_info_async(sync_env->dpp, shard_id, completion.get()); + if (ret < 0) { + ldpp_dout(sync_env->dpp, 0) << "ERROR: mdlog->get_info_async() returned ret=" << ret << dendl; + return set_cr_error(ret); + } + + return io_block(0); +} + +int RGWCloneMetaLogCoroutine::state_read_shard_status_complete() +{ + completion.reset(); + + ldpp_dout(sync_env->dpp, 20) << "shard_id=" << shard_id << " marker=" << shard_info.marker << " last_update=" << shard_info.last_update << dendl; + + marker = shard_info.marker; + + return 0; +} + +int RGWCloneMetaLogCoroutine::state_send_rest_request(const DoutPrefixProvider *dpp) +{ + RGWRESTConn *conn = sync_env->conn; + + char buf[32]; + snprintf(buf, sizeof(buf), "%d", shard_id); + + char max_entries_buf[32]; + snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", max_entries); + + const char *marker_key = (marker.empty() ? "" : "marker"); + + rgw_http_param_pair pairs[] = { { "type", "metadata" }, + { "id", buf }, + { "period", period.c_str() }, + { "max-entries", max_entries_buf }, + { marker_key, marker.c_str() }, + { NULL, NULL } }; + + http_op = new RGWRESTReadResource(conn, "/admin/log", pairs, NULL, sync_env->http_manager); + + init_new_io(http_op); + + int ret = http_op->aio_read(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog data" << dendl; + log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; + http_op->put(); + http_op = NULL; + return set_cr_error(ret); + } + + return io_block(0); +} + +int RGWCloneMetaLogCoroutine::state_receive_rest_response() +{ + int ret = http_op->wait(&data, null_yield); + if (ret < 0) { + error_stream << "http operation failed: " << http_op->to_str() << " status=" << http_op->get_http_status() << std::endl; + ldpp_dout(sync_env->dpp, 5) << "failed to wait for op, ret=" << ret << dendl; + http_op->put(); + http_op = NULL; + return set_cr_error(ret); + } + http_op->put(); + http_op = NULL; + + ldpp_dout(sync_env->dpp, 20) << "remote mdlog, shard_id=" << shard_id << " num of shard entries: " << data.entries.size() << dendl; + + truncated = ((int)data.entries.size() == max_entries); + + if (data.entries.empty()) { + if (new_marker) { + *new_marker = marker; + } + return set_cr_done(); + } + + if (new_marker) { + *new_marker = data.entries.back().id; + } + + return 0; +} + + +int RGWCloneMetaLogCoroutine::state_store_mdlog_entries() +{ + list dest_entries; + + vector::iterator iter; + for (iter = data.entries.begin(); iter != data.entries.end(); ++iter) { + rgw_mdlog_entry& entry = *iter; + ldpp_dout(sync_env->dpp, 20) << "entry: name=" << entry.name << dendl; + + cls_log_entry dest_entry; + dest_entry.id = entry.id; + dest_entry.section = entry.section; + dest_entry.name = entry.name; + dest_entry.timestamp = utime_t(entry.timestamp); + + encode(entry.log_data, dest_entry.data); + + dest_entries.push_back(dest_entry); + + marker = entry.id; + } + + RGWAioCompletionNotifier *cn = stack->create_completion_notifier(); + + int ret = mdlog->store_entries_in_shard(sync_env->dpp, dest_entries, shard_id, cn->completion()); + if (ret < 0) { + cn->put(); + ldpp_dout(sync_env->dpp, 10) << "failed to store md log entries shard_id=" << shard_id << " ret=" << ret << dendl; + return set_cr_error(ret); + } + return io_block(0); +} + +int RGWCloneMetaLogCoroutine::state_store_mdlog_entries_complete() +{ + return set_cr_done(); +} + +void rgw_meta_sync_info::decode_json(JSONObj *obj) +{ + string s; + JSONDecoder::decode_json("status", s, obj); + if (s == "init") { + state = StateInit; + } else if (s == "building-full-sync-maps") { + state = StateBuildingFullSyncMaps; + } else if (s == "sync") { + state = StateSync; + } + JSONDecoder::decode_json("num_shards", num_shards, obj); + JSONDecoder::decode_json("period", period, obj); + JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); +} + +void rgw_meta_sync_info::dump(Formatter *f) const +{ + string s; + switch ((SyncState)state) { + case StateInit: + s = "init"; + break; + case StateBuildingFullSyncMaps: + s = "building-full-sync-maps"; + break; + case StateSync: + s = "sync"; + break; + default: + s = "unknown"; + break; + } + encode_json("status", s, f); + encode_json("num_shards", num_shards, f); + encode_json("period", period, f); + encode_json("realm_epoch", realm_epoch, f); +} + + +void rgw_meta_sync_marker::decode_json(JSONObj *obj) +{ + int s; + JSONDecoder::decode_json("state", s, obj); + state = s; + JSONDecoder::decode_json("marker", marker, obj); + JSONDecoder::decode_json("next_step_marker", next_step_marker, obj); + JSONDecoder::decode_json("total_entries", total_entries, obj); + JSONDecoder::decode_json("pos", pos, obj); + utime_t ut; + JSONDecoder::decode_json("timestamp", ut, obj); + timestamp = ut.to_real_time(); + JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); +} + +void rgw_meta_sync_marker::dump(Formatter *f) const +{ + encode_json("state", (int)state, f); + encode_json("marker", marker, f); + encode_json("next_step_marker", next_step_marker, f); + encode_json("total_entries", total_entries, f); + encode_json("pos", pos, f); + encode_json("timestamp", utime_t(timestamp), f); + encode_json("realm_epoch", realm_epoch, f); +} + +void rgw_meta_sync_status::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("info", sync_info, obj); + JSONDecoder::decode_json("markers", sync_markers, obj); +} + +void rgw_meta_sync_status::dump(Formatter *f) const { + encode_json("info", sync_info, f); + encode_json("markers", sync_markers, f); +} + +void rgw_sync_error_info::dump(Formatter *f) const { + encode_json("source_zone", source_zone, f); + encode_json("error_code", error_code, f); + encode_json("message", message, f); +} + diff --git a/src/rgw/driver/rados/rgw_sync.h b/src/rgw/driver/rados/rgw_sync.h new file mode 100644 index 00000000000..8c4e511ae3e --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync.h @@ -0,0 +1,549 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SYNC_H +#define CEPH_RGW_SYNC_H + +#include + +#include "include/stringify.h" + +#include "rgw_coroutine.h" +#include "rgw_http_client.h" +#include "rgw_metadata.h" +#include "rgw_meta_sync_status.h" +#include "rgw_sal.h" +#include "rgw_sal_rados.h" +#include "rgw_sync_trace.h" +#include "rgw_mdlog.h" + +#define ERROR_LOGGER_SHARDS 32 +#define RGW_SYNC_ERROR_LOG_SHARD_PREFIX "sync.error-log" + +struct rgw_mdlog_info { + uint32_t num_shards; + std::string period; //< period id of the master's oldest metadata log + epoch_t realm_epoch; //< realm epoch of oldest metadata log + + rgw_mdlog_info() : num_shards(0), realm_epoch(0) {} + + void decode_json(JSONObj *obj); +}; + + +struct rgw_mdlog_entry { + std::string id; + std::string section; + std::string name; + ceph::real_time timestamp; + RGWMetadataLogData log_data; + + void decode_json(JSONObj *obj); + + bool convert_from(cls_log_entry& le) { + id = le.id; + section = le.section; + name = le.name; + timestamp = le.timestamp.to_real_time(); + try { + auto iter = le.data.cbegin(); + decode(log_data, iter); + } catch (buffer::error& err) { + return false; + } + return true; + } +}; + +struct rgw_mdlog_shard_data { + std::string marker; + bool truncated; + std::vector entries; + + void decode_json(JSONObj *obj); +}; + +class RGWAsyncRadosProcessor; +class RGWMetaSyncStatusManager; +class RGWMetaSyncCR; +class RGWRESTConn; +class RGWSyncTraceManager; + +class RGWSyncErrorLogger { + rgw::sal::RadosStore* store; + + std::vector oids; + int num_shards; + + std::atomic counter = { 0 }; +public: + RGWSyncErrorLogger(rgw::sal::RadosStore* _store, const std::string &oid_prefix, int _num_shards); + RGWCoroutine *log_error_cr(const DoutPrefixProvider *dpp, const std::string& source_zone, const std::string& section, const std::string& name, uint32_t error_code, const std::string& message); + + static std::string get_shard_oid(const std::string& oid_prefix, int shard_id); +}; + +struct rgw_sync_error_info { + std::string source_zone; + uint32_t error_code; + std::string message; + + rgw_sync_error_info() : error_code(0) {} + rgw_sync_error_info(const std::string& _source_zone, uint32_t _error_code, const std::string& _message) : source_zone(_source_zone), error_code(_error_code), message(_message) {} + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(source_zone, bl); + encode(error_code, bl); + encode(message, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(source_zone, bl); + decode(error_code, bl); + decode(message, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; +}; +WRITE_CLASS_ENCODER(rgw_sync_error_info) + +#define DEFAULT_BACKOFF_MAX 30 + +class RGWSyncBackoff { + int cur_wait; + int max_secs; + + void update_wait_time(); +public: + explicit RGWSyncBackoff(int _max_secs = DEFAULT_BACKOFF_MAX) : cur_wait(0), max_secs(_max_secs) {} + + void backoff_sleep(); + void reset() { + cur_wait = 0; + } + + void backoff(RGWCoroutine *op); +}; + +class RGWBackoffControlCR : public RGWCoroutine +{ + RGWCoroutine *cr; + ceph::mutex lock; + + RGWSyncBackoff backoff; + bool reset_backoff; + + bool exit_on_error; + +protected: + bool *backoff_ptr() { + return &reset_backoff; + } + + ceph::mutex& cr_lock() { + return lock; + } + + RGWCoroutine *get_cr() { + return cr; + } + +public: + RGWBackoffControlCR(CephContext *_cct, bool _exit_on_error) + : RGWCoroutine(_cct), + cr(nullptr), + lock(ceph::make_mutex("RGWBackoffControlCR::lock:" + stringify(this))), + reset_backoff(false), exit_on_error(_exit_on_error) { + } + + ~RGWBackoffControlCR() override { + if (cr) { + cr->put(); + } + } + + virtual RGWCoroutine *alloc_cr() = 0; + virtual RGWCoroutine *alloc_finisher_cr() { return NULL; } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +struct RGWMetaSyncEnv { + const DoutPrefixProvider *dpp; + CephContext *cct{nullptr}; + rgw::sal::RadosStore* store{nullptr}; + RGWRESTConn *conn{nullptr}; + RGWAsyncRadosProcessor *async_rados{nullptr}; + RGWHTTPManager *http_manager{nullptr}; + RGWSyncErrorLogger *error_logger{nullptr}; + RGWSyncTraceManager *sync_tracer{nullptr}; + + RGWMetaSyncEnv() {} + + void init(const DoutPrefixProvider *_dpp, CephContext *_cct, rgw::sal::RadosStore* _store, RGWRESTConn *_conn, + RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager, + RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer); + + std::string shard_obj_name(int shard_id); + std::string status_oid(); +}; + +class RGWRemoteMetaLog : public RGWCoroutinesManager { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + RGWRESTConn *conn; + RGWAsyncRadosProcessor *async_rados; + + RGWHTTPManager http_manager; + RGWMetaSyncStatusManager *status_manager; + RGWSyncErrorLogger *error_logger{nullptr}; + RGWSyncTraceManager *sync_tracer{nullptr}; + + RGWMetaSyncCR *meta_sync_cr{nullptr}; + + RGWSyncBackoff backoff; + + RGWMetaSyncEnv sync_env; + + void init_sync_env(RGWMetaSyncEnv *env); + int store_sync_info(const DoutPrefixProvider *dpp, const rgw_meta_sync_info& sync_info); + + std::atomic going_down = { false }; + + RGWSyncTraceNodeRef tn; + +public: + RGWRemoteMetaLog(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* _store, + RGWAsyncRadosProcessor *async_rados, + RGWMetaSyncStatusManager *_sm) + : RGWCoroutinesManager(_store->ctx(), _store->getRados()->get_cr_registry()), + dpp(dpp), store(_store), conn(NULL), async_rados(async_rados), + http_manager(store->ctx(), completion_mgr), + status_manager(_sm) {} + + virtual ~RGWRemoteMetaLog() override; + + int init(); + void finish(); + + int read_log_info(const DoutPrefixProvider *dpp, rgw_mdlog_info *log_info); + int read_master_log_shards_info(const DoutPrefixProvider *dpp, const std::string& master_period, std::map *shards_info); + int read_master_log_shards_next(const DoutPrefixProvider *dpp, const std::string& period, std::map shard_markers, std::map *result); + int read_sync_status(const DoutPrefixProvider *dpp, rgw_meta_sync_status *sync_status); + int init_sync_status(const DoutPrefixProvider *dpp); + int run_sync(const DoutPrefixProvider *dpp, optional_yield y); + + void wakeup(int shard_id); + + RGWMetaSyncEnv& get_sync_env() { + return sync_env; + } +}; + +class RGWMetaSyncStatusManager : public DoutPrefixProvider { + rgw::sal::RadosStore* store; + librados::IoCtx ioctx; + + RGWRemoteMetaLog master_log; + + std::map shard_objs; + + struct utime_shard { + real_time ts; + int shard_id; + + utime_shard() : shard_id(-1) {} + + bool operator<(const utime_shard& rhs) const { + if (ts == rhs.ts) { + return shard_id < rhs.shard_id; + } + return ts < rhs.ts; + } + }; + + ceph::shared_mutex ts_to_shard_lock = ceph::make_shared_mutex("ts_to_shard_lock"); + std::map ts_to_shard; + std::vector clone_markers; + +public: + RGWMetaSyncStatusManager(rgw::sal::RadosStore* _store, RGWAsyncRadosProcessor *async_rados) + : store(_store), master_log(this, store, async_rados, this) + {} + + virtual ~RGWMetaSyncStatusManager() override; + + int init(const DoutPrefixProvider *dpp); + + int read_sync_status(const DoutPrefixProvider *dpp, rgw_meta_sync_status *sync_status) { + return master_log.read_sync_status(dpp, sync_status); + } + int init_sync_status(const DoutPrefixProvider *dpp) { return master_log.init_sync_status(dpp); } + int read_log_info(const DoutPrefixProvider *dpp, rgw_mdlog_info *log_info) { + return master_log.read_log_info(dpp, log_info); + } + int read_master_log_shards_info(const DoutPrefixProvider *dpp, const std::string& master_period, std::map *shards_info) { + return master_log.read_master_log_shards_info(dpp, master_period, shards_info); + } + int read_master_log_shards_next(const DoutPrefixProvider *dpp, const std::string& period, std::map shard_markers, std::map *result) { + return master_log.read_master_log_shards_next(dpp, period, shard_markers, result); + } + + int run(const DoutPrefixProvider *dpp, optional_yield y) { return master_log.run_sync(dpp, y); } + + + // implements DoutPrefixProvider + CephContext *get_cct() const override { return store->ctx(); } + unsigned get_subsys() const override; + std::ostream& gen_prefix(std::ostream& out) const override; + + void wakeup(int shard_id) { return master_log.wakeup(shard_id); } + void stop() { + master_log.finish(); + } +}; + +class RGWOrderCallCR : public RGWCoroutine +{ +public: + RGWOrderCallCR(CephContext *cct) : RGWCoroutine(cct) {} + + virtual void call_cr(RGWCoroutine *_cr) = 0; +}; + +class RGWLastCallerWinsCR : public RGWOrderCallCR +{ + RGWCoroutine *cr{nullptr}; + +public: + explicit RGWLastCallerWinsCR(CephContext *cct) : RGWOrderCallCR(cct) {} + ~RGWLastCallerWinsCR() { + if (cr) { + cr->put(); + } + } + + int operate(const DoutPrefixProvider *dpp) override; + + void call_cr(RGWCoroutine *_cr) override { + if (cr) { + cr->put(); + } + cr = _cr; + } +}; + +template +class RGWSyncShardMarkerTrack { + struct marker_entry { + uint64_t pos; + real_time timestamp; + + marker_entry() : pos(0) {} + marker_entry(uint64_t _p, const real_time& _ts) : pos(_p), timestamp(_ts) {} + }; + typename std::map pending; + + std::map finish_markers; + + int window_size; + int updates_since_flush; + + RGWOrderCallCR *order_cr{nullptr}; + +protected: + typename std::set need_retry_set; + + virtual RGWCoroutine *store_marker(const T& new_marker, uint64_t index_pos, const real_time& timestamp) = 0; + virtual RGWOrderCallCR *allocate_order_control_cr() = 0; + virtual void handle_finish(const T& marker) { } + +public: + RGWSyncShardMarkerTrack(int _window_size) : window_size(_window_size), updates_since_flush(0) {} + virtual ~RGWSyncShardMarkerTrack() { + if (order_cr) { + order_cr->put(); + } + } + + bool start(const T& pos, int index_pos, const real_time& timestamp) { + if (pending.find(pos) != pending.end()) { + return false; + } + pending[pos] = marker_entry(index_pos, timestamp); + return true; + } + + void try_update_high_marker(const T& pos, int index_pos, const real_time& timestamp) { + finish_markers[pos] = marker_entry(index_pos, timestamp); + } + + RGWCoroutine *finish(const T& pos) { + if (pending.empty()) { + /* can happen, due to a bug that ended up with multiple objects with the same name and version + * -- which can happen when versioning is enabled an the version is 'null'. + */ + return NULL; + } + + typename std::map::iterator iter = pending.begin(); + + bool is_first = (pos == iter->first); + + typename std::map::iterator pos_iter = pending.find(pos); + if (pos_iter == pending.end()) { + /* see pending.empty() comment */ + return NULL; + } + + finish_markers[pos] = pos_iter->second; + + pending.erase(pos); + + handle_finish(pos); + + updates_since_flush++; + + if (is_first && (updates_since_flush >= window_size || pending.empty())) { + return flush(); + } + return NULL; + } + + RGWCoroutine *flush() { + if (finish_markers.empty()) { + return NULL; + } + + typename std::map::iterator i; + + if (pending.empty()) { + i = finish_markers.end(); + } else { + i = finish_markers.lower_bound(pending.begin()->first); + } + if (i == finish_markers.begin()) { + return NULL; + } + updates_since_flush = 0; + + auto last = i; + --i; + const T& high_marker = i->first; + marker_entry& high_entry = i->second; + RGWCoroutine *cr = order(store_marker(high_marker, high_entry.pos, high_entry.timestamp)); + finish_markers.erase(finish_markers.begin(), last); + return cr; + } + + /* + * a key needs retry if it was processing when another marker that points + * to the same bucket shards arrives. Instead of processing it, we mark + * it as need_retry so that when we finish processing the original, we + * retry the processing on the same bucket shard, in case there are more + * entries to process. This closes a race that can happen. + */ + bool need_retry(const K& key) { + return (need_retry_set.find(key) != need_retry_set.end()); + } + + void set_need_retry(const K& key) { + need_retry_set.insert(key); + } + + void reset_need_retry(const K& key) { + need_retry_set.erase(key); + } + + RGWCoroutine *order(RGWCoroutine *cr) { + /* either returns a new RGWLastWriteWinsCR, or update existing one, in which case it returns + * nothing and the existing one will call the cr + */ + if (order_cr && order_cr->is_done()) { + order_cr->put(); + order_cr = nullptr; + } + if (!order_cr) { + order_cr = allocate_order_control_cr(); + order_cr->get(); + order_cr->call_cr(cr); + return order_cr; + } + order_cr->call_cr(cr); + return nullptr; /* don't call it a second time */ + } +}; + +class RGWMetaSyncShardMarkerTrack; + +class RGWMetaSyncSingleEntryCR : public RGWCoroutine { + RGWMetaSyncEnv *sync_env; + + std::string raw_key; + std::string entry_marker; + RGWMDLogStatus op_status; + + ssize_t pos; + std::string section; + std::string key; + + int sync_status; + + bufferlist md_bl; + + RGWMetaSyncShardMarkerTrack *marker_tracker; + + int tries; + + bool error_injection; + + RGWSyncTraceNodeRef tn; + +public: + RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env, + const std::string& _raw_key, const std::string& _entry_marker, + const RGWMDLogStatus& _op_status, + RGWMetaSyncShardMarkerTrack *_marker_tracker, const RGWSyncTraceNodeRef& _tn_parent); + + int operate(const DoutPrefixProvider *dpp) override; +}; + +class RGWShardCollectCR : public RGWCoroutine { + int current_running = 0; + protected: + int max_concurrent; + int status = 0; + + // called with the result of each child. error codes can be ignored by + // returning 0. if handle_result() returns a negative value, it's + // treated as an error and stored in 'status'. the last such error is + // reported to the caller with set_cr_error() + virtual int handle_result(int r) = 0; + public: + RGWShardCollectCR(CephContext *_cct, int _max_concurrent) + : RGWCoroutine(_cct), max_concurrent(_max_concurrent) + {} + + virtual bool spawn_next() = 0; + int operate(const DoutPrefixProvider *dpp) override; +}; + +// factory functions for meta sync coroutines needed in mdlog trimming + +RGWCoroutine* create_read_remote_mdlog_shard_info_cr(RGWMetaSyncEnv *env, + const std::string& period, + int shard_id, + RGWMetadataLogInfo* info); + +RGWCoroutine* create_list_remote_mdlog_shard_cr(RGWMetaSyncEnv *env, + const std::string& period, + int shard_id, + const std::string& marker, + uint32_t max_entries, + rgw_mdlog_shard_data *result); + +#endif diff --git a/src/rgw/driver/rados/rgw_sync_counters.cc b/src/rgw/driver/rados/rgw_sync_counters.cc new file mode 100644 index 00000000000..1d23d58dcfb --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_counters.cc @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/ceph_context.h" +#include "rgw_sync_counters.h" + +namespace sync_counters { + +PerfCountersRef build(CephContext *cct, const std::string& name) +{ + PerfCountersBuilder b(cct, name, l_first, l_last); + + // share these counters with ceph-mgr + b.set_prio_default(PerfCountersBuilder::PRIO_USEFUL); + + b.add_u64_avg(l_fetch, "fetch_bytes", "Number of object bytes replicated"); + b.add_u64_counter(l_fetch_not_modified, "fetch_not_modified", "Number of objects already replicated"); + b.add_u64_counter(l_fetch_err, "fetch_errors", "Number of object replication errors"); + + b.add_time_avg(l_poll, "poll_latency", "Average latency of replication log requests"); + b.add_u64_counter(l_poll_err, "poll_errors", "Number of replication log request errors"); + + auto logger = PerfCountersRef{ b.create_perf_counters(), cct }; + cct->get_perfcounters_collection()->add(logger.get()); + return logger; +} + +} // namespace sync_counters diff --git a/src/rgw/driver/rados/rgw_sync_counters.h b/src/rgw/driver/rados/rgw_sync_counters.h new file mode 100644 index 00000000000..df3acc68023 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_counters.h @@ -0,0 +1,25 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "common/perf_counters_collection.h" + +namespace sync_counters { + +enum { + l_first = 805000, + + l_fetch, + l_fetch_not_modified, + l_fetch_err, + + l_poll, + l_poll_err, + + l_last, +}; + +PerfCountersRef build(CephContext *cct, const std::string& name); + +} // namespace sync_counters diff --git a/src/rgw/driver/rados/rgw_sync_error_repo.cc b/src/rgw/driver/rados/rgw_sync_error_repo.cc new file mode 100644 index 00000000000..44305b60b6b --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_error_repo.cc @@ -0,0 +1,205 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include "rgw_sync_error_repo.h" +#include "rgw_coroutine.h" +#include "rgw_sal.h" +#include "services/svc_rados.h" +#include "cls/cmpomap/client.h" + +namespace rgw::error_repo { + +// prefix for the binary encoding of keys. this particular value is not +// valid as the first byte of a utf8 code point, so we use this to +// differentiate the binary encoding from existing string keys for +// backward-compatibility +constexpr uint8_t binary_key_prefix = 0x80; + +struct key_type { + rgw_bucket_shard bs; + std::optional gen; +}; + +void encode(const key_type& k, bufferlist& bl, uint64_t f=0) +{ + ENCODE_START(1, 1, bl); + encode(k.bs, bl); + encode(k.gen, bl); + ENCODE_FINISH(bl); +} + +void decode(key_type& k, bufferlist::const_iterator& bl) +{ + DECODE_START(1, bl); + decode(k.bs, bl); + decode(k.gen, bl); + DECODE_FINISH(bl); +} + +std::string encode_key(const rgw_bucket_shard& bs, + std::optional gen) +{ + using ceph::encode; + const auto key = key_type{bs, gen}; + bufferlist bl; + encode(binary_key_prefix, bl); + encode(key, bl); + return bl.to_str(); +} + +int decode_key(std::string encoded, + rgw_bucket_shard& bs, + std::optional& gen) +{ + using ceph::decode; + key_type key; + const auto bl = bufferlist::static_from_string(encoded); + auto p = bl.cbegin(); + try { + uint8_t prefix; + decode(prefix, p); + if (prefix != binary_key_prefix) { + return -EINVAL; + } + decode(key, p); + } catch (const buffer::error&) { + return -EIO; + } + if (!p.end()) { + return -EIO; // buffer contained unexpected bytes + } + bs = std::move(key.bs); + gen = key.gen; + return 0; +} + +ceph::real_time decode_value(const bufferlist& bl) +{ + uint64_t value; + try { + using ceph::decode; + decode(value, bl); + } catch (const buffer::error&) { + value = 0; // empty buffer = 0 + } + return ceph::real_clock::zero() + ceph::timespan(value); +} + +int write(librados::ObjectWriteOperation& op, + const std::string& key, + ceph::real_time timestamp) +{ + // overwrite the existing timestamp if value is greater + const uint64_t value = timestamp.time_since_epoch().count(); + using namespace ::cls::cmpomap; + const bufferlist zero = u64_buffer(0); // compare against 0 for missing keys + return cmp_set_vals(op, Mode::U64, Op::GT, {{key, u64_buffer(value)}}, zero); +} + +int remove(librados::ObjectWriteOperation& op, + const std::string& key, + ceph::real_time timestamp) +{ + // remove the omap key if value >= existing + const uint64_t value = timestamp.time_since_epoch().count(); + using namespace ::cls::cmpomap; + return cmp_rm_keys(op, Mode::U64, Op::GTE, {{key, u64_buffer(value)}}); +} + +class RGWErrorRepoWriteCR : public RGWSimpleCoroutine { + RGWSI_RADOS::Obj obj; + std::string key; + ceph::real_time timestamp; + + boost::intrusive_ptr cn; + public: + RGWErrorRepoWriteCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj, + const std::string& key, ceph::real_time timestamp) + : RGWSimpleCoroutine(rados->ctx()), + obj(rados->obj(raw_obj)), + key(key), timestamp(timestamp) + {} + + int send_request(const DoutPrefixProvider *dpp) override { + librados::ObjectWriteOperation op; + int r = write(op, key, timestamp); + if (r < 0) { + return r; + } + r = obj.open(dpp); + if (r < 0) { + return r; + } + + cn = stack->create_completion_notifier(); + return obj.aio_operate(cn->completion(), &op); + } + + int request_complete() override { + return cn->completion()->get_return_value(); + } +}; + +RGWCoroutine* write_cr(RGWSI_RADOS* rados, + const rgw_raw_obj& obj, + const std::string& key, + ceph::real_time timestamp) +{ + return new RGWErrorRepoWriteCR(rados, obj, key, timestamp); +} + + +class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine { + RGWSI_RADOS::Obj obj; + std::string key; + ceph::real_time timestamp; + + boost::intrusive_ptr cn; + public: + RGWErrorRepoRemoveCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj, + const std::string& key, ceph::real_time timestamp) + : RGWSimpleCoroutine(rados->ctx()), + obj(rados->obj(raw_obj)), + key(key), timestamp(timestamp) + {} + + int send_request(const DoutPrefixProvider *dpp) override { + librados::ObjectWriteOperation op; + int r = remove(op, key, timestamp); + if (r < 0) { + return r; + } + r = obj.open(dpp); + if (r < 0) { + return r; + } + + cn = stack->create_completion_notifier(); + return obj.aio_operate(cn->completion(), &op); + } + + int request_complete() override { + return cn->completion()->get_return_value(); + } +}; + +RGWCoroutine* remove_cr(RGWSI_RADOS* rados, + const rgw_raw_obj& obj, + const std::string& key, + ceph::real_time timestamp) +{ + return new RGWErrorRepoRemoveCR(rados, obj, key, timestamp); +} + +} // namespace rgw::error_repo diff --git a/src/rgw/driver/rados/rgw_sync_error_repo.h b/src/rgw/driver/rados/rgw_sync_error_repo.h new file mode 100644 index 00000000000..60525d281f0 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_error_repo.h @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#pragma once + +#include +#include "include/rados/librados_fwd.hpp" +#include "include/buffer_fwd.h" +#include "common/ceph_time.h" + +class RGWSI_RADOS; +class RGWCoroutine; +struct rgw_raw_obj; +struct rgw_bucket_shard; + +namespace rgw::error_repo { + +// binary-encode a bucket/shard/gen and return it as a string +std::string encode_key(const rgw_bucket_shard& bs, + std::optional gen); + +// try to decode a key. returns -EINVAL if not in binary format +int decode_key(std::string encoded, + rgw_bucket_shard& bs, + std::optional& gen); + +// decode a timestamp as a uint64_t for CMPXATTR_MODE_U64 +ceph::real_time decode_value(const ceph::bufferlist& bl); + +// write an omap key iff the given timestamp is newer +int write(librados::ObjectWriteOperation& op, + const std::string& key, + ceph::real_time timestamp); +RGWCoroutine* write_cr(RGWSI_RADOS* rados, + const rgw_raw_obj& obj, + const std::string& key, + ceph::real_time timestamp); + +// remove an omap key iff there isn't a newer timestamp +int remove(librados::ObjectWriteOperation& op, + const std::string& key, + ceph::real_time timestamp); +RGWCoroutine* remove_cr(RGWSI_RADOS* rados, + const rgw_raw_obj& obj, + const std::string& key, + ceph::real_time timestamp); + +} // namespace rgw::error_repo diff --git a/src/rgw/driver/rados/rgw_sync_module.cc b/src/rgw/driver/rados/rgw_sync_module.cc new file mode 100644 index 00000000000..5a1e70be34e --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module.cc @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_common.h" +#include "rgw_coroutine.h" +#include "rgw_cr_rados.h" +#include "rgw_sync_module.h" +#include "rgw_data_sync.h" +#include "rgw_bucket.h" + +#include "rgw_sync_module_log.h" +#include "rgw_sync_module_es.h" +#include "rgw_sync_module_aws.h" + +#include + +#define dout_subsys ceph_subsys_rgw + +RGWMetadataHandler *RGWSyncModuleInstance::alloc_bucket_meta_handler() +{ + return RGWBucketMetaHandlerAllocator::alloc(); +} + +RGWBucketInstanceMetadataHandlerBase* RGWSyncModuleInstance::alloc_bucket_instance_meta_handler(rgw::sal::Driver* driver) +{ + return RGWBucketInstanceMetaHandlerAllocator::alloc(driver); +} + +RGWStatRemoteObjCBCR::RGWStatRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + src_bucket(_src_bucket), key(_key) { +} + +RGWCallStatRemoteObjCR::RGWCallStatRemoteObjCR(RGWDataSyncCtx *_sc, + rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + src_bucket(_src_bucket), key(_key) { +} + +int RGWCallStatRemoteObjCR::operate(const DoutPrefixProvider *dpp) { + reenter(this) { + yield { + call(new RGWStatRemoteObjCR(sync_env->async_rados, sync_env->driver, + sc->source_zone, + src_bucket, key, &mtime, &size, &etag, &attrs, &headers)); + } + if (retcode < 0) { + ldpp_dout(dpp, 10) << "RGWStatRemoteObjCR() returned " << retcode << dendl; + return set_cr_error(retcode); + } + ldpp_dout(dpp, 20) << "stat of remote obj: z=" << sc->source_zone + << " b=" << src_bucket << " k=" << key + << " size=" << size << " mtime=" << mtime << dendl; + yield { + RGWStatRemoteObjCBCR *cb = allocate_callback(); + if (cb) { + cb->set_result(mtime, size, etag, std::move(attrs), std::move(headers)); + call(cb); + } + } + if (retcode < 0) { + ldpp_dout(dpp, 10) << "RGWStatRemoteObjCR() callback returned " << retcode << dendl; + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; +} + +void rgw_register_sync_modules(RGWSyncModulesManager *modules_manager) +{ + RGWSyncModuleRef default_module(std::make_shared()); + modules_manager->register_module("rgw", default_module, true); + + RGWSyncModuleRef archive_module(std::make_shared()); + modules_manager->register_module("archive", archive_module); + + RGWSyncModuleRef log_module(std::make_shared()); + modules_manager->register_module("log", log_module); + + RGWSyncModuleRef es_module(std::make_shared()); + modules_manager->register_module("elasticsearch", es_module); + + RGWSyncModuleRef aws_module(std::make_shared()); + modules_manager->register_module("cloud", aws_module); +} diff --git a/src/rgw/driver/rados/rgw_sync_module.h b/src/rgw/driver/rados/rgw_sync_module.h new file mode 100644 index 00000000000..6d974c39a27 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module.h @@ -0,0 +1,202 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SYNC_MODULE_H +#define CEPH_RGW_SYNC_MODULE_H + +#include "rgw_common.h" +#include "rgw_coroutine.h" + +class RGWBucketInfo; +class RGWRemoteDataLog; +struct RGWDataSyncCtx; +struct RGWDataSyncEnv; +struct rgw_bucket_entry_owner; +struct rgw_obj_key; +struct rgw_bucket_sync_pipe; + + +class RGWDataSyncModule { +public: + RGWDataSyncModule() {} + virtual ~RGWDataSyncModule() {} + + virtual void init(RGWDataSyncCtx *sync_env, uint64_t instance_id) {} + + virtual RGWCoroutine *init_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) { + return nullptr; + } + + virtual RGWCoroutine *start_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) { + return nullptr; + } + virtual RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) = 0; + virtual RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& bucket_info, rgw_obj_key& key, real_time& mtime, + bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) = 0; + virtual RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& bucket_info, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) = 0; +}; + +class RGWRESTMgr; +class RGWMetadataHandler; +class RGWBucketInstanceMetadataHandlerBase; + +class RGWSyncModuleInstance { +public: + RGWSyncModuleInstance() {} + virtual ~RGWSyncModuleInstance() {} + virtual RGWDataSyncModule *get_data_handler() = 0; + virtual RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) { + return orig; + } + virtual bool supports_user_writes() { + return false; + } + virtual RGWMetadataHandler *alloc_bucket_meta_handler(); + virtual RGWBucketInstanceMetadataHandlerBase *alloc_bucket_instance_meta_handler(rgw::sal::Driver* driver); + + // indication whether the sync module start with full sync (default behavior) + // incremental sync would follow anyway + virtual bool should_full_sync() const { + return true; + } +}; + +typedef std::shared_ptr RGWSyncModuleInstanceRef; + +class JSONFormattable; + +class RGWSyncModule { + +public: + RGWSyncModule() {} + virtual ~RGWSyncModule() {} + + virtual bool supports_writes() { + return false; + } + virtual bool supports_data_export() = 0; + virtual int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) = 0; +}; + +typedef std::shared_ptr RGWSyncModuleRef; + + +class RGWSyncModulesManager { + ceph::mutex lock = ceph::make_mutex("RGWSyncModulesManager"); + + std::map modules; +public: + RGWSyncModulesManager() = default; + + void register_module(const std::string& name, RGWSyncModuleRef& module, bool is_default = false) { + std::lock_guard l{lock}; + modules[name] = module; + if (is_default) { + modules[std::string()] = module; + } + } + + bool get_module(const std::string& name, RGWSyncModuleRef *module) { + std::lock_guard l{lock}; + auto iter = modules.find(name); + if (iter == modules.end()) { + return false; + } + if (module != nullptr) { + *module = iter->second; + } + return true; + } + + + bool supports_data_export(const std::string& name) { + RGWSyncModuleRef module; + if (!get_module(name, &module)) { + return false; + } + + return module->supports_data_export(); + } + + int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const std::string& name, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) { + RGWSyncModuleRef module; + if (!get_module(name, &module)) { + return -ENOENT; + } + + return module.get()->create_instance(dpp, cct, config, instance); + } + + std::vector get_registered_module_names() const { + std::vector names; + for (auto& i: modules) { + if (!i.first.empty()) { + names.push_back(i.first); + } + } + return names; + } +}; + +class RGWStatRemoteObjCBCR : public RGWCoroutine { +protected: + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + rgw_bucket src_bucket; + rgw_obj_key key; + + ceph::real_time mtime; + uint64_t size = 0; + std::string etag; + std::map attrs; + std::map headers; +public: + RGWStatRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket& _src_bucket, rgw_obj_key& _key); + ~RGWStatRemoteObjCBCR() override {} + + void set_result(ceph::real_time& _mtime, + uint64_t _size, + const std::string& _etag, + std::map&& _attrs, + std::map&& _headers) { + mtime = _mtime; + size = _size; + etag = _etag; + attrs = std::move(_attrs); + headers = std::move(_headers); + } +}; + +class RGWCallStatRemoteObjCR : public RGWCoroutine { + ceph::real_time mtime; + uint64_t size{0}; + std::string etag; + std::map attrs; + std::map headers; + +protected: + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + + rgw_bucket src_bucket; + rgw_obj_key key; + +public: + RGWCallStatRemoteObjCR(RGWDataSyncCtx *_sc, + rgw_bucket& _src_bucket, rgw_obj_key& _key); + + ~RGWCallStatRemoteObjCR() override {} + + int operate(const DoutPrefixProvider *dpp) override; + + virtual RGWStatRemoteObjCBCR *allocate_callback() { + return nullptr; + } +}; + +void rgw_register_sync_modules(RGWSyncModulesManager *modules_manager); + +#endif diff --git a/src/rgw/driver/rados/rgw_sync_module_aws.cc b/src/rgw/driver/rados/rgw_sync_module_aws.cc new file mode 100644 index 00000000000..6827f7f3a1a --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_aws.cc @@ -0,0 +1,1836 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/errno.h" + +#include "rgw_common.h" +#include "rgw_coroutine.h" +#include "rgw_sync_module.h" +#include "rgw_data_sync.h" +#include "rgw_sync_module_aws.h" +#include "rgw_cr_rados.h" +#include "rgw_rest_conn.h" +#include "rgw_cr_rest.h" +#include "rgw_acl.h" +#include "rgw_zone.h" + +#include "services/svc_zone.h" + +#include + +#define dout_subsys ceph_subsys_rgw + + +#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) + +using namespace std; + +static string default_target_path = "rgw-${zonegroup}-${sid}/${bucket}"; + +static string get_key_oid(const rgw_obj_key& key) +{ + string oid = key.name; + if (!key.instance.empty() && + !key.have_null_instance()) { + oid += string(":") + key.instance; + } + return oid; +} + +static string obj_to_aws_path(rgw::sal::Object* obj) +{ + string path = obj->get_bucket()->get_name() + "/" + get_key_oid(obj->get_key()); + + + return path; +} + +/* + + json configuration definition: + + { + "connection": { + "access_key": , + "secret": , + "endpoint": , + "host_style": , + }, + "acls": [ { "type": , + "source_id": , + "dest_id": } ... ], # optional, acl mappings, no mappings if does not exist + "target_path": , # override default + + + # anything below here is for non trivial configuration + # can be used in conjuction with the above + + "default": { + "connection": { + "access_key": , + "secret": , + "endpoint": , + "host_style" , + }, + "acls": [ # list of source uids and how they map into destination uids in the dest objects acls + { + "type" : , # optional, default is id + "source_id": , + "dest_id": + } ... ] + "target_path": "rgwx-${sid}/${bucket}" # how a bucket name is mapped to destination path, + # final object name will be target_path + "/" + obj + }, + "connections": [ + { + "id": , + "access_key": , + "secret": , + "endpoint": , + } ... ], + "acl_profiles": [ + { + "id": , # acl mappings + "acls": [ { + "type": , + "source_id": , + "dest_id": + } ... ] + } + ], + "profiles": [ + { + "source_bucket": , # can specify either specific bucket name (foo), or prefix (foo*) + "target_path": , # (override default) + "connection_id": , # optional, if empty references default connection + "acls_id": , # optional, if empty references default mappings + } ... ], + } + +target path optional variables: + +(evaluated at init) +sid: sync instance id, randomly generated by sync process on first sync initalization +zonegroup: zonegroup name +zonegroup_id: zonegroup name +zone: zone name +zone_id: zone name + +(evaluated when syncing) +bucket: bucket name +owner: bucket owner + +*/ + +struct ACLMapping { + ACLGranteeTypeEnum type{ACL_TYPE_CANON_USER}; + string source_id; + string dest_id; + + ACLMapping() = default; + + ACLMapping(ACLGranteeTypeEnum t, + const string& s, + const string& d) : type(t), + source_id(s), + dest_id(d) {} + + void init(const JSONFormattable& config) { + const string& t = config["type"]; + + if (t == "email") { + type = ACL_TYPE_EMAIL_USER; + } else if (t == "uri") { + type = ACL_TYPE_GROUP; + } else { + type = ACL_TYPE_CANON_USER; + } + + source_id = config["source_id"]; + dest_id = config["dest_id"]; + } + + void dump_conf(CephContext *cct, JSONFormatter& jf) const { + Formatter::ObjectSection os(jf, "acl_mapping"); + string s; + switch (type) { + case ACL_TYPE_EMAIL_USER: + s = "email"; + break; + case ACL_TYPE_GROUP: + s = "uri"; + break; + default: + s = "id"; + break; + } + encode_json("type", s, &jf); + encode_json("source_id", source_id, &jf); + encode_json("dest_id", dest_id, &jf); + } +}; + +struct ACLMappings { + map acl_mappings; + + void init(const JSONFormattable& config) { + for (auto& c : config.array()) { + ACLMapping m; + m.init(c); + + acl_mappings.emplace(std::make_pair(m.source_id, m)); + } + } + void dump_conf(CephContext *cct, JSONFormatter& jf) const { + Formatter::ArraySection os(jf, "acls"); + + for (auto& i : acl_mappings) { + i.second.dump_conf(cct, jf); + } + } +}; + +struct AWSSyncConfig_ACLProfiles { + map > acl_profiles; + + void init(const JSONFormattable& config) { + for (auto& c : config.array()) { + const string& profile_id = c["id"]; + + std::shared_ptr ap{new ACLMappings}; + ap->init(c["acls"]); + + acl_profiles[profile_id] = ap; + } + } + void dump_conf(CephContext *cct, JSONFormatter& jf) const { + Formatter::ArraySection section(jf, "acl_profiles"); + + for (auto& p : acl_profiles) { + Formatter::ObjectSection section(jf, "profile"); + encode_json("id", p.first, &jf); + p.second->dump_conf(cct, jf); + } + } + + bool find(const string& profile_id, ACLMappings *result) const { + auto iter = acl_profiles.find(profile_id); + if (iter == acl_profiles.end()) { + return false; + } + *result = *iter->second; + return true; + } +}; + +struct AWSSyncConfig_Connection { + string connection_id; + string endpoint; + RGWAccessKey key; + std::optional region; + HostStyle host_style{PathStyle}; + + bool has_endpoint{false}; + bool has_key{false}; + bool has_host_style{false}; + + void init(const JSONFormattable& config) { + has_endpoint = config.exists("endpoint"); + has_key = config.exists("access_key") || config.exists("secret"); + has_host_style = config.exists("host_style"); + + connection_id = config["id"]; + endpoint = config["endpoint"]; + + key = RGWAccessKey(config["access_key"], config["secret"]); + + if (config.exists("region")) { + region = config["region"]; + } else { + region.reset(); + } + + string host_style_str = config["host_style"]; + if (host_style_str != "virtual") { + host_style = PathStyle; + } else { + host_style = VirtualStyle; + } + } + void dump_conf(CephContext *cct, JSONFormatter& jf) const { + Formatter::ObjectSection section(jf, "connection"); + encode_json("id", connection_id, &jf); + encode_json("endpoint", endpoint, &jf); + string s = (host_style == PathStyle ? "path" : "virtual"); + encode_json("region", region, &jf); + encode_json("host_style", s, &jf); + + { + Formatter::ObjectSection os(jf, "key"); + encode_json("access_key", key.id, &jf); + string secret = (key.key.empty() ? "" : "******"); + encode_json("secret", secret, &jf); + } + } +}; + +static int conf_to_uint64(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, const string& key, uint64_t *pval) +{ + string sval; + if (config.find(key, &sval)) { + string err; + uint64_t val = strict_strtoll(sval.c_str(), 10, &err); + if (!err.empty()) { + ldpp_dout(dpp, 0) << "ERROR: could not parse configurable value for cloud sync module: " << key << ": " << sval << dendl; + return -EINVAL; + } + *pval = val; + } + return 0; +} + +struct AWSSyncConfig_S3 { + uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE}; + uint64_t multipart_min_part_size{DEFAULT_MULTIPART_SYNC_PART_SIZE}; + + int init(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) { + int r = conf_to_uint64(dpp, cct, config, "multipart_sync_threshold", &multipart_sync_threshold); + if (r < 0) { + return r; + } + + r = conf_to_uint64(dpp, cct, config, "multipart_min_part_size", &multipart_min_part_size); + if (r < 0) { + return r; + } +#define MULTIPART_MIN_POSSIBLE_PART_SIZE (5 * 1024 * 1024) + if (multipart_min_part_size < MULTIPART_MIN_POSSIBLE_PART_SIZE) { + multipart_min_part_size = MULTIPART_MIN_POSSIBLE_PART_SIZE; + } + return 0; + } + + void dump_conf(CephContext *cct, JSONFormatter& jf) const { + Formatter::ObjectSection section(jf, "s3"); + encode_json("multipart_sync_threshold", multipart_sync_threshold, &jf); + encode_json("multipart_min_part_size", multipart_min_part_size, &jf); + } +}; + +struct AWSSyncConfig_Profile { + string source_bucket; + bool prefix{false}; + string target_path; + string connection_id; + string acls_id; + + std::shared_ptr conn_conf; + std::shared_ptr acls; + + std::shared_ptr conn; + + void init(const JSONFormattable& config) { + source_bucket = config["source_bucket"]; + + prefix = (!source_bucket.empty() && source_bucket[source_bucket.size() - 1] == '*'); + + if (prefix) { + source_bucket = source_bucket.substr(0, source_bucket.size() - 1); + } + + target_path = config["target_path"]; + connection_id = config["connection_id"]; + acls_id = config["acls_id"]; + + if (config.exists("connection")) { + conn_conf = make_shared(); + conn_conf->init(config["connection"]); + } + + if (config.exists("acls")) { + acls = make_shared(); + acls->init(config["acls"]); + } + } + + void dump_conf(CephContext *cct, JSONFormatter& jf, const char *section = "config") const { + Formatter::ObjectSection config(jf, section); + string sb{source_bucket}; + if (prefix) { + sb.append("*"); + } + encode_json("source_bucket", sb, &jf); + encode_json("target_path", target_path, &jf); + encode_json("connection_id", connection_id, &jf); + encode_json("acls_id", acls_id, &jf); + if (conn_conf.get()) { + conn_conf->dump_conf(cct, jf); + } + if (acls.get()) { + acls->dump_conf(cct, jf); + } + } +}; + +static void find_and_replace(const string& src, const string& find, const string& replace, string *dest) +{ + string s = src; + + size_t pos = s.find(find); + while (pos != string::npos) { + size_t next_ofs = pos + find.size(); + s = s.substr(0, pos) + replace + s.substr(next_ofs); + pos = s.find(find, next_ofs); + } + + *dest = s; +} + +static void apply_meta_param(const string& src, const string& param, const string& val, string *dest) +{ + string s = string("${") + param + "}"; + find_and_replace(src, s, val, dest); +} + + +struct AWSSyncConfig { + AWSSyncConfig_Profile default_profile; + std::shared_ptr root_profile; + + map > connections; + AWSSyncConfig_ACLProfiles acl_profiles; + + map > explicit_profiles; + + AWSSyncConfig_S3 s3; + + int init_profile(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& profile_conf, AWSSyncConfig_Profile& profile, + bool connection_must_exist) { + if (!profile.connection_id.empty()) { + if (profile.conn_conf) { + ldpp_dout(dpp, 0) << "ERROR: ambiguous profile connection configuration, connection_id=" << profile.connection_id << dendl; + return -EINVAL; + } + if (connections.find(profile.connection_id) == connections.end()) { + ldpp_dout(dpp, 0) << "ERROR: profile configuration reference non-existent connection_id=" << profile.connection_id << dendl; + return -EINVAL; + } + profile.conn_conf = connections[profile.connection_id]; + } else if (!profile.conn_conf) { + profile.connection_id = default_profile.connection_id; + auto i = connections.find(profile.connection_id); + if (i != connections.end()) { + profile.conn_conf = i->second; + } + } + + if (connection_must_exist && !profile.conn_conf) { + ldpp_dout(dpp, 0) << "ERROR: remote connection undefined for sync profile" << dendl; + return -EINVAL; + } + + if (profile.conn_conf && default_profile.conn_conf) { + if (!profile.conn_conf->has_endpoint) { + profile.conn_conf->endpoint = default_profile.conn_conf->endpoint; + } + if (!profile.conn_conf->has_host_style) { + profile.conn_conf->host_style = default_profile.conn_conf->host_style; + } + if (!profile.conn_conf->has_key) { + profile.conn_conf->key = default_profile.conn_conf->key; + } + } + + ACLMappings acl_mappings; + + if (!profile.acls_id.empty()) { + if (!acl_profiles.find(profile.acls_id, &acl_mappings)) { + ldpp_dout(dpp, 0) << "ERROR: profile configuration reference non-existent acls id=" << profile.acls_id << dendl; + return -EINVAL; + } + profile.acls = acl_profiles.acl_profiles[profile.acls_id]; + } else if (!profile.acls) { + if (default_profile.acls) { + profile.acls = default_profile.acls; + profile.acls_id = default_profile.acls_id; + } + } + + if (profile.target_path.empty()) { + profile.target_path = default_profile.target_path; + } + if (profile.target_path.empty()) { + profile.target_path = default_target_path; + } + + return 0; + } + + int init_target(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& profile_conf, std::shared_ptr *ptarget) { + std::shared_ptr profile; + profile.reset(new AWSSyncConfig_Profile); + profile->init(profile_conf); + + int ret = init_profile(dpp, cct, profile_conf, *profile, true); + if (ret < 0) { + return ret; + } + + auto& sb = profile->source_bucket; + + if (explicit_profiles.find(sb) != explicit_profiles.end()) { + ldpp_dout(dpp, 0) << "WARNING: duplicate target configuration in sync module" << dendl; + } + + explicit_profiles[sb] = profile; + if (ptarget) { + *ptarget = profile; + } + return 0; + } + + bool do_find_profile(const rgw_bucket bucket, std::shared_ptr *result) { + const string& name = bucket.name; + auto iter = explicit_profiles.upper_bound(name); + if (iter == explicit_profiles.begin()) { + return false; + } + + --iter; + if (iter->first.size() > name.size()) { + return false; + } + if (name.compare(0, iter->first.size(), iter->first) != 0) { + return false; + } + + std::shared_ptr& target = iter->second; + + if (!target->prefix && + name.size() != iter->first.size()) { + return false; + } + + *result = target; + return true; + } + + void find_profile(const rgw_bucket bucket, std::shared_ptr *result) { + if (!do_find_profile(bucket, result)) { + *result = root_profile; + } + } + + AWSSyncConfig() {} + + int init(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) { + auto& default_conf = config["default"]; + + if (config.exists("default")) { + default_profile.init(default_conf); + init_profile(dpp, cct, default_conf, default_profile, false); + } + + for (auto& conn : config["connections"].array()) { + auto new_conn = conn; + + std::shared_ptr c{new AWSSyncConfig_Connection}; + c->init(new_conn); + + connections[new_conn["id"]] = c; + } + + acl_profiles.init(config["acl_profiles"]); + + int r = s3.init(dpp, cct, config["s3"]); + if (r < 0) { + return r; + } + + auto new_root_conf = config; + + r = init_target(dpp, cct, new_root_conf, &root_profile); /* the root profile config */ + if (r < 0) { + return r; + } + + for (auto target_conf : config["profiles"].array()) { + int r = init_target(dpp, cct, target_conf, nullptr); + if (r < 0) { + return r; + } + } + + JSONFormatter jf(true); + dump_conf(cct, jf); + stringstream ss; + jf.flush(ss); + + ldpp_dout(dpp, 5) << "sync module config (parsed representation):\n" << ss.str() << dendl; + + return 0; + } + + void expand_target(RGWDataSyncCtx *sc, const string& sid, const string& path, string *dest) { + apply_meta_param(path, "sid", sid, dest); + + const RGWZoneGroup& zg = sc->env->svc->zone->get_zonegroup(); + apply_meta_param(path, "zonegroup", zg.get_name(), dest); + apply_meta_param(path, "zonegroup_id", zg.get_id(), dest); + + const RGWZone& zone = sc->env->svc->zone->get_zone(); + apply_meta_param(path, "zone", zone.name, dest); + apply_meta_param(path, "zone_id", zone.id, dest); + } + + void update_config(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, const string& sid) { + expand_target(sc, sid, root_profile->target_path, &root_profile->target_path); + ldpp_dout(dpp, 20) << "updated target: (root) -> " << root_profile->target_path << dendl; + for (auto& t : explicit_profiles) { + expand_target(sc, sid, t.second->target_path, &t.second->target_path); + ldpp_dout(dpp, 20) << "updated target: " << t.first << " -> " << t.second->target_path << dendl; + } + } + + void dump_conf(CephContext *cct, JSONFormatter& jf) const { + Formatter::ObjectSection config(jf, "config"); + root_profile->dump_conf(cct, jf); + jf.open_array_section("connections"); + for (auto c : connections) { + c.second->dump_conf(cct, jf); + } + jf.close_section(); + + acl_profiles.dump_conf(cct, jf); + + { // targets + Formatter::ArraySection as(jf, "profiles"); + for (auto& t : explicit_profiles) { + Formatter::ObjectSection target_section(jf, "profile"); + encode_json("name", t.first, &jf); + t.second->dump_conf(cct, jf); + } + } + } + + string get_path(std::shared_ptr& profile, + const RGWBucketInfo& bucket_info, + const rgw_obj_key& obj) { + string bucket_str; + string owner; + if (!bucket_info.owner.tenant.empty()) { + bucket_str = owner = bucket_info.owner.tenant + "-"; + owner += bucket_info.owner.id; + } + bucket_str += bucket_info.bucket.name; + + const string& path = profile->target_path; + + string new_path; + apply_meta_param(path, "bucket", bucket_str, &new_path); + apply_meta_param(new_path, "owner", owner, &new_path); + + new_path += string("/") + get_key_oid(obj); + + return new_path; + } + + void get_target(std::shared_ptr& profile, + const RGWBucketInfo& bucket_info, + const rgw_obj_key& obj, + string *bucket_name, + string *obj_name) { + string path = get_path(profile, bucket_info, obj); + size_t pos = path.find('/'); + + *bucket_name = path.substr(0, pos); + *obj_name = path.substr(pos + 1); + } + + void init_conns(RGWDataSyncCtx *sc, const string& id) { + auto sync_env = sc->env; + + update_config(sync_env->dpp, sc, id); + + auto& root_conf = root_profile->conn_conf; + + root_profile->conn.reset(new S3RESTConn(sc->cct, + id, + { root_conf->endpoint }, + root_conf->key, + sync_env->svc->zone->get_zonegroup().get_id(), + root_conf->region, + root_conf->host_style)); + + for (auto i : explicit_profiles) { + auto& c = i.second; + + c->conn.reset(new S3RESTConn(sc->cct, + id, + { c->conn_conf->endpoint }, + c->conn_conf->key, + sync_env->svc->zone->get_zonegroup().get_id(), + c->conn_conf->region, + c->conn_conf->host_style)); + } + } +}; + + +struct AWSSyncInstanceEnv { + AWSSyncConfig conf; + string id; + + explicit AWSSyncInstanceEnv(AWSSyncConfig& _conf) : conf(_conf) {} + + void init(RGWDataSyncCtx *sc, uint64_t instance_id) { + char buf[32]; + snprintf(buf, sizeof(buf), "%llx", (unsigned long long)instance_id); + id = buf; + + conf.init_conns(sc, id); + } + + void get_profile(const rgw_bucket& bucket, std::shared_ptr *ptarget) { + conf.find_profile(bucket, ptarget); + ceph_assert(ptarget); + } +}; + +static int do_decode_rest_obj(const DoutPrefixProvider *dpp, CephContext *cct, map& attrs, map& headers, rgw_rest_obj *info) +{ + for (auto header : headers) { + const string& val = header.second; + if (header.first == "RGWX_OBJECT_SIZE") { + info->content_len = atoi(val.c_str()); + } else { + info->attrs[header.first] = val; + } + } + + info->acls.set_ctx(cct); + auto aiter = attrs.find(RGW_ATTR_ACL); + if (aiter != attrs.end()) { + bufferlist& bl = aiter->second; + auto bliter = bl.cbegin(); + try { + info->acls.decode(bliter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode policy off attrs" << dendl; + return -EIO; + } + } else { + ldpp_dout(dpp, 0) << "WARNING: acl attrs not provided" << dendl; + } + + return 0; +} + +class RGWRESTStreamGetCRF : public RGWStreamReadHTTPResourceCRF +{ + RGWDataSyncCtx *sc; + RGWRESTConn *conn; + rgw::sal::Object* src_obj; + RGWRESTConn::get_obj_params req_params; + + rgw_sync_aws_src_obj_properties src_properties; +public: + RGWRESTStreamGetCRF(CephContext *_cct, + RGWCoroutinesEnv *_env, + RGWCoroutine *_caller, + RGWDataSyncCtx *_sc, + RGWRESTConn *_conn, + rgw::sal::Object* _src_obj, + const rgw_sync_aws_src_obj_properties& _src_properties) : RGWStreamReadHTTPResourceCRF(_cct, _env, _caller, + _sc->env->http_manager, _src_obj->get_key()), + sc(_sc), conn(_conn), src_obj(_src_obj), + src_properties(_src_properties) { + } + + int init(const DoutPrefixProvider *dpp) override { + /* init input connection */ + + + req_params.get_op = true; + req_params.prepend_metadata = true; + + req_params.unmod_ptr = &src_properties.mtime; + req_params.etag = src_properties.etag; + req_params.mod_zone_id = src_properties.zone_short_id; + req_params.mod_pg_ver = src_properties.pg_ver; + + if (range.is_set) { + req_params.range_is_set = true; + req_params.range_start = range.ofs; + req_params.range_end = range.ofs + range.size - 1; + } + + RGWRESTStreamRWRequest *in_req; + int ret = conn->get_obj(dpp, src_obj, req_params, false /* send */, &in_req); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): conn->get_obj() returned ret=" << ret << dendl; + return ret; + } + + set_req(in_req); + + return RGWStreamReadHTTPResourceCRF::init(dpp); + } + + int decode_rest_obj(const DoutPrefixProvider *dpp, map& headers, bufferlist& extra_data) override { + map src_attrs; + + ldpp_dout(dpp, 20) << __func__ << ":" << " headers=" << headers << " extra_data.length()=" << extra_data.length() << dendl; + + if (extra_data.length() > 0) { + JSONParser jp; + if (!jp.parse(extra_data.c_str(), extra_data.length())) { + ldpp_dout(dpp, 0) << "ERROR: failed to parse response extra data. len=" << extra_data.length() << " data=" << extra_data.c_str() << dendl; + return -EIO; + } + + JSONDecoder::decode_json("attrs", src_attrs, &jp); + } + return do_decode_rest_obj(dpp, sc->cct, src_attrs, headers, &rest_obj); + } + + bool need_extra_data() override { + return true; + } +}; + +static std::set keep_headers = { "CONTENT_TYPE", + "CONTENT_ENCODING", + "CONTENT_DISPOSITION", + "CONTENT_LANGUAGE" }; + +class RGWAWSStreamPutCRF : public RGWStreamWriteHTTPResourceCRF +{ + RGWDataSyncCtx *sc; + rgw_sync_aws_src_obj_properties src_properties; + std::shared_ptr target; + rgw::sal::Object* dest_obj; + string etag; +public: + RGWAWSStreamPutCRF(CephContext *_cct, + RGWCoroutinesEnv *_env, + RGWCoroutine *_caller, + RGWDataSyncCtx *_sc, + const rgw_sync_aws_src_obj_properties& _src_properties, + std::shared_ptr& _target, + rgw::sal::Object* _dest_obj) : RGWStreamWriteHTTPResourceCRF(_cct, _env, _caller, _sc->env->http_manager), + sc(_sc), src_properties(_src_properties), target(_target), dest_obj(_dest_obj) { + } + + int init() override { + /* init output connection */ + RGWRESTStreamS3PutObj *out_req{nullptr}; + + if (multipart.is_multipart) { + char buf[32]; + snprintf(buf, sizeof(buf), "%d", multipart.part_num); + rgw_http_param_pair params[] = { { "uploadId", multipart.upload_id.c_str() }, + { "partNumber", buf }, + { nullptr, nullptr } }; + target->conn->put_obj_send_init(dest_obj, params, &out_req); + } else { + target->conn->put_obj_send_init(dest_obj, nullptr, &out_req); + } + + set_req(out_req); + + return RGWStreamWriteHTTPResourceCRF::init(); + } + + static bool keep_attr(const string& h) { + return (keep_headers.find(h) != keep_headers.end() || + boost::algorithm::starts_with(h, "X_AMZ_")); + } + + static void init_send_attrs(const DoutPrefixProvider *dpp, + CephContext *cct, + const rgw_rest_obj& rest_obj, + const rgw_sync_aws_src_obj_properties& src_properties, + const AWSSyncConfig_Profile *target, + map *attrs) { + auto& new_attrs = *attrs; + + new_attrs.clear(); + + for (auto& hi : rest_obj.attrs) { + if (keep_attr(hi.first)) { + new_attrs.insert(hi); + } + } + + auto acl = rest_obj.acls.get_acl(); + + map > access_map; + + if (target->acls) { + for (auto& grant : acl.get_grant_map()) { + auto& orig_grantee = grant.first; + auto& perm = grant.second; + + string grantee; + + const auto& am = target->acls->acl_mappings; + + auto iter = am.find(orig_grantee); + if (iter == am.end()) { + ldpp_dout(dpp, 20) << "acl_mappings: Could not find " << orig_grantee << " .. ignoring" << dendl; + continue; + } + + grantee = iter->second.dest_id; + + string type; + + switch (iter->second.type) { + case ACL_TYPE_CANON_USER: + type = "id"; + break; + case ACL_TYPE_EMAIL_USER: + type = "emailAddress"; + break; + case ACL_TYPE_GROUP: + type = "uri"; + break; + default: + continue; + } + + string tv = type + "=" + grantee; + + int flags = perm.get_permission().get_permissions(); + if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) { + access_map[flags].push_back(tv); + continue; + } + + for (int i = 1; i <= RGW_PERM_WRITE_ACP; i <<= 1) { + if (flags & i) { + access_map[i].push_back(tv); + } + } + } + } + + for (auto aiter : access_map) { + int grant_type = aiter.first; + + string header_str("x-amz-grant-"); + + switch (grant_type) { + case RGW_PERM_READ: + header_str.append("read"); + break; + case RGW_PERM_WRITE: + header_str.append("write"); + break; + case RGW_PERM_READ_ACP: + header_str.append("read-acp"); + break; + case RGW_PERM_WRITE_ACP: + header_str.append("write-acp"); + break; + case RGW_PERM_FULL_CONTROL: + header_str.append("full-control"); + break; + } + + string s; + + for (auto viter : aiter.second) { + if (!s.empty()) { + s.append(", "); + } + s.append(viter); + } + + ldpp_dout(dpp, 20) << "acl_mappings: set acl: " << header_str << "=" << s << dendl; + + new_attrs[header_str] = s; + } + + char buf[32]; + snprintf(buf, sizeof(buf), "%llu", (long long)src_properties.versioned_epoch); + new_attrs["x-amz-meta-rgwx-versioned-epoch"] = buf; + + utime_t ut(src_properties.mtime); + snprintf(buf, sizeof(buf), "%lld.%09lld", + (long long)ut.sec(), + (long long)ut.nsec()); + + new_attrs["x-amz-meta-rgwx-source-mtime"] = buf; + new_attrs["x-amz-meta-rgwx-source-etag"] = src_properties.etag; + new_attrs["x-amz-meta-rgwx-source-key"] = rest_obj.key.name; + if (!rest_obj.key.instance.empty()) { + new_attrs["x-amz-meta-rgwx-source-version-id"] = rest_obj.key.instance; + } + } + + void send_ready(const DoutPrefixProvider *dpp, const rgw_rest_obj& rest_obj) override { + RGWRESTStreamS3PutObj *r = static_cast(req); + + map new_attrs; + if (!multipart.is_multipart) { + init_send_attrs(dpp, sc->cct, rest_obj, src_properties, target.get(), &new_attrs); + } + + r->set_send_length(rest_obj.content_len); + + RGWAccessControlPolicy policy; + + r->send_ready(dpp, target->conn->get_key(), new_attrs, policy); + } + + void handle_headers(const map& headers) { + for (auto h : headers) { + if (h.first == "ETAG") { + etag = h.second; + } + } + } + + bool get_etag(string *petag) { + if (etag.empty()) { + return false; + } + *petag = etag; + return true; + } +}; + + +class RGWAWSStreamObjToCloudPlainCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWRESTConn *source_conn; + std::shared_ptr target; + rgw::sal::Object* src_obj; + rgw::sal::Object* dest_obj; + + rgw_sync_aws_src_obj_properties src_properties; + + std::shared_ptr in_crf; + std::shared_ptr out_crf; + +public: + RGWAWSStreamObjToCloudPlainCR(RGWDataSyncCtx *_sc, + RGWRESTConn *_source_conn, + rgw::sal::Object* _src_obj, + const rgw_sync_aws_src_obj_properties& _src_properties, + std::shared_ptr _target, + rgw::sal::Object* _dest_obj) : RGWCoroutine(_sc->cct), + sc(_sc), + source_conn(_source_conn), + target(_target), + src_obj(_src_obj), + dest_obj(_dest_obj), + src_properties(_src_properties) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + /* init input */ + in_crf.reset(new RGWRESTStreamGetCRF(cct, get_env(), this, sc, + source_conn, src_obj, + src_properties)); + + /* init output */ + out_crf.reset(new RGWAWSStreamPutCRF(cct, get_env(), this, sc, + src_properties, target, dest_obj)); + + yield call(new RGWStreamSpliceCR(cct, sc->env->http_manager, in_crf, out_crf)); + if (retcode < 0) { + return set_cr_error(retcode); + } + + return set_cr_done(); + } + + return 0; + } +}; + +class RGWAWSStreamObjToCloudMultipartPartCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWRESTConn *source_conn; + std::shared_ptr target; + rgw::sal::Object* src_obj; + rgw::sal::Object* dest_obj; + + rgw_sync_aws_src_obj_properties src_properties; + + string upload_id; + + rgw_sync_aws_multipart_part_info part_info; + + std::shared_ptr in_crf; + std::shared_ptr out_crf; + + string *petag; + +public: + RGWAWSStreamObjToCloudMultipartPartCR(RGWDataSyncCtx *_sc, + RGWRESTConn *_source_conn, + rgw::sal::Object* _src_obj, + std::shared_ptr& _target, + rgw::sal::Object* _dest_obj, + const rgw_sync_aws_src_obj_properties& _src_properties, + const string& _upload_id, + const rgw_sync_aws_multipart_part_info& _part_info, + string *_petag) : RGWCoroutine(_sc->cct), + sc(_sc), + source_conn(_source_conn), + target(_target), + src_obj(_src_obj), + dest_obj(_dest_obj), + src_properties(_src_properties), + upload_id(_upload_id), + part_info(_part_info), + petag(_petag) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + /* init input */ + in_crf.reset(new RGWRESTStreamGetCRF(cct, get_env(), this, sc, + source_conn, src_obj, + src_properties)); + + in_crf->set_range(part_info.ofs, part_info.size); + + /* init output */ + out_crf.reset(new RGWAWSStreamPutCRF(cct, get_env(), this, sc, + src_properties, target, dest_obj)); + + out_crf->set_multipart(upload_id, part_info.part_num, part_info.size); + + yield call(new RGWStreamSpliceCR(cct, sc->env->http_manager, in_crf, out_crf)); + if (retcode < 0) { + return set_cr_error(retcode); + } + + if (!(static_cast(out_crf.get()))->get_etag(petag)) { + ldpp_dout(dpp, 0) << "ERROR: failed to get etag from PUT request" << dendl; + return set_cr_error(-EIO); + } + + return set_cr_done(); + } + + return 0; + } +}; + +class RGWAWSAbortMultipartCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWRESTConn *dest_conn; + rgw::sal::Object* dest_obj; + + string upload_id; + +public: + RGWAWSAbortMultipartCR(RGWDataSyncCtx *_sc, + RGWRESTConn *_dest_conn, + rgw::sal::Object* _dest_obj, + const string& _upload_id) : RGWCoroutine(_sc->cct), + sc(_sc), + dest_conn(_dest_conn), + dest_obj(_dest_obj), + upload_id(_upload_id) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + + yield { + rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; + bufferlist bl; + call(new RGWDeleteRESTResourceCR(sc->cct, dest_conn, sc->env->http_manager, + obj_to_aws_path(dest_obj), params)); + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload for dest object=" << dest_obj << " (retcode=" << retcode << ")" << dendl; + return set_cr_error(retcode); + } + + return set_cr_done(); + } + + return 0; + } +}; + +class RGWAWSInitMultipartCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWRESTConn *dest_conn; + rgw::sal::Object* dest_obj; + + uint64_t obj_size; + map attrs; + + bufferlist out_bl; + + string *upload_id; + + struct InitMultipartResult { + string bucket; + string key; + string upload_id; + + void decode_xml(XMLObj *obj) { + RGWXMLDecoder::decode_xml("Bucket", bucket, obj); + RGWXMLDecoder::decode_xml("Key", key, obj); + RGWXMLDecoder::decode_xml("UploadId", upload_id, obj); + } + } result; + +public: + RGWAWSInitMultipartCR(RGWDataSyncCtx *_sc, + RGWRESTConn *_dest_conn, + rgw::sal::Object* _dest_obj, + uint64_t _obj_size, + const map& _attrs, + string *_upload_id) : RGWCoroutine(_sc->cct), + sc(_sc), + dest_conn(_dest_conn), + dest_obj(_dest_obj), + obj_size(_obj_size), + attrs(_attrs), + upload_id(_upload_id) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + + yield { + rgw_http_param_pair params[] = { { "uploads", nullptr }, {nullptr, nullptr} }; + bufferlist bl; + call(new RGWPostRawRESTResourceCR (sc->cct, dest_conn, sc->env->http_manager, + obj_to_aws_path(dest_obj), params, &attrs, bl, &out_bl)); + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl; + return set_cr_error(retcode); + } + { + /* + * If one of the following fails we cannot abort upload, as we cannot + * extract the upload id. If one of these fail it's very likely that that's + * the least of our problem. + */ + RGWXMLDecoder::XMLParser parser; + if (!parser.init()) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; + return set_cr_error(-EIO); + } + + if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: failed to parse xml: " << str << dendl; + return set_cr_error(-EIO); + } + + try { + RGWXMLDecoder::decode_xml("InitiateMultipartUploadResult", result, &parser, true); + } catch (RGWXMLDecoder::err& err) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; + return set_cr_error(-EIO); + } + } + + ldpp_dout(dpp, 20) << "init multipart result: bucket=" << result.bucket << " key=" << result.key << " upload_id=" << result.upload_id << dendl; + + *upload_id = result.upload_id; + + return set_cr_done(); + } + + return 0; + } +}; + +class RGWAWSCompleteMultipartCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWRESTConn *dest_conn; + rgw::sal::Object* dest_obj; + + bufferlist out_bl; + + string upload_id; + + struct CompleteMultipartReq { + map parts; + + explicit CompleteMultipartReq(const map& _parts) : parts(_parts) {} + + void dump_xml(Formatter *f) const { + for (auto p : parts) { + f->open_object_section("Part"); + encode_xml("PartNumber", p.first, f); + encode_xml("ETag", p.second.etag, f); + f->close_section(); + }; + } + } req_enc; + + struct CompleteMultipartResult { + string location; + string bucket; + string key; + string etag; + + void decode_xml(XMLObj *obj) { + RGWXMLDecoder::decode_xml("Location", bucket, obj); + RGWXMLDecoder::decode_xml("Bucket", bucket, obj); + RGWXMLDecoder::decode_xml("Key", key, obj); + RGWXMLDecoder::decode_xml("ETag", etag, obj); + } + } result; + +public: + RGWAWSCompleteMultipartCR(RGWDataSyncCtx *_sc, + RGWRESTConn *_dest_conn, + rgw::sal::Object* _dest_obj, + string _upload_id, + const map& _parts) : RGWCoroutine(_sc->cct), + sc(_sc), + dest_conn(_dest_conn), + dest_obj(_dest_obj), + upload_id(_upload_id), + req_enc(_parts) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + + yield { + rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; + stringstream ss; + XMLFormatter formatter; + + encode_xml("CompleteMultipartUpload", req_enc, &formatter); + + formatter.flush(ss); + + bufferlist bl; + bl.append(ss.str()); + + call(new RGWPostRawRESTResourceCR (sc->cct, dest_conn, sc->env->http_manager, + obj_to_aws_path(dest_obj), params, nullptr, bl, &out_bl)); + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl; + return set_cr_error(retcode); + } + { + /* + * If one of the following fails we cannot abort upload, as we cannot + * extract the upload id. If one of these fail it's very likely that that's + * the least of our problem. + */ + RGWXMLDecoder::XMLParser parser; + if (!parser.init()) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; + return set_cr_error(-EIO); + } + + if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: failed to parse xml: " << str << dendl; + return set_cr_error(-EIO); + } + + try { + RGWXMLDecoder::decode_xml("CompleteMultipartUploadResult", result, &parser, true); + } catch (RGWXMLDecoder::err& err) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; + return set_cr_error(-EIO); + } + } + + ldpp_dout(dpp, 20) << "complete multipart result: location=" << result.location << " bucket=" << result.bucket << " key=" << result.key << " etag=" << result.etag << dendl; + + return set_cr_done(); + } + + return 0; + } +}; + + +class RGWAWSStreamAbortMultipartUploadCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWRESTConn *dest_conn; + rgw::sal::Object* dest_obj; + const rgw_raw_obj status_obj; + + string upload_id; + +public: + + RGWAWSStreamAbortMultipartUploadCR(RGWDataSyncCtx *_sc, + RGWRESTConn *_dest_conn, + rgw::sal::Object* _dest_obj, + const rgw_raw_obj& _status_obj, + const string& _upload_id) : RGWCoroutine(_sc->cct), sc(_sc), + dest_conn(_dest_conn), + dest_obj(_dest_obj), + status_obj(_status_obj), + upload_id(_upload_id) {} + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield call(new RGWAWSAbortMultipartCR(sc, dest_conn, dest_obj, upload_id)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload dest obj=" << dest_obj << " upload_id=" << upload_id << " retcode=" << retcode << dendl; + /* ignore error, best effort */ + } + yield call(new RGWRadosRemoveCR(sc->env->driver, status_obj)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to remove sync status obj obj=" << status_obj << " retcode=" << retcode << dendl; + /* ignore error, best effort */ + } + return set_cr_done(); + } + + return 0; + } +}; + +class RGWAWSStreamObjToCloudMultipartCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + AWSSyncConfig& conf; + RGWRESTConn *source_conn; + std::shared_ptr target; + rgw::sal::Object* src_obj; + rgw::sal::Object* dest_obj; + + uint64_t obj_size; + string src_etag; + rgw_sync_aws_src_obj_properties src_properties; + rgw_rest_obj rest_obj; + + rgw_sync_aws_multipart_upload_info status; + + map new_attrs; + + rgw_sync_aws_multipart_part_info *pcur_part_info{nullptr}; + + int ret_err{0}; + + rgw_raw_obj status_obj; + +public: + RGWAWSStreamObjToCloudMultipartCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, + AWSSyncConfig& _conf, + RGWRESTConn *_source_conn, + rgw::sal::Object* _src_obj, + std::shared_ptr& _target, + rgw::sal::Object* _dest_obj, + uint64_t _obj_size, + const rgw_sync_aws_src_obj_properties& _src_properties, + const rgw_rest_obj& _rest_obj) : RGWCoroutine(_sc->cct), + sc(_sc), + sync_env(_sc->env), + conf(_conf), + source_conn(_source_conn), + target(_target), + src_obj(_src_obj), + dest_obj(_dest_obj), + obj_size(_obj_size), + src_properties(_src_properties), + rest_obj(_rest_obj), + status_obj(sync_env->svc->zone->get_zone_params().log_pool, + RGWBucketPipeSyncStatusManager::obj_status_oid(_sync_pipe, sc->source_zone, src_obj)) { + } + + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + yield call(new RGWSimpleRadosReadCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, + status_obj, &status, false)); + + if (retcode < 0 && retcode != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed to read sync status of object " << src_obj << " retcode=" << retcode << dendl; + return retcode; + } + + if (retcode >= 0) { + /* check here that mtime and size did not change */ + + if (status.src_properties.mtime != src_properties.mtime || status.obj_size != obj_size || + status.src_properties.etag != src_properties.etag) { + yield call(new RGWAWSStreamAbortMultipartUploadCR(sc, target->conn.get(), dest_obj, status_obj, status.upload_id)); + retcode = -ENOENT; + } + } + + if (retcode == -ENOENT) { + RGWAWSStreamPutCRF::init_send_attrs(dpp, sc->cct, rest_obj, src_properties, target.get(), &new_attrs); + + yield call(new RGWAWSInitMultipartCR(sc, target->conn.get(), dest_obj, status.obj_size, std::move(new_attrs), &status.upload_id)); + if (retcode < 0) { + return set_cr_error(retcode); + } + + status.obj_size = obj_size; + status.src_properties = src_properties; +#define MULTIPART_MAX_PARTS 10000 + uint64_t min_part_size = obj_size / MULTIPART_MAX_PARTS; + status.part_size = std::max(conf.s3.multipart_min_part_size, min_part_size); + status.num_parts = (obj_size + status.part_size - 1) / status.part_size; + status.cur_part = 1; + } + + for (; (uint32_t)status.cur_part <= status.num_parts; ++status.cur_part) { + yield { + rgw_sync_aws_multipart_part_info& cur_part_info = status.parts[status.cur_part]; + cur_part_info.part_num = status.cur_part; + cur_part_info.ofs = status.cur_ofs; + cur_part_info.size = std::min((uint64_t)status.part_size, status.obj_size - status.cur_ofs); + + pcur_part_info = &cur_part_info; + + status.cur_ofs += status.part_size; + + call(new RGWAWSStreamObjToCloudMultipartPartCR(sc, + source_conn, src_obj, + target, + dest_obj, + status.src_properties, + status.upload_id, + cur_part_info, + &cur_part_info.etag)); + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to sync obj=" << src_obj << ", sync via multipart upload, upload_id=" << status.upload_id << " part number " << status.cur_part << " (error: " << cpp_strerror(-retcode) << ")" << dendl; + ret_err = retcode; + yield call(new RGWAWSStreamAbortMultipartUploadCR(sc, target->conn.get(), dest_obj, status_obj, status.upload_id)); + return set_cr_error(ret_err); + } + + yield call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, status_obj, status)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to store multipart upload state, retcode=" << retcode << dendl; + /* continue with upload anyway */ + } + ldpp_dout(dpp, 20) << "sync of object=" << src_obj << " via multipart upload, finished sending part #" << status.cur_part << " etag=" << pcur_part_info->etag << dendl; + } + + yield call(new RGWAWSCompleteMultipartCR(sc, target->conn.get(), dest_obj, status.upload_id, status.parts)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to complete multipart upload of obj=" << src_obj << " (error: " << cpp_strerror(-retcode) << ")" << dendl; + ret_err = retcode; + yield call(new RGWAWSStreamAbortMultipartUploadCR(sc, target->conn.get(), dest_obj, status_obj, status.upload_id)); + return set_cr_error(ret_err); + } + + /* remove status obj */ + yield call(new RGWRadosRemoveCR(sync_env->driver, status_obj)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload obj=" << src_obj << " upload_id=" << status.upload_id << " part number " << status.cur_part << " (" << cpp_strerror(-retcode) << ")" << dendl; + /* ignore error, best effort */ + } + return set_cr_done(); + } + + return 0; + } +}; +template +int decode_attr(map& attrs, const char *attr_name, T *result, T def_val) +{ + map::iterator iter = attrs.find(attr_name); + if (iter == attrs.end()) { + *result = def_val; + return 0; + } + bufferlist& bl = iter->second; + if (bl.length() == 0) { + *result = def_val; + return 0; + } + auto bliter = bl.cbegin(); + try { + decode(*result, bliter); + } catch (buffer::error& err) { + return -EIO; + } + return 0; +} + +// maybe use Fetch Remote Obj instead? +class RGWAWSHandleRemoteObjCBCR: public RGWStatRemoteObjCBCR { + rgw_bucket_sync_pipe sync_pipe; + AWSSyncInstanceEnv& instance; + + uint64_t versioned_epoch{0}; + + RGWRESTConn *source_conn{nullptr}; + std::shared_ptr target; + bufferlist res; + unordered_map bucket_created; + string target_bucket_name; + string target_obj_name; + rgw_rest_obj rest_obj; + int ret{0}; + + uint32_t src_zone_short_id{0}; + uint64_t src_pg_ver{0}; + + bufferlist out_bl; + + struct CreateBucketResult { + string code; + + void decode_xml(XMLObj *obj) { + RGWXMLDecoder::decode_xml("Code", code, obj); + } + } result; + + rgw_bucket target_bucket; + std::unique_ptr bucket; + std::unique_ptr src_obj; + std::unique_ptr dest_bucket; + std::unique_ptr dest_obj; + + +public: + RGWAWSHandleRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, + rgw_obj_key& _key, + AWSSyncInstanceEnv& _instance, + uint64_t _versioned_epoch) : RGWStatRemoteObjCBCR(_sc, _sync_pipe.info.source_bs.bucket, _key), + sync_pipe(_sync_pipe), + instance(_instance), versioned_epoch(_versioned_epoch) + {} + + ~RGWAWSHandleRemoteObjCBCR(){ + } + + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + ret = decode_attr(attrs, RGW_ATTR_PG_VER, &src_pg_ver, (uint64_t)0); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode pg ver attr, ignoring" << dendl; + } else { + ret = decode_attr(attrs, RGW_ATTR_SOURCE_ZONE, &src_zone_short_id, (uint32_t)0); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode source zone short_id attr, ignoring" << dendl; + src_pg_ver = 0; /* all or nothing */ + } + } + ldpp_dout(dpp, 4) << "AWS: download begin: z=" << sc->source_zone + << " b=" << src_bucket << " k=" << key << " size=" << size + << " mtime=" << mtime << " etag=" << etag + << " zone_short_id=" << src_zone_short_id << " pg_ver=" << src_pg_ver + << dendl; + + source_conn = sync_env->svc->zone->get_zone_conn(sc->source_zone); + if (!source_conn) { + ldpp_dout(dpp, 0) << "ERROR: cannot find http connection to zone " << sc->source_zone << dendl; + return set_cr_error(-EINVAL); + } + + instance.get_profile(sync_pipe.info.source_bs.bucket, &target); + instance.conf.get_target(target, sync_pipe.dest_bucket_info, key, &target_bucket_name, &target_obj_name); + + if (bucket_created.find(target_bucket_name) == bucket_created.end()){ + yield { + ldpp_dout(dpp, 0) << "AWS: creating bucket " << target_bucket_name << dendl; + bufferlist bl; + call(new RGWPutRawRESTResourceCR (sc->cct, target->conn.get(), + sync_env->http_manager, + target_bucket_name, nullptr, bl, &out_bl)); + } + if (retcode < 0 ) { + RGWXMLDecoder::XMLParser parser; + if (!parser.init()) { + ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; + return set_cr_error(retcode); + } + + if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: failed to parse xml: " << str << dendl; + return set_cr_error(retcode); + } + + try { + RGWXMLDecoder::decode_xml("Error", result, &parser, true); + } catch (RGWXMLDecoder::err& err) { + string str(out_bl.c_str(), out_bl.length()); + ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; + return set_cr_error(retcode); + } + + if (result.code != "BucketAlreadyOwnedByYou") { + return set_cr_error(retcode); + } + } + + bucket_created[target_bucket_name] = true; + } + + yield { + bucket.reset(new rgw::sal::RadosBucket(sync_env->driver, src_bucket)); + src_obj.reset(new rgw::sal::RadosObject(sync_env->driver, key, bucket.get())); + + /* init output */ + target_bucket.name = target_bucket_name; /* this is only possible because we only use bucket name for + uri resolution */ + dest_bucket.reset(new rgw::sal::RadosBucket(sync_env->driver, target_bucket)); + dest_obj.reset(new rgw::sal::RadosObject(sync_env->driver, rgw_obj_key(target_obj_name), dest_bucket.get())); + + rgw_sync_aws_src_obj_properties src_properties; + src_properties.mtime = mtime; + src_properties.etag = etag; + src_properties.zone_short_id = src_zone_short_id; + src_properties.pg_ver = src_pg_ver; + src_properties.versioned_epoch = versioned_epoch; + + if (size < instance.conf.s3.multipart_sync_threshold) { + call(new RGWAWSStreamObjToCloudPlainCR(sc, source_conn, src_obj.get(), + src_properties, + target, + dest_obj.get())); + } else { + rgw_rest_obj rest_obj; + rest_obj.init(key); + if (do_decode_rest_obj(dpp, sc->cct, attrs, headers, &rest_obj)) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode rest obj out of headers=" << headers << ", attrs=" << attrs << dendl; + return set_cr_error(-EINVAL); + } + call(new RGWAWSStreamObjToCloudMultipartCR(sc, sync_pipe, instance.conf, source_conn, src_obj.get(), + target, dest_obj.get(), size, src_properties, rest_obj)); + } + } + if (retcode < 0) { + return set_cr_error(retcode); + } + + return set_cr_done(); + } + + return 0; + } +}; + +class RGWAWSHandleRemoteObjCR : public RGWCallStatRemoteObjCR { + rgw_bucket_sync_pipe sync_pipe; + AWSSyncInstanceEnv& instance; + uint64_t versioned_epoch; +public: + RGWAWSHandleRemoteObjCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, + AWSSyncInstanceEnv& _instance, uint64_t _versioned_epoch) : RGWCallStatRemoteObjCR(_sc, _sync_pipe.info.source_bs.bucket, _key), + sync_pipe(_sync_pipe), + instance(_instance), versioned_epoch(_versioned_epoch) { + } + + ~RGWAWSHandleRemoteObjCR() {} + + RGWStatRemoteObjCBCR *allocate_callback() override { + return new RGWAWSHandleRemoteObjCBCR(sc, sync_pipe, key, instance, versioned_epoch); + } +}; + +class RGWAWSRemoveRemoteObjCBCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + std::shared_ptr target; + rgw_bucket_sync_pipe sync_pipe; + rgw_obj_key key; + ceph::real_time mtime; + AWSSyncInstanceEnv& instance; + int ret{0}; +public: + RGWAWSRemoveRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, const ceph::real_time& _mtime, + AWSSyncInstanceEnv& _instance) : RGWCoroutine(_sc->cct), sc(_sc), + sync_pipe(_sync_pipe), key(_key), + mtime(_mtime), instance(_instance) {} + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + ldpp_dout(dpp, 0) << ": remove remote obj: z=" << sc->source_zone + << " b=" <cct, target->conn.get(), + sc->env->http_manager, + path, nullptr /* params */)); + } + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } + +}; + + +class RGWAWSDataSyncModule: public RGWDataSyncModule { + CephContext *cct; + AWSSyncInstanceEnv instance; +public: + RGWAWSDataSyncModule(CephContext *_cct, AWSSyncConfig& _conf) : + cct(_cct), + instance(_conf) { + } + + void init(RGWDataSyncCtx *sc, uint64_t instance_id) override { + instance.init(sc, instance_id); + } + + ~RGWAWSDataSyncModule() {} + + RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, + std::optional versioned_epoch, + rgw_zone_set *zones_trace) override { + ldout(sc->cct, 0) << instance.id << ": sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; + return new RGWAWSHandleRemoteObjCR(sc, sync_pipe, key, instance, versioned_epoch.value_or(0)); + } + RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, + rgw_zone_set *zones_trace) override { + ldout(sc->cct, 0) <<"rm_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + return new RGWAWSRemoveRemoteObjCBCR(sc, sync_pipe, key, mtime, instance); + } + RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, + rgw_zone_set *zones_trace) override { + ldout(sc->cct, 0) <<"AWS Not implemented: create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime + << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + return NULL; + } +}; + +class RGWAWSSyncModuleInstance : public RGWSyncModuleInstance { + RGWAWSDataSyncModule data_handler; +public: + RGWAWSSyncModuleInstance(CephContext *cct, AWSSyncConfig& _conf) : data_handler(cct, _conf) {} + RGWDataSyncModule *get_data_handler() override { + return &data_handler; + } +}; + +int RGWAWSSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance){ + AWSSyncConfig conf; + + int r = conf.init(dpp, cct, config); + if (r < 0) { + return r; + } + + instance->reset(new RGWAWSSyncModuleInstance(cct, conf)); + return 0; +} diff --git a/src/rgw/driver/rados/rgw_sync_module_aws.h b/src/rgw/driver/rados/rgw_sync_module_aws.h new file mode 100644 index 00000000000..48f0145fdf9 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_aws.h @@ -0,0 +1,111 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef RGW_SYNC_MODULE_AWS_H +#define RGW_SYNC_MODULE_AWS_H + +#include "rgw_sync_module.h" + +struct rgw_sync_aws_multipart_part_info { + int part_num{0}; + uint64_t ofs{0}; + uint64_t size{0}; + std::string etag; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(part_num, bl); + encode(ofs, bl); + encode(size, bl); + encode(etag, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(part_num, bl); + decode(ofs, bl); + decode(size, bl); + decode(etag, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_sync_aws_multipart_part_info) + +struct rgw_sync_aws_src_obj_properties { + ceph::real_time mtime; + std::string etag; + uint32_t zone_short_id{0}; + uint64_t pg_ver{0}; + uint64_t versioned_epoch{0}; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(mtime, bl); + encode(etag, bl); + encode(zone_short_id, bl); + encode(pg_ver, bl); + encode(versioned_epoch, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(mtime, bl); + decode(etag, bl); + decode(zone_short_id, bl); + decode(pg_ver, bl); + decode(versioned_epoch, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_sync_aws_src_obj_properties) + +struct rgw_sync_aws_multipart_upload_info { + std::string upload_id; + uint64_t obj_size; + rgw_sync_aws_src_obj_properties src_properties; + uint32_t part_size{0}; + uint32_t num_parts{0}; + + int cur_part{0}; + uint64_t cur_ofs{0}; + + std::map parts; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(upload_id, bl); + encode(obj_size, bl); + encode(src_properties, bl); + encode(part_size, bl); + encode(num_parts, bl); + encode(cur_part, bl); + encode(cur_ofs, bl); + encode(parts, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(upload_id, bl); + decode(obj_size, bl); + decode(src_properties, bl); + decode(part_size, bl); + decode(num_parts, bl); + decode(cur_part, bl); + decode(cur_ofs, bl); + decode(parts, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(rgw_sync_aws_multipart_upload_info) + +class RGWAWSSyncModule : public RGWSyncModule { + public: + RGWAWSSyncModule() {} + bool supports_data_export() override { return false;} + int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; +}; + +#endif /* RGW_SYNC_MODULE_AWS_H */ diff --git a/src/rgw/driver/rados/rgw_sync_module_es.cc b/src/rgw/driver/rados/rgw_sync_module_es.cc new file mode 100644 index 00000000000..3c294bbbc19 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_es.cc @@ -0,0 +1,962 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_b64.h" +#include "rgw_common.h" +#include "rgw_coroutine.h" +#include "rgw_sync_module.h" +#include "rgw_data_sync.h" +#include "rgw_sync_module_es.h" +#include "rgw_sync_module_es_rest.h" +#include "rgw_rest_conn.h" +#include "rgw_cr_rest.h" +#include "rgw_op.h" +#include "rgw_es_query.h" +#include "rgw_zone.h" + +#include "services/svc_zone.h" + +#include "include/str_list.h" + +#include + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +/* + * allowlist utility. Config string is a list of entries, where an entry is either an item, + * a prefix, or a suffix. An item would be the name of the entity that we'd look up, + * a prefix would be a string ending with an asterisk, a suffix would be a string starting + * with an asterisk. For example: + * + * bucket1, bucket2, foo*, *bar + */ +class ItemList { + bool approve_all{false}; + + set entries; + set prefixes; + set suffixes; + + void parse(const string& str) { + list l; + + get_str_list(str, ",", l); + + for (auto& entry : l) { + entry = rgw_trim_whitespace(entry); + if (entry.empty()) { + continue; + } + + if (entry == "*") { + approve_all = true; + return; + } + + if (entry[0] == '*') { + suffixes.insert(entry.substr(1)); + continue; + } + + if (entry.back() == '*') { + prefixes.insert(entry.substr(0, entry.size() - 1)); + continue; + } + + entries.insert(entry); + } + } + +public: + ItemList() {} + void init(const string& str, bool def_val) { + if (str.empty()) { + approve_all = def_val; + } else { + parse(str); + } + } + + bool exists(const string& entry) { + if (approve_all) { + return true; + } + + if (entries.find(entry) != entries.end()) { + return true; + } + + auto i = prefixes.upper_bound(entry); + if (i != prefixes.begin()) { + --i; + if (boost::algorithm::starts_with(entry, *i)) { + return true; + } + } + + for (i = suffixes.begin(); i != suffixes.end(); ++i) { + if (boost::algorithm::ends_with(entry, *i)) { + return true; + } + } + + return false; + } +}; + +#define ES_NUM_SHARDS_MIN 5 + +#define ES_NUM_SHARDS_DEFAULT 16 +#define ES_NUM_REPLICAS_DEFAULT 1 + +using ESVersion = std::pair; +static constexpr ESVersion ES_V5{5,0}; +static constexpr ESVersion ES_V7{7,0}; + +struct ESInfo { + std::string name; + std::string cluster_name; + std::string cluster_uuid; + ESVersion version; + + void decode_json(JSONObj *obj); + + std::string get_version_str(){ + return std::to_string(version.first) + "." + std::to_string(version.second); + } +}; + +// simple wrapper structure to wrap the es version nested type +struct es_version_decoder { + ESVersion version; + + int parse_version(const std::string& s) { + int major, minor; + int ret = sscanf(s.c_str(), "%d.%d", &major, &minor); + if (ret < 0) { + return ret; + } + version = std::make_pair(major,minor); + return 0; + } + + void decode_json(JSONObj *obj) { + std::string s; + JSONDecoder::decode_json("number",s,obj); + if (parse_version(s) < 0) + throw JSONDecoder::err("Failed to parse ElasticVersion"); + } +}; + + +void ESInfo::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("name", name, obj); + JSONDecoder::decode_json("cluster_name", cluster_name, obj); + JSONDecoder::decode_json("cluster_uuid", cluster_uuid, obj); + es_version_decoder esv; + JSONDecoder::decode_json("version", esv, obj); + version = std::move(esv.version); +} + +struct ElasticConfig { + uint64_t sync_instance{0}; + string id; + string index_path; + std::unique_ptr conn; + bool explicit_custom_meta{true}; + string override_index_path; + ItemList index_buckets; + ItemList allow_owners; + uint32_t num_shards{0}; + uint32_t num_replicas{0}; + std::map default_headers = {{ "Content-Type", "application/json" }}; + ESInfo es_info; + + void init(CephContext *cct, const JSONFormattable& config) { + string elastic_endpoint = config["endpoint"]; + id = string("elastic:") + elastic_endpoint; + conn.reset(new RGWRESTConn(cct, (rgw::sal::Driver*)nullptr, id, { elastic_endpoint }, nullopt /* region */ )); + explicit_custom_meta = config["explicit_custom_meta"](true); + index_buckets.init(config["index_buckets_list"], true); /* approve all buckets by default */ + allow_owners.init(config["approved_owners_list"], true); /* approve all bucket owners by default */ + override_index_path = config["override_index_path"]; + num_shards = config["num_shards"](ES_NUM_SHARDS_DEFAULT); + if (num_shards < ES_NUM_SHARDS_MIN) { + num_shards = ES_NUM_SHARDS_MIN; + } + num_replicas = config["num_replicas"](ES_NUM_REPLICAS_DEFAULT); + if (string user = config["username"], pw = config["password"]; + !user.empty() && !pw.empty()) { + auto auth_string = user + ":" + pw; + default_headers.emplace("AUTHORIZATION", "Basic " + rgw::to_base64(auth_string)); + } + + } + + void init_instance(const RGWRealm& realm, uint64_t instance_id) { + sync_instance = instance_id; + + if (!override_index_path.empty()) { + index_path = override_index_path; + return; + } + + char buf[32]; + snprintf(buf, sizeof(buf), "-%08x", (uint32_t)(sync_instance & 0xFFFFFFFF)); + + index_path = "/rgw-" + realm.get_name() + buf; + } + + string get_index_path() { + return index_path; + } + + map& get_request_headers() { + return default_headers; + } + + string get_obj_path(const RGWBucketInfo& bucket_info, const rgw_obj_key& key) { + if (es_info.version >= ES_V7) { + return index_path+ "/_doc/" + url_encode(bucket_info.bucket.bucket_id + ":" + key.name + ":" + (key.instance.empty() ? "null" : key.instance)); +; + } else { + return index_path + "/object/" + url_encode(bucket_info.bucket.bucket_id + ":" + key.name + ":" + (key.instance.empty() ? "null" : key.instance)); + } + } + + bool should_handle_operation(RGWBucketInfo& bucket_info) { + return index_buckets.exists(bucket_info.bucket.name) && + allow_owners.exists(bucket_info.owner.to_str()); + } +}; + +using ElasticConfigRef = std::shared_ptr; + +static const char *es_type_to_str(const ESType& t) { + switch (t) { + case ESType::String: return "string"; + case ESType::Text: return "text"; + case ESType::Keyword: return "keyword"; + case ESType::Long: return "long"; + case ESType::Integer: return "integer"; + case ESType::Short: return "short"; + case ESType::Byte: return "byte"; + case ESType::Double: return "double"; + case ESType::Float: return "float"; + case ESType::Half_Float: return "half_float"; + case ESType::Scaled_Float: return "scaled_float"; + case ESType::Date: return "date"; + case ESType::Boolean: return "boolean"; + case ESType::Integer_Range: return "integer_range"; + case ESType::Float_Range: return "float_range"; + case ESType::Double_Range: return "date_range"; + case ESType::Date_Range: return "date_range"; + case ESType::Geo_Point: return "geo_point"; + case ESType::Ip: return "ip"; + default: + return ""; + } +} + +struct es_type_v2 { + ESType estype; + const char *format{nullptr}; + std::optional analyzed; + + es_type_v2(ESType et) : estype(et) {} + + void dump(Formatter *f) const { + const char *type_str = es_type_to_str(estype); + encode_json("type", type_str, f); + if (format) { + encode_json("format", format, f); + } + + auto is_analyzed = analyzed; + + if (estype == ESType::String && + !is_analyzed) { + is_analyzed = false; + } + + if (is_analyzed) { + encode_json("index", (is_analyzed.value() ? "analyzed" : "not_analyzed"), f); + } + } +}; + +struct es_type_v5 { + ESType estype; + const char *format{nullptr}; + std::optional analyzed; + std::optional index; + + es_type_v5(ESType et) : estype(et) {} + + void dump(Formatter *f) const { + ESType new_estype; + if (estype != ESType::String) { + new_estype = estype; + } else { + bool is_analyzed = analyzed.value_or(false); + new_estype = (is_analyzed ? ESType::Text : ESType::Keyword); + /* index = true; ... Not setting index=true, because that's the default, + * and dumping a boolean value *might* be a problem when backporting this + * because value might get quoted + */ + } + + const char *type_str = es_type_to_str(new_estype); + encode_json("type", type_str, f); + if (format) { + encode_json("format", format, f); + } + if (index) { + encode_json("index", index.value(), f); + } + } +}; + +template +struct es_type : public T { + es_type(T t) : T(t) {} + es_type& set_format(const char *f) { + T::format = f; + return *this; + } + + es_type& set_analyzed(bool a) { + T::analyzed = a; + return *this; + } +}; + +template +struct es_index_mappings { + ESVersion es_version; + ESType string_type {ESType::String}; + + es_index_mappings(ESVersion esv):es_version(esv) { + } + + es_type est(ESType t) const { + return es_type(t); + } + + void dump_custom(const char *section, ESType type, const char *format, Formatter *f) const { + f->open_object_section(section); + ::encode_json("type", "nested", f); + f->open_object_section("properties"); + encode_json("name", est(string_type), f); + encode_json("value", est(type).set_format(format), f); + f->close_section(); // entry + f->close_section(); // custom-string + } + + void dump(Formatter *f) const { + if (es_version <= ES_V7) + f->open_object_section("object"); + f->open_object_section("properties"); + encode_json("bucket", est(string_type), f); + encode_json("name", est(string_type), f); + encode_json("instance", est(string_type), f); + encode_json("versioned_epoch", est(ESType::Long), f); + f->open_object_section("meta"); + f->open_object_section("properties"); + encode_json("cache_control", est(string_type), f); + encode_json("content_disposition", est(string_type), f); + encode_json("content_encoding", est(string_type), f); + encode_json("content_language", est(string_type), f); + encode_json("content_type", est(string_type), f); + encode_json("storage_class", est(string_type), f); + encode_json("etag", est(string_type), f); + encode_json("expires", est(string_type), f); + encode_json("mtime", est(ESType::Date) + .set_format("strict_date_optional_time||epoch_millis"), f); + encode_json("size", est(ESType::Long), f); + dump_custom("custom-string", string_type, nullptr, f); + dump_custom("custom-int", ESType::Long, nullptr, f); + dump_custom("custom-date", ESType::Date, "strict_date_optional_time||epoch_millis", f); + f->close_section(); // properties + f->close_section(); // meta + f->close_section(); // properties + + if (es_version <= ES_V7) + f->close_section(); // object + } +}; + +struct es_index_settings { + uint32_t num_replicas; + uint32_t num_shards; + + es_index_settings(uint32_t _replicas, uint32_t _shards) : num_replicas(_replicas), num_shards(_shards) {} + + void dump(Formatter *f) const { + encode_json("number_of_replicas", num_replicas, f); + encode_json("number_of_shards", num_shards, f); + } +}; + +struct es_index_config_base { + virtual ~es_index_config_base() {} + virtual void dump(Formatter *f) const = 0; +}; + +template +struct es_index_config : public es_index_config_base { + es_index_settings settings; + es_index_mappings mappings; + + es_index_config(es_index_settings& _s, ESVersion esv) : settings(_s), mappings(esv) { + } + + void dump(Formatter *f) const { + encode_json("settings", settings, f); + encode_json("mappings", mappings, f); + } +}; + +static bool is_sys_attr(const std::string& attr_name){ + static constexpr std::initializer_list rgw_sys_attrs = + {RGW_ATTR_PG_VER, + RGW_ATTR_SOURCE_ZONE, + RGW_ATTR_ID_TAG, + RGW_ATTR_TEMPURL_KEY1, + RGW_ATTR_TEMPURL_KEY2, + RGW_ATTR_UNIX1, + RGW_ATTR_UNIX_KEY1 + }; + + return std::find(rgw_sys_attrs.begin(), rgw_sys_attrs.end(), attr_name) != rgw_sys_attrs.end(); +} + +static size_t attr_len(const bufferlist& val) +{ + size_t len = val.length(); + if (len && val[len - 1] == '\0') { + --len; + } + + return len; +} + +struct es_obj_metadata { + const DoutPrefixProvider *dpp; + CephContext *cct; + ElasticConfigRef es_conf; + RGWBucketInfo bucket_info; + rgw_obj_key key; + ceph::real_time mtime; + uint64_t size; + map attrs; + uint64_t versioned_epoch; + + es_obj_metadata(CephContext *_cct, ElasticConfigRef _es_conf, const RGWBucketInfo& _bucket_info, + const rgw_obj_key& _key, ceph::real_time& _mtime, uint64_t _size, + map& _attrs, uint64_t _versioned_epoch) : cct(_cct), es_conf(_es_conf), bucket_info(_bucket_info), key(_key), + mtime(_mtime), size(_size), attrs(std::move(_attrs)), versioned_epoch(_versioned_epoch) {} + + void dump(Formatter *f) const { + map out_attrs; + map custom_meta; + RGWAccessControlPolicy policy; + set permissions; + RGWObjTags obj_tags; + + for (auto i : attrs) { + const string& attr_name = i.first; + bufferlist& val = i.second; + + if (!boost::algorithm::starts_with(attr_name, RGW_ATTR_PREFIX)) { + continue; + } + + if (boost::algorithm::starts_with(attr_name, RGW_ATTR_META_PREFIX)) { + custom_meta.emplace(attr_name.substr(sizeof(RGW_ATTR_META_PREFIX) - 1), + string(val.c_str(), attr_len(val))); + continue; + } + + if (boost::algorithm::starts_with(attr_name, RGW_ATTR_CRYPT_PREFIX)) { + continue; + } + + if (boost::algorithm::starts_with(attr_name, RGW_ATTR_OLH_PREFIX)) { + // skip versioned object olh info + continue; + } + + if (attr_name == RGW_ATTR_ACL) { + try { + auto i = val.cbegin(); + decode(policy, i); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode acl for " << bucket_info.bucket << "/" << key << dendl; + continue; + } + + const RGWAccessControlList& acl = policy.get_acl(); + + permissions.insert(policy.get_owner().get_id().to_str()); + for (auto acliter : acl.get_grant_map()) { + const ACLGrant& grant = acliter.second; + if (grant.get_type().get_type() == ACL_TYPE_CANON_USER && + ((uint32_t)grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) { + rgw_user user; + if (grant.get_id(user)) { + permissions.insert(user.to_str()); + } + } + } + } else if (attr_name == RGW_ATTR_TAGS) { + try { + auto tags_bl = val.cbegin(); + decode(obj_tags, tags_bl); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode obj tags for " + << bucket_info.bucket << "/" << key << dendl; + continue; + } + } else if (attr_name == RGW_ATTR_COMPRESSION) { + RGWCompressionInfo cs_info; + try { + auto vals_bl = val.cbegin(); + decode(cs_info, vals_bl); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode compression attr for " + << bucket_info.bucket << "/" << key << dendl; + continue; + } + out_attrs.emplace("compression",std::move(cs_info.compression_type)); + } else { + if (!is_sys_attr(attr_name)) { + out_attrs.emplace(attr_name.substr(sizeof(RGW_ATTR_PREFIX) - 1), + std::string(val.c_str(), attr_len(val))); + } + } + } + ::encode_json("bucket", bucket_info.bucket.name, f); + ::encode_json("name", key.name, f); + string instance = key.instance; + if (instance.empty()) + instance = "null"; + ::encode_json("instance", instance, f); + ::encode_json("versioned_epoch", versioned_epoch, f); + ::encode_json("owner", policy.get_owner(), f); + ::encode_json("permissions", permissions, f); + f->open_object_section("meta"); + ::encode_json("size", size, f); + + string mtime_str; + rgw_to_iso8601(mtime, &mtime_str); + ::encode_json("mtime", mtime_str, f); + for (auto i : out_attrs) { + ::encode_json(i.first.c_str(), i.second, f); + } + map custom_str; + map custom_int; + map custom_date; + + for (auto i : custom_meta) { + auto config = bucket_info.mdsearch_config.find(i.first); + if (config == bucket_info.mdsearch_config.end()) { + if (!es_conf->explicit_custom_meta) { + /* default custom meta is of type string */ + custom_str[i.first] = i.second; + } else { + ldpp_dout(dpp, 20) << "custom meta entry key=" << i.first << " not found in bucket mdsearch config: " << bucket_info.mdsearch_config << dendl; + } + continue; + } + switch (config->second) { + case ESEntityTypeMap::ES_ENTITY_DATE: + custom_date[i.first] = i.second; + break; + case ESEntityTypeMap::ES_ENTITY_INT: + custom_int[i.first] = i.second; + break; + default: + custom_str[i.first] = i.second; + } + } + + if (!custom_str.empty()) { + f->open_array_section("custom-string"); + for (auto i : custom_str) { + f->open_object_section("entity"); + ::encode_json("name", i.first.c_str(), f); + ::encode_json("value", i.second, f); + f->close_section(); + } + f->close_section(); + } + if (!custom_int.empty()) { + f->open_array_section("custom-int"); + for (auto i : custom_int) { + f->open_object_section("entity"); + ::encode_json("name", i.first.c_str(), f); + ::encode_json("value", i.second, f); + f->close_section(); + } + f->close_section(); + } + if (!custom_date.empty()) { + f->open_array_section("custom-date"); + for (auto i : custom_date) { + /* + * try to exlicitly parse date field, otherwise elasticsearch could reject the whole doc, + * which will end up with failed sync + */ + real_time t; + int r = parse_time(i.second.c_str(), &t); + if (r < 0) { + ldpp_dout(dpp, 20) << __func__ << "(): failed to parse time (" << i.second << "), skipping encoding of custom date attribute" << dendl; + continue; + } + + string time_str; + rgw_to_iso8601(t, &time_str); + + f->open_object_section("entity"); + ::encode_json("name", i.first.c_str(), f); + ::encode_json("value", time_str.c_str(), f); + f->close_section(); + } + f->close_section(); + } + f->close_section(); // meta + const auto& m = obj_tags.get_tags(); + if (m.size() > 0){ + f->open_array_section("tagging"); + for (const auto &it : m) { + f->open_object_section("tag"); + ::encode_json("key", it.first, f); + ::encode_json("value",it.second, f); + f->close_section(); + } + f->close_section(); // tagging + } + } +}; + +class RGWElasticGetESInfoCBCR : public RGWCoroutine { +public: + RGWElasticGetESInfoCBCR(RGWDataSyncCtx *_sc, + ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + conf(_conf) {} + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + ldpp_dout(dpp, 5) << conf->id << ": get elasticsearch info for zone: " << sc->source_zone << dendl; + yield call(new RGWReadRESTResourceCR (sync_env->cct, + conf->conn.get(), + sync_env->http_manager, + "/", nullptr /*params*/, + &(conf->default_headers), + &(conf->es_info))); + if (retcode < 0) { + ldpp_dout(dpp, 5) << conf->id << ": get elasticsearch failed: " << retcode << dendl; + return set_cr_error(retcode); + } + + ldpp_dout(dpp, 5) << conf->id << ": got elastic version=" << conf->es_info.get_version_str() << dendl; + return set_cr_done(); + } + return 0; + } +private: + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + ElasticConfigRef conf; +}; + +class RGWElasticPutIndexCBCR : public RGWCoroutine { +public: + RGWElasticPutIndexCBCR(RGWDataSyncCtx *_sc, + ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + conf(_conf) {} + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + ldpp_dout(dpp, 5) << conf->id << ": put elasticsearch index for zone: " << sc->source_zone << dendl; + + yield { + string path = conf->get_index_path(); + es_index_settings settings(conf->num_replicas, conf->num_shards); + std::unique_ptr index_conf; + + if (conf->es_info.version >= ES_V5) { + ldpp_dout(dpp, 0) << "elasticsearch: index mapping: version >= 5" << dendl; + index_conf.reset(new es_index_config(settings, conf->es_info.version)); + } else { + ldpp_dout(dpp, 0) << "elasticsearch: index mapping: version < 5" << dendl; + index_conf.reset(new es_index_config(settings, conf->es_info.version)); + } + call(new RGWPutRESTResourceCR (sc->cct, + conf->conn.get(), + sync_env->http_manager, + path, nullptr /*params*/, + &(conf->default_headers), + *index_conf, nullptr, &err_response)); + } + if (retcode < 0) { + + if (err_response.error.type != "index_already_exists_exception" && + err_response.error.type != "resource_already_exists_exception") { + ldpp_dout(dpp, 0) << "elasticsearch: failed to initialize index: response.type=" << err_response.error.type << " response.reason=" << err_response.error.reason << dendl; + return set_cr_error(retcode); + } + + ldpp_dout(dpp, 0) << "elasticsearch: index already exists, assuming external initialization" << dendl; + } + return set_cr_done(); + } + return 0; + } + +private: + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + ElasticConfigRef conf; + + struct _err_response { + struct err_reason { + vector root_cause; + string type; + string reason; + string index; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("root_cause", root_cause, obj); + JSONDecoder::decode_json("type", type, obj); + JSONDecoder::decode_json("reason", reason, obj); + JSONDecoder::decode_json("index", index, obj); + } + } error; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("error", error, obj); + } + } err_response; +}; + +class RGWElasticInitConfigCBCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + ElasticConfigRef conf; + +public: + RGWElasticInitConfigCBCR(RGWDataSyncCtx *_sc, + ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), + sc(_sc), sync_env(_sc->env), + conf(_conf) {} + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + + yield call(new RGWElasticGetESInfoCBCR(sc, conf)); + + if (retcode < 0) { + return set_cr_error(retcode); + } + + yield call(new RGWElasticPutIndexCBCR(sc, conf)); + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } + +}; + +class RGWElasticHandleRemoteObjCBCR : public RGWStatRemoteObjCBCR { + rgw_bucket_sync_pipe sync_pipe; + ElasticConfigRef conf; + uint64_t versioned_epoch; +public: + RGWElasticHandleRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, + ElasticConfigRef _conf, uint64_t _versioned_epoch) : RGWStatRemoteObjCBCR(_sc, _sync_pipe.info.source_bs.bucket, _key), + sync_pipe(_sync_pipe), conf(_conf), + versioned_epoch(_versioned_epoch) {} + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + ldpp_dout(dpp, 10) << ": stat of remote obj: z=" << sc->source_zone + << " b=" << sync_pipe.info.source_bs.bucket << " k=" << key + << " size=" << size << " mtime=" << mtime << dendl; + + yield { + string path = conf->get_obj_path(sync_pipe.dest_bucket_info, key); + es_obj_metadata doc(sync_env->cct, conf, sync_pipe.dest_bucket_info, key, mtime, size, attrs, versioned_epoch); + + call(new RGWPutRESTResourceCR(sync_env->cct, conf->conn.get(), + sync_env->http_manager, + path, nullptr /* params */, + &(conf->default_headers), + doc, nullptr /* result */)); + + } + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +class RGWElasticHandleRemoteObjCR : public RGWCallStatRemoteObjCR { + rgw_bucket_sync_pipe sync_pipe; + ElasticConfigRef conf; + uint64_t versioned_epoch; +public: + RGWElasticHandleRemoteObjCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, + ElasticConfigRef _conf, uint64_t _versioned_epoch) : RGWCallStatRemoteObjCR(_sc, _sync_pipe.info.source_bs.bucket, _key), + sync_pipe(_sync_pipe), + conf(_conf), versioned_epoch(_versioned_epoch) { + } + + ~RGWElasticHandleRemoteObjCR() override {} + + RGWStatRemoteObjCBCR *allocate_callback() override { + return new RGWElasticHandleRemoteObjCBCR(sc, sync_pipe, key, conf, versioned_epoch); + } +}; + +class RGWElasticRemoveRemoteObjCBCR : public RGWCoroutine { + RGWDataSyncCtx *sc; + RGWDataSyncEnv *sync_env; + rgw_bucket_sync_pipe sync_pipe; + rgw_obj_key key; + ceph::real_time mtime; + ElasticConfigRef conf; +public: + RGWElasticRemoveRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, const ceph::real_time& _mtime, + ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), + sync_pipe(_sync_pipe), key(_key), + mtime(_mtime), conf(_conf) {} + int operate(const DoutPrefixProvider *dpp) override { + reenter(this) { + ldpp_dout(dpp, 10) << ": remove remote obj: z=" << sc->source_zone + << " b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << dendl; + yield { + string path = conf->get_obj_path(sync_pipe.dest_bucket_info, key); + + call(new RGWDeleteRESTResourceCR(sync_env->cct, conf->conn.get(), + sync_env->http_manager, + path, nullptr /* params */)); + } + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } + +}; + +class RGWElasticDataSyncModule : public RGWDataSyncModule { + ElasticConfigRef conf; +public: + RGWElasticDataSyncModule(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) : conf(std::make_shared()) { + conf->init(cct, config); + } + ~RGWElasticDataSyncModule() override {} + + void init(RGWDataSyncCtx *sc, uint64_t instance_id) override { + conf->init_instance(sc->env->svc->zone->get_realm(), instance_id); + } + + RGWCoroutine *init_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) override { + ldpp_dout(dpp, 5) << conf->id << ": init" << dendl; + return new RGWElasticInitConfigCBCR(sc, conf); + } + + RGWCoroutine *start_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) override { + ldpp_dout(dpp, 5) << conf->id << ": start_sync" << dendl; + // try to get elastic search version + return new RGWElasticGetESInfoCBCR(sc, conf); + } + + RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override { + ldpp_dout(dpp, 10) << conf->id << ": sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; + if (!conf->should_handle_operation(sync_pipe.dest_bucket_info)) { + ldpp_dout(dpp, 10) << conf->id << ": skipping operation (bucket not approved)" << dendl; + return nullptr; + } + return new RGWElasticHandleRemoteObjCR(sc, sync_pipe, key, conf, versioned_epoch.value_or(0)); + } + RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { + /* versioned and versioned epoch params are useless in the elasticsearch backend case */ + ldpp_dout(dpp, 10) << conf->id << ": rm_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + if (!conf->should_handle_operation(sync_pipe.dest_bucket_info)) { + ldpp_dout(dpp, 10) << conf->id << ": skipping operation (bucket not approved)" << dendl; + return nullptr; + } + return new RGWElasticRemoveRemoteObjCBCR(sc, sync_pipe, key, mtime, conf); + } + RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { + ldpp_dout(dpp, 10) << conf->id << ": create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime + << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + ldpp_dout(dpp, 10) << conf->id << ": skipping operation (not handled)" << dendl; + return NULL; + } + RGWRESTConn *get_rest_conn() { + return conf->conn.get(); + } + + string get_index_path() { + return conf->get_index_path(); + } + + map& get_request_headers() { + return conf->get_request_headers(); + } +}; + +RGWElasticSyncModuleInstance::RGWElasticSyncModuleInstance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) +{ + data_handler = std::unique_ptr(new RGWElasticDataSyncModule(dpp, cct, config)); +} + +RGWDataSyncModule *RGWElasticSyncModuleInstance::get_data_handler() +{ + return data_handler.get(); +} + +RGWRESTConn *RGWElasticSyncModuleInstance::get_rest_conn() +{ + return data_handler->get_rest_conn(); +} + +string RGWElasticSyncModuleInstance::get_index_path() { + return data_handler->get_index_path(); +} + +map& RGWElasticSyncModuleInstance::get_request_headers() { + return data_handler->get_request_headers(); +} + +RGWRESTMgr *RGWElasticSyncModuleInstance::get_rest_filter(int dialect, RGWRESTMgr *orig) { + if (dialect != RGW_REST_S3) { + return orig; + } + delete orig; + return new RGWRESTMgr_MDSearch_S3(); +} + +int RGWElasticSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) { + string endpoint = config["endpoint"]; + instance->reset(new RGWElasticSyncModuleInstance(dpp, cct, config)); + return 0; +} + diff --git a/src/rgw/driver/rados/rgw_sync_module_es.h b/src/rgw/driver/rados/rgw_sync_module_es.h new file mode 100644 index 00000000000..6c0c422c39c --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_es.h @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SYNC_MODULE_ES_H +#define CEPH_RGW_SYNC_MODULE_ES_H + +#include "rgw_sync_module.h" + +enum class ESType { + /* string datatypes */ + String, /* Deprecated Since 5.X+ */ + Text, + Keyword, + + /* Numeric Types */ + Long, Integer, Short, Byte, Double, Float, Half_Float, Scaled_Float, + + /* Date Type */ + Date, + + /* Boolean */ + Boolean, + + /* Binary; Must Be Base64 Encoded */ + Binary, + + /* Range Types */ + Integer_Range, Float_Range, Long_Range, Double_Range, Date_Range, + + /* A Few Specialized Types */ + Geo_Point, + Ip +}; + + +class RGWElasticSyncModule : public RGWSyncModule { +public: + RGWElasticSyncModule() {} + bool supports_data_export() override { + return false; + } + int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; +}; + +class RGWElasticDataSyncModule; +class RGWRESTConn; + +class RGWElasticSyncModuleInstance : public RGWSyncModuleInstance { + std::unique_ptr data_handler; +public: + RGWElasticSyncModuleInstance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config); + RGWDataSyncModule *get_data_handler() override; + RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) override; + RGWRESTConn *get_rest_conn(); + std::string get_index_path(); + std::map& get_request_headers(); + bool supports_user_writes() override { + return true; + } +}; + +#endif diff --git a/src/rgw/driver/rados/rgw_sync_module_es_rest.cc b/src/rgw/driver/rados/rgw_sync_module_es_rest.cc new file mode 100644 index 00000000000..db9d48adb36 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_es_rest.cc @@ -0,0 +1,428 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_sync_module_es.h" +#include "rgw_sync_module_es_rest.h" +#include "rgw_es_query.h" +#include "rgw_op.h" +#include "rgw_rest.h" +#include "rgw_rest_s3.h" +#include "rgw_sal_rados.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +struct es_index_obj_response { + string bucket; + rgw_obj_key key; + uint64_t versioned_epoch{0}; + ACLOwner owner; + set read_permissions; + + struct { + uint64_t size{0}; + ceph::real_time mtime; + string etag; + string content_type; + string storage_class; + map custom_str; + map custom_int; + map custom_date; + + template + struct _custom_entry { + string name; + T value; + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("name", name, obj); + JSONDecoder::decode_json("value", value, obj); + } + }; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("size", size, obj); + string mtime_str; + JSONDecoder::decode_json("mtime", mtime_str, obj); + parse_time(mtime_str.c_str(), &mtime); + JSONDecoder::decode_json("etag", etag, obj); + JSONDecoder::decode_json("content_type", content_type, obj); + JSONDecoder::decode_json("storage_class", storage_class, obj); + list<_custom_entry > str_entries; + JSONDecoder::decode_json("custom-string", str_entries, obj); + for (auto& e : str_entries) { + custom_str[e.name] = e.value; + } + list<_custom_entry > int_entries; + JSONDecoder::decode_json("custom-int", int_entries, obj); + for (auto& e : int_entries) { + custom_int[e.name] = e.value; + } + list<_custom_entry > date_entries; + JSONDecoder::decode_json("custom-date", date_entries, obj); + for (auto& e : date_entries) { + custom_date[e.name] = e.value; + } + } + } meta; + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("bucket", bucket, obj); + JSONDecoder::decode_json("name", key.name, obj); + JSONDecoder::decode_json("instance", key.instance, obj); + JSONDecoder::decode_json("versioned_epoch", versioned_epoch, obj); + JSONDecoder::decode_json("permissions", read_permissions, obj); + JSONDecoder::decode_json("owner", owner, obj); + JSONDecoder::decode_json("meta", meta, obj); + } +}; + +struct es_search_response { + uint32_t took; + bool timed_out; + struct { + uint32_t total; + uint32_t successful; + uint32_t failed; + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("total", total, obj); + JSONDecoder::decode_json("successful", successful, obj); + JSONDecoder::decode_json("failed", failed, obj); + } + } shards; + struct obj_hit { + string index; + string type; + string id; + // double score + es_index_obj_response source; + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("_index", index, obj); + JSONDecoder::decode_json("_type", type, obj); + JSONDecoder::decode_json("_id", id, obj); + JSONDecoder::decode_json("_source", source, obj); + } + }; + struct { + uint32_t total; + // double max_score; + list hits; + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("total", total, obj); + // JSONDecoder::decode_json("max_score", max_score, obj); + JSONDecoder::decode_json("hits", hits, obj); + } + } hits; + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("took", took, obj); + JSONDecoder::decode_json("timed_out", timed_out, obj); + JSONDecoder::decode_json("_shards", shards, obj); + JSONDecoder::decode_json("hits", hits, obj); + } +}; + +class RGWMetadataSearchOp : public RGWOp { + RGWSyncModuleInstanceRef sync_module_ref; + RGWElasticSyncModuleInstance *es_module; +protected: + string expression; + string custom_prefix; +#define MAX_KEYS_DEFAULT 100 + uint64_t max_keys{MAX_KEYS_DEFAULT}; + string marker_str; + uint64_t marker{0}; + string next_marker; + bool is_truncated{false}; + string err; + + es_search_response response; + +public: + RGWMetadataSearchOp(const RGWSyncModuleInstanceRef& sync_module) : sync_module_ref(sync_module) { + es_module = static_cast(sync_module_ref.get()); + } + + int verify_permission(optional_yield) override { + return 0; + } + virtual int get_params() = 0; + void pre_exec() override; + void execute(optional_yield y) override; + + const char* name() const override { return "metadata_search"; } + virtual RGWOpType get_type() override { return RGW_OP_METADATA_SEARCH; } + virtual uint32_t op_mask() override { return RGW_OP_TYPE_READ; } +}; + +void RGWMetadataSearchOp::pre_exec() +{ + rgw_bucket_object_pre_exec(s); +} + +void RGWMetadataSearchOp::execute(optional_yield y) +{ + op_ret = get_params(); + if (op_ret < 0) + return; + + list > conds; + + if (!s->user->get_info().system) { + conds.push_back(make_pair("permissions", s->user->get_id().to_str())); + } + + if (!s->bucket_name.empty()) { + conds.push_back(make_pair("bucket", s->bucket_name)); + } + + ESQueryCompiler es_query(expression, &conds, custom_prefix); + + static map aliases = { + { "bucket", "bucket" }, /* forces lowercase */ + { "name", "name" }, + { "key", "name" }, + { "instance", "instance" }, + { "etag", "meta.etag" }, + { "size", "meta.size" }, + { "mtime", "meta.mtime" }, + { "lastmodified", "meta.mtime" }, + { "last_modified", "meta.mtime" }, + { "contenttype", "meta.content_type" }, + { "content_type", "meta.content_type" }, + { "storageclass", "meta.storage_class" }, + { "storage_class", "meta.storage_class" }, + }; + es_query.set_field_aliases(&aliases); + + static map generic_map = { {"bucket", ESEntityTypeMap::ES_ENTITY_STR}, + {"name", ESEntityTypeMap::ES_ENTITY_STR}, + {"instance", ESEntityTypeMap::ES_ENTITY_STR}, + {"permissions", ESEntityTypeMap::ES_ENTITY_STR}, + {"meta.etag", ESEntityTypeMap::ES_ENTITY_STR}, + {"meta.content_type", ESEntityTypeMap::ES_ENTITY_STR}, + {"meta.mtime", ESEntityTypeMap::ES_ENTITY_DATE}, + {"meta.size", ESEntityTypeMap::ES_ENTITY_INT}, + {"meta.storage_class", ESEntityTypeMap::ES_ENTITY_STR} }; + ESEntityTypeMap gm(generic_map); + es_query.set_generic_type_map(&gm); + + static set restricted_fields = { {"permissions"} }; + es_query.set_restricted_fields(&restricted_fields); + + map custom_map; + for (auto& i : s->bucket->get_info().mdsearch_config) { + custom_map[i.first] = (ESEntityTypeMap::EntityType)i.second; + } + + ESEntityTypeMap em(custom_map); + es_query.set_custom_type_map(&em); + + bool valid = es_query.compile(&err); + if (!valid) { + ldpp_dout(this, 10) << "invalid query, failed generating request json" << dendl; + op_ret = -EINVAL; + return; + } + + JSONFormatter f; + encode_json("root", es_query, &f); + + RGWRESTConn *conn = es_module->get_rest_conn(); + + bufferlist in; + bufferlist out; + + stringstream ss; + + f.flush(ss); + in.append(ss.str()); + + string resource = es_module->get_index_path() + "/_search"; + param_vec_t params; + static constexpr int BUFSIZE = 32; + char buf[BUFSIZE]; + snprintf(buf, sizeof(buf), "%lld", (long long)max_keys); + params.push_back(param_pair_t("size", buf)); + if (marker > 0) { + params.push_back(param_pair_t("from", marker_str.c_str())); + } + ldpp_dout(this, 20) << "sending request to elasticsearch, payload=" << string(in.c_str(), in.length()) << dendl; + auto& extra_headers = es_module->get_request_headers(); + op_ret = conn->get_resource(s, resource, ¶ms, &extra_headers, + out, &in, nullptr, y); + if (op_ret < 0) { + ldpp_dout(this, 0) << "ERROR: failed to fetch resource (r=" << resource << ", ret=" << op_ret << ")" << dendl; + return; + } + + ldpp_dout(this, 20) << "response: " << string(out.c_str(), out.length()) << dendl; + + JSONParser jparser; + if (!jparser.parse(out.c_str(), out.length())) { + ldpp_dout(this, 0) << "ERROR: failed to parse elasticsearch response" << dendl; + op_ret = -EINVAL; + return; + } + + try { + decode_json_obj(response, &jparser); + } catch (const JSONDecoder::err& e) { + ldpp_dout(this, 0) << "ERROR: failed to decode JSON input: " << e.what() << dendl; + op_ret = -EINVAL; + return; + } + +} + +class RGWMetadataSearch_ObjStore_S3 : public RGWMetadataSearchOp { +public: + explicit RGWMetadataSearch_ObjStore_S3(const RGWSyncModuleInstanceRef& _sync_module) : RGWMetadataSearchOp(_sync_module) { + custom_prefix = "x-amz-meta-"; + } + + int get_params() override { + expression = s->info.args.get("query"); + bool exists; + string max_keys_str = s->info.args.get("max-keys", &exists); +#define MAX_KEYS_MAX 10000 + if (exists) { + string err; + max_keys = strict_strtoll(max_keys_str.c_str(), 10, &err); + if (!err.empty()) { + return -EINVAL; + } + if (max_keys > MAX_KEYS_MAX) { + max_keys = MAX_KEYS_MAX; + } + } + marker_str = s->info.args.get("marker", &exists); + if (exists) { + string err; + marker = strict_strtoll(marker_str.c_str(), 10, &err); + if (!err.empty()) { + return -EINVAL; + } + } + uint64_t nm = marker + max_keys; + static constexpr int BUFSIZE = 32; + char buf[BUFSIZE]; + snprintf(buf, sizeof(buf), "%lld", (long long)nm); + next_marker = buf; + return 0; + } + void send_response() override { + if (op_ret) { + s->err.message = err; + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + is_truncated = (response.hits.hits.size() >= max_keys); + + s->formatter->open_object_section("SearchMetadataResponse"); + s->formatter->dump_string("Marker", marker_str); + s->formatter->dump_string("IsTruncated", (is_truncated ? "true" : "false")); + if (is_truncated) { + s->formatter->dump_string("NextMarker", next_marker); + } + if (s->format == RGWFormat::JSON) { + s->formatter->open_array_section("Objects"); + } + for (auto& i : response.hits.hits) { + s->formatter->open_object_section("Contents"); + es_index_obj_response& e = i.source; + s->formatter->dump_string("Bucket", e.bucket); + s->formatter->dump_string("Key", e.key.name); + string instance = (!e.key.instance.empty() ? e.key.instance : "null"); + s->formatter->dump_string("Instance", instance.c_str()); + s->formatter->dump_int("VersionedEpoch", e.versioned_epoch); + dump_time(s, "LastModified", e.meta.mtime); + s->formatter->dump_int("Size", e.meta.size); + s->formatter->dump_format("ETag", "\"%s\"", e.meta.etag.c_str()); + s->formatter->dump_string("ContentType", e.meta.content_type.c_str()); + s->formatter->dump_string("StorageClass", e.meta.storage_class.c_str()); + dump_owner(s, e.owner.get_id(), e.owner.get_display_name()); + s->formatter->open_array_section("CustomMetadata"); + for (auto& m : e.meta.custom_str) { + s->formatter->open_object_section("Entry"); + s->formatter->dump_string("Name", m.first.c_str()); + s->formatter->dump_string("Value", m.second); + s->formatter->close_section(); + } + for (auto& m : e.meta.custom_int) { + s->formatter->open_object_section("Entry"); + s->formatter->dump_string("Name", m.first.c_str()); + s->formatter->dump_int("Value", m.second); + s->formatter->close_section(); + } + for (auto& m : e.meta.custom_date) { + s->formatter->open_object_section("Entry"); + s->formatter->dump_string("Name", m.first.c_str()); + s->formatter->dump_string("Value", m.second); + s->formatter->close_section(); + } + s->formatter->close_section(); + rgw_flush_formatter(s, s->formatter); + s->formatter->close_section(); + }; + if (s->format == RGWFormat::JSON) { + s->formatter->close_section(); + } + s->formatter->close_section(); + rgw_flush_formatter_and_reset(s, s->formatter); + } +}; + +class RGWHandler_REST_MDSearch_S3 : public RGWHandler_REST_S3 { +protected: + RGWOp *op_get() override { + if (s->info.args.exists("query")) { + return new RGWMetadataSearch_ObjStore_S3(driver->get_sync_module()); + } + if (!s->init_state.url_bucket.empty() && + s->info.args.exists("mdsearch")) { + return new RGWGetBucketMetaSearch_ObjStore_S3; + } + return nullptr; + } + RGWOp *op_head() override { + return nullptr; + } + RGWOp *op_post() override { + return nullptr; + } +public: + explicit RGWHandler_REST_MDSearch_S3(const rgw::auth::StrategyRegistry& auth_registry) : RGWHandler_REST_S3(auth_registry) {} + virtual ~RGWHandler_REST_MDSearch_S3() {} +}; + + +RGWHandler_REST* RGWRESTMgr_MDSearch_S3::get_handler(rgw::sal::Driver* driver, + req_state* const s, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string& frontend_prefix) +{ + int ret = + RGWHandler_REST_S3::init_from_header(driver, s, + RGWFormat::XML, true); + if (ret < 0) { + return nullptr; + } + + if (!s->object->empty()) { + return nullptr; + } + + RGWHandler_REST *handler = new RGWHandler_REST_MDSearch_S3(auth_registry); + + ldpp_dout(s, 20) << __func__ << " handler=" << typeid(*handler).name() + << dendl; + return handler; +} + diff --git a/src/rgw/driver/rados/rgw_sync_module_es_rest.h b/src/rgw/driver/rados/rgw_sync_module_es_rest.h new file mode 100644 index 00000000000..b18271a69cd --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_es_rest.h @@ -0,0 +1,18 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw_rest.h" + +class RGWElasticSyncModuleInstance; + +class RGWRESTMgr_MDSearch_S3 : public RGWRESTMgr { +public: + explicit RGWRESTMgr_MDSearch_S3() {} + + RGWHandler_REST *get_handler(rgw::sal::Driver* driver, + req_state* s, + const rgw::auth::StrategyRegistry& auth_registry, + const std::string& frontend_prefix) override; +}; diff --git a/src/rgw/driver/rados/rgw_sync_module_log.cc b/src/rgw/driver/rados/rgw_sync_module_log.cc new file mode 100644 index 00000000000..a21604cc228 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_log.cc @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_common.h" +#include "rgw_coroutine.h" +#include "rgw_cr_rados.h" +#include "rgw_sync_module.h" +#include "rgw_data_sync.h" +#include "rgw_sync_module_log.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +class RGWLogStatRemoteObjCBCR : public RGWStatRemoteObjCBCR { +public: + RGWLogStatRemoteObjCBCR(RGWDataSyncCtx *_sc, + rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWStatRemoteObjCBCR(_sc, _src_bucket, _key) {} + int operate(const DoutPrefixProvider *dpp) override { + ldpp_dout(dpp, 0) << "SYNC_LOG: stat of remote obj: z=" << sc->source_zone + << " b=" << src_bucket << " k=" << key << " size=" << size << " mtime=" << mtime + << " attrs=" << attrs << dendl; + return set_cr_done(); + } + +}; + +class RGWLogStatRemoteObjCR : public RGWCallStatRemoteObjCR { +public: + RGWLogStatRemoteObjCR(RGWDataSyncCtx *_sc, + rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWCallStatRemoteObjCR(_sc, _src_bucket, _key) { + } + + ~RGWLogStatRemoteObjCR() override {} + + RGWStatRemoteObjCBCR *allocate_callback() override { + return new RGWLogStatRemoteObjCBCR(sc, src_bucket, key); + } +}; + +class RGWLogDataSyncModule : public RGWDataSyncModule { + string prefix; +public: + explicit RGWLogDataSyncModule(const string& _prefix) : prefix(_prefix) {} + + RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override { + ldpp_dout(dpp, 0) << prefix << ": SYNC_LOG: sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; + return new RGWLogStatRemoteObjCR(sc, sync_pipe.info.source_bs.bucket, key); + } + RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { + ldpp_dout(dpp, 0) << prefix << ": SYNC_LOG: rm_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + return NULL; + } + RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, + rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { + ldpp_dout(dpp, 0) << prefix << ": SYNC_LOG: create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime + << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; + return NULL; + } +}; + +class RGWLogSyncModuleInstance : public RGWSyncModuleInstance { + RGWLogDataSyncModule data_handler; +public: + explicit RGWLogSyncModuleInstance(const string& prefix) : data_handler(prefix) {} + RGWDataSyncModule *get_data_handler() override { + return &data_handler; + } +}; + +int RGWLogSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) { + string prefix = config["prefix"]; + instance->reset(new RGWLogSyncModuleInstance(prefix)); + return 0; +} + diff --git a/src/rgw/driver/rados/rgw_sync_module_log.h b/src/rgw/driver/rados/rgw_sync_module_log.h new file mode 100644 index 00000000000..ecf3bb78911 --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_module_log.h @@ -0,0 +1,18 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SYNC_MODULE_LOG_H +#define CEPH_RGW_SYNC_MODULE_LOG_H + +#include "rgw_sync_module.h" + +class RGWLogSyncModule : public RGWSyncModule { +public: + RGWLogSyncModule() {} + bool supports_data_export() override { + return false; + } + int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; +}; + +#endif diff --git a/src/rgw/driver/rados/rgw_sync_trace.cc b/src/rgw/driver/rados/rgw_sync_trace.cc new file mode 100644 index 00000000000..b346835938d --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_trace.cc @@ -0,0 +1,290 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SYNC_TRACE_H +#define CEPH_RGW_SYNC_TRACE_H + +#include + +#include "common/debug.h" +#include "common/ceph_json.h" + +#include "rgw_sync_trace.h" +#include "rgw_rados.h" +#include "rgw_worker.h" + +#define dout_context g_ceph_context + +static constexpr auto dout_subsys = ceph_subsys_rgw; + +using namespace std; + + +RGWSyncTraceNode::RGWSyncTraceNode(CephContext *_cct, uint64_t _handle, + const RGWSyncTraceNodeRef& _parent, + const string& _type, const string& _id) : cct(_cct), + parent(_parent), + type(_type), + id(_id), + handle(_handle), + history(cct->_conf->rgw_sync_trace_per_node_log_size) +{ + if (parent.get()) { + prefix = parent->get_prefix(); + } + + if (!type.empty()) { + prefix += type; + if (!id.empty()) { + prefix += "[" + id + "]"; + } + prefix += ":"; + } +} + +void RGWSyncTraceNode::log(int level, const string& s) +{ + status = s; + history.push_back(status); + /* dump output on either rgw_sync, or rgw -- but only once */ + if (cct->_conf->subsys.should_gather(ceph_subsys_rgw_sync, level)) { + lsubdout(cct, rgw_sync, + ceph::dout::need_dynamic(level)) << "RGW-SYNC:" << to_str() << dendl; + } else { + lsubdout(cct, rgw, + ceph::dout::need_dynamic(level)) << "RGW-SYNC:" << to_str() << dendl; + } +} + + +class RGWSyncTraceServiceMapThread : public RGWRadosThread { + RGWRados *store; + RGWSyncTraceManager *manager; + + uint64_t interval_msec() override { + return cct->_conf->rgw_sync_trace_servicemap_update_interval * 1000; + } +public: + RGWSyncTraceServiceMapThread(RGWRados *_store, RGWSyncTraceManager *_manager) + : RGWRadosThread(_store, "sync-trace"), store(_store), manager(_manager) {} + + int process(const DoutPrefixProvider *dpp) override; +}; + +int RGWSyncTraceServiceMapThread::process(const DoutPrefixProvider *dpp) +{ + map status; + status["current_sync"] = manager->get_active_names(); + int ret = store->update_service_map(dpp, std::move(status)); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: update_service_map() returned ret=" << ret << dendl; + } + return 0; +} + +RGWSyncTraceNodeRef RGWSyncTraceManager::add_node(const RGWSyncTraceNodeRef& parent, + const std::string& type, + const std::string& id) +{ + shunique_lock wl(lock, ceph::acquire_unique); + auto handle = alloc_handle(); + RGWSyncTraceNodeRef& ref = nodes[handle]; + ref.reset(new RGWSyncTraceNode(cct, handle, parent, type, id)); + // return a separate shared_ptr that calls finish() on the node instead of + // deleting it. the lambda capture holds a reference to the original 'ref' + auto deleter = [ref, this] (RGWSyncTraceNode *node) { finish_node(node); }; + return {ref.get(), deleter}; +} + +bool RGWSyncTraceNode::match(const string& search_term, bool search_history) +{ + try { + std::regex expr(search_term); + std::smatch m; + + if (regex_search(prefix, m, expr)) { + return true; + } + if (regex_search(status, m,expr)) { + return true; + } + if (!search_history) { + return false; + } + + for (auto h : history) { + if (regex_search(h, m, expr)) { + return true; + } + } + } catch (const std::regex_error& e) { + ldout(cct, 5) << "NOTICE: sync trace: bad expression: bad regex search term" << dendl; + } + + return false; +} + +void RGWSyncTraceManager::init(RGWRados *store) +{ + service_map_thread = new RGWSyncTraceServiceMapThread(store, this); + service_map_thread->start(); +} + +RGWSyncTraceManager::~RGWSyncTraceManager() +{ + cct->get_admin_socket()->unregister_commands(this); + service_map_thread->stop(); + delete service_map_thread; + + nodes.clear(); +} + +int RGWSyncTraceManager::hook_to_admin_command() +{ + AdminSocket *admin_socket = cct->get_admin_socket(); + + admin_commands = { { "sync trace show name=search,type=CephString,req=false", "sync trace show [filter_str]: show current multisite tracing information" }, + { "sync trace history name=search,type=CephString,req=false", "sync trace history [filter_str]: show history of multisite tracing information" }, + { "sync trace active name=search,type=CephString,req=false", "show active multisite sync entities information" }, + { "sync trace active_short name=search,type=CephString,req=false", "show active multisite sync entities entries" } }; + for (auto cmd : admin_commands) { + int r = admin_socket->register_command(cmd[0], this, + cmd[1]); + if (r < 0) { + lderr(cct) << "ERROR: fail to register admin socket command (r=" << r << ")" << dendl; + return r; + } + } + return 0; +} + +static void dump_node(RGWSyncTraceNode *entry, bool show_history, Formatter *f) +{ + f->open_object_section("entry"); + ::encode_json("status", entry->to_str(), f); + if (show_history) { + f->open_array_section("history"); + for (auto h : entry->get_history()) { + ::encode_json("entry", h, f); + } + f->close_section(); + } + f->close_section(); +} + +string RGWSyncTraceManager::get_active_names() +{ + shunique_lock rl(lock, ceph::acquire_shared); + + stringstream ss; + JSONFormatter f; + + f.open_array_section("result"); + for (auto n : nodes) { + auto& entry = n.second; + + if (!entry->test_flags(RGW_SNS_FLAG_ACTIVE)) { + continue; + } + const string& name = entry->get_resource_name(); + if (!name.empty()) { + ::encode_json("entry", name, &f); + } + f.flush(ss); + } + f.close_section(); + f.flush(ss); + + return ss.str(); +} + +int RGWSyncTraceManager::call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& ss, + bufferlist& out) { + + bool show_history = (command == "sync trace history"); + bool show_short = (command == "sync trace active_short"); + bool show_active = (command == "sync trace active") || show_short; + + string search; + + auto si = cmdmap.find("search"); + if (si != cmdmap.end()) { + search = boost::get(si->second); + } + + shunique_lock rl(lock, ceph::acquire_shared); + + f->open_object_section("result"); + f->open_array_section("running"); + for (auto n : nodes) { + auto& entry = n.second; + + if (!search.empty() && !entry->match(search, show_history)) { + continue; + } + if (show_active && !entry->test_flags(RGW_SNS_FLAG_ACTIVE)) { + continue; + } + if (show_short) { + const string& name = entry->get_resource_name(); + if (!name.empty()) { + ::encode_json("entry", name, f); + } + } else { + dump_node(entry.get(), show_history, f); + } + f->flush(out); + } + f->close_section(); + + f->open_array_section("complete"); + for (auto& entry : complete_nodes) { + if (!search.empty() && !entry->match(search, show_history)) { + continue; + } + if (show_active && !entry->test_flags(RGW_SNS_FLAG_ACTIVE)) { + continue; + } + dump_node(entry.get(), show_history, f); + f->flush(out); + } + f->close_section(); + + f->close_section(); + + return 0; +} + +void RGWSyncTraceManager::finish_node(RGWSyncTraceNode *node) +{ + RGWSyncTraceNodeRef old_node; + + { + shunique_lock wl(lock, ceph::acquire_unique); + if (!node) { + return; + } + auto iter = nodes.find(node->handle); + if (iter == nodes.end()) { + /* not found, already finished */ + return; + } + + if (complete_nodes.full()) { + /* take a reference to the entry that is going to be evicted, + * can't let it get evicted under lock held, otherwise + * it's a deadlock as it will call finish_node() + */ + old_node = complete_nodes.front(); + } + + complete_nodes.push_back(iter->second); + nodes.erase(iter); + } +}; + +#endif + diff --git a/src/rgw/driver/rados/rgw_sync_trace.h b/src/rgw/driver/rados/rgw_sync_trace.h new file mode 100644 index 00000000000..9617dac70db --- /dev/null +++ b/src/rgw/driver/rados/rgw_sync_trace.h @@ -0,0 +1,145 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_SYNC_LOG_H +#define CEPH_RGW_SYNC_LOG_H + +#include + +#include "common/ceph_mutex.h" +#include "common/shunique_lock.h" +#include "common/admin_socket.h" + +#include +#include +#include +#include +#include + +#define SSTR(o) ({ \ + std::stringstream ss; \ + ss << o; \ + ss.str(); \ +}) + +#define RGW_SNS_FLAG_ACTIVE 1 +#define RGW_SNS_FLAG_ERROR 2 + +class RGWRados; +class RGWSyncTraceManager; +class RGWSyncTraceNode; +class RGWSyncTraceServiceMapThread; + +using RGWSyncTraceNodeRef = std::shared_ptr; + +class RGWSyncTraceNode final { + friend class RGWSyncTraceManager; + + CephContext *cct; + RGWSyncTraceNodeRef parent; + + uint16_t state{0}; + std::string status; + + ceph::mutex lock = ceph::make_mutex("RGWSyncTraceNode::lock"); + + std::string type; + std::string id; + + std::string prefix; + + std::string resource_name; + + uint64_t handle; + + boost::circular_buffer history; + + // private constructor, create with RGWSyncTraceManager::add_node() + RGWSyncTraceNode(CephContext *_cct, uint64_t _handle, + const RGWSyncTraceNodeRef& _parent, + const std::string& _type, const std::string& _id); + + public: + void set_resource_name(const std::string& s) { + resource_name = s; + } + + const std::string& get_resource_name() { + return resource_name; + } + + void set_flag(uint16_t s) { + state |= s; + } + void unset_flag(uint16_t s) { + state &= ~s; + } + bool test_flags(uint16_t f) { + return (state & f) == f; + } + void log(int level, const std::string& s); + + std::string to_str() { + return prefix + " " + status; + } + + const std::string& get_prefix() { + return prefix; + } + + std::ostream& operator<<(std::ostream& os) { + os << to_str(); + return os; + } + + boost::circular_buffer& get_history() { + return history; + } + + bool match(const std::string& search_term, bool search_history); +}; + +class RGWSyncTraceManager : public AdminSocketHook { + friend class RGWSyncTraceNode; + + mutable std::shared_timed_mutex lock; + using shunique_lock = ceph::shunique_lock; + + CephContext *cct; + RGWSyncTraceServiceMapThread *service_map_thread{nullptr}; + + std::map nodes; + boost::circular_buffer complete_nodes; + + std::atomic count = { 0 }; + + std::list > admin_commands; + + uint64_t alloc_handle() { + return ++count; + } + void finish_node(RGWSyncTraceNode *node); + +public: + RGWSyncTraceManager(CephContext *_cct, int max_lru) : cct(_cct), complete_nodes(max_lru) {} + ~RGWSyncTraceManager(); + + void init(RGWRados *store); + + const RGWSyncTraceNodeRef root_node; + + RGWSyncTraceNodeRef add_node(const RGWSyncTraceNodeRef& parent, + const std::string& type, + const std::string& id = ""); + + int hook_to_admin_command(); + int call(std::string_view command, const cmdmap_t& cmdmap, + const bufferlist&, + Formatter *f, + std::ostream& ss, + bufferlist& out) override; + std::string get_active_names(); +}; + + +#endif diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc new file mode 100644 index 00000000000..5a8aefaac3d --- /dev/null +++ b/src/rgw/driver/rados/rgw_tools.cc @@ -0,0 +1,414 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/errno.h" +#include "librados/librados_asio.h" + +#include "include/stringify.h" + +#include "rgw_tools.h" +#include "rgw_acl_s3.h" +#include "rgw_aio_throttle.h" +#include "rgw_compression.h" + +#define dout_subsys ceph_subsys_rgw + +#define READ_CHUNK_LEN (512 * 1024) + +using namespace std; + +int rgw_init_ioctx(const DoutPrefixProvider *dpp, + librados::Rados *rados, const rgw_pool& pool, + librados::IoCtx& ioctx, bool create, + bool mostly_omap) +{ + int r = rados->ioctx_create(pool.name.c_str(), ioctx); + if (r == -ENOENT && create) { + r = rados->pool_create(pool.name.c_str()); + if (r == -ERANGE) { + ldpp_dout(dpp, 0) + << __func__ + << " ERROR: librados::Rados::pool_create returned " << cpp_strerror(-r) + << " (this can be due to a pool or placement group misconfiguration, e.g." + << " pg_num < pgp_num or mon_max_pg_per_osd exceeded)" + << dendl; + } + if (r < 0 && r != -EEXIST) { + return r; + } + + r = rados->ioctx_create(pool.name.c_str(), ioctx); + if (r < 0) { + return r; + } + + r = ioctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false); + if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + if (mostly_omap) { + // set pg_autoscale_bias + bufferlist inbl; + float bias = g_conf().get_val("rgw_rados_pool_autoscale_bias"); + int r = rados->mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + + pool.name + "\", \"var\": \"pg_autoscale_bias\", \"val\": \"" + + stringify(bias) + "\"}", + inbl, NULL, NULL); + if (r < 0) { + ldpp_dout(dpp, 10) << __func__ << " warning: failed to set pg_autoscale_bias on " + << pool.name << dendl; + } + // set recovery_priority + int p = g_conf().get_val("rgw_rados_pool_recovery_priority"); + r = rados->mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + + pool.name + "\", \"var\": \"recovery_priority\": \"" + + stringify(p) + "\"}", + inbl, NULL, NULL); + if (r < 0) { + ldpp_dout(dpp, 10) << __func__ << " warning: failed to set recovery_priority on " + << pool.name << dendl; + } + } + } else if (r < 0) { + return r; + } + if (!pool.ns.empty()) { + ioctx.set_namespace(pool.ns); + } + return 0; +} + +map* no_change_attrs() { + static map no_change; + return &no_change; +} + +int rgw_put_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, + const rgw_pool& pool, const string& oid, bufferlist& data, bool exclusive, + RGWObjVersionTracker *objv_tracker, real_time set_mtime, optional_yield y, map *pattrs) +{ + map no_attrs; + if (!pattrs) { + pattrs = &no_attrs; + } + + rgw_raw_obj obj(pool, oid); + + auto sysobj = svc_sysobj->get_obj(obj); + int ret; + + if (pattrs != no_change_attrs()) { + ret = sysobj.wop() + .set_objv_tracker(objv_tracker) + .set_exclusive(exclusive) + .set_mtime(set_mtime) + .set_attrs(*pattrs) + .write(dpp, data, y); + } else { + ret = sysobj.wop() + .set_objv_tracker(objv_tracker) + .set_exclusive(exclusive) + .set_mtime(set_mtime) + .write_data(dpp, data, y); + } + + return ret; +} + +int rgw_stat_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, + const rgw_pool& pool, const std::string& key, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, optional_yield y, + std::map *pattrs) +{ + rgw_raw_obj obj(pool, key); + auto sysobj = svc_sysobj->get_obj(obj); + return sysobj.rop() + .set_attrs(pattrs) + .set_last_mod(pmtime) + .stat(y, dpp); +} + + +int rgw_get_system_obj(RGWSI_SysObj* svc_sysobj, const rgw_pool& pool, const string& key, bufferlist& bl, + RGWObjVersionTracker *objv_tracker, real_time *pmtime, optional_yield y, + const DoutPrefixProvider *dpp, map *pattrs, + rgw_cache_entry_info *cache_info, + boost::optional refresh_version, bool raw_attrs) +{ + const rgw_raw_obj obj(pool, key); + auto sysobj = svc_sysobj->get_obj(obj); + auto rop = sysobj.rop(); + return rop.set_attrs(pattrs) + .set_last_mod(pmtime) + .set_objv_tracker(objv_tracker) + .set_raw_attrs(raw_attrs) + .set_cache_info(cache_info) + .set_refresh_version(refresh_version) + .read(dpp, &bl, y); +} + +int rgw_delete_system_obj(const DoutPrefixProvider *dpp, + RGWSI_SysObj *sysobj_svc, const rgw_pool& pool, const string& oid, + RGWObjVersionTracker *objv_tracker, optional_yield y) +{ + auto sysobj = sysobj_svc->get_obj(rgw_raw_obj{pool, oid}); + rgw_raw_obj obj(pool, oid); + return sysobj.wop() + .set_objv_tracker(objv_tracker) + .remove(dpp, y); +} + +int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectReadOperation *op, bufferlist* pbl, + optional_yield y, int flags) +{ + // given a yield_context, call async_operate() to yield the coroutine instead + // of blocking + if (y) { + auto& context = y.get_io_context(); + auto& yield = y.get_yield_context(); + boost::system::error_code ec; + auto bl = librados::async_operate( + context, ioctx, oid, op, flags, yield[ec]); + if (pbl) { + *pbl = std::move(bl); + } + return -ec.value(); + } + // work on asio threads should be asynchronous, so warn when they block + if (is_asio_thread) { + ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; + } + return ioctx.operate(oid, op, nullptr, flags); +} + +int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectWriteOperation *op, optional_yield y, + int flags) +{ + if (y) { + auto& context = y.get_io_context(); + auto& yield = y.get_yield_context(); + boost::system::error_code ec; + librados::async_operate(context, ioctx, oid, op, flags, yield[ec]); + return -ec.value(); + } + if (is_asio_thread) { + ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; + } + return ioctx.operate(oid, op, flags); +} + +int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, + optional_yield y) +{ + if (y) { + auto& context = y.get_io_context(); + auto& yield = y.get_yield_context(); + boost::system::error_code ec; + auto reply = librados::async_notify(context, ioctx, oid, + bl, timeout_ms, yield[ec]); + if (pbl) { + *pbl = std::move(reply); + } + return -ec.value(); + } + if (is_asio_thread) { + ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; + } + return ioctx.notify2(oid, bl, timeout_ms, pbl); +} + +void rgw_filter_attrset(map& unfiltered_attrset, const string& check_prefix, + map *attrset) +{ + attrset->clear(); + map::iterator iter; + for (iter = unfiltered_attrset.lower_bound(check_prefix); + iter != unfiltered_attrset.end(); ++iter) { + if (!boost::algorithm::starts_with(iter->first, check_prefix)) + break; + (*attrset)[iter->first] = iter->second; + } +} + +RGWDataAccess::RGWDataAccess(rgw::sal::Driver* _driver) : driver(_driver) +{ +} + + +int RGWDataAccess::Bucket::finish_init() +{ + auto iter = attrs.find(RGW_ATTR_ACL); + if (iter == attrs.end()) { + return 0; + } + + bufferlist::const_iterator bliter = iter->second.begin(); + try { + policy.decode(bliter); + } catch (buffer::error& err) { + return -EIO; + } + + return 0; +} + +int RGWDataAccess::Bucket::init(const DoutPrefixProvider *dpp, optional_yield y) +{ + std::unique_ptr bucket; + int ret = sd->driver->get_bucket(dpp, nullptr, tenant, name, &bucket, y); + if (ret < 0) { + return ret; + } + + bucket_info = bucket->get_info(); + mtime = bucket->get_modification_time(); + attrs = bucket->get_attrs(); + + return finish_init(); +} + +int RGWDataAccess::Bucket::init(const RGWBucketInfo& _bucket_info, + const map& _attrs) +{ + bucket_info = _bucket_info; + attrs = _attrs; + + return finish_init(); +} + +int RGWDataAccess::Bucket::get_object(const rgw_obj_key& key, + ObjectRef *obj) { + obj->reset(new Object(sd, shared_from_this(), key)); + return 0; +} + +int RGWDataAccess::Object::put(bufferlist& data, + map& attrs, + const DoutPrefixProvider *dpp, + optional_yield y) +{ + rgw::sal::Driver* driver = sd->driver; + CephContext *cct = driver->ctx(); + + string tag; + append_rand_alpha(cct, tag, tag, 32); + + RGWBucketInfo& bucket_info = bucket->bucket_info; + + rgw::BlockingAioThrottle aio(driver->ctx()->_conf->rgw_put_obj_min_window_size); + + std::unique_ptr b; + driver->get_bucket(NULL, bucket_info, &b); + std::unique_ptr obj = b->get_object(key); + + auto& owner = bucket->policy.get_owner(); + + string req_id = driver->zone_unique_id(driver->get_new_req_id()); + + std::unique_ptr processor; + processor = driver->get_atomic_writer(dpp, y, std::move(obj), + owner.get_id(), + nullptr, olh_epoch, req_id); + + int ret = processor->prepare(y); + if (ret < 0) + return ret; + + rgw::sal::DataProcessor *filter = processor.get(); + + CompressorRef plugin; + boost::optional compressor; + + const auto& compression_type = driver->get_compression_type(bucket_info.placement_rule); + if (compression_type != "none") { + plugin = Compressor::create(driver->ctx(), compression_type); + if (!plugin) { + ldpp_dout(dpp, 1) << "Cannot load plugin for compression type " + << compression_type << dendl; + } else { + compressor.emplace(driver->ctx(), plugin, filter); + filter = &*compressor; + } + } + + off_t ofs = 0; + auto obj_size = data.length(); + + RGWMD5Etag etag_calc; + + do { + size_t read_len = std::min(data.length(), (unsigned int)cct->_conf->rgw_max_chunk_size); + + bufferlist bl; + + data.splice(0, read_len, &bl); + etag_calc.update(bl); + + ret = filter->process(std::move(bl), ofs); + if (ret < 0) + return ret; + + ofs += read_len; + } while (data.length() > 0); + + ret = filter->process({}, ofs); + if (ret < 0) { + return ret; + } + bool has_etag_attr = false; + auto iter = attrs.find(RGW_ATTR_ETAG); + if (iter != attrs.end()) { + bufferlist& bl = iter->second; + etag = bl.to_str(); + has_etag_attr = true; + } + + if (!aclbl) { + RGWAccessControlPolicy_S3 policy(cct); + + policy.create_canned(bucket->policy.get_owner(), bucket->policy.get_owner(), string()); /* default private policy */ + + policy.encode(aclbl.emplace()); + } + + if (etag.empty()) { + etag_calc.finish(&etag); + } + + if (!has_etag_attr) { + bufferlist etagbl; + etagbl.append(etag); + attrs[RGW_ATTR_ETAG] = etagbl; + } + attrs[RGW_ATTR_ACL] = *aclbl; + + string *puser_data = nullptr; + if (user_data) { + puser_data = &(*user_data); + } + + return processor->complete(obj_size, etag, + &mtime, mtime, + attrs, delete_at, + nullptr, nullptr, + puser_data, + nullptr, nullptr, y); +} + +void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy) +{ + policy.encode(aclbl.emplace()); +} + +void rgw_complete_aio_completion(librados::AioCompletion* c, int r) { + auto pc = c->pc; + librados::CB_AioCompleteAndSafe cb(pc); + cb(r); +} diff --git a/src/rgw/driver/rados/rgw_tools.h b/src/rgw/driver/rados/rgw_tools.h new file mode 100644 index 00000000000..6aeb9b89100 --- /dev/null +++ b/src/rgw/driver/rados/rgw_tools.h @@ -0,0 +1,277 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_TOOLS_H +#define CEPH_RGW_TOOLS_H + +#include + +#include "include/types.h" +#include "include/ceph_hash.h" + +#include "common/ceph_time.h" + +#include "rgw_common.h" +#include "rgw_sal_fwd.h" + +class RGWSI_SysObj; + +class RGWRados; +struct RGWObjVersionTracker; +class optional_yield; + +struct obj_version; + + +int rgw_init_ioctx(const DoutPrefixProvider *dpp, + librados::Rados *rados, const rgw_pool& pool, + librados::IoCtx& ioctx, + bool create = false, + bool mostly_omap = false); + +#define RGW_NO_SHARD -1 + +#define RGW_SHARDS_PRIME_0 7877 +#define RGW_SHARDS_PRIME_1 65521 + +extern const std::string MP_META_SUFFIX; + +inline int rgw_shards_max() +{ + return RGW_SHARDS_PRIME_1; +} + +// only called by rgw_shard_id and rgw_bucket_shard_index +static inline int rgw_shards_mod(unsigned hval, int max_shards) +{ + if (max_shards <= RGW_SHARDS_PRIME_0) { + return hval % RGW_SHARDS_PRIME_0 % max_shards; + } + return hval % RGW_SHARDS_PRIME_1 % max_shards; +} + +// used for logging and tagging +inline int rgw_shard_id(const std::string& key, int max_shards) +{ + return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()), + max_shards); +} + +void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& key, std::string& name, int *shard_id); +void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& section, const std::string& key, std::string& name); +void rgw_shard_name(const std::string& prefix, unsigned shard_id, std::string& name); + +int rgw_put_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, + const rgw_pool& pool, const std::string& oid, + bufferlist& data, bool exclusive, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime, optional_yield y, + std::map *pattrs = nullptr); +int rgw_get_system_obj(RGWSI_SysObj* svc_sysobj, const rgw_pool& pool, + const std::string& key, bufferlist& bl, + RGWObjVersionTracker *objv_tracker, real_time *pmtime, + optional_yield y, const DoutPrefixProvider *dpp, + std::map *pattrs = nullptr, + rgw_cache_entry_info *cache_info = nullptr, + boost::optional refresh_version = boost::none, + bool raw_attrs=false); +int rgw_delete_system_obj(const DoutPrefixProvider *dpp, + RGWSI_SysObj *sysobj_svc, const rgw_pool& pool, const std::string& oid, + RGWObjVersionTracker *objv_tracker, optional_yield y); +int rgw_stat_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, + const rgw_pool& pool, const std::string& key, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, optional_yield y, + std::map *pattrs = nullptr); + +const char *rgw_find_mime_by_ext(std::string& ext); + +void rgw_filter_attrset(std::map& unfiltered_attrset, const std::string& check_prefix, + std::map *attrset); + +/// indicates whether the current thread is in boost::asio::io_context::run(), +/// used to log warnings if synchronous librados calls are made +extern thread_local bool is_asio_thread; + +/// perform the rados operation, using the yield context when given +int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectReadOperation *op, bufferlist* pbl, + optional_yield y, int flags = 0); +int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + librados::ObjectWriteOperation *op, optional_yield y, + int flags = 0); +int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, + bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, + optional_yield y); + +int rgw_tools_init(const DoutPrefixProvider *dpp, CephContext *cct); +void rgw_tools_cleanup(); + +template +class RGWEtag +{ + H hash; + +public: + RGWEtag() { + if constexpr (std::is_same_v) { + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + } + } + + void update(const char *buf, size_t len) { + hash.Update((const unsigned char *)buf, len); + } + + void update(bufferlist& bl) { + if (bl.length() > 0) { + update(bl.c_str(), bl.length()); + } + } + + void update(const std::string& s) { + if (!s.empty()) { + update(s.c_str(), s.size()); + } + } + void finish(std::string *etag) { + char etag_buf[S]; + char etag_buf_str[S * 2 + 16]; + + hash.Final((unsigned char *)etag_buf); + buf_to_hex((const unsigned char *)etag_buf, S, + etag_buf_str); + + *etag = etag_buf_str; + } +}; + +using RGWMD5Etag = RGWEtag; + +class RGWDataAccess +{ + rgw::sal::Driver* driver; + +public: + RGWDataAccess(rgw::sal::Driver* _driver); + + class Object; + class Bucket; + + using BucketRef = std::shared_ptr; + using ObjectRef = std::shared_ptr; + + class Bucket : public std::enable_shared_from_this { + friend class RGWDataAccess; + friend class Object; + + RGWDataAccess *sd{nullptr}; + RGWBucketInfo bucket_info; + std::string tenant; + std::string name; + std::string bucket_id; + ceph::real_time mtime; + std::map attrs; + + RGWAccessControlPolicy policy; + int finish_init(); + + Bucket(RGWDataAccess *_sd, + const std::string& _tenant, + const std::string& _name, + const std::string& _bucket_id) : sd(_sd), + tenant(_tenant), + name(_name), + bucket_id(_bucket_id) {} + Bucket(RGWDataAccess *_sd) : sd(_sd) {} + int init(const DoutPrefixProvider *dpp, optional_yield y); + int init(const RGWBucketInfo& _bucket_info, const std::map& _attrs); + public: + int get_object(const rgw_obj_key& key, + ObjectRef *obj); + + }; + + + class Object { + RGWDataAccess *sd{nullptr}; + BucketRef bucket; + rgw_obj_key key; + + ceph::real_time mtime; + std::string etag; + uint64_t olh_epoch{0}; + ceph::real_time delete_at; + std::optional user_data; + + std::optional aclbl; + + Object(RGWDataAccess *_sd, + BucketRef&& _bucket, + const rgw_obj_key& _key) : sd(_sd), + bucket(_bucket), + key(_key) {} + public: + int put(bufferlist& data, std::map& attrs, const DoutPrefixProvider *dpp, optional_yield y); /* might modify attrs */ + + void set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + } + + void set_etag(const std::string& _etag) { + etag = _etag; + } + + void set_olh_epoch(uint64_t epoch) { + olh_epoch = epoch; + } + + void set_delete_at(ceph::real_time _delete_at) { + delete_at = _delete_at; + } + + void set_user_data(const std::string& _user_data) { + user_data = _user_data; + } + + void set_policy(const RGWAccessControlPolicy& policy); + + friend class Bucket; + }; + + int get_bucket(const DoutPrefixProvider *dpp, + const std::string& tenant, + const std::string name, + const std::string bucket_id, + BucketRef *bucket, + optional_yield y) { + bucket->reset(new Bucket(this, tenant, name, bucket_id)); + return (*bucket)->init(dpp, y); + } + + int get_bucket(const RGWBucketInfo& bucket_info, + const std::map& attrs, + BucketRef *bucket) { + bucket->reset(new Bucket(this)); + return (*bucket)->init(bucket_info, attrs); + } + friend class Bucket; + friend class Object; +}; + +using RGWDataAccessRef = std::shared_ptr; + +/// Complete an AioCompletion. To return error values or otherwise +/// satisfy the caller. Useful for making complicated asynchronous +/// calls and error handling. +void rgw_complete_aio_completion(librados::AioCompletion* c, int r); + +/// This returns a static, non-NULL pointer, recognized only by +/// rgw_put_system_obj(). When supplied instead of the attributes, the +/// attributes will be unmodified. +/// +// (Currently providing nullptr will wipe all attributes.) + +std::map* no_change_attrs(); +#endif diff --git a/src/rgw/driver/rados/rgw_trim_bilog.cc b/src/rgw/driver/rados/rgw_trim_bilog.cc new file mode 100644 index 00000000000..6ddda5d6b17 --- /dev/null +++ b/src/rgw/driver/rados/rgw_trim_bilog.cc @@ -0,0 +1,1445 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 Red Hat, Inc + * + * Author: Casey Bodley + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include +#include +#include + +#include "include/scope_guard.h" +#include "common/bounded_key_counter.h" +#include "common/errno.h" +#include "rgw_trim_bilog.h" +#include "rgw_cr_rados.h" +#include "rgw_cr_rest.h" +#include "rgw_cr_tools.h" +#include "rgw_data_sync.h" +#include "rgw_metadata.h" +#include "rgw_sal.h" +#include "rgw_zone.h" +#include "rgw_sync.h" +#include "rgw_bucket.h" + +#include "services/svc_zone.h" +#include "services/svc_meta.h" +#include "services/svc_bilog_rados.h" + +#include +#include "include/ceph_assert.h" + +#define dout_subsys ceph_subsys_rgw + +#undef dout_prefix +#define dout_prefix (*_dout << "trim: ") + +using namespace std; + +using rgw::BucketTrimConfig; +using BucketChangeCounter = BoundedKeyCounter; + +const std::string rgw::BucketTrimStatus::oid = "bilog.trim"; +using rgw::BucketTrimStatus; + + +// watch/notify api for gateways to coordinate about which buckets to trim +enum TrimNotifyType { + NotifyTrimCounters = 0, + NotifyTrimComplete, +}; +WRITE_RAW_ENCODER(TrimNotifyType); + +struct TrimNotifyHandler { + virtual ~TrimNotifyHandler() = default; + + virtual void handle(bufferlist::const_iterator& input, bufferlist& output) = 0; +}; + +/// api to share the bucket trim counters between gateways in the same zone. +/// each gateway will process different datalog shards, so the gateway that runs +/// the trim process needs to accumulate their counters +struct TrimCounters { + /// counter for a single bucket + struct BucketCounter { + std::string bucket; //< bucket instance metadata key + int count{0}; + + BucketCounter() = default; + BucketCounter(const std::string& bucket, int count) + : bucket(bucket), count(count) {} + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& p); + }; + using Vector = std::vector; + + /// request bucket trim counters from peer gateways + struct Request { + uint16_t max_buckets; //< maximum number of bucket counters to return + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& p); + }; + + /// return the current bucket trim counters + struct Response { + Vector bucket_counters; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& p); + }; + + /// server interface to query the hottest buckets + struct Server { + virtual ~Server() = default; + + virtual void get_bucket_counters(int count, Vector& counters) = 0; + virtual void reset_bucket_counters() = 0; + }; + + /// notify handler + class Handler : public TrimNotifyHandler { + Server *const server; + public: + explicit Handler(Server *server) : server(server) {} + + void handle(bufferlist::const_iterator& input, bufferlist& output) override; + }; +}; +std::ostream& operator<<(std::ostream& out, const TrimCounters::BucketCounter& rhs) +{ + return out << rhs.bucket << ":" << rhs.count; +} + +void TrimCounters::BucketCounter::encode(bufferlist& bl) const +{ + using ceph::encode; + // no versioning to save space + encode(bucket, bl); + encode(count, bl); +} +void TrimCounters::BucketCounter::decode(bufferlist::const_iterator& p) +{ + using ceph::decode; + decode(bucket, p); + decode(count, p); +} +WRITE_CLASS_ENCODER(TrimCounters::BucketCounter); + +void TrimCounters::Request::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + encode(max_buckets, bl); + ENCODE_FINISH(bl); +} +void TrimCounters::Request::decode(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + decode(max_buckets, p); + DECODE_FINISH(p); +} +WRITE_CLASS_ENCODER(TrimCounters::Request); + +void TrimCounters::Response::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + encode(bucket_counters, bl); + ENCODE_FINISH(bl); +} +void TrimCounters::Response::decode(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + decode(bucket_counters, p); + DECODE_FINISH(p); +} +WRITE_CLASS_ENCODER(TrimCounters::Response); + +void TrimCounters::Handler::handle(bufferlist::const_iterator& input, + bufferlist& output) +{ + Request request; + decode(request, input); + auto count = std::min(request.max_buckets, 128); + + Response response; + server->get_bucket_counters(count, response.bucket_counters); + encode(response, output); +} + +/// api to notify peer gateways that trim has completed and their bucket change +/// counters can be reset +struct TrimComplete { + struct Request { + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& p); + }; + struct Response { + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& p); + }; + + /// server interface to reset bucket counters + using Server = TrimCounters::Server; + + /// notify handler + class Handler : public TrimNotifyHandler { + Server *const server; + public: + explicit Handler(Server *server) : server(server) {} + + void handle(bufferlist::const_iterator& input, bufferlist& output) override; + }; +}; + +void TrimComplete::Request::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + ENCODE_FINISH(bl); +} +void TrimComplete::Request::decode(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + DECODE_FINISH(p); +} +WRITE_CLASS_ENCODER(TrimComplete::Request); + +void TrimComplete::Response::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + ENCODE_FINISH(bl); +} +void TrimComplete::Response::decode(bufferlist::const_iterator& p) +{ + DECODE_START(1, p); + DECODE_FINISH(p); +} +WRITE_CLASS_ENCODER(TrimComplete::Response); + +void TrimComplete::Handler::handle(bufferlist::const_iterator& input, + bufferlist& output) +{ + Request request; + decode(request, input); + + server->reset_bucket_counters(); + + Response response; + encode(response, output); +} + + +/// rados watcher for bucket trim notifications +class BucketTrimWatcher : public librados::WatchCtx2 { + rgw::sal::RadosStore* const store; + const rgw_raw_obj& obj; + rgw_rados_ref ref; + uint64_t handle{0}; + + using HandlerPtr = std::unique_ptr; + boost::container::flat_map handlers; + + public: + BucketTrimWatcher(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, + TrimCounters::Server *counters) + : store(store), obj(obj) { + handlers.emplace(NotifyTrimCounters, new TrimCounters::Handler(counters)); + handlers.emplace(NotifyTrimComplete, new TrimComplete::Handler(counters)); + } + + ~BucketTrimWatcher() { + stop(); + } + + int start(const DoutPrefixProvider *dpp) { + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + return r; + } + + // register a watch on the realm's control object + r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + if (r == -ENOENT) { + constexpr bool exclusive = true; + r = ref.pool.ioctx().create(ref.obj.oid, exclusive); + if (r == -EEXIST || r == 0) { + r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + } + } + if (r < 0) { + ldpp_dout(dpp, -1) << "Failed to watch " << ref.obj + << " with " << cpp_strerror(-r) << dendl; + ref.pool.ioctx().close(); + return r; + } + + ldpp_dout(dpp, 10) << "Watching " << ref.obj.oid << dendl; + return 0; + } + + int restart() { + int r = ref.pool.ioctx().unwatch2(handle); + if (r < 0) { + lderr(store->ctx()) << "Failed to unwatch on " << ref.obj + << " with " << cpp_strerror(-r) << dendl; + } + r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + if (r < 0) { + lderr(store->ctx()) << "Failed to restart watch on " << ref.obj + << " with " << cpp_strerror(-r) << dendl; + ref.pool.ioctx().close(); + } + return r; + } + + void stop() { + if (handle) { + ref.pool.ioctx().unwatch2(handle); + ref.pool.ioctx().close(); + } + } + + /// respond to bucket trim notifications + void handle_notify(uint64_t notify_id, uint64_t cookie, + uint64_t notifier_id, bufferlist& bl) override { + if (cookie != handle) { + return; + } + bufferlist reply; + try { + auto p = bl.cbegin(); + TrimNotifyType type; + decode(type, p); + + auto handler = handlers.find(type); + if (handler != handlers.end()) { + handler->second->handle(p, reply); + } else { + lderr(store->ctx()) << "no handler for notify type " << type << dendl; + } + } catch (const buffer::error& e) { + lderr(store->ctx()) << "Failed to decode notification: " << e.what() << dendl; + } + ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, reply); + } + + /// reestablish the watch if it gets disconnected + void handle_error(uint64_t cookie, int err) override { + if (cookie != handle) { + return; + } + if (err == -ENOTCONN) { + ldout(store->ctx(), 4) << "Disconnected watch on " << ref.obj << dendl; + restart(); + } + } +}; + + +/// Interface to communicate with the trim manager about completed operations +struct BucketTrimObserver { + virtual ~BucketTrimObserver() = default; + + virtual void on_bucket_trimmed(std::string&& bucket_instance) = 0; + virtual bool trimmed_recently(const std::string_view& bucket_instance) = 0; +}; + +/// trim each bilog shard to the given marker, while limiting the number of +/// concurrent requests +class BucketTrimShardCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* const store; + const RGWBucketInfo& bucket_info; + rgw::bucket_index_layout_generation generation; + const std::vector& markers; //< shard markers to trim + size_t i{0}; //< index of current shard marker + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim bilog shard: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + BucketTrimShardCollectCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info, + const rgw::bucket_index_layout_generation& generation, + const std::vector& markers) + : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS), + dpp(dpp), store(store), bucket_info(bucket_info), + generation(generation), markers(markers) + {} + bool spawn_next() override; +}; + +bool BucketTrimShardCollectCR::spawn_next() +{ + while (i < markers.size()) { + const auto& marker = markers[i]; + const auto shard_id = i++; + + // skip empty markers + if (!marker.empty()) { + ldpp_dout(dpp, 10) << "trimming bilog shard " << shard_id + << " of " << bucket_info.bucket << " at marker " << marker << dendl; + spawn(new RGWRadosBILogTrimCR(dpp, store, bucket_info, shard_id, + generation, std::string{}, marker), + false); + return true; + } + } + return false; +} + +/// Delete a BI generation, limiting the number of requests in flight. +class BucketCleanIndexCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* const store; + const RGWBucketInfo& bucket_info; + rgw::bucket_index_layout_generation index; + uint32_t shard = 0; + const uint32_t num_shards = rgw::num_shards(index); + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "clean index: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + BucketCleanIndexCollectCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + const RGWBucketInfo& bucket_info, + rgw::bucket_index_layout_generation index) + : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS), + dpp(dpp), store(store), bucket_info(bucket_info), + index(index) + {} + bool spawn_next() override { + if (shard < num_shards) { + RGWRados::BucketShard bs(store->getRados()); + bs.init(dpp, bucket_info, index, shard); + spawn(new RGWRadosRemoveOidCR(store, std::move(bs.bucket_obj), nullptr), + false); + ++shard; + return true; + } else { + return false; + } + } +}; + + +/// trim the bilog of all of the given bucket instance's shards +class BucketTrimInstanceCR : public RGWCoroutine { + static constexpr auto MAX_RETRIES = 25u; + rgw::sal::RadosStore* const store; + RGWHTTPManager *const http; + BucketTrimObserver *const observer; + std::string bucket_instance; + rgw_bucket_get_sync_policy_params get_policy_params; + std::shared_ptr source_policy; + rgw_bucket bucket; + const std::string& zone_id; //< my zone id + RGWBucketInfo _bucket_info; + const RGWBucketInfo *pbucket_info; //< pointer to bucket instance info to locate bucket indices + int child_ret = 0; + const DoutPrefixProvider *dpp; +public: + struct StatusShards { + uint64_t generation = 0; + std::vector shards; + }; +private: + std::vector peer_status; //< sync status for each peer + std::vector min_markers; //< min marker per shard + + /// The log generation to trim + rgw::bucket_log_layout_generation totrim; + + /// Generation to be cleaned/New bucket info (if any) + std::optional> clean_info; + /// Maximum number of times to attempt to put bucket info + unsigned retries = 0; + + int take_min_generation() { + // Initialize the min_generation to the bucket's current + // generation, used in case we have no peers. + auto min_generation = pbucket_info->layout.logs.back().gen; + + // Determine the minimum generation + if (auto m = std::min_element(peer_status.begin(), + peer_status.end(), + [](const StatusShards& l, + const StatusShards& r) { + return l.generation < r.generation; + }); m != peer_status.end()) { + min_generation = m->generation; + } + + auto& logs = pbucket_info->layout.logs; + auto log = std::find_if(logs.begin(), logs.end(), + rgw::matches_gen(min_generation)); + if (log == logs.end()) { + ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << "ERROR: No log layout for min_generation=" + << min_generation << dendl; + return -ENOENT; + } + + totrim = *log; + return 0; + } + + /// If there is a generation below the minimum, prepare to clean it up. + int maybe_remove_generation() { + if (clean_info) + return 0; + + + if (pbucket_info->layout.logs.front().gen < totrim.gen) { + clean_info = {*pbucket_info, {}}; + auto log = clean_info->first.layout.logs.cbegin(); + clean_info->second = *log; + + if (clean_info->first.layout.logs.size() == 1) { + ldpp_dout(dpp, -1) + << "Critical error! Attempt to remove only log generation! " + << "log.gen=" << log->gen << ", totrim.gen=" << totrim.gen + << dendl; + return -EIO; + } + clean_info->first.layout.logs.erase(log); + } + return 0; + } + + public: + BucketTrimInstanceCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, + BucketTrimObserver *observer, + const std::string& bucket_instance, + const DoutPrefixProvider *dpp) + : RGWCoroutine(store->ctx()), store(store), + http(http), observer(observer), + bucket_instance(bucket_instance), + zone_id(store->svc()->zone->get_zone().id), + dpp(dpp) { + rgw_bucket_parse_bucket_key(cct, bucket_instance, &bucket, nullptr); + source_policy = make_shared(); + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +namespace { +/// populate the status with the minimum stable marker of each shard +int take_min_status( + CephContext *cct, + const uint64_t min_generation, + std::vector::const_iterator first, + std::vector::const_iterator last, + std::vector *status) { + for (auto peer = first; peer != last; ++peer) { + // Peers on later generations don't get a say in the matter + if (peer->generation > min_generation) { + continue; + } + if (peer->shards.size() != status->size()) { + // all peers must agree on the number of shards + return -EINVAL; + } + + auto m = status->begin(); + for (auto& shard : peer->shards) { + auto& marker = *m++; + // always take the first marker, or any later marker that's smaller + if (peer == first || marker > shard.inc_marker.position) { + marker = std::move(shard.inc_marker.position); + } + } + } + return 0; +} +} + +template<> +inline int parse_decode_json( + BucketTrimInstanceCR::StatusShards& s, bufferlist& bl) +{ + JSONParser p; + if (!p.parse(bl.c_str(), bl.length())) { + return -EINVAL; + } + + try { + bilog_status_v2 v; + decode_json_obj(v, &p); + s.generation = v.sync_status.incremental_gen; + s.shards = std::move(v.inc_status); + } catch (JSONDecoder::err& e) { + try { + // Fall back if we're talking to an old node that can't give v2 + // output. + s.generation = 0; + decode_json_obj(s.shards, &p); + } catch (JSONDecoder::err& e) { + return -EINVAL; + } + } + return 0; +} + +int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + ldpp_dout(dpp, 4) << "starting trim on bucket=" << bucket_instance << dendl; + + get_policy_params.zone = zone_id; + get_policy_params.bucket = bucket; + yield call(new RGWBucketGetSyncPolicyHandlerCR(store->svc()->rados->get_async_processor(), + store, + get_policy_params, + source_policy, + dpp)); + if (retcode < 0) { + if (retcode != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed to fetch policy handler for bucket=" << bucket << dendl; + } + + return set_cr_error(retcode); + } + + if (auto& opt_bucket_info = source_policy->policy_handler->get_bucket_info(); + opt_bucket_info) { + pbucket_info = &(*opt_bucket_info); + } else { + /* this shouldn't really happen */ + return set_cr_error(-ENOENT); + } + + if (pbucket_info->layout.logs.empty()) { + return set_cr_done(); // no bilogs to trim + } + + // query peers for sync status + set_status("fetching sync status from relevant peers"); + yield { + const auto& all_dests = source_policy->policy_handler->get_all_dests(); + + vector zids; + rgw_zone_id last_zid; + for (auto& diter : all_dests) { + const auto& zid = diter.first; + if (zid == last_zid) { + continue; + } + last_zid = zid; + zids.push_back(zid); + } + + peer_status.resize(zids.size()); + + auto& zone_conn_map = store->svc()->zone->get_zone_conn_map(); + + auto p = peer_status.begin(); + for (auto& zid : zids) { + // query data sync status from each sync peer + rgw_http_param_pair params[] = { + { "type", "bucket-index" }, + { "status", nullptr }, + { "options", "merge" }, + { "bucket", bucket_instance.c_str() }, /* equal to source-bucket when `options==merge` and source-bucket + param is not provided */ + { "source-zone", zone_id.c_str() }, + { "version", "2" }, + { nullptr, nullptr } + }; + + auto ziter = zone_conn_map.find(zid); + if (ziter == zone_conn_map.end()) { + ldpp_dout(dpp, 0) << "WARNING: no connection to zone " << zid << ", can't trim bucket: " << bucket << dendl; + return set_cr_error(-ECANCELED); + } + + using StatusCR = RGWReadRESTResourceCR; + spawn(new StatusCR(cct, ziter->second, http, "/admin/log/", params, &*p), + false); + ++p; + } + } + // wait for a response from each peer. all must respond to attempt trim + while (num_spawned()) { + yield wait_for_child(); + collect(&child_ret, nullptr); + if (child_ret < 0) { + drain_all(); + return set_cr_error(child_ret); + } + } + + // Determine the minimum generation + retcode = take_min_generation(); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to find minimum generation" << dendl; + return set_cr_error(retcode); + } + retcode = maybe_remove_generation(); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "error removing old generation from log: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + if (clean_info) { + if (clean_info->second.layout.type != rgw::BucketLogType::InIndex) { + ldpp_dout(dpp, 0) << "Unable to convert log of unknown type " + << clean_info->second.layout.type + << " to rgw::bucket_index_layout_generation " << dendl; + return set_cr_error(-EINVAL); + } + + yield call(new BucketCleanIndexCollectCR(dpp, store, clean_info->first, + clean_info->second.layout.in_index)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to remove previous generation: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + while (clean_info && retries < MAX_RETRIES) { + yield call(new RGWPutBucketInstanceInfoCR( + store->svc()->rados->get_async_processor(), + store, clean_info->first, false, {}, + no_change_attrs(), dpp)); + + // Raced, try again. + if (retcode == -ECANCELED) { + yield call(new RGWGetBucketInstanceInfoCR( + store->svc()->rados->get_async_processor(), + store, clean_info->first.bucket, + &(clean_info->first), nullptr, dpp)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to get bucket info: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (clean_info->first.layout.logs.front().gen == + clean_info->second.gen) { + clean_info->first.layout.logs.erase( + clean_info->first.layout.logs.begin()); + ++retries; + continue; + } + // Raced, but someone else did what we needed to. + retcode = 0; + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to put bucket info: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + clean_info = std::nullopt; + } + } else { + if (totrim.layout.type != rgw::BucketLogType::InIndex) { + ldpp_dout(dpp, 0) << "Unable to convert log of unknown type " + << totrim.layout.type + << " to rgw::bucket_index_layout_generation " << dendl; + return set_cr_error(-EINVAL); + } + // To avoid hammering the OSD too hard, either trim old + // generations OR trim the current one. + + // determine the minimum marker for each shard + + // initialize each shard with the maximum marker, which is only used when + // there are no peers syncing from us + min_markers.assign(std::max(1u, rgw::num_shards(totrim.layout.in_index)), + RGWSyncLogTrimCR::max_marker); + + + retcode = take_min_status(cct, totrim.gen, peer_status.cbegin(), + peer_status.cend(), &min_markers); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to correlate bucket sync status from peers" << dendl; + return set_cr_error(retcode); + } + + // trim shards with a ShardCollectCR + ldpp_dout(dpp, 10) << "trimming bilogs for bucket=" << pbucket_info->bucket + << " markers=" << min_markers << ", shards=" << min_markers.size() << dendl; + set_status("trimming bilog shards"); + yield call(new BucketTrimShardCollectCR(dpp, store, *pbucket_info, totrim.layout.in_index, + min_markers)); + // ENODATA just means there were no keys to trim + if (retcode == -ENODATA) { + retcode = 0; + } + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to trim bilog shards: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + } + + observer->on_bucket_trimmed(std::move(bucket_instance)); + return set_cr_done(); + } + return 0; +} + +/// trim each bucket instance while limiting the number of concurrent operations + +class BucketTrimInstanceCollectCR : public RGWShardCollectCR { + rgw::sal::RadosStore* const store; + RGWHTTPManager *const http; + BucketTrimObserver *const observer; + std::vector::const_iterator bucket; + std::vector::const_iterator end; + const DoutPrefixProvider *dpp; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim bucket instance: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + BucketTrimInstanceCollectCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, + BucketTrimObserver *observer, + const std::vector& buckets, + int max_concurrent, + const DoutPrefixProvider *dpp) + : RGWShardCollectCR(store->ctx(), max_concurrent), + store(store), http(http), observer(observer), + bucket(buckets.begin()), end(buckets.end()), + dpp(dpp) + {} + bool spawn_next() override; +}; + +bool BucketTrimInstanceCollectCR::spawn_next() +{ + if (bucket == end) { + return false; + } + spawn(new BucketTrimInstanceCR(store, http, observer, *bucket, dpp), false); + ++bucket; + return true; +} + +/// correlate the replies from each peer gateway into the given counter +int accumulate_peer_counters(bufferlist& bl, BucketChangeCounter& counter) +{ + counter.clear(); + + try { + // decode notify responses + auto p = bl.cbegin(); + std::map, bufferlist> replies; + std::set> timeouts; + decode(replies, p); + decode(timeouts, p); + + for (auto& peer : replies) { + auto q = peer.second.cbegin(); + TrimCounters::Response response; + decode(response, q); + for (const auto& b : response.bucket_counters) { + counter.insert(b.bucket, b.count); + } + } + } catch (const buffer::error& e) { + return -EIO; + } + return 0; +} + +/// metadata callback has the signature bool(string&& key, string&& marker) +using MetadataListCallback = std::function; + +/// lists metadata keys, passing each to a callback until it returns false. +/// on reaching the end, it will restart at the beginning and list up to the +/// initial marker +class AsyncMetadataList : public RGWAsyncRadosRequest { + CephContext *const cct; + RGWMetadataManager *const mgr; + const std::string section; + const std::string start_marker; + MetadataListCallback callback; + + int _send_request(const DoutPrefixProvider *dpp) override; + public: + AsyncMetadataList(CephContext *cct, RGWCoroutine *caller, + RGWAioCompletionNotifier *cn, RGWMetadataManager *mgr, + const std::string& section, const std::string& start_marker, + const MetadataListCallback& callback) + : RGWAsyncRadosRequest(caller, cn), cct(cct), mgr(mgr), + section(section), start_marker(start_marker), callback(callback) + {} +}; + +int AsyncMetadataList::_send_request(const DoutPrefixProvider *dpp) +{ + void* handle = nullptr; + std::list keys; + bool truncated{false}; + std::string marker; + + // start a listing at the given marker + int r = mgr->list_keys_init(dpp, section, start_marker, &handle); + if (r == -EINVAL) { + // restart with empty marker below + } else if (r < 0) { + ldpp_dout(dpp, 10) << "failed to init metadata listing: " + << cpp_strerror(r) << dendl; + return r; + } else { + ldpp_dout(dpp, 20) << "starting metadata listing at " << start_marker << dendl; + + // release the handle when scope exits + auto g = make_scope_guard([=, this] { mgr->list_keys_complete(handle); }); + + do { + // get the next key and marker + r = mgr->list_keys_next(dpp, handle, 1, keys, &truncated); + if (r < 0) { + ldpp_dout(dpp, 10) << "failed to list metadata: " + << cpp_strerror(r) << dendl; + return r; + } + marker = mgr->get_marker(handle); + + if (!keys.empty()) { + ceph_assert(keys.size() == 1); + auto& key = keys.front(); + if (!callback(std::move(key), std::move(marker))) { + return 0; + } + } + } while (truncated); + + if (start_marker.empty()) { + // already listed all keys + return 0; + } + } + + // restart the listing from the beginning (empty marker) + handle = nullptr; + + r = mgr->list_keys_init(dpp, section, "", &handle); + if (r < 0) { + ldpp_dout(dpp, 10) << "failed to restart metadata listing: " + << cpp_strerror(r) << dendl; + return r; + } + ldpp_dout(dpp, 20) << "restarting metadata listing" << dendl; + + // release the handle when scope exits + auto g = make_scope_guard([=, this] { mgr->list_keys_complete(handle); }); + do { + // get the next key and marker + r = mgr->list_keys_next(dpp, handle, 1, keys, &truncated); + if (r < 0) { + ldpp_dout(dpp, 10) << "failed to list metadata: " + << cpp_strerror(r) << dendl; + return r; + } + marker = mgr->get_marker(handle); + + if (!keys.empty()) { + ceph_assert(keys.size() == 1); + auto& key = keys.front(); + // stop at original marker + if (marker > start_marker) { + return 0; + } + if (!callback(std::move(key), std::move(marker))) { + return 0; + } + } + } while (truncated); + + return 0; +} + +/// coroutine wrapper for AsyncMetadataList +class MetadataListCR : public RGWSimpleCoroutine { + RGWAsyncRadosProcessor *const async_rados; + RGWMetadataManager *const mgr; + const std::string& section; + const std::string& start_marker; + MetadataListCallback callback; + RGWAsyncRadosRequest *req{nullptr}; + public: + MetadataListCR(CephContext *cct, RGWAsyncRadosProcessor *async_rados, + RGWMetadataManager *mgr, const std::string& section, + const std::string& start_marker, + const MetadataListCallback& callback) + : RGWSimpleCoroutine(cct), async_rados(async_rados), mgr(mgr), + section(section), start_marker(start_marker), callback(callback) + {} + ~MetadataListCR() override { + request_cleanup(); + } + + int send_request(const DoutPrefixProvider *dpp) override { + req = new AsyncMetadataList(cct, this, stack->create_completion_notifier(), + mgr, section, start_marker, callback); + async_rados->queue(req); + return 0; + } + int request_complete() override { + return req->get_ret_status(); + } + void request_cleanup() override { + if (req) { + req->finish(); + req = nullptr; + } + } +}; + +class BucketTrimCR : public RGWCoroutine { + rgw::sal::RadosStore* const store; + RGWHTTPManager *const http; + const BucketTrimConfig& config; + BucketTrimObserver *const observer; + const rgw_raw_obj& obj; + ceph::mono_time start_time; + bufferlist notify_replies; + BucketChangeCounter counter; + std::vector buckets; //< buckets selected for trim + BucketTrimStatus status; + RGWObjVersionTracker objv; //< version tracker for trim status object + std::string last_cold_marker; //< position for next trim marker + const DoutPrefixProvider *dpp; + + static const std::string section; //< metadata section for bucket instances + public: + BucketTrimCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, + const BucketTrimConfig& config, BucketTrimObserver *observer, + const rgw_raw_obj& obj, const DoutPrefixProvider *dpp) + : RGWCoroutine(store->ctx()), store(store), http(http), config(config), + observer(observer), obj(obj), counter(config.counter_size), dpp(dpp) + {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +const std::string BucketTrimCR::section{"bucket.instance"}; + +int BucketTrimCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + start_time = ceph::mono_clock::now(); + + if (config.buckets_per_interval) { + // query watch/notify for hot buckets + ldpp_dout(dpp, 10) << "fetching active bucket counters" << dendl; + set_status("fetching active bucket counters"); + yield { + // request the top bucket counters from each peer gateway + const TrimNotifyType type = NotifyTrimCounters; + TrimCounters::Request request{32}; + bufferlist bl; + encode(type, bl); + encode(request, bl); + call(new RGWRadosNotifyCR(store, obj, bl, config.notify_timeout_ms, + ¬ify_replies)); + } + if (retcode < 0) { + ldpp_dout(dpp, 10) << "failed to fetch peer bucket counters" << dendl; + return set_cr_error(retcode); + } + + // select the hottest buckets for trim + retcode = accumulate_peer_counters(notify_replies, counter); + if (retcode < 0) { + ldout(cct, 4) << "failed to correlate peer bucket counters" << dendl; + return set_cr_error(retcode); + } + buckets.reserve(config.buckets_per_interval); + + const int max_count = config.buckets_per_interval - + config.min_cold_buckets_per_interval; + counter.get_highest(max_count, + [this] (const std::string& bucket, int count) { + buckets.push_back(bucket); + }); + } + + if (buckets.size() < config.buckets_per_interval) { + // read BucketTrimStatus for marker position + set_status("reading trim status"); + using ReadStatus = RGWSimpleRadosReadCR; + yield call(new ReadStatus(dpp, store->svc()->rados->get_async_processor(), store->svc()->sysobj, obj, + &status, true, &objv)); + if (retcode < 0) { + ldpp_dout(dpp, 10) << "failed to read bilog trim status: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (status.marker == "MAX") { + status.marker.clear(); // restart at the beginning + } + ldpp_dout(dpp, 10) << "listing cold buckets from marker=" + << status.marker << dendl; + + set_status("listing cold buckets for trim"); + yield { + // capture a reference so 'this' remains valid in the callback + auto ref = boost::intrusive_ptr{this}; + // list cold buckets to consider for trim + auto cb = [this, ref] (std::string&& bucket, std::string&& marker) { + // filter out keys that we trimmed recently + if (observer->trimmed_recently(bucket)) { + return true; + } + // filter out active buckets that we've already selected + auto i = std::find(buckets.begin(), buckets.end(), bucket); + if (i != buckets.end()) { + return true; + } + buckets.emplace_back(std::move(bucket)); + // remember the last cold bucket spawned to update the status marker + last_cold_marker = std::move(marker); + // return true if there's room for more + return buckets.size() < config.buckets_per_interval; + }; + + call(new MetadataListCR(cct, store->svc()->rados->get_async_processor(), + store->ctl()->meta.mgr, + section, status.marker, cb)); + } + if (retcode < 0) { + ldout(cct, 4) << "failed to list bucket instance metadata: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + } + + // trim bucket instances with limited concurrency + set_status("trimming buckets"); + ldpp_dout(dpp, 4) << "collected " << buckets.size() << " buckets for trim" << dendl; + yield call(new BucketTrimInstanceCollectCR(store, http, observer, buckets, + config.concurrent_buckets, dpp)); + // ignore errors from individual buckets + + // write updated trim status + if (!last_cold_marker.empty() && status.marker != last_cold_marker) { + set_status("writing updated trim status"); + status.marker = std::move(last_cold_marker); + ldpp_dout(dpp, 20) << "writing bucket trim marker=" << status.marker << dendl; + using WriteStatus = RGWSimpleRadosWriteCR; + yield call(new WriteStatus(dpp, store->svc()->rados->get_async_processor(), store->svc()->sysobj, obj, + status, &objv)); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to write updated trim status: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + } + + // notify peers that trim completed + set_status("trim completed"); + yield { + const TrimNotifyType type = NotifyTrimComplete; + TrimComplete::Request request; + bufferlist bl; + encode(type, bl); + encode(request, bl); + call(new RGWRadosNotifyCR(store, obj, bl, config.notify_timeout_ms, + nullptr)); + } + if (retcode < 0) { + ldout(cct, 10) << "failed to notify peers of trim completion" << dendl; + return set_cr_error(retcode); + } + + ldpp_dout(dpp, 4) << "bucket index log processing completed in " + << ceph::mono_clock::now() - start_time << dendl; + return set_cr_done(); + } + return 0; +} + +class BucketTrimPollCR : public RGWCoroutine { + rgw::sal::RadosStore* const store; + RGWHTTPManager *const http; + const BucketTrimConfig& config; + BucketTrimObserver *const observer; + const rgw_raw_obj& obj; + const std::string name{"trim"}; //< lock name + const std::string cookie; + const DoutPrefixProvider *dpp; + + public: + BucketTrimPollCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, + const BucketTrimConfig& config, + BucketTrimObserver *observer, const rgw_raw_obj& obj, + const DoutPrefixProvider *dpp) + : RGWCoroutine(store->ctx()), store(store), http(http), + config(config), observer(observer), obj(obj), + cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)), + dpp(dpp) {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int BucketTrimPollCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + for (;;) { + set_status("sleeping"); + wait(utime_t{static_cast(config.trim_interval_sec), 0}); + + // prevent others from trimming for our entire wait interval + set_status("acquiring trim lock"); + yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, + obj, name, cookie, + config.trim_interval_sec)); + if (retcode < 0) { + ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl; + continue; + } + + set_status("trimming"); + yield call(new BucketTrimCR(store, http, config, observer, obj, dpp)); + if (retcode < 0) { + // on errors, unlock so other gateways can try + set_status("unlocking"); + yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store, + obj, name, cookie)); + } + } + } + return 0; +} + +/// tracks a bounded list of events with timestamps. old events can be expired, +/// and recent events can be searched by key. expiration depends on events being +/// inserted in temporal order +template +class RecentEventList { + public: + using clock_type = Clock; + using time_point = typename clock_type::time_point; + + RecentEventList(size_t max_size, const ceph::timespan& max_duration) + : events(max_size), max_duration(max_duration) + {} + + /// insert an event at the given point in time. this time must be at least as + /// recent as the last inserted event + void insert(T&& value, const time_point& now) { + // ceph_assert(events.empty() || now >= events.back().time) + events.push_back(Event{std::move(value), now}); + } + + /// performs a linear search for an event matching the given key, whose type + /// U can be any that provides operator==(U, T) + template + bool lookup(const U& key) const { + for (const auto& event : events) { + if (key == event.value) { + return true; + } + } + return false; + } + + /// remove events that are no longer recent compared to the given point in time + void expire_old(const time_point& now) { + const auto expired_before = now - max_duration; + while (!events.empty() && events.front().time < expired_before) { + events.pop_front(); + } + } + + private: + struct Event { + T value; + time_point time; + }; + boost::circular_buffer events; + const ceph::timespan max_duration; +}; + +namespace rgw { + +// read bucket trim configuration from ceph context +void configure_bucket_trim(CephContext *cct, BucketTrimConfig& config) +{ + const auto& conf = cct->_conf; + + config.trim_interval_sec = + conf.get_val("rgw_sync_log_trim_interval"); + config.counter_size = 512; + config.buckets_per_interval = + conf.get_val("rgw_sync_log_trim_max_buckets"); + config.min_cold_buckets_per_interval = + conf.get_val("rgw_sync_log_trim_min_cold_buckets"); + config.concurrent_buckets = + conf.get_val("rgw_sync_log_trim_concurrent_buckets"); + config.notify_timeout_ms = 10000; + config.recent_size = 128; + config.recent_duration = std::chrono::hours(2); +} + +class BucketTrimManager::Impl : public TrimCounters::Server, + public BucketTrimObserver { + public: + rgw::sal::RadosStore* const store; + const BucketTrimConfig config; + + const rgw_raw_obj status_obj; + + /// count frequency of bucket instance entries in the data changes log + BucketChangeCounter counter; + + using RecentlyTrimmedBucketList = RecentEventList; + using clock_type = RecentlyTrimmedBucketList::clock_type; + /// track recently trimmed buckets to focus trim activity elsewhere + RecentlyTrimmedBucketList trimmed; + + /// serve the bucket trim watch/notify api + BucketTrimWatcher watcher; + + /// protect data shared between data sync, trim, and watch/notify threads + std::mutex mutex; + + Impl(rgw::sal::RadosStore* store, const BucketTrimConfig& config) + : store(store), config(config), + status_obj(store->svc()->zone->get_zone_params().log_pool, BucketTrimStatus::oid), + counter(config.counter_size), + trimmed(config.recent_size, config.recent_duration), + watcher(store, status_obj, this) + {} + + /// TrimCounters::Server interface for watch/notify api + void get_bucket_counters(int count, TrimCounters::Vector& buckets) { + buckets.reserve(count); + std::lock_guard lock(mutex); + counter.get_highest(count, [&buckets] (const std::string& key, int count) { + buckets.emplace_back(key, count); + }); + ldout(store->ctx(), 20) << "get_bucket_counters: " << buckets << dendl; + } + + void reset_bucket_counters() override { + ldout(store->ctx(), 20) << "bucket trim completed" << dendl; + std::lock_guard lock(mutex); + counter.clear(); + trimmed.expire_old(clock_type::now()); + } + + /// BucketTrimObserver interface to remember successfully-trimmed buckets + void on_bucket_trimmed(std::string&& bucket_instance) override { + ldout(store->ctx(), 20) << "trimmed bucket instance " << bucket_instance << dendl; + std::lock_guard lock(mutex); + trimmed.insert(std::move(bucket_instance), clock_type::now()); + } + + bool trimmed_recently(const std::string_view& bucket_instance) override { + std::lock_guard lock(mutex); + return trimmed.lookup(bucket_instance); + } +}; + +BucketTrimManager::BucketTrimManager(rgw::sal::RadosStore* store, + const BucketTrimConfig& config) + : impl(new Impl(store, config)) +{ +} +BucketTrimManager::~BucketTrimManager() = default; + +int BucketTrimManager::init() +{ + return impl->watcher.start(this); +} + +void BucketTrimManager::on_bucket_changed(const std::string_view& bucket) +{ + std::lock_guard lock(impl->mutex); + // filter recently trimmed bucket instances out of bucket change counter + if (impl->trimmed.lookup(bucket)) { + return; + } + impl->counter.insert(std::string(bucket)); +} + +RGWCoroutine* BucketTrimManager::create_bucket_trim_cr(RGWHTTPManager *http) +{ + return new BucketTrimPollCR(impl->store, http, impl->config, + impl.get(), impl->status_obj, this); +} + +RGWCoroutine* BucketTrimManager::create_admin_bucket_trim_cr(RGWHTTPManager *http) +{ + // return the trim coroutine without any polling + return new BucketTrimCR(impl->store, http, impl->config, + impl.get(), impl->status_obj, this); +} + +CephContext* BucketTrimManager::get_cct() const +{ + return impl->store->ctx(); +} + +unsigned BucketTrimManager::get_subsys() const +{ + return dout_subsys; +} + +std::ostream& BucketTrimManager::gen_prefix(std::ostream& out) const +{ + return out << "rgw bucket trim manager: "; +} + +} // namespace rgw + +int bilog_trim(const DoutPrefixProvider* p, rgw::sal::RadosStore* store, + RGWBucketInfo& bucket_info, uint64_t gen, int shard_id, + std::string_view start_marker, std::string_view end_marker) +{ + auto& logs = bucket_info.layout.logs; + auto log = std::find_if(logs.begin(), logs.end(), rgw::matches_gen(gen)); + if (log == logs.end()) { + ldpp_dout(p, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << "ERROR: no log layout with gen=" << gen << dendl; + return -ENOENT; + } + + auto log_layout = *log; + + auto r = store->svc()->bilog_rados->log_trim(p, bucket_info, log_layout, shard_id, start_marker, end_marker); + if (r < 0) { + ldpp_dout(p, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << "ERROR: bilog_rados->log_trim returned r=" << r << dendl; + } + return r; +} diff --git a/src/rgw/driver/rados/rgw_trim_bilog.h b/src/rgw/driver/rados/rgw_trim_bilog.h new file mode 100644 index 00000000000..5b9c4cdd7ec --- /dev/null +++ b/src/rgw/driver/rados/rgw_trim_bilog.h @@ -0,0 +1,124 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 Red Hat, Inc + * + * Author: Casey Bodley + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#ifndef RGW_SYNC_LOG_TRIM_H +#define RGW_SYNC_LOG_TRIM_H + +#include +#include + +#include "include/common_fwd.h" +#include "include/encoding.h" +#include "common/ceph_time.h" +#include "common/dout.h" +#include "rgw_common.h" + +class RGWCoroutine; +class RGWHTTPManager; + +namespace rgw { + +namespace sal { + class RadosStore; +} + +/// Interface to inform the trim process about which buckets are most active +struct BucketChangeObserver { + virtual ~BucketChangeObserver() = default; + + virtual void on_bucket_changed(const std::string_view& bucket_instance) = 0; +}; + +/// Configuration for BucketTrimManager +struct BucketTrimConfig { + /// time interval in seconds between bucket trim attempts + uint32_t trim_interval_sec{0}; + /// maximum number of buckets to track with BucketChangeObserver + size_t counter_size{0}; + /// maximum number of buckets to process each trim interval + uint32_t buckets_per_interval{0}; + /// minimum number of buckets to choose from the global bucket instance list + uint32_t min_cold_buckets_per_interval{0}; + /// maximum number of buckets to process in parallel + uint32_t concurrent_buckets{0}; + /// timeout in ms for bucket trim notify replies + uint64_t notify_timeout_ms{0}; + /// maximum number of recently trimmed buckets to remember (should be small + /// enough for a linear search) + size_t recent_size{0}; + /// maximum duration to consider a trim as 'recent' (should be some multiple + /// of the trim interval, at least) + ceph::timespan recent_duration{0}; +}; + +/// fill out the BucketTrimConfig from the ceph context +void configure_bucket_trim(CephContext *cct, BucketTrimConfig& config); + +/// Determines the buckets on which to focus trim activity, using two sources of +/// input: the frequency of entries read from the data changes log, and a global +/// listing of the bucket.instance metadata. This allows us to trim active +/// buckets quickly, while also ensuring that all buckets will eventually trim +class BucketTrimManager : public BucketChangeObserver, public DoutPrefixProvider { + class Impl; + std::unique_ptr impl; + public: + BucketTrimManager(sal::RadosStore *store, const BucketTrimConfig& config); + ~BucketTrimManager(); + + int init(); + + /// increment a counter for the given bucket instance + void on_bucket_changed(const std::string_view& bucket_instance) override; + + /// create a coroutine to run the bucket trim process every trim interval + RGWCoroutine* create_bucket_trim_cr(RGWHTTPManager *http); + + /// create a coroutine to trim buckets directly via radosgw-admin + RGWCoroutine* create_admin_bucket_trim_cr(RGWHTTPManager *http); + + CephContext *get_cct() const override; + unsigned get_subsys() const; + std::ostream& gen_prefix(std::ostream& out) const; +}; + +/// provides persistent storage for the trim manager's current position in the +/// list of bucket instance metadata +struct BucketTrimStatus { + std::string marker; //< metadata key of current bucket instance + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(marker, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& p) { + DECODE_START(1, p); + decode(marker, p); + DECODE_FINISH(p); + } + + static const std::string oid; +}; + +} // namespace rgw + +WRITE_CLASS_ENCODER(rgw::BucketTrimStatus); + +int bilog_trim(const DoutPrefixProvider* p, rgw::sal::RadosStore* store, + RGWBucketInfo& bucket_info, uint64_t gen, int shard_id, + std::string_view start_marker, std::string_view end_marker); + +#endif // RGW_SYNC_LOG_TRIM_H diff --git a/src/rgw/driver/rados/rgw_trim_datalog.cc b/src/rgw/driver/rados/rgw_trim_datalog.cc new file mode 100644 index 00000000000..72a160039cf --- /dev/null +++ b/src/rgw/driver/rados/rgw_trim_datalog.cc @@ -0,0 +1,252 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include +#include + +#include "common/errno.h" + +#include "rgw_trim_datalog.h" +#include "rgw_cr_rados.h" +#include "rgw_cr_rest.h" +#include "rgw_datalog.h" +#include "rgw_data_sync.h" +#include "rgw_zone.h" +#include "rgw_bucket.h" + +#include "services/svc_zone.h" + +#include + +#define dout_subsys ceph_subsys_rgw + +#undef dout_prefix +#define dout_prefix (*_dout << "data trim: ") + +namespace { + +class DatalogTrimImplCR : public RGWSimpleCoroutine { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + boost::intrusive_ptr cn; + int shard; + std::string marker; + std::string* last_trim_marker; + + public: + DatalogTrimImplCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, int shard, + const std::string& marker, std::string* last_trim_marker) + : RGWSimpleCoroutine(store->ctx()), dpp(dpp), store(store), shard(shard), + marker(marker), last_trim_marker(last_trim_marker) { + set_description() << "Datalog trim shard=" << shard + << " marker=" << marker; + } + + int send_request(const DoutPrefixProvider *dpp) override { + set_status() << "sending request"; + cn = stack->create_completion_notifier(); + return store->svc()->datalog_rados->trim_entries(dpp, shard, marker, + cn->completion()); + } + int request_complete() override { + int r = cn->completion()->get_return_value(); + ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << "(): trim of shard=" << shard + << " marker=" << marker << " returned r=" << r << dendl; + + set_status() << "request complete; ret=" << r; + if (r != -ENODATA) { + return r; + } + // nothing left to trim, update last_trim_marker + if (*last_trim_marker < marker && + marker != store->svc()->datalog_rados->max_marker()) { + *last_trim_marker = marker; + } + return 0; + } +}; + +/// return the marker that it's safe to trim up to +const std::string& get_stable_marker(const rgw_data_sync_marker& m) +{ + return m.state == m.FullSync ? m.next_step_marker : m.marker; +} + +/// populate the container starting with 'dest' with the minimum stable marker +/// of each shard for all of the peers in [first, last) +template +void take_min_markers(IterIn first, IterIn last, IterOut dest) +{ + if (first == last) { + return; + } + for (auto p = first; p != last; ++p) { + auto m = dest; + for (auto &shard : p->sync_markers) { + const auto& stable = get_stable_marker(shard.second); + if (*m > stable) { + *m = stable; + } + ++m; + } + } +} + +} // anonymous namespace + +class DataLogTrimCR : public RGWCoroutine { + using TrimCR = DatalogTrimImplCR; + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + RGWHTTPManager *http; + const int num_shards; + const std::string& zone_id; //< my zone id + std::vector peer_status; //< sync status for each peer + std::vector min_shard_markers; //< min marker per shard + std::vector& last_trim; //< last trimmed marker per shard + int ret{0}; + + public: + DataLogTrimCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, + int num_shards, std::vector& last_trim) + : RGWCoroutine(store->ctx()), dpp(dpp), store(store), http(http), + num_shards(num_shards), + zone_id(store->svc()->zone->get_zone().id), + peer_status(store->svc()->zone->get_zone_data_notify_to_map().size()), + min_shard_markers(num_shards, + std::string(store->svc()->datalog_rados->max_marker())), + last_trim(last_trim) + {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int DataLogTrimCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + ldpp_dout(dpp, 10) << "fetching sync status for zone " << zone_id << dendl; + set_status("fetching sync status"); + yield { + // query data sync status from each sync peer + rgw_http_param_pair params[] = { + { "type", "data" }, + { "status", nullptr }, + { "source-zone", zone_id.c_str() }, + { nullptr, nullptr } + }; + + auto p = peer_status.begin(); + for (auto& c : store->svc()->zone->get_zone_data_notify_to_map()) { + ldpp_dout(dpp, 20) << "query sync status from " << c.first << dendl; + using StatusCR = RGWReadRESTResourceCR; + spawn(new StatusCR(cct, c.second, http, "/admin/log/", params, &*p), + false); + ++p; + } + } + + // must get a successful reply from all peers to consider trimming + ret = 0; + while (ret == 0 && num_spawned() > 0) { + yield wait_for_child(); + collect_next(&ret); + } + drain_all(); + + if (ret < 0) { + ldpp_dout(dpp, 4) << "failed to fetch sync status from all peers" << dendl; + return set_cr_error(ret); + } + + ldpp_dout(dpp, 10) << "trimming log shards" << dendl; + set_status("trimming log shards"); + yield { + // determine the minimum marker for each shard + take_min_markers(peer_status.begin(), peer_status.end(), + min_shard_markers.begin()); + + for (int i = 0; i < num_shards; i++) { + const auto& m = min_shard_markers[i]; + if (m <= last_trim[i]) { + continue; + } + ldpp_dout(dpp, 10) << "trimming log shard " << i + << " at marker=" << m + << " last_trim=" << last_trim[i] << dendl; + spawn(new TrimCR(dpp, store, i, m, &last_trim[i]), + true); + } + } + return set_cr_done(); + } + return 0; +} + +RGWCoroutine* create_admin_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards, + std::vector& markers) +{ + return new DataLogTrimCR(dpp, store, http, num_shards, markers); +} + +class DataLogTrimPollCR : public RGWCoroutine { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* store; + RGWHTTPManager *http; + const int num_shards; + const utime_t interval; //< polling interval + const std::string lock_oid; //< use first data log shard for lock + const std::string lock_cookie; + std::vector last_trim; //< last trimmed marker per shard + + public: + DataLogTrimPollCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, + int num_shards, utime_t interval) + : RGWCoroutine(store->ctx()), dpp(dpp), store(store), http(http), + num_shards(num_shards), interval(interval), + lock_oid(store->svc()->datalog_rados->get_oid(0, 0)), + lock_cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)), + last_trim(num_shards) + {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int DataLogTrimPollCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + for (;;) { + set_status("sleeping"); + wait(interval); + + // request a 'data_trim' lock that covers the entire wait interval to + // prevent other gateways from attempting to trim for the duration + set_status("acquiring trim lock"); + yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, + rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, lock_oid), + "data_trim", lock_cookie, + interval.sec())); + if (retcode < 0) { + // if the lock is already held, go back to sleep and try again later + ldpp_dout(dpp, 4) << "failed to lock " << lock_oid << ", trying again in " + << interval.sec() << "s" << dendl; + continue; + } + + set_status("trimming"); + yield call(new DataLogTrimCR(dpp, store, http, num_shards, last_trim)); + + // note that the lock is not released. this is intentional, as it avoids + // duplicating this work in other gateways + } + } + return 0; +} + +RGWCoroutine* create_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards, utime_t interval) +{ + return new DataLogTrimPollCR(dpp, store, http, num_shards, interval); +} diff --git a/src/rgw/driver/rados/rgw_trim_datalog.h b/src/rgw/driver/rados/rgw_trim_datalog.h new file mode 100644 index 00000000000..9f5bf7252fe --- /dev/null +++ b/src/rgw/driver/rados/rgw_trim_datalog.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include +#include + +#include "common/dout.h" + +class RGWCoroutine; +class RGWRados; +class RGWHTTPManager; +class utime_t; +namespace rgw { namespace sal { + class RadosStore; +} } + +// DataLogTrimCR factory function +extern RGWCoroutine* create_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards, utime_t interval); + +// factory function for datalog trim via radosgw-admin +RGWCoroutine* create_admin_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards, + std::vector& markers); diff --git a/src/rgw/driver/rados/rgw_trim_mdlog.cc b/src/rgw/driver/rados/rgw_trim_mdlog.cc new file mode 100644 index 00000000000..d8e19594aea --- /dev/null +++ b/src/rgw/driver/rados/rgw_trim_mdlog.cc @@ -0,0 +1,795 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/errno.h" + +#include "rgw_trim_mdlog.h" +#include "rgw_sync.h" +#include "rgw_cr_rados.h" +#include "rgw_cr_rest.h" +#include "rgw_zone.h" +#include "services/svc_zone.h" +#include "services/svc_meta.h" +#include "services/svc_mdlog.h" +#include "services/svc_cls.h" + +#include + +#define dout_subsys ceph_subsys_rgw + +#undef dout_prefix +#define dout_prefix (*_dout << "meta trim: ") + +/// purge all log shards for the given mdlog +class PurgeLogShardsCR : public RGWShardCollectCR { + rgw::sal::RadosStore* const store; + const RGWMetadataLog* mdlog; + const int num_shards; + rgw_raw_obj obj; + int i{0}; + + static constexpr int max_concurrent = 16; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to remove mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + PurgeLogShardsCR(rgw::sal::RadosStore* store, const RGWMetadataLog* mdlog, + const rgw_pool& pool, int num_shards) + : RGWShardCollectCR(store->ctx(), max_concurrent), + store(store), mdlog(mdlog), num_shards(num_shards), obj(pool, "") + {} + + bool spawn_next() override { + if (i == num_shards) { + return false; + } + mdlog->get_shard_oid(i++, obj.oid); + spawn(new RGWRadosRemoveCR(store, obj), false); + return true; + } +}; + +using Cursor = RGWPeriodHistory::Cursor; + +/// purge mdlogs from the oldest up to (but not including) the given realm_epoch +class PurgePeriodLogsCR : public RGWCoroutine { + struct Svc { + RGWSI_Zone *zone; + RGWSI_MDLog *mdlog; + } svc; + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* const store; + RGWMetadataManager *const metadata; + RGWObjVersionTracker objv; + Cursor cursor; + epoch_t realm_epoch; + epoch_t *last_trim_epoch; //< update last trim on success + + public: + PurgePeriodLogsCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, epoch_t realm_epoch, epoch_t *last_trim) + : RGWCoroutine(store->ctx()), dpp(dpp), store(store), metadata(store->ctl()->meta.mgr), + realm_epoch(realm_epoch), last_trim_epoch(last_trim) { + svc.zone = store->svc()->zone; + svc.mdlog = store->svc()->mdlog; + } + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int PurgePeriodLogsCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + // read our current oldest log period + yield call(svc.mdlog->read_oldest_log_period_cr(dpp, &cursor, &objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + ceph_assert(cursor); + ldpp_dout(dpp, 20) << "oldest log realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + + // trim -up to- the given realm_epoch + while (cursor.get_epoch() < realm_epoch) { + ldpp_dout(dpp, 4) << "purging log shards for realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + yield { + const auto mdlog = svc.mdlog->get_log(cursor.get_period().get_id()); + const auto& pool = svc.zone->get_zone_params().log_pool; + auto num_shards = cct->_conf->rgw_md_log_max_shards; + call(new PurgeLogShardsCR(store, mdlog, pool, num_shards)); + } + if (retcode < 0) { + ldpp_dout(dpp, 1) << "failed to remove log shards: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + ldpp_dout(dpp, 10) << "removed log shards for realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + + // update our mdlog history + yield call(svc.mdlog->trim_log_period_cr(dpp, cursor, &objv)); + if (retcode == -ENOENT) { + // must have raced to update mdlog history. return success and allow the + // winner to continue purging + ldpp_dout(dpp, 10) << "already removed log shards for realm_epoch=" << cursor.get_epoch() + << " period=" << cursor.get_period().get_id() << dendl; + return set_cr_done(); + } else if (retcode < 0) { + ldpp_dout(dpp, 1) << "failed to remove log shards for realm_epoch=" + << cursor.get_epoch() << " period=" << cursor.get_period().get_id() + << " with: " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + if (*last_trim_epoch < cursor.get_epoch()) { + *last_trim_epoch = cursor.get_epoch(); + } + + ceph_assert(cursor.has_next()); // get_current() should always come after + cursor.next(); + } + return set_cr_done(); + } + return 0; +} + +namespace { + +using connection_map = std::map>; + +/// construct a RGWRESTConn for each zone in the realm +template +connection_map make_peer_connections(rgw::sal::RadosStore* store, + const Zonegroups& zonegroups) +{ + connection_map connections; + for (auto& g : zonegroups) { + for (auto& z : g.second.zones) { + std::unique_ptr conn{ + new RGWRESTConn(store->ctx(), store, z.first.id, z.second.endpoints, g.second.api_name)}; + connections.emplace(z.first.id, std::move(conn)); + } + } + return connections; +} + +/// return the marker that it's safe to trim up to +const std::string& get_stable_marker(const rgw_meta_sync_marker& m) +{ + return m.state == m.FullSync ? m.next_step_marker : m.marker; +} + +/// comparison operator for take_min_status() +bool operator<(const rgw_meta_sync_marker& lhs, const rgw_meta_sync_marker& rhs) +{ + // sort by stable marker + return get_stable_marker(lhs) < get_stable_marker(rhs); +} + +/// populate the status with the minimum stable marker of each shard for any +/// peer whose realm_epoch matches the minimum realm_epoch in the input +template +int take_min_status(CephContext *cct, Iter first, Iter last, + rgw_meta_sync_status *status) +{ + if (first == last) { + return -EINVAL; + } + const size_t num_shards = cct->_conf->rgw_md_log_max_shards; + + status->sync_info.realm_epoch = std::numeric_limits::max(); + for (auto p = first; p != last; ++p) { + // validate peer's shard count + if (p->sync_markers.size() != num_shards) { + ldout(cct, 1) << "take_min_status got peer status with " + << p->sync_markers.size() << " shards, expected " + << num_shards << dendl; + return -EINVAL; + } + if (p->sync_info.realm_epoch < status->sync_info.realm_epoch) { + // earlier epoch, take its entire status + *status = std::move(*p); + } else if (p->sync_info.realm_epoch == status->sync_info.realm_epoch) { + // same epoch, take any earlier markers + auto m = status->sync_markers.begin(); + for (auto& shard : p->sync_markers) { + if (shard.second < m->second) { + m->second = std::move(shard.second); + } + ++m; + } + } + } + return 0; +} + +struct TrimEnv { + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* const store; + RGWHTTPManager *const http; + int num_shards; + const rgw_zone_id& zone; + Cursor current; //< cursor to current period + epoch_t last_trim_epoch{0}; //< epoch of last mdlog that was purged + + TrimEnv(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) + : dpp(dpp), store(store), http(http), num_shards(num_shards), + zone(store->svc()->zone->zone_id()), + current(store->svc()->mdlog->get_period_history()->get_current()) + {} +}; + +struct MasterTrimEnv : public TrimEnv { + connection_map connections; //< peer connections + std::vector peer_status; //< sync status for each peer + /// last trim marker for each shard, only applies to current period's mdlog + std::vector last_trim_markers; + + MasterTrimEnv(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) + : TrimEnv(dpp, store, http, num_shards), + last_trim_markers(num_shards) + { + auto& period = current.get_period(); + connections = make_peer_connections(store, period.get_map().zonegroups); + connections.erase(zone.id); + peer_status.resize(connections.size()); + } +}; + +struct PeerTrimEnv : public TrimEnv { + /// last trim timestamp for each shard, only applies to current period's mdlog + std::vector last_trim_timestamps; + + PeerTrimEnv(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) + : TrimEnv(dpp, store, http, num_shards), + last_trim_timestamps(num_shards) + {} + + void set_num_shards(int num_shards) { + this->num_shards = num_shards; + last_trim_timestamps.resize(num_shards); + } +}; + +} // anonymous namespace + + +/// spawn a trim cr for each shard that needs it, while limiting the number +/// of concurrent shards +class MetaMasterTrimShardCollectCR : public RGWShardCollectCR { + private: + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + MasterTrimEnv& env; + RGWMetadataLog *mdlog; + int shard_id{0}; + std::string oid; + const rgw_meta_sync_status& sync_status; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog, + const rgw_meta_sync_status& sync_status) + : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), + env(env), mdlog(mdlog), sync_status(sync_status) + {} + + bool spawn_next() override; +}; + +bool MetaMasterTrimShardCollectCR::spawn_next() +{ + while (shard_id < env.num_shards) { + auto m = sync_status.sync_markers.find(shard_id); + if (m == sync_status.sync_markers.end()) { + shard_id++; + continue; + } + auto& stable = get_stable_marker(m->second); + auto& last_trim = env.last_trim_markers[shard_id]; + + if (stable <= last_trim) { + // already trimmed + ldpp_dout(env.dpp, 20) << "skipping log shard " << shard_id + << " at marker=" << stable + << " last_trim=" << last_trim + << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl; + shard_id++; + continue; + } + + mdlog->get_shard_oid(shard_id, oid); + + ldpp_dout(env.dpp, 10) << "trimming log shard " << shard_id + << " at marker=" << stable + << " last_trim=" << last_trim + << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl; + spawn(new RGWSyncLogTrimCR(env.dpp, env.store, oid, stable, &last_trim), false); + shard_id++; + return true; + } + return false; +} + +/// spawn rest requests to read each peer's sync status +class MetaMasterStatusCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + MasterTrimEnv& env; + connection_map::iterator c; + std::vector::iterator s; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to fetch metadata sync status: " + << cpp_strerror(r) << dendl; + } + return r; + } + public: + explicit MetaMasterStatusCollectCR(MasterTrimEnv& env) + : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), + env(env), c(env.connections.begin()), s(env.peer_status.begin()) + {} + + bool spawn_next() override { + if (c == env.connections.end()) { + return false; + } + static rgw_http_param_pair params[] = { + { "type", "metadata" }, + { "status", nullptr }, + { nullptr, nullptr } + }; + + ldout(cct, 20) << "query sync status from " << c->first << dendl; + auto conn = c->second.get(); + using StatusCR = RGWReadRESTResourceCR; + spawn(new StatusCR(cct, conn, env.http, "/admin/log/", params, &*s), + false); + ++c; + ++s; + return true; + } +}; + +class MetaMasterTrimCR : public RGWCoroutine { + MasterTrimEnv& env; + rgw_meta_sync_status min_status; //< minimum sync status of all peers + int ret{0}; + + public: + explicit MetaMasterTrimCR(MasterTrimEnv& env) + : RGWCoroutine(env.store->ctx()), env(env) + {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int MetaMasterTrimCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + // TODO: detect this and fail before we spawn the trim thread? + if (env.connections.empty()) { + ldpp_dout(dpp, 4) << "no peers, exiting" << dendl; + return set_cr_done(); + } + + ldpp_dout(dpp, 10) << "fetching sync status for zone " << env.zone << dendl; + // query mdlog sync status from peers + yield call(new MetaMasterStatusCollectCR(env)); + + // must get a successful reply from all peers to consider trimming + if (ret < 0) { + ldpp_dout(dpp, 4) << "failed to fetch sync status from all peers" << dendl; + return set_cr_error(ret); + } + + // determine the minimum epoch and markers + ret = take_min_status(env.store->ctx(), env.peer_status.begin(), + env.peer_status.end(), &min_status); + if (ret < 0) { + ldpp_dout(dpp, 4) << "failed to calculate min sync status from peers" << dendl; + return set_cr_error(ret); + } + yield { + auto store = env.store; + auto epoch = min_status.sync_info.realm_epoch; + ldpp_dout(dpp, 4) << "realm epoch min=" << epoch + << " current=" << env.current.get_epoch()<< dendl; + if (epoch > env.last_trim_epoch + 1) { + // delete any prior mdlog periods + spawn(new PurgePeriodLogsCR(dpp, store, epoch, &env.last_trim_epoch), true); + } else { + ldpp_dout(dpp, 10) << "mdlogs already purged up to realm_epoch " + << env.last_trim_epoch << dendl; + } + + // if realm_epoch == current, trim mdlog based on markers + if (epoch == env.current.get_epoch()) { + auto mdlog = store->svc()->mdlog->get_log(env.current.get_period().get_id()); + spawn(new MetaMasterTrimShardCollectCR(env, mdlog, min_status), true); + } + } + // ignore any errors during purge/trim because we want to hold the lock open + return set_cr_done(); + } + return 0; +} + + +/// read the first entry of the master's mdlog shard and trim to that position +class MetaPeerTrimShardCR : public RGWCoroutine { + RGWMetaSyncEnv& env; + RGWMetadataLog *mdlog; + const std::string& period_id; + const int shard_id; + RGWMetadataLogInfo info; + ceph::real_time stable; //< safe timestamp to trim, according to master + ceph::real_time *last_trim; //< last trimmed timestamp, updated on trim + rgw_mdlog_shard_data result; //< result from master's mdlog listing + + public: + MetaPeerTrimShardCR(RGWMetaSyncEnv& env, RGWMetadataLog *mdlog, + const std::string& period_id, int shard_id, + ceph::real_time *last_trim) + : RGWCoroutine(env.store->ctx()), env(env), mdlog(mdlog), + period_id(period_id), shard_id(shard_id), last_trim(last_trim) + {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int MetaPeerTrimShardCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + // query master's first mdlog entry for this shard + yield call(create_list_remote_mdlog_shard_cr(&env, period_id, shard_id, + "", 1, &result)); + if (retcode < 0) { + ldpp_dout(dpp, 5) << "failed to read first entry from master's mdlog shard " + << shard_id << " for period " << period_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (result.entries.empty()) { + // if there are no mdlog entries, we don't have a timestamp to compare. we + // can't just trim everything, because there could be racing updates since + // this empty reply. query the mdlog shard info to read its max timestamp, + // then retry the listing to make sure it's still empty before trimming to + // that + ldpp_dout(dpp, 10) << "empty master mdlog shard " << shard_id + << ", reading last timestamp from shard info" << dendl; + // read the mdlog shard info for the last timestamp + yield call(create_read_remote_mdlog_shard_info_cr(&env, period_id, shard_id, &info)); + if (retcode < 0) { + ldpp_dout(dpp, 5) << "failed to read info from master's mdlog shard " + << shard_id << " for period " << period_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (ceph::real_clock::is_zero(info.last_update)) { + return set_cr_done(); // nothing to trim + } + ldpp_dout(dpp, 10) << "got mdlog shard info with last update=" + << info.last_update << dendl; + // re-read the master's first mdlog entry to make sure it hasn't changed + yield call(create_list_remote_mdlog_shard_cr(&env, period_id, shard_id, + "", 1, &result)); + if (retcode < 0) { + ldpp_dout(dpp, 5) << "failed to read first entry from master's mdlog shard " + << shard_id << " for period " << period_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + // if the mdlog is still empty, trim to max marker + if (result.entries.empty()) { + stable = info.last_update; + } else { + stable = result.entries.front().timestamp; + + // can only trim -up to- master's first timestamp, so subtract a second. + // (this is why we use timestamps instead of markers for the peers) + stable -= std::chrono::seconds(1); + } + } else { + stable = result.entries.front().timestamp; + stable -= std::chrono::seconds(1); + } + + if (stable <= *last_trim) { + ldpp_dout(dpp, 10) << "skipping log shard " << shard_id + << " at timestamp=" << stable + << " last_trim=" << *last_trim << dendl; + return set_cr_done(); + } + + ldpp_dout(dpp, 10) << "trimming log shard " << shard_id + << " at timestamp=" << stable + << " last_trim=" << *last_trim << dendl; + yield { + std::string oid; + mdlog->get_shard_oid(shard_id, oid); + call(new RGWRadosTimelogTrimCR(dpp, env.store, oid, real_time{}, stable, "", "")); + } + if (retcode < 0 && retcode != -ENODATA) { + ldpp_dout(dpp, 1) << "failed to trim mdlog shard " << shard_id + << ": " << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + *last_trim = stable; + return set_cr_done(); + } + return 0; +} + +class MetaPeerTrimShardCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + + PeerTrimEnv& env; + RGWMetadataLog *mdlog; + const std::string& period_id; + RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR + int shard_id{0}; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog) + : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), + env(env), mdlog(mdlog), period_id(env.current.get_period().get_id()) + { + meta_env.init(env.dpp, cct, env.store, env.store->svc()->zone->get_master_conn(), + env.store->svc()->rados->get_async_processor(), env.http, nullptr, + env.store->getRados()->get_sync_tracer()); + } + + bool spawn_next() override; +}; + +bool MetaPeerTrimShardCollectCR::spawn_next() +{ + if (shard_id >= env.num_shards) { + return false; + } + auto& last_trim = env.last_trim_timestamps[shard_id]; + spawn(new MetaPeerTrimShardCR(meta_env, mdlog, period_id, shard_id, &last_trim), + false); + shard_id++; + return true; +} + +class MetaPeerTrimCR : public RGWCoroutine { + PeerTrimEnv& env; + rgw_mdlog_info mdlog_info; //< master's mdlog info + + public: + explicit MetaPeerTrimCR(PeerTrimEnv& env) : RGWCoroutine(env.store->ctx()), env(env) {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int MetaPeerTrimCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + ldpp_dout(dpp, 10) << "fetching master mdlog info" << dendl; + yield { + // query mdlog_info from master for oldest_log_period + rgw_http_param_pair params[] = { + { "type", "metadata" }, + { nullptr, nullptr } + }; + + using LogInfoCR = RGWReadRESTResourceCR; + call(new LogInfoCR(cct, env.store->svc()->zone->get_master_conn(), env.http, + "/admin/log/", params, &mdlog_info)); + } + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to read mdlog info from master" << dendl; + return set_cr_error(retcode); + } + // use master's shard count instead + env.set_num_shards(mdlog_info.num_shards); + + if (mdlog_info.realm_epoch > env.last_trim_epoch + 1) { + // delete any prior mdlog periods + yield call(new PurgePeriodLogsCR(dpp, env.store, mdlog_info.realm_epoch, + &env.last_trim_epoch)); + } else { + ldpp_dout(dpp, 10) << "mdlogs already purged through realm_epoch " + << env.last_trim_epoch << dendl; + } + + // if realm_epoch == current, trim mdlog based on master's markers + if (mdlog_info.realm_epoch == env.current.get_epoch()) { + yield { + auto mdlog = env.store->svc()->mdlog->get_log(env.current.get_period().get_id()); + call(new MetaPeerTrimShardCollectCR(env, mdlog)); + // ignore any errors during purge/trim because we want to hold the lock open + } + } + return set_cr_done(); + } + return 0; +} + +class MetaTrimPollCR : public RGWCoroutine { + rgw::sal::RadosStore* const store; + const utime_t interval; //< polling interval + const rgw_raw_obj obj; + const std::string name{"meta_trim"}; //< lock name + const std::string cookie; + + protected: + /// allocate the coroutine to run within the lease + virtual RGWCoroutine* alloc_cr() = 0; + + public: + MetaTrimPollCR(rgw::sal::RadosStore* store, utime_t interval) + : RGWCoroutine(store->ctx()), store(store), interval(interval), + obj(store->svc()->zone->get_zone_params().log_pool, RGWMetadataLogHistory::oid), + cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)) + {} + + int operate(const DoutPrefixProvider *dpp) override; +}; + +int MetaTrimPollCR::operate(const DoutPrefixProvider *dpp) +{ + reenter(this) { + for (;;) { + set_status("sleeping"); + wait(interval); + + // prevent others from trimming for our entire wait interval + set_status("acquiring trim lock"); + yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, + obj, name, cookie, interval.sec())); + if (retcode < 0) { + ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl; + continue; + } + + set_status("trimming"); + yield call(alloc_cr()); + + if (retcode < 0) { + // on errors, unlock so other gateways can try + set_status("unlocking"); + yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store, + obj, name, cookie)); + } + } + } + return 0; +} + +class MetaMasterTrimPollCR : public MetaTrimPollCR { + MasterTrimEnv env; //< trim state to share between calls + RGWCoroutine* alloc_cr() override { + return new MetaMasterTrimCR(env); + } + public: + MetaMasterTrimPollCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, + int num_shards, utime_t interval) + : MetaTrimPollCR(store, interval), + env(dpp, store, http, num_shards) + {} +}; + +class MetaPeerTrimPollCR : public MetaTrimPollCR { + PeerTrimEnv env; //< trim state to share between calls + RGWCoroutine* alloc_cr() override { + return new MetaPeerTrimCR(env); + } + public: + MetaPeerTrimPollCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, + int num_shards, utime_t interval) + : MetaTrimPollCR(store, interval), + env(dpp, store, http, num_shards) + {} +}; + +namespace { +bool sanity_check_endpoints(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store) { + bool retval = true; + auto current = store->svc()->mdlog->get_period_history()->get_current(); + const auto& period = current.get_period(); + for (const auto& [_, zonegroup] : period.get_map().zonegroups) { + if (zonegroup.endpoints.empty()) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " WARNING: Cluster is is misconfigured! " + << " Zonegroup " << zonegroup.get_name() + << " (" << zonegroup.get_id() << ") in Realm " + << period.get_realm_name() << " ( " << period.get_realm() << ") " + << " has no endpoints!" << dendl; + } + for (const auto& [_, zone] : zonegroup.zones) { + if (zone.endpoints.empty()) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " ERROR: Cluster is is misconfigured! " + << " Zone " << zone.name << " (" << zone.id << ") in Zonegroup " + << zonegroup.get_name() << " ( " << zonegroup.get_id() + << ") in Realm " << period.get_realm_name() + << " ( " << period.get_realm() << ") " + << " has no endpoints! Trimming is impossible." << dendl; + retval = false; + } + } + } + return retval; +} +} + +RGWCoroutine* create_meta_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, + int num_shards, utime_t interval) +{ + if (!sanity_check_endpoints(dpp, store)) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " ERROR: Cluster is is misconfigured! Refusing to trim." << dendl; + return nullptr; + } + if (store->svc()->zone->is_meta_master()) { + return new MetaMasterTrimPollCR(dpp, store, http, num_shards, interval); + } + return new MetaPeerTrimPollCR(dpp, store, http, num_shards, interval); +} + + +struct MetaMasterAdminTrimCR : private MasterTrimEnv, public MetaMasterTrimCR { + MetaMasterAdminTrimCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) + : MasterTrimEnv(dpp, store, http, num_shards), + MetaMasterTrimCR(*static_cast(this)) + {} +}; + +struct MetaPeerAdminTrimCR : private PeerTrimEnv, public MetaPeerTrimCR { + MetaPeerAdminTrimCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) + : PeerTrimEnv(dpp, store, http, num_shards), + MetaPeerTrimCR(*static_cast(this)) + {} +}; + +RGWCoroutine* create_admin_meta_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards) +{ + if (!sanity_check_endpoints(dpp, store)) { + ldpp_dout(dpp, -1) + << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " ERROR: Cluster is is misconfigured! Refusing to trim." << dendl; + return nullptr; + } + if (store->svc()->zone->is_meta_master()) { + return new MetaMasterAdminTrimCR(dpp, store, http, num_shards); + } + return new MetaPeerAdminTrimCR(dpp, store, http, num_shards); +} diff --git a/src/rgw/driver/rados/rgw_trim_mdlog.h b/src/rgw/driver/rados/rgw_trim_mdlog.h new file mode 100644 index 00000000000..1dba8612bd3 --- /dev/null +++ b/src/rgw/driver/rados/rgw_trim_mdlog.h @@ -0,0 +1,25 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +class RGWCoroutine; +class DoutPrefixProvider; +class RGWRados; +class RGWHTTPManager; +class utime_t; +namespace rgw { namespace sal { + class RadosStore; +} } + +// MetaLogTrimCR factory function +RGWCoroutine* create_meta_log_trim_cr(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards, utime_t interval); + +// factory function for mdlog trim via radosgw-admin +RGWCoroutine* create_admin_meta_log_trim_cr(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + RGWHTTPManager *http, + int num_shards); diff --git a/src/rgw/driver/rados/rgw_user.cc b/src/rgw/driver/rados/rgw_user.cc new file mode 100644 index 00000000000..7c36a52e31d --- /dev/null +++ b/src/rgw/driver/rados/rgw_user.cc @@ -0,0 +1,2768 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/errno.h" + +#include "rgw_user.h" + +#include "rgw_bucket.h" + +#include "services/svc_user.h" +#include "services/svc_meta.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace std; + +extern void op_type_to_str(uint32_t mask, char *buf, int len); + +static string key_type_to_str(int key_type) { + switch (key_type) { + case KEY_TYPE_SWIFT: + return "swift"; + break; + + default: + return "s3"; + break; + } +} + +static bool char_is_unreserved_url(char c) +{ + if (isalnum(c)) + return true; + + switch (c) { + case '-': + case '.': + case '_': + case '~': + return true; + default: + return false; + } +} + +static bool validate_access_key(string& key) +{ + const char *p = key.c_str(); + while (*p) { + if (!char_is_unreserved_url(*p)) + return false; + p++; + } + return true; +} + +static void set_err_msg(std::string *sink, std::string msg) +{ + if (sink && !msg.empty()) + *sink = msg; +} + +/* + * Dump either the full user info or a subset to a formatter. + * + * NOTE: It is the caller's responsibility to ensure that the + * formatter is flushed at the correct time. + */ + +static void dump_subusers_info(Formatter *f, RGWUserInfo &info) +{ + map::iterator uiter; + + f->open_array_section("subusers"); + for (uiter = info.subusers.begin(); uiter != info.subusers.end(); ++uiter) { + RGWSubUser& u = uiter->second; + f->open_object_section("user"); + string s; + info.user_id.to_str(s); + f->dump_format("id", "%s:%s", s.c_str(), u.name.c_str()); + char buf[256]; + rgw_perm_to_str(u.perm_mask, buf, sizeof(buf)); + f->dump_string("permissions", buf); + f->close_section(); + } + f->close_section(); +} + +static void dump_access_keys_info(Formatter *f, RGWUserInfo &info) +{ + map::iterator kiter; + f->open_array_section("keys"); + for (kiter = info.access_keys.begin(); kiter != info.access_keys.end(); ++kiter) { + RGWAccessKey& k = kiter->second; + const char *sep = (k.subuser.empty() ? "" : ":"); + const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str()); + f->open_object_section("key"); + string s; + info.user_id.to_str(s); + f->dump_format("user", "%s%s%s", s.c_str(), sep, subuser); + f->dump_string("access_key", k.id); + f->dump_string("secret_key", k.key); + f->close_section(); + } + f->close_section(); +} + +static void dump_swift_keys_info(Formatter *f, RGWUserInfo &info) +{ + map::iterator kiter; + f->open_array_section("swift_keys"); + for (kiter = info.swift_keys.begin(); kiter != info.swift_keys.end(); ++kiter) { + RGWAccessKey& k = kiter->second; + const char *sep = (k.subuser.empty() ? "" : ":"); + const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str()); + f->open_object_section("key"); + string s; + info.user_id.to_str(s); + f->dump_format("user", "%s%s%s", s.c_str(), sep, subuser); + f->dump_string("secret_key", k.key); + f->close_section(); + } + f->close_section(); +} + +static void dump_user_info(Formatter *f, RGWUserInfo &info, + RGWStorageStats *stats = NULL) +{ + f->open_object_section("user_info"); + encode_json("tenant", info.user_id.tenant, f); + encode_json("user_id", info.user_id.id, f); + encode_json("display_name", info.display_name, f); + encode_json("email", info.user_email, f); + encode_json("suspended", (int)info.suspended, f); + encode_json("max_buckets", (int)info.max_buckets, f); + + dump_subusers_info(f, info); + dump_access_keys_info(f, info); + dump_swift_keys_info(f, info); + + encode_json("caps", info.caps, f); + + char buf[256]; + op_type_to_str(info.op_mask, buf, sizeof(buf)); + encode_json("op_mask", (const char *)buf, f); + encode_json("system", (bool)info.system, f); + encode_json("admin", (bool)info.admin, f); + encode_json("default_placement", info.default_placement.name, f); + encode_json("default_storage_class", info.default_placement.storage_class, f); + encode_json("placement_tags", info.placement_tags, f); + encode_json("bucket_quota", info.quota.bucket_quota, f); + encode_json("user_quota", info.quota.user_quota, f); + encode_json("temp_url_keys", info.temp_url_keys, f); + + string user_source_type; + switch ((RGWIdentityType)info.type) { + case TYPE_RGW: + user_source_type = "rgw"; + break; + case TYPE_KEYSTONE: + user_source_type = "keystone"; + break; + case TYPE_LDAP: + user_source_type = "ldap"; + break; + case TYPE_NONE: + user_source_type = "none"; + break; + default: + user_source_type = "none"; + break; + } + encode_json("type", user_source_type, f); + encode_json("mfa_ids", info.mfa_ids, f); + if (stats) { + encode_json("stats", *stats, f); + } + f->close_section(); +} + +static int user_add_helper(RGWUserAdminOpState& op_state, std::string *err_msg) +{ + int ret = 0; + const rgw_user& uid = op_state.get_user_id(); + std::string user_email = op_state.get_user_email(); + std::string display_name = op_state.get_display_name(); + + // fail if the user exists already + if (op_state.has_existing_user()) { + if (op_state.found_by_email) { + set_err_msg(err_msg, "email: " + user_email + + " is the email address of an existing user"); + ret = -ERR_EMAIL_EXIST; + } else if (op_state.found_by_key) { + set_err_msg(err_msg, "duplicate key provided"); + ret = -ERR_KEY_EXIST; + } else { + set_err_msg(err_msg, "user: " + uid.to_str() + " exists"); + ret = -EEXIST; + } + return ret; + } + + // fail if the user_info has already been populated + if (op_state.is_populated()) { + set_err_msg(err_msg, "cannot overwrite already populated user"); + return -EEXIST; + } + + // fail if the display name was not included + if (display_name.empty()) { + set_err_msg(err_msg, "no display name specified"); + return -EINVAL; + } + + return ret; +} + +RGWAccessKeyPool::RGWAccessKeyPool(RGWUser* usr) +{ + if (!usr) { + return; + } + + user = usr; + + driver = user->get_driver(); +} + +int RGWAccessKeyPool::init(RGWUserAdminOpState& op_state) +{ + if (!op_state.is_initialized()) { + keys_allowed = false; + return -EINVAL; + } + + const rgw_user& uid = op_state.get_user_id(); + if (uid.compare(RGW_USER_ANON_ID) == 0) { + keys_allowed = false; + return -EINVAL; + } + + swift_keys = op_state.get_swift_keys(); + access_keys = op_state.get_access_keys(); + + keys_allowed = true; + + return 0; +} + +RGWUserAdminOpState::RGWUserAdminOpState(rgw::sal::Driver* driver) +{ + user = driver->get_user(rgw_user(RGW_USER_ANON_ID)); +} + +void RGWUserAdminOpState::set_user_id(const rgw_user& id) +{ + if (id.empty()) + return; + + user->get_info().user_id = id; +} + +void RGWUserAdminOpState::set_subuser(std::string& _subuser) +{ + if (_subuser.empty()) + return; + + size_t pos = _subuser.find(":"); + if (pos != string::npos) { + rgw_user tmp_id; + tmp_id.from_str(_subuser.substr(0, pos)); + if (tmp_id.tenant.empty()) { + user->get_info().user_id.id = tmp_id.id; + } else { + user->get_info().user_id = tmp_id; + } + subuser = _subuser.substr(pos+1); + } else { + subuser = _subuser; + } + + subuser_specified = true; +} + +void RGWUserAdminOpState::set_user_info(RGWUserInfo& user_info) +{ + user->get_info() = user_info; +} + +void RGWUserAdminOpState::set_user_version_tracker(RGWObjVersionTracker& objv_tracker) +{ + user->get_version_tracker() = objv_tracker; +} + +const rgw_user& RGWUserAdminOpState::get_user_id() +{ + return user->get_id(); +} + +RGWUserInfo& RGWUserAdminOpState::get_user_info() +{ + return user->get_info(); +} + +map* RGWUserAdminOpState::get_swift_keys() +{ + return &user->get_info().swift_keys; +} + +map* RGWUserAdminOpState::get_access_keys() +{ + return &user->get_info().access_keys; +} + +map* RGWUserAdminOpState::get_subusers() +{ + return &user->get_info().subusers; +} + +RGWUserCaps *RGWUserAdminOpState::get_caps_obj() +{ + return &user->get_info().caps; +} + +std::string RGWUserAdminOpState::build_default_swift_kid() +{ + if (user->get_id().empty() || subuser.empty()) + return ""; + + std::string kid; + user->get_id().to_str(kid); + kid.append(":"); + kid.append(subuser); + + return kid; +} + +std::string RGWUserAdminOpState::generate_subuser() { + if (user->get_id().empty()) + return ""; + + std::string generated_subuser; + user->get_id().to_str(generated_subuser); + std::string rand_suffix; + + int sub_buf_size = RAND_SUBUSER_LEN + 1; + char sub_buf[RAND_SUBUSER_LEN + 1]; + + gen_rand_alphanumeric_upper(g_ceph_context, sub_buf, sub_buf_size); + + rand_suffix = sub_buf; + if (rand_suffix.empty()) + return ""; + + generated_subuser.append(rand_suffix); + subuser = generated_subuser; + + return generated_subuser; +} + +/* + * Do a fairly exhaustive search for an existing key matching the parameters + * given. Also handles the case where no key type was specified and updates + * the operation state if needed. + */ + +bool RGWAccessKeyPool::check_existing_key(RGWUserAdminOpState& op_state) +{ + bool existing_key = false; + + int key_type = op_state.get_key_type(); + std::string kid = op_state.get_access_key(); + std::map::iterator kiter; + std::string swift_kid = op_state.build_default_swift_kid(); + + RGWUserInfo dup_info; + + if (kid.empty() && swift_kid.empty()) + return false; + + switch (key_type) { + case KEY_TYPE_SWIFT: + kiter = swift_keys->find(swift_kid); + + existing_key = (kiter != swift_keys->end()); + if (existing_key) + op_state.set_access_key(swift_kid); + + break; + case KEY_TYPE_S3: + kiter = access_keys->find(kid); + existing_key = (kiter != access_keys->end()); + + break; + default: + kiter = access_keys->find(kid); + + existing_key = (kiter != access_keys->end()); + if (existing_key) { + op_state.set_key_type(KEY_TYPE_S3); + break; + } + + kiter = swift_keys->find(kid); + + existing_key = (kiter != swift_keys->end()); + if (existing_key) { + op_state.set_key_type(KEY_TYPE_SWIFT); + break; + } + + // handle the case where the access key was not provided in user:key format + if (swift_kid.empty()) + return false; + + kiter = swift_keys->find(swift_kid); + + existing_key = (kiter != swift_keys->end()); + if (existing_key) { + op_state.set_access_key(swift_kid); + op_state.set_key_type(KEY_TYPE_SWIFT); + } + } + + op_state.set_existing_key(existing_key); + + return existing_key; +} + +int RGWAccessKeyPool::check_op(RGWUserAdminOpState& op_state, + std::string *err_msg) +{ + RGWUserInfo dup_info; + + if (!op_state.is_populated()) { + set_err_msg(err_msg, "user info was not populated"); + return -EINVAL; + } + + if (!keys_allowed) { + set_err_msg(err_msg, "keys not allowed for this user"); + return -EACCES; + } + + int32_t key_type = op_state.get_key_type(); + + // if a key type wasn't specified + if (key_type < 0) { + if (op_state.has_subuser()) { + key_type = KEY_TYPE_SWIFT; + } else { + key_type = KEY_TYPE_S3; + } + } + + op_state.set_key_type(key_type); + + /* see if the access key was specified */ + if (key_type == KEY_TYPE_S3 && !op_state.will_gen_access() && + op_state.get_access_key().empty()) { + set_err_msg(err_msg, "empty access key"); + return -ERR_INVALID_ACCESS_KEY; + } + + // don't check for secret key because we may be doing a removal + + if (check_existing_key(op_state)) { + op_state.set_access_key_exist(); + } + return 0; +} + +// Generate a new random key +int RGWAccessKeyPool::generate_key(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, + optional_yield y, std::string *err_msg) +{ + std::string id; + std::string key; + + std::pair key_pair; + RGWAccessKey new_key; + std::unique_ptr duplicate_check; + + int key_type = op_state.get_key_type(); + bool gen_access = op_state.will_gen_access(); + bool gen_secret = op_state.will_gen_secret(); + + if (!keys_allowed) { + set_err_msg(err_msg, "access keys not allowed for this user"); + return -EACCES; + } + + if (op_state.has_existing_key()) { + set_err_msg(err_msg, "cannot create existing key"); + return -ERR_KEY_EXIST; + } + + if (!gen_access) { + id = op_state.get_access_key(); + } + + if (!id.empty()) { + switch (key_type) { + case KEY_TYPE_SWIFT: + if (driver->get_user_by_swift(dpp, id, y, &duplicate_check) >= 0) { + set_err_msg(err_msg, "existing swift key in RGW system:" + id); + return -ERR_KEY_EXIST; + } + break; + case KEY_TYPE_S3: + if (driver->get_user_by_access_key(dpp, id, y, &duplicate_check) >= 0) { + set_err_msg(err_msg, "existing S3 key in RGW system:" + id); + return -ERR_KEY_EXIST; + } + } + } + + //key's subuser + if (op_state.has_subuser()) { + //create user and subuser at the same time, user's s3 key should not be set this + if (!op_state.key_type_setbycontext || (key_type == KEY_TYPE_SWIFT)) { + new_key.subuser = op_state.get_subuser(); + } + } + + //Secret key + if (!gen_secret) { + if (op_state.get_secret_key().empty()) { + set_err_msg(err_msg, "empty secret key"); + return -ERR_INVALID_SECRET_KEY; + } + + key = op_state.get_secret_key(); + } else { + char secret_key_buf[SECRET_KEY_LEN + 1]; + gen_rand_alphanumeric_plain(g_ceph_context, secret_key_buf, sizeof(secret_key_buf)); + key = secret_key_buf; + } + + // Generate the access key + if (key_type == KEY_TYPE_S3 && gen_access) { + char public_id_buf[PUBLIC_ID_LEN + 1]; + + do { + int id_buf_size = sizeof(public_id_buf); + gen_rand_alphanumeric_upper(g_ceph_context, public_id_buf, id_buf_size); + id = public_id_buf; + if (!validate_access_key(id)) + continue; + + } while (!driver->get_user_by_access_key(dpp, id, y, &duplicate_check)); + } + + if (key_type == KEY_TYPE_SWIFT) { + id = op_state.build_default_swift_kid(); + if (id.empty()) { + set_err_msg(err_msg, "empty swift access key"); + return -ERR_INVALID_ACCESS_KEY; + } + + // check that the access key doesn't exist + if (driver->get_user_by_swift(dpp, id, y, &duplicate_check) >= 0) { + set_err_msg(err_msg, "cannot create existing swift key"); + return -ERR_KEY_EXIST; + } + } + + // finally create the new key + new_key.id = id; + new_key.key = key; + + key_pair.first = id; + key_pair.second = new_key; + + if (key_type == KEY_TYPE_S3) { + access_keys->insert(key_pair); + } else if (key_type == KEY_TYPE_SWIFT) { + swift_keys->insert(key_pair); + } + + return 0; +} + +// modify an existing key +int RGWAccessKeyPool::modify_key(RGWUserAdminOpState& op_state, std::string *err_msg) +{ + std::string id; + std::string key = op_state.get_secret_key(); + int key_type = op_state.get_key_type(); + + RGWAccessKey modify_key; + + pair key_pair; + map::iterator kiter; + + switch (key_type) { + case KEY_TYPE_S3: + id = op_state.get_access_key(); + if (id.empty()) { + set_err_msg(err_msg, "no access key specified"); + return -ERR_INVALID_ACCESS_KEY; + } + break; + case KEY_TYPE_SWIFT: + id = op_state.build_default_swift_kid(); + if (id.empty()) { + set_err_msg(err_msg, "no subuser specified"); + return -EINVAL; + } + break; + default: + set_err_msg(err_msg, "invalid key type"); + return -ERR_INVALID_KEY_TYPE; + } + + if (!op_state.has_existing_key()) { + set_err_msg(err_msg, "key does not exist"); + return -ERR_INVALID_ACCESS_KEY; + } + + key_pair.first = id; + + if (key_type == KEY_TYPE_SWIFT) { + modify_key.id = id; + modify_key.subuser = op_state.get_subuser(); + } else if (key_type == KEY_TYPE_S3) { + kiter = access_keys->find(id); + if (kiter != access_keys->end()) { + modify_key = kiter->second; + } + } + + if (op_state.will_gen_secret()) { + char secret_key_buf[SECRET_KEY_LEN + 1]; + int key_buf_size = sizeof(secret_key_buf); + gen_rand_alphanumeric_plain(g_ceph_context, secret_key_buf, key_buf_size); + key = secret_key_buf; + } + + if (key.empty()) { + set_err_msg(err_msg, "empty secret key"); + return -ERR_INVALID_SECRET_KEY; + } + + // update the access key with the new secret key + modify_key.key = key; + + key_pair.second = modify_key; + + + if (key_type == KEY_TYPE_S3) { + (*access_keys)[id] = modify_key; + } else if (key_type == KEY_TYPE_SWIFT) { + (*swift_keys)[id] = modify_key; + } + + return 0; +} + +int RGWAccessKeyPool::execute_add(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, + std::string *err_msg, bool defer_user_update, + optional_yield y) +{ + int ret = 0; + + std::string subprocess_msg; + int key_op = GENERATE_KEY; + + // set the op + if (op_state.has_existing_key()) + key_op = MODIFY_KEY; + + switch (key_op) { + case GENERATE_KEY: + ret = generate_key(dpp, op_state, y, &subprocess_msg); + break; + case MODIFY_KEY: + ret = modify_key(op_state, &subprocess_msg); + break; + } + + if (ret < 0) { + set_err_msg(err_msg, subprocess_msg); + return ret; + } + + // store the updated info + if (!defer_user_update) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +int RGWAccessKeyPool::add(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg) +{ + return add(dpp, op_state, err_msg, false, y); +} + +int RGWAccessKeyPool::add(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_user_update, optional_yield y) +{ + int ret; + std::string subprocess_msg; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); + return ret; + } + + ret = execute_add(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to add access key, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWAccessKeyPool::execute_remove(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, + std::string *err_msg, + bool defer_user_update, + optional_yield y) +{ + int ret = 0; + + int key_type = op_state.get_key_type(); + std::string id = op_state.get_access_key(); + map::iterator kiter; + map *keys_map; + + if (!op_state.has_existing_key()) { + set_err_msg(err_msg, "unable to find access key, with key type: " + + key_type_to_str(key_type)); + return -ERR_INVALID_ACCESS_KEY; + } + + if (key_type == KEY_TYPE_S3) { + keys_map = access_keys; + } else if (key_type == KEY_TYPE_SWIFT) { + keys_map = swift_keys; + } else { + keys_map = NULL; + set_err_msg(err_msg, "invalid access key"); + return -ERR_INVALID_ACCESS_KEY; + } + + kiter = keys_map->find(id); + if (kiter == keys_map->end()) { + set_err_msg(err_msg, "key not found"); + return -ERR_INVALID_ACCESS_KEY; + } + + keys_map->erase(kiter); + + if (!defer_user_update) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +int RGWAccessKeyPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg) +{ + return remove(dpp, op_state, err_msg, false, y); +} + +int RGWAccessKeyPool::remove(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, + std::string *err_msg, bool defer_user_update, + optional_yield y) +{ + int ret; + + std::string subprocess_msg; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); + return ret; + } + + ret = execute_remove(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to remove access key, " + subprocess_msg); + return ret; + } + + return 0; +} + +// remove all keys associated with a subuser +int RGWAccessKeyPool::remove_subuser_keys(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, + std::string *err_msg, + bool defer_user_update, + optional_yield y) +{ + int ret = 0; + + if (!op_state.is_populated()) { + set_err_msg(err_msg, "user info was not populated"); + return -EINVAL; + } + + if (!op_state.has_subuser()) { + set_err_msg(err_msg, "no subuser specified"); + return -EINVAL; + } + + std::string swift_kid = op_state.build_default_swift_kid(); + if (swift_kid.empty()) { + set_err_msg(err_msg, "empty swift access key"); + return -EINVAL; + } + + map::iterator kiter; + map *keys_map; + + // a subuser can have at most one swift key + keys_map = swift_keys; + kiter = keys_map->find(swift_kid); + if (kiter != keys_map->end()) { + keys_map->erase(kiter); + } + + // a subuser may have multiple s3 key pairs + std::string subuser_str = op_state.get_subuser(); + keys_map = access_keys; + RGWUserInfo user_info = op_state.get_user_info(); + auto user_kiter = user_info.access_keys.begin(); + for (; user_kiter != user_info.access_keys.end(); ++user_kiter) { + if (user_kiter->second.subuser == subuser_str) { + kiter = keys_map->find(user_kiter->first); + if (kiter != keys_map->end()) { + keys_map->erase(kiter); + } + } + } + + if (!defer_user_update) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +RGWSubUserPool::RGWSubUserPool(RGWUser *usr) +{ + if (!usr) { + return; + } + + user = usr; + + subusers_allowed = true; + driver = user->get_driver(); +} + +int RGWSubUserPool::init(RGWUserAdminOpState& op_state) +{ + if (!op_state.is_initialized()) { + subusers_allowed = false; + return -EINVAL; + } + + const rgw_user& uid = op_state.get_user_id(); + if (uid.compare(RGW_USER_ANON_ID) == 0) { + subusers_allowed = false; + return -EACCES; + } + + subuser_map = op_state.get_subusers(); + if (subuser_map == NULL) { + subusers_allowed = false; + return -EINVAL; + } + + subusers_allowed = true; + + return 0; +} + +bool RGWSubUserPool::exists(std::string subuser) +{ + if (subuser.empty()) + return false; + + if (!subuser_map) + return false; + + if (subuser_map->count(subuser)) + return true; + + return false; +} + +int RGWSubUserPool::check_op(RGWUserAdminOpState& op_state, + std::string *err_msg) +{ + bool existing = false; + std::string subuser = op_state.get_subuser(); + + if (!op_state.is_populated()) { + set_err_msg(err_msg, "user info was not populated"); + return -EINVAL; + } + + if (!subusers_allowed) { + set_err_msg(err_msg, "subusers not allowed for this user"); + return -EACCES; + } + + if (subuser.empty() && !op_state.will_gen_subuser()) { + set_err_msg(err_msg, "empty subuser name"); + return -EINVAL; + } + + if (op_state.get_subuser_perm() == RGW_PERM_INVALID) { + set_err_msg(err_msg, "invalid subuser access"); + return -EINVAL; + } + + //set key type when it not set or set by context + if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) { + op_state.set_key_type(KEY_TYPE_SWIFT); + op_state.key_type_setbycontext = true; + } + + // check if the subuser exists + if (!subuser.empty()) + existing = exists(subuser); + + op_state.set_existing_subuser(existing); + + return 0; +} + +int RGWSubUserPool::execute_add(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, + std::string *err_msg, bool defer_user_update, + optional_yield y) +{ + int ret = 0; + std::string subprocess_msg; + + RGWSubUser subuser; + std::pair subuser_pair; + std::string subuser_str = op_state.get_subuser(); + + subuser_pair.first = subuser_str; + + // assumes key should be created + if (op_state.has_key_op()) { + ret = user->keys.add(dpp, op_state, &subprocess_msg, true, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to create subuser key, " + subprocess_msg); + return ret; + } + } + + // create the subuser + subuser.name = subuser_str; + + if (op_state.has_subuser_perm()) + subuser.perm_mask = op_state.get_subuser_perm(); + + // insert the subuser into user info + subuser_pair.second = subuser; + subuser_map->insert(subuser_pair); + + // attempt to save the subuser + if (!defer_user_update) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +int RGWSubUserPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg) +{ + return add(dpp, op_state, err_msg, false, y); +} + +int RGWSubUserPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update, optional_yield y) +{ + std::string subprocess_msg; + int ret; + int32_t key_type = op_state.get_key_type(); + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); + return ret; + } + + if (op_state.get_access_key_exist()) { + set_err_msg(err_msg, "cannot create existing key"); + return -ERR_KEY_EXIST; + } + + if (key_type == KEY_TYPE_S3 && op_state.get_access_key().empty()) { + op_state.set_gen_access(); + } + + if (op_state.get_secret_key().empty()) { + op_state.set_gen_secret(); + } + + ret = execute_add(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to create subuser, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWSubUserPool::execute_remove(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, + std::string *err_msg, bool defer_user_update, + optional_yield y) +{ + int ret = 0; + std::string subprocess_msg; + + std::string subuser_str = op_state.get_subuser(); + + map::iterator siter; + siter = subuser_map->find(subuser_str); + if (siter == subuser_map->end()){ + set_err_msg(err_msg, "subuser not found: " + subuser_str); + return -ERR_NO_SUCH_SUBUSER; + } + if (!op_state.has_existing_subuser()) { + set_err_msg(err_msg, "subuser not found: " + subuser_str); + return -ERR_NO_SUCH_SUBUSER; + } + + // always purge all associate keys + user->keys.remove_subuser_keys(dpp, op_state, &subprocess_msg, true, y); + + // remove the subuser from the user info + subuser_map->erase(siter); + + // attempt to save the subuser + if (!defer_user_update) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +int RGWSubUserPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg) +{ + return remove(dpp, op_state, err_msg, false, y); +} + +int RGWSubUserPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_user_update, optional_yield y) +{ + std::string subprocess_msg; + int ret; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); + return ret; + } + + ret = execute_remove(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to remove subuser, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWSubUserPool::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update, optional_yield y) +{ + int ret = 0; + std::string subprocess_msg; + std::map::iterator siter; + std::pair subuser_pair; + + std::string subuser_str = op_state.get_subuser(); + RGWSubUser subuser; + + if (!op_state.has_existing_subuser()) { + set_err_msg(err_msg, "subuser does not exist"); + return -ERR_NO_SUCH_SUBUSER; + } + + subuser_pair.first = subuser_str; + + siter = subuser_map->find(subuser_str); + subuser = siter->second; + + if (op_state.has_key_op()) { + ret = user->keys.add(dpp, op_state, &subprocess_msg, true, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to create subuser keys, " + subprocess_msg); + return ret; + } + } + + if (op_state.has_subuser_perm()) + subuser.perm_mask = op_state.get_subuser_perm(); + + subuser_pair.second = subuser; + + subuser_map->erase(siter); + subuser_map->insert(subuser_pair); + + // attempt to save the subuser + if (!defer_user_update) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +int RGWSubUserPool::modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) +{ + return RGWSubUserPool::modify(dpp, op_state, y, err_msg, false); +} + +int RGWSubUserPool::modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg, bool defer_user_update) +{ + std::string subprocess_msg; + int ret; + + RGWSubUser subuser; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); + return ret; + } + + ret = execute_modify(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to modify subuser, " + subprocess_msg); + return ret; + } + + return 0; +} + +RGWUserCapPool::RGWUserCapPool(RGWUser *usr) +{ + if (!usr) { + return; + } + user = usr; + caps_allowed = true; +} + +int RGWUserCapPool::init(RGWUserAdminOpState& op_state) +{ + if (!op_state.is_initialized()) { + caps_allowed = false; + return -EINVAL; + } + + const rgw_user& uid = op_state.get_user_id(); + if (uid.compare(RGW_USER_ANON_ID) == 0) { + caps_allowed = false; + return -EACCES; + } + + caps = op_state.get_caps_obj(); + if (!caps) { + caps_allowed = false; + return -ERR_INVALID_CAP; + } + + caps_allowed = true; + + return 0; +} + +int RGWUserCapPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg) +{ + return add(dpp, op_state, err_msg, false, y); +} + +int RGWUserCapPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_save, optional_yield y) +{ + int ret = 0; + std::string caps_str = op_state.get_caps(); + + if (!op_state.is_populated()) { + set_err_msg(err_msg, "user info was not populated"); + return -EINVAL; + } + + if (!caps_allowed) { + set_err_msg(err_msg, "caps not allowed for this user"); + return -EACCES; + } + + if (caps_str.empty()) { + set_err_msg(err_msg, "empty user caps"); + return -ERR_INVALID_CAP; + } + + int r = caps->add_from_string(caps_str); + if (r < 0) { + set_err_msg(err_msg, "unable to add caps: " + caps_str); + return r; + } + + if (!defer_save) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +int RGWUserCapPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg) +{ + return remove(dpp, op_state, err_msg, false, y); +} + +int RGWUserCapPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_save, optional_yield y) +{ + int ret = 0; + + std::string caps_str = op_state.get_caps(); + + if (!op_state.is_populated()) { + set_err_msg(err_msg, "user info was not populated"); + return -EINVAL; + } + + if (!caps_allowed) { + set_err_msg(err_msg, "caps not allowed for this user"); + return -EACCES; + } + + if (caps_str.empty()) { + set_err_msg(err_msg, "empty user caps"); + return -ERR_INVALID_CAP; + } + + int r = caps->remove_from_string(caps_str); + if (r < 0) { + set_err_msg(err_msg, "unable to remove caps: " + caps_str); + return r; + } + + if (!defer_save) + ret = user->update(dpp, op_state, err_msg, y); + + if (ret < 0) + return ret; + + return 0; +} + +RGWUser::RGWUser() : caps(this), keys(this), subusers(this) +{ + init_default(); +} + +int RGWUser::init(const DoutPrefixProvider *dpp, rgw::sal::Driver* _driver, + RGWUserAdminOpState& op_state, optional_yield y) +{ + init_default(); + int ret = init_storage(_driver); + if (ret < 0) + return ret; + + ret = init(dpp, op_state, y); + if (ret < 0) + return ret; + + return 0; +} + +void RGWUser::init_default() +{ + // use anonymous user info as a placeholder + rgw_get_anon_user(old_info); + user_id = RGW_USER_ANON_ID; + + clear_populated(); +} + +int RGWUser::init_storage(rgw::sal::Driver* _driver) +{ + if (!_driver) { + return -EINVAL; + } + + driver = _driver; + + clear_populated(); + + /* API wrappers */ + keys = RGWAccessKeyPool(this); + caps = RGWUserCapPool(this); + subusers = RGWSubUserPool(this); + + return 0; +} + +int RGWUser::init(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y) +{ + bool found = false; + std::string swift_user; + user_id = op_state.get_user_id(); + std::string user_email = op_state.get_user_email(); + std::string access_key = op_state.get_access_key(); + std::string subuser = op_state.get_subuser(); + + int key_type = op_state.get_key_type(); + if (key_type == KEY_TYPE_SWIFT) { + swift_user = op_state.get_access_key(); + access_key.clear(); + } + + std::unique_ptr user; + + clear_populated(); + + if (user_id.empty() && !subuser.empty()) { + size_t pos = subuser.find(':'); + if (pos != string::npos) { + user_id = subuser.substr(0, pos); + op_state.set_user_id(user_id); + } + } + + if (!user_id.empty() && (user_id.compare(RGW_USER_ANON_ID) != 0)) { + user = driver->get_user(user_id); + found = (user->load_user(dpp, y) >= 0); + op_state.found_by_uid = found; + } + if (driver->ctx()->_conf.get_val("rgw_user_unique_email")) { + if (!user_email.empty() && !found) { + found = (driver->get_user_by_email(dpp, user_email, y, &user) >= 0); + op_state.found_by_email = found; + } + } + if (!swift_user.empty() && !found) { + found = (driver->get_user_by_swift(dpp, swift_user, y, &user) >= 0); + op_state.found_by_key = found; + } + if (!access_key.empty() && !found) { + found = (driver->get_user_by_access_key(dpp, access_key, y, &user) >= 0); + op_state.found_by_key = found; + } + + op_state.set_existing_user(found); + if (found) { + op_state.set_user_info(user->get_info()); + op_state.set_populated(); + op_state.objv = user->get_version_tracker(); + op_state.set_user_version_tracker(user->get_version_tracker()); + + old_info = user->get_info(); + set_populated(); + } + + if (user_id.empty()) { + user_id = user->get_id(); + } + op_state.set_initialized(); + + // this may have been called by a helper object + int ret = init_members(op_state); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUser::init_members(RGWUserAdminOpState& op_state) +{ + int ret = 0; + + ret = keys.init(op_state); + if (ret < 0) + return ret; + + ret = subusers.init(op_state); + if (ret < 0) + return ret; + + ret = caps.init(op_state); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUser::update(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + optional_yield y) +{ + int ret; + std::string subprocess_msg; + rgw::sal::User* user = op_state.get_user(); + + if (!driver) { + set_err_msg(err_msg, "couldn't initialize storage"); + return -EINVAL; + } + + RGWUserInfo *pold_info = (is_populated() ? &old_info : nullptr); + + ret = user->store_user(dpp, y, false, pold_info); + op_state.objv = user->get_version_tracker(); + op_state.set_user_version_tracker(user->get_version_tracker()); + + if (ret < 0) { + set_err_msg(err_msg, "unable to store user info"); + return ret; + } + + old_info = user->get_info(); + set_populated(); + + return 0; +} + +int RGWUser::check_op(RGWUserAdminOpState& op_state, std::string *err_msg) +{ + int ret = 0; + const rgw_user& uid = op_state.get_user_id(); + + if (uid.compare(RGW_USER_ANON_ID) == 0) { + set_err_msg(err_msg, "unable to perform operations on the anonymous user"); + return -EINVAL; + } + + if (is_populated() && user_id.compare(uid) != 0) { + set_err_msg(err_msg, "user id mismatch, operation id: " + uid.to_str() + + " does not match: " + user_id.to_str()); + + return -EINVAL; + } + + ret = rgw_validate_tenant_name(uid.tenant); + if (ret) { + set_err_msg(err_msg, + "invalid tenant only alphanumeric and _ characters are allowed"); + return ret; + } + + //set key type when it not set or set by context + if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) { + op_state.set_key_type(KEY_TYPE_S3); + op_state.key_type_setbycontext = true; + } + + return 0; +} + +// update swift_keys with new user id +static void rename_swift_keys(const rgw_user& user, + std::map& keys) +{ + std::string user_id; + user.to_str(user_id); + + auto modify_keys = std::move(keys); + for ([[maybe_unused]] auto& [k, key] : modify_keys) { + std::string id = user_id + ":" + key.subuser; + key.id = id; + keys[id] = std::move(key); + } +} + +int RGWUser::execute_rename(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y) +{ + int ret; + bool populated = op_state.is_populated(); + + if (!op_state.has_existing_user() && !populated) { + set_err_msg(err_msg, "user not found"); + return -ENOENT; + } + + if (!populated) { + ret = init(dpp, op_state, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to retrieve user info"); + return ret; + } + } + + std::unique_ptr old_user = driver->get_user(op_state.get_user_info().user_id); + std::unique_ptr new_user = driver->get_user(op_state.get_new_uid()); + if (old_user->get_tenant() != new_user->get_tenant()) { + set_err_msg(err_msg, "users have to be under the same tenant namespace " + + old_user->get_tenant() + " != " + new_user->get_tenant()); + return -EINVAL; + } + + // create a stub user and write only the uid index and buckets object + std::unique_ptr user; + user = driver->get_user(new_user->get_id()); + + const bool exclusive = !op_state.get_overwrite_new_user(); // overwrite if requested + + ret = user->store_user(dpp, y, exclusive); + if (ret == -EEXIST) { + set_err_msg(err_msg, "user name given by --new-uid already exists"); + return ret; + } + if (ret < 0) { + set_err_msg(err_msg, "unable to store new user info"); + return ret; + } + + RGWAccessControlPolicy policy_instance; + policy_instance.create_default(new_user->get_id(), old_user->get_display_name()); + + //unlink and link buckets to new user + string marker; + CephContext *cct = driver->ctx(); + size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk; + rgw::sal::BucketList buckets; + + do { + ret = old_user->list_buckets(dpp, marker, "", max_buckets, false, buckets, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to list user buckets"); + return ret; + } + + auto& m = buckets.get_buckets(); + + for (auto it = m.begin(); it != m.end(); ++it) { + auto& bucket = it->second; + marker = it->first; + + ret = bucket->load_bucket(dpp, y); + if (ret < 0) { + set_err_msg(err_msg, "failed to fetch bucket info for bucket=" + bucket->get_name()); + return ret; + } + + ret = bucket->set_acl(dpp, policy_instance, y); + if (ret < 0) { + set_err_msg(err_msg, "failed to set acl on bucket " + bucket->get_name()); + return ret; + } + + ret = bucket->chown(dpp, new_user.get(), old_user.get(), y); + if (ret < 0) { + set_err_msg(err_msg, "failed to run bucket chown" + cpp_strerror(-ret)); + return ret; + } + } + + } while (buckets.is_truncated()); + + // update the 'stub user' with all of the other fields and rewrite all of the + // associated index objects + RGWUserInfo& user_info = op_state.get_user_info(); + user_info.user_id = new_user->get_id(); + op_state.objv = user->get_version_tracker(); + op_state.set_user_version_tracker(user->get_version_tracker()); + + rename_swift_keys(new_user->get_id(), user_info.swift_keys); + + return update(dpp, op_state, err_msg, y); +} + +int RGWUser::execute_add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + optional_yield y) +{ + const rgw_user& uid = op_state.get_user_id(); + std::string user_email = op_state.get_user_email(); + std::string display_name = op_state.get_display_name(); + + // set the user info + RGWUserInfo user_info; + user_id = uid; + user_info.user_id = user_id; + user_info.display_name = display_name; + user_info.type = TYPE_RGW; + + if (!user_email.empty()) + user_info.user_email = user_email; + + CephContext *cct = driver->ctx(); + if (op_state.max_buckets_specified) { + user_info.max_buckets = op_state.get_max_buckets(); + } else { + user_info.max_buckets = + cct->_conf.get_val("rgw_user_max_buckets"); + } + + user_info.suspended = op_state.get_suspension_status(); + user_info.admin = op_state.admin; + user_info.system = op_state.system; + + if (op_state.op_mask_specified) + user_info.op_mask = op_state.get_op_mask(); + + if (op_state.has_bucket_quota()) { + user_info.quota.bucket_quota = op_state.get_bucket_quota(); + } else { + rgw_apply_default_bucket_quota(user_info.quota.bucket_quota, cct->_conf); + } + + if (op_state.temp_url_key_specified) { + map::iterator iter; + for (iter = op_state.temp_url_keys.begin(); + iter != op_state.temp_url_keys.end(); ++iter) { + user_info.temp_url_keys[iter->first] = iter->second; + } + } + + if (op_state.has_user_quota()) { + user_info.quota.user_quota = op_state.get_user_quota(); + } else { + rgw_apply_default_user_quota(user_info.quota.user_quota, cct->_conf); + } + + if (op_state.default_placement_specified) { + user_info.default_placement = op_state.default_placement; + } + + if (op_state.placement_tags_specified) { + user_info.placement_tags = op_state.placement_tags; + } + + // update the request + op_state.set_user_info(user_info); + op_state.set_populated(); + + // update the helper objects + int ret = init_members(op_state); + if (ret < 0) { + set_err_msg(err_msg, "unable to initialize user"); + return ret; + } + + // see if we need to add an access key + std::string subprocess_msg; + bool defer_user_update = true; + if (op_state.has_key_op()) { + ret = keys.add(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to create access key, " + subprocess_msg); + return ret; + } + } + + // see if we need to add some caps + if (op_state.has_caps_op()) { + ret = caps.add(dpp, op_state, &subprocess_msg, defer_user_update, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to add user capabilities, " + subprocess_msg); + return ret; + } + } + + ret = update(dpp, op_state, err_msg, y); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUser::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) +{ + std::string subprocess_msg; + int ret = user_add_helper(op_state, &subprocess_msg); + if (ret != 0) { + set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); + return ret; + } + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); + return ret; + } + + ret = execute_add(dpp, op_state, &subprocess_msg, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to create user, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWUser::rename(RGWUserAdminOpState& op_state, optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg) +{ + std::string subprocess_msg; + int ret; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); + return ret; + } + + ret = execute_rename(dpp, op_state, &subprocess_msg, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to rename user, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWUser::execute_remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y) +{ + int ret; + + bool purge_data = op_state.will_purge_data(); + rgw::sal::User* user = op_state.get_user(); + + if (!op_state.has_existing_user()) { + set_err_msg(err_msg, "user does not exist"); + return -ENOENT; + } + + rgw::sal::BucketList buckets; + string marker; + CephContext *cct = driver->ctx(); + size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk; + do { + ret = user->list_buckets(dpp, marker, string(), max_buckets, false, buckets, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to read user bucket info"); + return ret; + } + + auto& m = buckets.get_buckets(); + if (!m.empty() && !purge_data) { + set_err_msg(err_msg, "must specify purge data to remove user with buckets"); + return -EEXIST; // change to code that maps to 409: conflict + } + + for (auto it = m.begin(); it != m.end(); ++it) { + ret = it->second->remove_bucket(dpp, true, false, nullptr, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to delete user data"); + return ret; + } + + marker = it->first; + } + + } while (buckets.is_truncated()); + + ret = user->remove_user(dpp, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to remove user from RADOS"); + return ret; + } + + op_state.clear_populated(); + clear_populated(); + + return 0; +} + +int RGWUser::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) +{ + std::string subprocess_msg; + int ret; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); + return ret; + } + + ret = execute_remove(dpp, op_state, &subprocess_msg, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to remove user, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWUser::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y) +{ + bool populated = op_state.is_populated(); + int ret = 0; + std::string subprocess_msg; + std::string op_email = op_state.get_user_email(); + std::string display_name = op_state.get_display_name(); + + RGWUserInfo user_info; + std::unique_ptr duplicate_check; + + // ensure that the user info has been populated or is populate-able + if (!op_state.has_existing_user() && !populated) { + set_err_msg(err_msg, "user not found"); + return -ENOENT; + } + + // if the user hasn't already been populated...attempt to + if (!populated) { + ret = init(dpp, op_state, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to retrieve user info"); + return ret; + } + } + + // ensure that we can modify the user's attributes + if (user_id.compare(RGW_USER_ANON_ID) == 0) { + set_err_msg(err_msg, "unable to modify anonymous user's info"); + return -EACCES; + } + + user_info = old_info; + + std::string old_email = old_info.user_email; + if (!op_email.empty()) { + // make sure we are not adding a duplicate email + if (old_email != op_email) { + ret = driver->get_user_by_email(dpp, op_email, y, &duplicate_check); + if (ret >= 0 && duplicate_check->get_id().compare(user_id) != 0) { + set_err_msg(err_msg, "cannot add duplicate email"); + return -ERR_EMAIL_EXIST; + } + } + user_info.user_email = op_email; + } else if (op_email.empty() && op_state.user_email_specified) { + ldpp_dout(dpp, 10) << "removing email index: " << user_info.user_email << dendl; + /* will be physically removed later when calling update() */ + user_info.user_email.clear(); + } + + // update the remaining user info + if (!display_name.empty()) + user_info.display_name = display_name; + + if (op_state.max_buckets_specified) + user_info.max_buckets = op_state.get_max_buckets(); + + if (op_state.admin_specified) + user_info.admin = op_state.admin; + + if (op_state.system_specified) + user_info.system = op_state.system; + + if (op_state.temp_url_key_specified) { + map::iterator iter; + for (iter = op_state.temp_url_keys.begin(); + iter != op_state.temp_url_keys.end(); ++iter) { + user_info.temp_url_keys[iter->first] = iter->second; + } + } + + if (op_state.op_mask_specified) + user_info.op_mask = op_state.get_op_mask(); + + if (op_state.has_bucket_quota()) + user_info.quota.bucket_quota = op_state.get_bucket_quota(); + + if (op_state.has_user_quota()) + user_info.quota.user_quota = op_state.get_user_quota(); + + if (op_state.has_suspension_op()) { + __u8 suspended = op_state.get_suspension_status(); + user_info.suspended = suspended; + + rgw::sal::BucketList buckets; + + if (user_id.empty()) { + set_err_msg(err_msg, "empty user id passed...aborting"); + return -EINVAL; + } + + string marker; + CephContext *cct = driver->ctx(); + size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk; + std::unique_ptr user = driver->get_user(user_id); + do { + ret = user->list_buckets(dpp, marker, string(), max_buckets, false, buckets, y); + if (ret < 0) { + set_err_msg(err_msg, "could not get buckets for uid: " + user_id.to_str()); + return ret; + } + + auto& m = buckets.get_buckets(); + + vector bucket_names; + for (auto iter = m.begin(); iter != m.end(); ++iter) { + auto& bucket = iter->second; + bucket_names.push_back(bucket->get_key()); + + marker = iter->first; + } + + ret = driver->set_buckets_enabled(dpp, bucket_names, !suspended); + if (ret < 0) { + set_err_msg(err_msg, "failed to modify bucket"); + return ret; + } + + } while (buckets.is_truncated()); + } + + if (op_state.mfa_ids_specified) { + user_info.mfa_ids = op_state.mfa_ids; + } + + if (op_state.default_placement_specified) { + user_info.default_placement = op_state.default_placement; + } + + if (op_state.placement_tags_specified) { + user_info.placement_tags = op_state.placement_tags; + } + + op_state.set_user_info(user_info); + + // if we're supposed to modify keys, do so + if (op_state.has_key_op()) { + ret = keys.add(dpp, op_state, &subprocess_msg, true, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to create or modify keys, " + subprocess_msg); + return ret; + } + } + + ret = update(dpp, op_state, err_msg, y); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUser::modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) +{ + std::string subprocess_msg; + int ret; + + ret = check_op(op_state, &subprocess_msg); + if (ret < 0) { + set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); + return ret; + } + + ret = execute_modify(dpp, op_state, &subprocess_msg, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to modify user, " + subprocess_msg); + return ret; + } + + return 0; +} + +int RGWUser::info(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWUserInfo& fetched_info, + optional_yield y, std::string *err_msg) +{ + int ret = init(dpp, op_state, y); + if (ret < 0) { + set_err_msg(err_msg, "unable to fetch user info"); + return ret; + } + + fetched_info = op_state.get_user_info(); + + return 0; +} + +int RGWUser::info(RGWUserInfo& fetched_info, std::string *err_msg) +{ + if (!is_populated()) { + set_err_msg(err_msg, "no user info saved"); + return -EINVAL; + } + + fetched_info = old_info; + + return 0; +} + +int RGWUser::list(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher) +{ + Formatter *formatter = flusher.get_formatter(); + void *handle = nullptr; + std::string metadata_key = "user"; + if (op_state.max_entries > 1000) { + op_state.max_entries = 1000; + } + + int ret = driver->meta_list_keys_init(dpp, metadata_key, op_state.marker, &handle); + if (ret < 0) { + return ret; + } + + bool truncated = false; + uint64_t count = 0; + uint64_t left = 0; + flusher.start(0); + + // open the result object section + formatter->open_object_section("result"); + + // open the user id list array section + formatter->open_array_section("keys"); + do { + std::list keys; + left = op_state.max_entries - count; + ret = driver->meta_list_keys_next(dpp, handle, left, keys, &truncated); + if (ret < 0 && ret != -ENOENT) { + return ret; + } if (ret != -ENOENT) { + for (std::list::iterator iter = keys.begin(); iter != keys.end(); ++iter) { + formatter->dump_string("key", *iter); + ++count; + } + } + } while (truncated && left > 0); + // close user id list section + formatter->close_section(); + + formatter->dump_bool("truncated", truncated); + formatter->dump_int("count", count); + if (truncated) { + formatter->dump_string("marker", driver->meta_get_marker(handle)); + } + + // close result object section + formatter->close_section(); + + driver->meta_list_keys_complete(handle); + + flusher.flush(); + return 0; +} + +int RGWUserAdminOp_User::list(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher) +{ + RGWUser user; + + int ret = user.init_storage(driver); + if (ret < 0) + return ret; + + ret = user.list(dpp, op_state, flusher); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUserAdminOp_User::info(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + std::unique_ptr ruser; + + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + ruser = driver->get_user(info.user_id); + + if (op_state.sync_stats) { + ret = rgw_user_sync_all_stats(dpp, driver, ruser.get(), y); + if (ret < 0) { + return ret; + } + } + + RGWStorageStats stats; + RGWStorageStats *arg_stats = NULL; + if (op_state.fetch_stats) { + int ret = ruser->read_stats(dpp, y, &stats); + if (ret < 0 && ret != -ENOENT) { + return ret; + } + + arg_stats = &stats; + } + + if (formatter) { + flusher.start(0); + + dump_user_info(formatter, info, arg_stats); + flusher.flush(); + } + + return 0; +} + +int RGWUserAdminOp_User::create(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.add(dpp, op_state, y, NULL); + if (ret < 0) { + if (ret == -EEXIST) + ret = -ERR_USER_EXIST; + return ret; + } + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + dump_user_info(formatter, info); + flusher.flush(); + } + + return 0; +} + +int RGWUserAdminOp_User::modify(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + Formatter *formatter = flusher.get_formatter(); + + ret = user.modify(dpp, op_state, y, NULL); + if (ret < 0) { + if (ret == -ENOENT) + ret = -ERR_NO_SUCH_USER; + return ret; + } + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + dump_user_info(formatter, info); + flusher.flush(); + } + + return 0; +} + +int RGWUserAdminOp_User::remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + + ret = user.remove(dpp, op_state, y, NULL); + + if (ret == -ENOENT) + ret = -ERR_NO_SUCH_USER; + return ret; +} + +int RGWUserAdminOp_Subuser::create(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.subusers.add(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + dump_subusers_info(formatter, info); + flusher.flush(); + } + + return 0; +} + +int RGWUserAdminOp_Subuser::modify(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.subusers.modify(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + dump_subusers_info(formatter, info); + flusher.flush(); + } + + return 0; +} + +int RGWUserAdminOp_Subuser::remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + ret = user.subusers.remove(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUserAdminOp_Key::create(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.keys.add(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + int key_type = op_state.get_key_type(); + + if (key_type == KEY_TYPE_SWIFT) + dump_swift_keys_info(formatter, info); + + else if (key_type == KEY_TYPE_S3) + dump_access_keys_info(formatter, info); + + flusher.flush(); + } + + return 0; +} + +int RGWUserAdminOp_Key::remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, + optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + + ret = user.keys.remove(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + return 0; +} + +int RGWUserAdminOp_Caps::add(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.caps.add(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + info.caps.dump(formatter); + flusher.flush(); + } + + return 0; +} + + +int RGWUserAdminOp_Caps::remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, + RGWFormatterFlusher& flusher, optional_yield y) +{ + RGWUserInfo info; + RGWUser user; + int ret = user.init(dpp, driver, op_state, y); + if (ret < 0) + return ret; + + if (!op_state.has_existing_user()) + return -ERR_NO_SUCH_USER; + + Formatter *formatter = flusher.get_formatter(); + + ret = user.caps.remove(dpp, op_state, y, NULL); + if (ret < 0) + return ret; + + ret = user.info(info, NULL); + if (ret < 0) + return ret; + + if (formatter) { + flusher.start(0); + + info.caps.dump(formatter); + flusher.flush(); + } + + return 0; +} + +class RGWUserMetadataHandler : public RGWMetadataHandler_GenericMetaBE { +public: + struct Svc { + RGWSI_User *user{nullptr}; + } svc; + + RGWUserMetadataHandler(RGWSI_User *user_svc) { + base_init(user_svc->ctx(), user_svc->get_be_handler()); + svc.user = user_svc; + } + + ~RGWUserMetadataHandler() {} + + string get_type() override { return "user"; } + + int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { + RGWUserCompleteInfo uci; + RGWObjVersionTracker objv_tracker; + real_time mtime; + + rgw_user user = RGWSI_User::user_from_meta_key(entry); + + int ret = svc.user->read_user_info(op->ctx(), user, &uci.info, &objv_tracker, + &mtime, nullptr, &uci.attrs, + y, dpp); + if (ret < 0) { + return ret; + } + + RGWUserMetadataObject *mdo = new RGWUserMetadataObject(uci, objv_tracker.read_version, mtime); + *obj = mdo; + + return 0; + } + + RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { + RGWUserCompleteInfo uci; + + try { + decode_json_obj(uci, jo); + } catch (JSONDecoder::err& e) { + return nullptr; + } + + return new RGWUserMetadataObject(uci, objv, mtime); + } + + int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) override; + + int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) override { + RGWUserInfo info; + + rgw_user user = RGWSI_User::user_from_meta_key(entry); + + int ret = svc.user->read_user_info(op->ctx(), user, &info, nullptr, + nullptr, nullptr, nullptr, + y, dpp); + if (ret < 0) { + return ret; + } + + return svc.user->remove_user_info(op->ctx(), info, &objv_tracker, + y, dpp); + } +}; + +class RGWMetadataHandlerPut_User : public RGWMetadataHandlerPut_SObj +{ + RGWUserMetadataHandler *uhandler; + RGWUserMetadataObject *uobj; +public: + RGWMetadataHandlerPut_User(RGWUserMetadataHandler *_handler, + RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *obj, RGWObjVersionTracker& objv_tracker, + optional_yield y, + RGWMDLogSyncType type, bool from_remote_zone) : RGWMetadataHandlerPut_SObj(_handler, op, entry, obj, objv_tracker, y, type, from_remote_zone), + uhandler(_handler) { + uobj = static_cast(obj); + } + + int put_checked(const DoutPrefixProvider *dpp) override; +}; + +int RGWUserMetadataHandler::do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, + RGWMetadataObject *obj, + RGWObjVersionTracker& objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp, + RGWMDLogSyncType type, bool from_remote_zone) +{ + RGWMetadataHandlerPut_User put_op(this, op, entry, obj, objv_tracker, y, type, from_remote_zone); + return do_put_operate(&put_op, dpp); +} + +int RGWMetadataHandlerPut_User::put_checked(const DoutPrefixProvider *dpp) +{ + RGWUserMetadataObject *orig_obj = static_cast(old_obj); + RGWUserCompleteInfo& uci = uobj->get_uci(); + + map *pattrs{nullptr}; + if (uci.has_attrs) { + pattrs = &uci.attrs; + } + + RGWUserInfo *pold_info = (orig_obj ? &orig_obj->get_uci().info : nullptr); + + auto mtime = obj->get_mtime(); + + int ret = uhandler->svc.user->store_user_info(op->ctx(), uci.info, pold_info, + &objv_tracker, mtime, + false, pattrs, y, dpp); + if (ret < 0) { + return ret; + } + + return STATUS_APPLIED; +} + + +RGWUserCtl::RGWUserCtl(RGWSI_Zone *zone_svc, + RGWSI_User *user_svc, + RGWUserMetadataHandler *_umhandler) : umhandler(_umhandler) { + svc.zone = zone_svc; + svc.user = user_svc; + be_handler = umhandler->get_be_handler(); +} + +template +class optional_default +{ + const std::optional& opt; + std::optional def; + const T *p; +public: + optional_default(const std::optional& _o) : opt(_o) { + if (opt) { + p = &(*opt); + } else { + def = T(); + p = &(*def); + } + } + + const T *operator->() { + return p; + } + + const T& operator*() { + return *p; + } +}; + +int RGWUserCtl::get_info_by_uid(const DoutPrefixProvider *dpp, + const rgw_user& uid, + RGWUserInfo *info, + optional_yield y, + const GetParams& params) + +{ + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->read_user_info(op->ctx(), + uid, + info, + params.objv_tracker, + params.mtime, + params.cache_info, + params.attrs, + y, + dpp); + }); +} + +int RGWUserCtl::get_info_by_email(const DoutPrefixProvider *dpp, + const string& email, + RGWUserInfo *info, + optional_yield y, + const GetParams& params) +{ + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->get_user_info_by_email(op->ctx(), email, + info, + params.objv_tracker, + params.mtime, + y, + dpp); + }); +} + +int RGWUserCtl::get_info_by_swift(const DoutPrefixProvider *dpp, + const string& swift_name, + RGWUserInfo *info, + optional_yield y, + const GetParams& params) +{ + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->get_user_info_by_swift(op->ctx(), swift_name, + info, + params.objv_tracker, + params.mtime, + y, + dpp); + }); +} + +int RGWUserCtl::get_info_by_access_key(const DoutPrefixProvider *dpp, + const string& access_key, + RGWUserInfo *info, + optional_yield y, + const GetParams& params) +{ + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->get_user_info_by_access_key(op->ctx(), access_key, + info, + params.objv_tracker, + params.mtime, + y, + dpp); + }); +} + +int RGWUserCtl::get_attrs_by_uid(const DoutPrefixProvider *dpp, + const rgw_user& user_id, + map *pattrs, + optional_yield y, + RGWObjVersionTracker *objv_tracker) +{ + RGWUserInfo user_info; + + return get_info_by_uid(dpp, user_id, &user_info, y, RGWUserCtl::GetParams() + .set_attrs(pattrs) + .set_objv_tracker(objv_tracker)); +} + +int RGWUserCtl::store_info(const DoutPrefixProvider *dpp, + const RGWUserInfo& info, optional_yield y, + const PutParams& params) +{ + string key = RGWSI_User::get_meta_key(info.user_id); + + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->store_user_info(op->ctx(), info, + params.old_info, + params.objv_tracker, + params.mtime, + params.exclusive, + params.attrs, + y, + dpp); + }); +} + +int RGWUserCtl::remove_info(const DoutPrefixProvider *dpp, + const RGWUserInfo& info, optional_yield y, + const RemoveParams& params) + +{ + string key = RGWSI_User::get_meta_key(info.user_id); + + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->remove_user_info(op->ctx(), info, + params.objv_tracker, + y, dpp); + }); +} + +int RGWUserCtl::list_buckets(const DoutPrefixProvider *dpp, + const rgw_user& user, + const string& marker, + const string& end_marker, + uint64_t max, + bool need_stats, + RGWUserBuckets *buckets, + bool *is_truncated, + optional_yield y, + uint64_t default_max) +{ + if (!max) { + max = default_max; + } + + int ret = svc.user->list_buckets(dpp, user, marker, end_marker, + max, buckets, is_truncated, y); + if (ret < 0) { + return ret; + } + if (need_stats) { + map& m = buckets->get_buckets(); + ret = ctl.bucket->read_buckets_stats(m, y, dpp); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: could not get stats for buckets" << dendl; + return ret; + } + } + return 0; +} + +int RGWUserCtl::read_stats(const DoutPrefixProvider *dpp, + const rgw_user& user, RGWStorageStats *stats, + optional_yield y, + ceph::real_time *last_stats_sync, + ceph::real_time *last_stats_update) +{ + return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { + return svc.user->read_stats(dpp, op->ctx(), user, stats, + last_stats_sync, last_stats_update, y); + }); +} + +RGWMetadataHandler *RGWUserMetaHandlerAllocator::alloc(RGWSI_User *user_svc) { + return new RGWUserMetadataHandler(user_svc); +} + +void rgw_user::dump(Formatter *f) const +{ + ::encode_json("user", *this, f); +} + diff --git a/src/rgw/driver/rados/rgw_user.h b/src/rgw/driver/rados/rgw_user.h new file mode 100644 index 00000000000..110124cdbc7 --- /dev/null +++ b/src/rgw/driver/rados/rgw_user.h @@ -0,0 +1,887 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_USER_H +#define CEPH_RGW_USER_H + +#include +#include +#include "include/ceph_assert.h" + +#include "include/types.h" +#include "rgw_common.h" +#include "rgw_tools.h" + +#include "rgw_string.h" + +#include "common/Formatter.h" +#include "rgw_formats.h" +#include "rgw_metadata.h" +#include "rgw_sal_fwd.h" + +#define RGW_USER_ANON_ID "anonymous" + +#define SECRET_KEY_LEN 40 +#define PUBLIC_ID_LEN 20 +#define RAND_SUBUSER_LEN 5 + +#define XMLNS_AWS_S3 "http://s3.amazonaws.com/doc/2006-03-01/" + +class RGWUserCtl; +class RGWBucketCtl; +class RGWUserBuckets; + +class RGWGetUserStats_CB; + +/** + * A string wrapper that includes encode/decode functions + * for easily accessing a UID in all forms + */ +struct RGWUID +{ + rgw_user user_id; + void encode(bufferlist& bl) const { + std::string s; + user_id.to_str(s); + using ceph::encode; + encode(s, bl); + } + void decode(bufferlist::const_iterator& bl) { + std::string s; + using ceph::decode; + decode(s, bl); + user_id.from_str(s); + } +}; +WRITE_CLASS_ENCODER(RGWUID) + +/** Entry for bucket metadata collection */ +struct bucket_meta_entry { + size_t size; + size_t size_rounded; + ceph::real_time creation_time; + uint64_t count; +}; + +extern int rgw_user_sync_all_stats(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::User* user, optional_yield y); +extern int rgw_user_get_all_buckets_stats(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, rgw::sal::User* user, + std::map& buckets_usage_map, optional_yield y); + +/** + * Get the anonymous (ie, unauthenticated) user info. + */ +extern void rgw_get_anon_user(RGWUserInfo& info); + +extern void rgw_perm_to_str(uint32_t mask, char *buf, int len); +extern uint32_t rgw_str_to_perm(const char *str); + +extern int rgw_validate_tenant_name(const std::string& t); + +enum ObjectKeyType { + KEY_TYPE_SWIFT, + KEY_TYPE_S3, + KEY_TYPE_UNDEFINED +}; + +enum RGWKeyPoolOp { + GENERATE_KEY, + MODIFY_KEY +}; + +enum RGWUserId { + RGW_USER_ID, + RGW_SWIFT_USERNAME, + RGW_USER_EMAIL, + RGW_ACCESS_KEY, +}; + +/* + * An RGWUser class along with supporting classes created + * to support the creation of an RESTful administrative API + */ +struct RGWUserAdminOpState { + // user attributes + std::unique_ptr user; + std::string user_email; + std::string display_name; + rgw_user new_user_id; + bool overwrite_new_user = false; + int32_t max_buckets{RGW_DEFAULT_MAX_BUCKETS}; + __u8 suspended{0}; + __u8 admin{0}; + __u8 system{0}; + __u8 exclusive{0}; + __u8 fetch_stats{0}; + __u8 sync_stats{0}; + std::string caps; + RGWObjVersionTracker objv; + uint32_t op_mask{0}; + std::map temp_url_keys; + + // subuser attributes + std::string subuser; + uint32_t perm_mask{RGW_PERM_NONE}; + + // key_attributes + std::string id; // access key + std::string key; // secret key + int32_t key_type{-1}; + bool access_key_exist = false; + + std::set mfa_ids; + + // operation attributes + bool existing_user{false}; + bool existing_key{false}; + bool existing_subuser{false}; + bool existing_email{false}; + bool subuser_specified{false}; + bool gen_secret{false}; + bool gen_access{false}; + bool gen_subuser{false}; + bool id_specified{false}; + bool key_specified{false}; + bool type_specified{false}; + bool key_type_setbycontext{false}; // key type set by user or subuser context + bool purge_data{false}; + bool purge_keys{false}; + bool display_name_specified{false}; + bool user_email_specified{false}; + bool max_buckets_specified{false}; + bool perm_specified{false}; + bool op_mask_specified{false}; + bool caps_specified{false}; + bool suspension_op{false}; + bool admin_specified{false}; + bool system_specified{false}; + bool key_op{false}; + bool temp_url_key_specified{false}; + bool found_by_uid{false}; + bool found_by_email{false}; + bool found_by_key{false}; + bool mfa_ids_specified{false}; + + // req parameters + bool populated{false}; + bool initialized{false}; + bool key_params_checked{false}; + bool subuser_params_checked{false}; + bool user_params_checked{false}; + + bool bucket_quota_specified{false}; + bool user_quota_specified{false}; + bool bucket_ratelimit_specified{false}; + bool user_ratelimit_specified{false}; + + RGWQuota quota; + RGWRateLimitInfo user_ratelimit; + RGWRateLimitInfo bucket_ratelimit; + + // req parameters for listing user + std::string marker{""}; + uint32_t max_entries{1000}; + rgw_placement_rule default_placement; // user default placement + bool default_placement_specified{false}; + + std::list placement_tags; // user default placement_tags + bool placement_tags_specified{false}; + + void set_access_key(const std::string& access_key) { + if (access_key.empty()) + return; + + id = access_key; + id_specified = true; + gen_access = false; + key_op = true; + } + + void set_secret_key(const std::string& secret_key) { + if (secret_key.empty()) + return; + + key = secret_key; + key_specified = true; + gen_secret = false; + key_op = true; + } + + void set_user_id(const rgw_user& id); + + void set_new_user_id(const rgw_user& id) { + if (id.empty()) + return; + + new_user_id = id; + } + void set_overwrite_new_user(bool b) { + overwrite_new_user = b; + } + + void set_user_email(std::string& email) { + /* always lowercase email address */ + boost::algorithm::to_lower(email); + user_email = email; + user_email_specified = true; + } + + void set_display_name(const std::string& name) { + if (name.empty()) + return; + + display_name = name; + display_name_specified = true; + } + + void set_subuser(std::string& _subuser); + + void set_caps(const std::string& _caps) { + if (_caps.empty()) + return; + + caps = _caps; + caps_specified = true; + } + + void set_perm(uint32_t perm) { + perm_mask = perm; + perm_specified = true; + } + + void set_op_mask(uint32_t mask) { + op_mask = mask; + op_mask_specified = true; + } + + void set_temp_url_key(const std::string& key, int index) { + temp_url_keys[index] = key; + temp_url_key_specified = true; + } + + void set_key_type(int32_t type) { + key_type = type; + type_specified = true; + } + + void set_access_key_exist() { + access_key_exist = true; + } + + void set_suspension(__u8 is_suspended) { + suspended = is_suspended; + suspension_op = true; + } + + void set_admin(__u8 is_admin) { + admin = is_admin; + admin_specified = true; + } + + void set_system(__u8 is_system) { + system = is_system; + system_specified = true; + } + + void set_exclusive(__u8 is_exclusive) { + exclusive = is_exclusive; + } + + void set_fetch_stats(__u8 is_fetch_stats) { + fetch_stats = is_fetch_stats; + } + + void set_sync_stats(__u8 is_sync_stats) { + sync_stats = is_sync_stats; + } + + void set_user_info(RGWUserInfo& user_info); + + void set_user_version_tracker(RGWObjVersionTracker& objv_tracker); + + void set_max_buckets(int32_t mb) { + max_buckets = mb; + max_buckets_specified = true; + } + + void set_gen_access() { + gen_access = true; + key_op = true; + } + + void set_gen_secret() { + gen_secret = true; + key_op = true; + } + + void set_generate_key() { + if (id.empty()) + gen_access = true; + if (key.empty()) + gen_secret = true; + key_op = true; + } + + void clear_generate_key() { + gen_access = false; + gen_secret = false; + } + + void set_purge_keys() { + purge_keys = true; + key_op = true; + } + + void set_bucket_quota(RGWQuotaInfo& quotas) { + quota.bucket_quota = quotas; + bucket_quota_specified = true; + } + + void set_user_quota(RGWQuotaInfo& quotas) { + quota.user_quota = quotas; + user_quota_specified = true; + } + + void set_bucket_ratelimit(RGWRateLimitInfo& ratelimit) { + bucket_ratelimit = ratelimit; + bucket_ratelimit_specified = true; + } + + void set_user_ratelimit(RGWRateLimitInfo& ratelimit) { + user_ratelimit = ratelimit; + user_ratelimit_specified = true; + } + + void set_mfa_ids(const std::set& ids) { + mfa_ids = ids; + mfa_ids_specified = true; + } + + void set_default_placement(const rgw_placement_rule& _placement) { + default_placement = _placement; + default_placement_specified = true; + } + + void set_placement_tags(const std::list& _tags) { + placement_tags = _tags; + placement_tags_specified = true; + } + + bool is_populated() { return populated; } + bool is_initialized() { return initialized; } + bool has_existing_user() { return existing_user; } + bool has_existing_key() { return existing_key; } + bool has_existing_subuser() { return existing_subuser; } + bool has_existing_email() { return existing_email; } + bool has_subuser() { return subuser_specified; } + bool has_key_op() { return key_op; } + bool has_caps_op() { return caps_specified; } + bool has_suspension_op() { return suspension_op; } + bool has_subuser_perm() { return perm_specified; } + bool has_op_mask() { return op_mask_specified; } + bool will_gen_access() { return gen_access; } + bool will_gen_secret() { return gen_secret; } + bool will_gen_subuser() { return gen_subuser; } + bool will_purge_keys() { return purge_keys; } + bool will_purge_data() { return purge_data; } + bool will_generate_subuser() { return gen_subuser; } + bool has_bucket_quota() { return bucket_quota_specified; } + bool has_user_quota() { return user_quota_specified; } + void set_populated() { populated = true; } + void clear_populated() { populated = false; } + void set_initialized() { initialized = true; } + void set_existing_user(bool flag) { existing_user = flag; } + void set_existing_key(bool flag) { existing_key = flag; } + void set_existing_subuser(bool flag) { existing_subuser = flag; } + void set_existing_email(bool flag) { existing_email = flag; } + void set_purge_data(bool flag) { purge_data = flag; } + void set_generate_subuser(bool flag) { gen_subuser = flag; } + __u8 get_suspension_status() { return suspended; } + int32_t get_key_type() {return key_type; } + bool get_access_key_exist() {return access_key_exist; } + uint32_t get_subuser_perm() { return perm_mask; } + int32_t get_max_buckets() { return max_buckets; } + uint32_t get_op_mask() { return op_mask; } + RGWQuotaInfo& get_bucket_quota() { return quota.bucket_quota; } + RGWQuotaInfo& get_user_quota() { return quota.user_quota; } + std::set& get_mfa_ids() { return mfa_ids; } + + rgw::sal::User* get_user() { return user.get(); } + const rgw_user& get_user_id(); + std::string get_subuser() { return subuser; } + std::string get_access_key() { return id; } + std::string get_secret_key() { return key; } + std::string get_caps() { return caps; } + std::string get_user_email() { return user_email; } + std::string get_display_name() { return display_name; } + rgw_user& get_new_uid() { return new_user_id; } + bool get_overwrite_new_user() const { return overwrite_new_user; } + std::map& get_temp_url_keys() { return temp_url_keys; } + + RGWUserInfo& get_user_info(); + + std::map* get_swift_keys(); + std::map* get_access_keys(); + std::map* get_subusers(); + + RGWUserCaps* get_caps_obj(); + + std::string build_default_swift_kid(); + + std::string generate_subuser(); + + RGWUserAdminOpState(rgw::sal::Driver* driver); +}; + +class RGWUser; + +class RGWAccessKeyPool +{ + RGWUser *user{nullptr}; + + std::map key_type_map; + rgw_user user_id; + rgw::sal::Driver* driver{nullptr}; + + std::map *swift_keys{nullptr}; + std::map *access_keys{nullptr}; + + // we don't want to allow keys for the anonymous user or a null user + bool keys_allowed{false}; + +private: + int create_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); + int generate_key(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg = NULL); + int modify_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); + + int check_key_owner(RGWUserAdminOpState& op_state); + bool check_existing_key(RGWUserAdminOpState& op_state); + int check_op(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); + + /* API Contract Fulfilment */ + int execute_add(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_save, optional_yield y); + int execute_remove(const DoutPrefixProvider *dpp, + RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_save, optional_yield y); + int remove_subuser_keys(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_save, optional_yield y); + + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, + optional_yield y); + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + bool defer_save, optional_yield y); +public: + explicit RGWAccessKeyPool(RGWUser* usr); + + int init(RGWUserAdminOpState& op_state); + + /* API Contracted Methods */ + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg = NULL); + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg = NULL); + + friend class RGWUser; + friend class RGWSubUserPool; +}; + +class RGWSubUserPool +{ + RGWUser *user{nullptr}; + + rgw_user user_id; + rgw::sal::Driver* driver{nullptr}; + bool subusers_allowed{false}; + + std::map *subuser_map{nullptr}; + +private: + int check_op(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); + + /* API Contract Fulfillment */ + int execute_add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); + int execute_remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); + int execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); + + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, + optional_yield y); + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); + int modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg, bool defer_save); +public: + explicit RGWSubUserPool(RGWUser *user); + + bool exists(std::string subuser); + int init(RGWUserAdminOpState& op_state); + + /* API contracted methods */ + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg = NULL); + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); + int modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); + + friend class RGWUser; +}; + +class RGWUserCapPool +{ + RGWUserCaps *caps{nullptr}; + bool caps_allowed{false}; + RGWUser *user{nullptr}; + +private: + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, + optional_yield y); + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, + optional_yield y); + +public: + explicit RGWUserCapPool(RGWUser *user); + + int init(RGWUserAdminOpState& op_state); + + /* API contracted methods */ + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, + std::string *err_msg = NULL); + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); + + friend class RGWUser; +}; + +class RGWUser +{ + +private: + RGWUserInfo old_info; + rgw::sal::Driver* driver{nullptr}; + + rgw_user user_id; + bool info_stored{false}; + + void set_populated() { info_stored = true; } + void clear_populated() { info_stored = false; } + bool is_populated() { return info_stored; } + + int check_op(RGWUserAdminOpState& req, std::string *err_msg); + int update(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y); + + void clear_members(); + void init_default(); + + /* API Contract Fulfillment */ + int execute_add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, + optional_yield y); + int execute_remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, + std::string *err_msg, optional_yield y); + int execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y); + int execute_rename(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y); + +public: + RGWUser(); + + int init(const DoutPrefixProvider *dpp, rgw::sal::Driver* storage, RGWUserAdminOpState& op_state, + optional_yield y); + + int init_storage(rgw::sal::Driver* storage); + int init(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y); + int init_members(RGWUserAdminOpState& op_state); + + rgw::sal::Driver* get_driver() { return driver; } + + /* API Contracted Members */ + RGWUserCapPool caps; + RGWAccessKeyPool keys; + RGWSubUserPool subusers; + + /* API Contracted Methods */ + int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); + + int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); + + int rename(RGWUserAdminOpState& op_state, optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); + + /* remove an already populated RGWUser */ + int remove(std::string *err_msg = NULL); + + int modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); + + /* retrieve info from an existing user in the RGW system */ + int info(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWUserInfo& fetched_info, optional_yield y, + std::string *err_msg = NULL); + + /* info from an already populated RGWUser */ + int info (RGWUserInfo& fetched_info, std::string *err_msg = NULL); + + /* list the existing users */ + int list(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher); + + friend class RGWAccessKeyPool; + friend class RGWSubUserPool; + friend class RGWUserCapPool; +}; + +/* Wrappers for admin API functionality */ + +class RGWUserAdminOp_User +{ +public: + static int list(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher); + + static int info(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); + + static int create(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); + + static int modify(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, optional_yield y); + + static int remove(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, optional_yield y); +}; + +class RGWUserAdminOp_Subuser +{ +public: + static int create(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); + + static int modify(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); + + static int remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); +}; + +class RGWUserAdminOp_Key +{ +public: + static int create(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); + + static int remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); +}; + +class RGWUserAdminOp_Caps +{ +public: + static int add(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); + + static int remove(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, + optional_yield y); +}; + +struct RGWUserCompleteInfo { + RGWUserInfo info; + std::map attrs; + bool has_attrs{false}; + + void dump(Formatter * const f) const { + info.dump(f); + encode_json("attrs", attrs, f); + } + + void decode_json(JSONObj *obj) { + decode_json_obj(info, obj); + has_attrs = JSONDecoder::decode_json("attrs", attrs, obj); + } +}; + +class RGWUserMetadataObject : public RGWMetadataObject { + RGWUserCompleteInfo uci; +public: + RGWUserMetadataObject() {} + RGWUserMetadataObject(const RGWUserCompleteInfo& _uci, const obj_version& v, real_time m) + : uci(_uci) { + objv = v; + mtime = m; + } + + void dump(Formatter *f) const override { + uci.dump(f); + } + + RGWUserCompleteInfo& get_uci() { + return uci; + } +}; + +class RGWUserMetadataHandler; + +class RGWUserCtl +{ + struct Svc { + RGWSI_Zone *zone{nullptr}; + RGWSI_User *user{nullptr}; + } svc; + + struct Ctl { + RGWBucketCtl *bucket{nullptr}; + } ctl; + + RGWUserMetadataHandler *umhandler; + RGWSI_MetaBackend_Handler *be_handler{nullptr}; + +public: + RGWUserCtl(RGWSI_Zone *zone_svc, + RGWSI_User *user_svc, + RGWUserMetadataHandler *_umhandler); + + void init(RGWBucketCtl *bucket_ctl) { + ctl.bucket = bucket_ctl; + } + + RGWBucketCtl *get_bucket_ctl() { + return ctl.bucket; + } + + struct GetParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + ceph::real_time *mtime{nullptr}; + rgw_cache_entry_info *cache_info{nullptr}; + std::map *attrs{nullptr}; + + GetParams() {} + + GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + GetParams& set_mtime(ceph::real_time *_mtime) { + mtime = _mtime; + return *this; + } + + GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { + cache_info = _cache_info; + return *this; + } + + GetParams& set_attrs(std::map *_attrs) { + attrs = _attrs; + return *this; + } + }; + + struct PutParams { + RGWUserInfo *old_info{nullptr}; + RGWObjVersionTracker *objv_tracker{nullptr}; + ceph::real_time mtime; + bool exclusive{false}; + std::map *attrs{nullptr}; + + PutParams() {} + + PutParams& set_old_info(RGWUserInfo *_info) { + old_info = _info; + return *this; + } + + PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + PutParams& set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + return *this; + } + + PutParams& set_exclusive(bool _exclusive) { + exclusive = _exclusive; + return *this; + } + + PutParams& set_attrs(std::map *_attrs) { + attrs = _attrs; + return *this; + } + }; + + struct RemoveParams { + RGWObjVersionTracker *objv_tracker{nullptr}; + + RemoveParams() {} + + RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + }; + + int get_info_by_uid(const DoutPrefixProvider *dpp, + const rgw_user& uid, RGWUserInfo *info, + optional_yield y, const GetParams& params = {}); + int get_info_by_email(const DoutPrefixProvider *dpp, + const std::string& email, RGWUserInfo *info, + optional_yield y, const GetParams& params = {}); + int get_info_by_swift(const DoutPrefixProvider *dpp, + const std::string& swift_name, RGWUserInfo *info, + optional_yield y, const GetParams& params = {}); + int get_info_by_access_key(const DoutPrefixProvider *dpp, + const std::string& access_key, RGWUserInfo *info, + optional_yield y, const GetParams& params = {}); + + int get_attrs_by_uid(const DoutPrefixProvider *dpp, + const rgw_user& user_id, + std::map *attrs, + optional_yield y, + RGWObjVersionTracker *objv_tracker = nullptr); + + int store_info(const DoutPrefixProvider *dpp, + const RGWUserInfo& info, optional_yield y, + const PutParams& params = {}); + int remove_info(const DoutPrefixProvider *dpp, + const RGWUserInfo& info, optional_yield y, + const RemoveParams& params = {}); + + int list_buckets(const DoutPrefixProvider *dpp, + const rgw_user& user, + const std::string& marker, + const std::string& end_marker, + uint64_t max, + bool need_stats, + RGWUserBuckets *buckets, + bool *is_truncated, + optional_yield y, + uint64_t default_max = 1000); + + int read_stats(const DoutPrefixProvider *dpp, + const rgw_user& user, RGWStorageStats *stats, + optional_yield y, + ceph::real_time *last_stats_sync = nullptr, /* last time a full stats sync completed */ + ceph::real_time *last_stats_update = nullptr); /* last time a stats update was done */ +}; + +class RGWUserMetaHandlerAllocator { +public: + static RGWMetadataHandler *alloc(RGWSI_User *user_svc); +}; + + +#endif diff --git a/src/rgw/driver/rados/rgw_zone.cc b/src/rgw/driver/rados/rgw_zone.cc new file mode 100644 index 00000000000..d9e750541d7 --- /dev/null +++ b/src/rgw/driver/rados/rgw_zone.cc @@ -0,0 +1,1287 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_zone.h" +#include "rgw_realm_watcher.h" +#include "rgw_sal_config.h" +#include "rgw_sync.h" + +#include "services/svc_zone.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rgw + +using namespace std; +using namespace rgw_zone_defaults; + +RGWMetaSyncStatusManager::~RGWMetaSyncStatusManager(){} + +#define FIRST_EPOCH 1 + +struct RGWAccessKey; + +/// Generate a random uuid for realm/period/zonegroup/zone ids +static std::string gen_random_uuid() +{ + uuid_d uuid; + uuid.generate_random(); + return uuid.to_string(); +} + +void RGWDefaultZoneGroupInfo::dump(Formatter *f) const { + encode_json("default_zonegroup", default_zonegroup, f); +} + +void RGWDefaultZoneGroupInfo::decode_json(JSONObj *obj) { + + JSONDecoder::decode_json("default_zonegroup", default_zonegroup, obj); + /* backward compatability with region */ + if (default_zonegroup.empty()) { + JSONDecoder::decode_json("default_region", default_zonegroup, obj); + } +} + +int RGWZoneGroup::create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format) +{ + name = default_zonegroup_name; + api_name = default_zonegroup_name; + is_master = true; + + RGWZoneGroupPlacementTarget placement_target; + placement_target.name = "default-placement"; + placement_targets[placement_target.name] = placement_target; + default_placement.name = "default-placement"; + + RGWZoneParams zone_params(default_zone_name); + + int r = zone_params.init(dpp, cct, sysobj_svc, y, false); + if (r < 0) { + ldpp_dout(dpp, 0) << "create_default: error initializing zone params: " << cpp_strerror(-r) << dendl; + return r; + } + + r = zone_params.create_default(dpp, y); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, 0) << "create_default: error in create_default zone params: " << cpp_strerror(-r) << dendl; + return r; + } else if (r == -EEXIST) { + ldpp_dout(dpp, 10) << "zone_params::create_default() returned -EEXIST, we raced with another default zone_params creation" << dendl; + zone_params.clear_id(); + r = zone_params.init(dpp, cct, sysobj_svc, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "create_default: error in init existing zone params: " << cpp_strerror(-r) << dendl; + return r; + } + ldpp_dout(dpp, 20) << "zone_params::create_default() " << zone_params.get_name() << " id " << zone_params.get_id() + << dendl; + } + + RGWZone& default_zone = zones[zone_params.get_id()]; + default_zone.name = zone_params.get_name(); + default_zone.id = zone_params.get_id(); + master_zone = default_zone.id; + + // enable all supported features + enabled_features.insert(rgw::zone_features::supported.begin(), + rgw::zone_features::supported.end()); + default_zone.supported_features = enabled_features; + + r = create(dpp, y); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, 0) << "error storing zone group info: " << cpp_strerror(-r) << dendl; + return r; + } + + if (r == -EEXIST) { + ldpp_dout(dpp, 10) << "create_default() returned -EEXIST, we raced with another zonegroup creation" << dendl; + id.clear(); + r = init(dpp, cct, sysobj_svc, y); + if (r < 0) { + return r; + } + } + + if (old_format) { + name = id; + } + + post_process_params(dpp, y); + + return 0; +} + +int RGWZoneGroup::equals(const string& other_zonegroup) const +{ + if (is_master && other_zonegroup.empty()) + return true; + + return (id == other_zonegroup); +} + +int RGWZoneGroup::add_zone(const DoutPrefixProvider *dpp, + const RGWZoneParams& zone_params, bool *is_master, bool *read_only, + const list& endpoints, const string *ptier_type, + bool *psync_from_all, list& sync_from, list& sync_from_rm, + string *predirect_zone, std::optional bucket_index_max_shards, + RGWSyncModulesManager *sync_mgr, + const rgw::zone_features::set& enable_features, + const rgw::zone_features::set& disable_features, + optional_yield y) +{ + auto& zone_id = zone_params.get_id(); + auto& zone_name = zone_params.get_name(); + + // check for duplicate zone name on insert + if (!zones.count(zone_id)) { + for (const auto& zone : zones) { + if (zone.second.name == zone_name) { + ldpp_dout(dpp, 0) << "ERROR: found existing zone name " << zone_name + << " (" << zone.first << ") in zonegroup " << get_name() << dendl; + return -EEXIST; + } + } + } + + if (is_master) { + if (*is_master) { + if (!master_zone.empty() && master_zone != zone_id) { + ldpp_dout(dpp, 0) << "NOTICE: overriding master zone: " << master_zone << dendl; + } + master_zone = zone_id; + } else if (master_zone == zone_id) { + master_zone.clear(); + } + } + + RGWZone& zone = zones[zone_id]; + zone.name = zone_name; + zone.id = zone_id; + if (!endpoints.empty()) { + zone.endpoints = endpoints; + } + if (read_only) { + zone.read_only = *read_only; + } + if (ptier_type) { + zone.tier_type = *ptier_type; + if (!sync_mgr->get_module(*ptier_type, nullptr)) { + ldpp_dout(dpp, 0) << "ERROR: could not found sync module: " << *ptier_type + << ", valid sync modules: " + << sync_mgr->get_registered_module_names() + << dendl; + return -ENOENT; + } + } + + if (psync_from_all) { + zone.sync_from_all = *psync_from_all; + } + + if (predirect_zone) { + zone.redirect_zone = *predirect_zone; + } + + if (bucket_index_max_shards) { + zone.bucket_index_max_shards = *bucket_index_max_shards; + } + + for (auto add : sync_from) { + zone.sync_from.insert(add); + } + + for (auto rm : sync_from_rm) { + zone.sync_from.erase(rm); + } + + zone.supported_features.insert(enable_features.begin(), + enable_features.end()); + + for (const auto& feature : disable_features) { + if (enabled_features.contains(feature)) { + lderr(cct) << "ERROR: Cannot disable zone feature \"" << feature + << "\" until it's been disabled in zonegroup " << name << dendl; + return -EINVAL; + } + auto i = zone.supported_features.find(feature); + if (i == zone.supported_features.end()) { + ldout(cct, 1) << "WARNING: zone feature \"" << feature + << "\" was not enabled in zone " << zone.name << dendl; + continue; + } + zone.supported_features.erase(i); + } + + post_process_params(dpp, y); + + return update(dpp,y); +} + + +int RGWZoneGroup::rename_zone(const DoutPrefixProvider *dpp, + const RGWZoneParams& zone_params, + optional_yield y) +{ + RGWZone& zone = zones[zone_params.get_id()]; + zone.name = zone_params.get_name(); + + return update(dpp, y); +} + +void RGWZoneGroup::post_process_params(const DoutPrefixProvider *dpp, optional_yield y) +{ + bool log_data = zones.size() > 1; + + if (master_zone.empty()) { + auto iter = zones.begin(); + if (iter != zones.end()) { + master_zone = iter->first; + } + } + + for (auto& item : zones) { + RGWZone& zone = item.second; + zone.log_data = log_data; + + RGWZoneParams zone_params(zone.id, zone.name); + int ret = zone_params.init(dpp, cct, sysobj_svc, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: could not read zone params for zone id=" << zone.id << " name=" << zone.name << dendl; + continue; + } + + for (auto& pitem : zone_params.placement_pools) { + const string& placement_name = pitem.first; + if (placement_targets.find(placement_name) == placement_targets.end()) { + RGWZoneGroupPlacementTarget placement_target; + placement_target.name = placement_name; + placement_targets[placement_name] = placement_target; + } + } + } + + if (default_placement.empty() && !placement_targets.empty()) { + default_placement.init(placement_targets.begin()->first, RGW_STORAGE_CLASS_STANDARD); + } +} + +int RGWZoneGroup::remove_zone(const DoutPrefixProvider *dpp, const std::string& zone_id, optional_yield y) +{ + auto iter = zones.find(zone_id); + if (iter == zones.end()) { + ldpp_dout(dpp, 0) << "zone id " << zone_id << " is not a part of zonegroup " + << name << dendl; + return -ENOENT; + } + + zones.erase(iter); + + post_process_params(dpp, y); + + return update(dpp, y); +} + +void RGWDefaultSystemMetaObjInfo::dump(Formatter *f) const { + encode_json("default_id", default_id, f); +} + +void RGWDefaultSystemMetaObjInfo::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("default_id", default_id, obj); +} + +int RGWSystemMetaObj::rename(const DoutPrefixProvider *dpp, const string& new_name, optional_yield y) +{ + string new_id; + int ret = read_id(dpp, new_name, new_id, y); + if (!ret) { + return -EEXIST; + } + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "Error read_id " << new_name << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + string old_name = name; + name = new_name; + ret = update(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "Error storing new obj info " << new_name << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = store_name(dpp, true, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "Error storing new name " << new_name << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + /* delete old name */ + rgw_pool pool(get_pool(cct)); + string oid = get_names_oid_prefix() + old_name; + rgw_raw_obj old_name_obj(pool, oid); + auto sysobj = sysobj_svc->get_obj(old_name_obj); + ret = sysobj.wop().remove(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "Error delete old obj name " << old_name << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + + return ret; +} + +int RGWSystemMetaObj::read(const DoutPrefixProvider *dpp, optional_yield y) +{ + int ret = read_id(dpp, name, id, y); + if (ret < 0) { + return ret; + } + + return read_info(dpp, id, y); +} + +int RGWZoneParams::create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format) +{ + name = default_zone_name; + + int r = create(dpp, y); + if (r < 0) { + return r; + } + + if (old_format) { + name = id; + } + + return r; +} + +const string& RGWZoneParams::get_compression_type(const rgw_placement_rule& placement_rule) const +{ + static const std::string NONE{"none"}; + auto p = placement_pools.find(placement_rule.name); + if (p == placement_pools.end()) { + return NONE; + } + const auto& type = p->second.get_compression_type(placement_rule.get_storage_class()); + return !type.empty() ? type : NONE; +} + +// run an MD5 hash on the zone_id and return the first 32 bits +static uint32_t gen_short_zone_id(const std::string zone_id) +{ + unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE]; + MD5 hash; + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + hash.Update((const unsigned char *)zone_id.c_str(), zone_id.size()); + hash.Final(md5); + + uint32_t short_id; + memcpy((char *)&short_id, md5, sizeof(short_id)); + return std::max(short_id, 1u); +} + +int RGWPeriodMap::update(const RGWZoneGroup& zonegroup, CephContext *cct) +{ + if (zonegroup.is_master_zonegroup() && (!master_zonegroup.empty() && zonegroup.get_id() != master_zonegroup)) { + ldout(cct,0) << "Error updating periodmap, multiple master zonegroups configured "<< dendl; + ldout(cct,0) << "master zonegroup: " << master_zonegroup << " and " << zonegroup.get_id() <::iterator iter = zonegroups.find(zonegroup.get_id()); + if (iter != zonegroups.end()) { + RGWZoneGroup& old_zonegroup = iter->second; + if (!old_zonegroup.api_name.empty()) { + zonegroups_by_api.erase(old_zonegroup.api_name); + } + } + zonegroups[zonegroup.get_id()] = zonegroup; + + if (!zonegroup.api_name.empty()) { + zonegroups_by_api[zonegroup.api_name] = zonegroup; + } + + if (zonegroup.is_master_zonegroup()) { + master_zonegroup = zonegroup.get_id(); + } else if (master_zonegroup == zonegroup.get_id()) { + master_zonegroup = ""; + } + + for (auto& i : zonegroup.zones) { + auto& zone = i.second; + if (short_zone_ids.find(zone.id) != short_zone_ids.end()) { + continue; + } + // calculate the zone's short id + uint32_t short_id = gen_short_zone_id(zone.id); + + // search for an existing zone with the same short id + for (auto& s : short_zone_ids) { + if (s.second == short_id) { + ldout(cct, 0) << "New zone '" << zone.name << "' (" << zone.id + << ") generates the same short_zone_id " << short_id + << " as existing zone id " << s.first << dendl; + return -EEXIST; + } + } + + short_zone_ids[zone.id] = short_id; + } + + return 0; +} + +uint32_t RGWPeriodMap::get_zone_short_id(const string& zone_id) const +{ + auto i = short_zone_ids.find(zone_id); + if (i == short_zone_ids.end()) { + return 0; + } + return i->second; +} + +bool RGWPeriodMap::find_zone_by_name(const string& zone_name, + RGWZoneGroup *zonegroup, + RGWZone *zone) const +{ + for (auto& iter : zonegroups) { + auto& zg = iter.second; + for (auto& ziter : zg.zones) { + auto& z = ziter.second; + + if (z.name == zone_name) { + *zonegroup = zg; + *zone = z; + return true; + } + } + } + + return false; +} + +namespace rgw { + +int read_realm(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + std::string_view realm_id, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer) +{ + if (!realm_id.empty()) { + return cfgstore->read_realm_by_id(dpp, y, realm_id, info, writer); + } + if (!realm_name.empty()) { + return cfgstore->read_realm_by_name(dpp, y, realm_name, info, writer); + } + return cfgstore->read_default_realm(dpp, y, info, writer); +} + +int create_realm(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, bool exclusive, + RGWRealm& info, + std::unique_ptr* writer_out) +{ + if (info.name.empty()) { + ldpp_dout(dpp, -1) << __func__ << " requires a realm name" << dendl; + return -EINVAL; + } + if (info.id.empty()) { + info.id = gen_random_uuid(); + } + + // if the realm already has a current_period, just make sure it exists + std::optional period; + if (!info.current_period.empty()) { + period.emplace(); + int r = cfgstore->read_period(dpp, y, info.current_period, + std::nullopt, *period); + if (r < 0) { + ldpp_dout(dpp, -1) << __func__ << " failed to read realm's current_period=" + << info.current_period << " with " << cpp_strerror(r) << dendl; + return r; + } + } + + // create the realm + std::unique_ptr writer; + int r = cfgstore->create_realm(dpp, y, exclusive, info, &writer); + if (r < 0) { + return r; + } + + if (!period) { + // initialize and exclusive-create the initial period + period.emplace(); + period->id = gen_random_uuid(); + period->period_map.id = period->id; + period->epoch = FIRST_EPOCH; + period->realm_id = info.id; + period->realm_name = info.name; + + r = cfgstore->create_period(dpp, y, true, *period); + if (r < 0) { + ldpp_dout(dpp, -1) << __func__ << " failed to create the initial period id=" + << period->id << " for realm " << info.name + << " with " << cpp_strerror(r) << dendl; + return r; + } + } + + // update the realm's current_period + r = realm_set_current_period(dpp, y, cfgstore, *writer, info, *period); + if (r < 0) { + return r; + } + + // try to set as default. may race with another create, so pass exclusive=true + // so we don't override an existing default + r = set_default_realm(dpp, y, cfgstore, info, true); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, 0) << "WARNING: failed to set realm as default: " + << cpp_strerror(r) << dendl; + } + + if (writer_out) { + *writer_out = std::move(writer); + } + return 0; +} + +int set_default_realm(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWRealm& info, + bool exclusive) +{ + return cfgstore->write_default_realm_id(dpp, y, exclusive, info.id); +} + +int realm_set_current_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + sal::RealmWriter& writer, RGWRealm& realm, + const RGWPeriod& period) +{ + // update realm epoch to match the period's + if (realm.epoch > period.realm_epoch) { + ldpp_dout(dpp, -1) << __func__ << " with old realm epoch " + << period.realm_epoch << ", current epoch=" << realm.epoch << dendl; + return -EINVAL; + } + if (realm.epoch == period.realm_epoch && realm.current_period != period.id) { + ldpp_dout(dpp, -1) << __func__ << " with same realm epoch " + << period.realm_epoch << ", but different period id " + << period.id << " != " << realm.current_period << dendl; + return -EINVAL; + } + + realm.epoch = period.realm_epoch; + realm.current_period = period.id; + + // update the realm object + int r = writer.write(dpp, y, realm); + if (r < 0) { + ldpp_dout(dpp, -1) << __func__ << " failed to overwrite realm " + << realm.name << " with " << cpp_strerror(r) << dendl; + return r; + } + + // reflect the zonegroup and period config + (void) reflect_period(dpp, y, cfgstore, period); + return 0; +} + +int reflect_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWPeriod& info) +{ + // overwrite the local period config and zonegroup objects + constexpr bool exclusive = false; + + int r = cfgstore->write_period_config(dpp, y, exclusive, info.realm_id, + info.period_config); + if (r < 0) { + ldpp_dout(dpp, -1) << __func__ << " failed to store period config for realm id=" + << info.realm_id << " with " << cpp_strerror(r) << dendl; + return r; + } + + for (auto& [zonegroup_id, zonegroup] : info.period_map.zonegroups) { + r = cfgstore->create_zonegroup(dpp, y, exclusive, zonegroup, nullptr); + if (r < 0) { + ldpp_dout(dpp, -1) << __func__ << " failed to store zonegroup id=" + << zonegroup_id << " with " << cpp_strerror(r) << dendl; + return r; + } + if (zonegroup.is_master) { + // set master as default if no default exists + constexpr bool exclusive = true; + r = set_default_zonegroup(dpp, y, cfgstore, zonegroup, exclusive); + if (r == 0) { + ldpp_dout(dpp, 1) << "Set the period's master zonegroup " + << zonegroup.name << " as the default" << dendl; + } + } + } + return 0; +} + +std::string get_staging_period_id(std::string_view realm_id) +{ + return string_cat_reserve(realm_id, ":staging"); +} + +void fork_period(const DoutPrefixProvider* dpp, RGWPeriod& info) +{ + ldpp_dout(dpp, 20) << __func__ << " realm id=" << info.realm_id + << " period id=" << info.id << dendl; + + info.predecessor_uuid = std::move(info.id); + info.id = get_staging_period_id(info.realm_id); + info.period_map.reset(); + info.realm_epoch++; +} + +int update_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, RGWPeriod& info) +{ + // clear zone short ids of removed zones. period_map.update() will add the + // remaining zones back + info.period_map.short_zone_ids.clear(); + + // list all zonegroups in the realm + rgw::sal::ListResult listing; + std::array zonegroup_names; // list in pages of 1000 + do { + int ret = cfgstore->list_zonegroup_names(dpp, y, listing.next, + zonegroup_names, listing); + if (ret < 0) { + std::cerr << "failed to list zonegroups: " << cpp_strerror(-ret) << std::endl; + return -ret; + } + for (const auto& name : listing.entries) { + RGWZoneGroup zg; + ret = cfgstore->read_zonegroup_by_name(dpp, y, name, zg, nullptr); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to read zonegroup " + << name << ": " << cpp_strerror(-ret) << dendl; + continue; + } + + if (zg.realm_id != info.realm_id) { + ldpp_dout(dpp, 20) << "skipping zonegroup " << zg.get_name() + << " with realm id " << zg.realm_id + << ", not on our realm " << info.realm_id << dendl; + continue; + } + + if (zg.master_zone.empty()) { + ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl; + return -EINVAL; + } + + if (zg.zones.find(zg.master_zone) == zg.zones.end()) { + ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() + << " has a non existent master zone "<< dendl; + return -EINVAL; + } + + if (zg.is_master_zonegroup()) { + info.master_zonegroup = zg.get_id(); + info.master_zone = zg.master_zone; + } + + ret = info.period_map.update(zg, dpp->get_cct()); + if (ret < 0) { + return ret; + } + } // foreach name in listing.entries + } while (!listing.next.empty()); + + // read the realm's current period config + int ret = cfgstore->read_period_config(dpp, y, info.realm_id, + info.period_config); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed to read period config: " + << cpp_strerror(ret) << dendl; + return ret; + } + + return 0; +} + +int commit_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, sal::Driver* driver, + RGWRealm& realm, sal::RealmWriter& realm_writer, + const RGWPeriod& current_period, + RGWPeriod& info, std::ostream& error_stream, + bool force_if_stale) +{ + auto zone_svc = static_cast(driver)->svc()->zone; // XXX + + ldpp_dout(dpp, 20) << __func__ << " realm " << realm.id + << " period " << current_period.id << dendl; + // gateway must be in the master zone to commit + if (info.master_zone != zone_svc->get_zone_params().id) { + error_stream << "Cannot commit period on zone " + << zone_svc->get_zone_params().id << ", it must be sent to " + "the period's master zone " << info.master_zone << '.' << std::endl; + return -EINVAL; + } + // period predecessor must match current period + if (info.predecessor_uuid != current_period.id) { + error_stream << "Period predecessor " << info.predecessor_uuid + << " does not match current period " << current_period.id + << ". Use 'period pull' to get the latest period from the master, " + "reapply your changes, and try again." << std::endl; + return -EINVAL; + } + // realm epoch must be 1 greater than current period + if (info.realm_epoch != current_period.realm_epoch + 1) { + error_stream << "Period's realm epoch " << info.realm_epoch + << " does not come directly after current realm epoch " + << current_period.realm_epoch << ". Use 'realm pull' to get the " + "latest realm and period from the master zone, reapply your changes, " + "and try again." << std::endl; + return -EINVAL; + } + // did the master zone change? + if (info.master_zone != current_period.master_zone) { + // store the current metadata sync status in the period + int r = info.update_sync_status(dpp, driver, current_period, + error_stream, force_if_stale); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to update metadata sync status: " + << cpp_strerror(-r) << dendl; + return r; + } + // create an object with a new period id + info.period_map.id = info.id = gen_random_uuid(); + info.epoch = FIRST_EPOCH; + + constexpr bool exclusive = true; + r = cfgstore->create_period(dpp, y, exclusive, info); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl; + return r; + } + // set as current period + r = realm_set_current_period(dpp, y, cfgstore, realm_writer, realm, info); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to update realm's current period: " + << cpp_strerror(-r) << dendl; + return r; + } + ldpp_dout(dpp, 4) << "Promoted to master zone and committed new period " + << info.id << dendl; + (void) cfgstore->realm_notify_new_period(dpp, y, info); + return 0; + } + // period must be based on current epoch + if (info.epoch != current_period.epoch) { + error_stream << "Period epoch " << info.epoch << " does not match " + "predecessor epoch " << current_period.epoch << ". Use " + "'period pull' to get the latest epoch from the master zone, " + "reapply your changes, and try again." << std::endl; + return -EINVAL; + } + // set period as next epoch + info.id = current_period.id; + info.epoch = current_period.epoch + 1; + info.predecessor_uuid = current_period.predecessor_uuid; + info.realm_epoch = current_period.realm_epoch; + // write the period + constexpr bool exclusive = true; + int r = cfgstore->create_period(dpp, y, exclusive, info); + if (r == -EEXIST) { + // already have this epoch (or a more recent one) + return 0; + } + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to store period: " << cpp_strerror(r) << dendl; + return r; + } + r = reflect_period(dpp, y, cfgstore, info); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to update local objects: " << cpp_strerror(r) << dendl; + return r; + } + ldpp_dout(dpp, 4) << "Committed new epoch " << info.epoch + << " for period " << info.id << dendl; + (void) cfgstore->realm_notify_new_period(dpp, y, info); + return 0; +} + + +int read_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + std::string_view zonegroup_id, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer) +{ + if (!zonegroup_id.empty()) { + return cfgstore->read_zonegroup_by_id(dpp, y, zonegroup_id, info, writer); + } + if (!zonegroup_name.empty()) { + return cfgstore->read_zonegroup_by_name(dpp, y, zonegroup_name, info, writer); + } + + std::string realm_id; + int r = cfgstore->read_default_realm_id(dpp, y, realm_id); + if (r == -ENOENT) { + return cfgstore->read_zonegroup_by_name(dpp, y, default_zonegroup_name, + info, writer); + } + if (r < 0) { + return r; + } + return cfgstore->read_default_zonegroup(dpp, y, realm_id, info, writer); +} + +int create_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, bool exclusive, + RGWZoneGroup& info) +{ + if (info.name.empty()) { + ldpp_dout(dpp, -1) << __func__ << " requires a zonegroup name" << dendl; + return -EINVAL; + } + if (info.id.empty()) { + info.id = gen_random_uuid(); + } + + // insert the default placement target if it doesn't exist + constexpr std::string_view default_placement_name = "default-placement"; + + RGWZoneGroupPlacementTarget placement_target; + placement_target.name = default_placement_name; + + info.placement_targets.emplace(default_placement_name, placement_target); + if (info.default_placement.name.empty()) { + info.default_placement.name = default_placement_name; + } + + int r = cfgstore->create_zonegroup(dpp, y, exclusive, info, nullptr); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to create zonegroup with " + << cpp_strerror(r) << dendl; + return r; + } + + // try to set as default. may race with another create, so pass exclusive=true + // so we don't override an existing default + r = set_default_zonegroup(dpp, y, cfgstore, info, true); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, 0) << "WARNING: failed to set zonegroup as default: " + << cpp_strerror(r) << dendl; + } + + return 0; +} + +int set_default_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWZoneGroup& info, + bool exclusive) +{ + return cfgstore->write_default_zonegroup_id( + dpp, y, exclusive, info.realm_id, info.id); +} + +int remove_zone_from_group(const DoutPrefixProvider* dpp, + RGWZoneGroup& zonegroup, + const rgw_zone_id& zone_id) +{ + auto z = zonegroup.zones.find(zone_id); + if (z == zonegroup.zones.end()) { + return -ENOENT; + } + zonegroup.zones.erase(z); + + if (zonegroup.master_zone == zone_id) { + // choose a new master zone + auto m = zonegroup.zones.begin(); + if (m != zonegroup.zones.end()) { + zonegroup.master_zone = m->first; + ldpp_dout(dpp, 0) << "NOTICE: promoted " << m->second.name + << " as new master_zone of zonegroup " << zonegroup.name << dendl; + } else { + zonegroup.master_zone.clear(); + ldpp_dout(dpp, 0) << "NOTICE: cleared master_zone of zonegroup " + << zonegroup.name << dendl; + } + } + + const bool log_data = zonegroup.zones.size() > 1; + for (auto& [id, zone] : zonegroup.zones) { + zone.log_data = log_data; + } + + return 0; +} + +// try to remove the given zone id from every zonegroup in the cluster +static int remove_zone_from_groups(const DoutPrefixProvider* dpp, + optional_yield y, + sal::ConfigStore* cfgstore, + const rgw_zone_id& zone_id) +{ + std::array zonegroup_names; + sal::ListResult listing; + do { + int r = cfgstore->list_zonegroup_names(dpp, y, listing.next, + zonegroup_names, listing); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to list zonegroups with " + << cpp_strerror(r) << dendl; + return r; + } + + for (const auto& name : listing.entries) { + RGWZoneGroup zonegroup; + std::unique_ptr writer; + r = cfgstore->read_zonegroup_by_name(dpp, y, name, zonegroup, &writer); + if (r < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to load zonegroup " << name + << " with " << cpp_strerror(r) << dendl; + continue; + } + + r = remove_zone_from_group(dpp, zonegroup, zone_id); + if (r < 0) { + continue; + } + + // write the updated zonegroup + r = writer->write(dpp, y, zonegroup); + if (r < 0) { + ldpp_dout(dpp, 0) << "WARNING: failed to write zonegroup " << name + << " with " << cpp_strerror(r) << dendl; + continue; + } + ldpp_dout(dpp, 0) << "Removed zone from zonegroup " << name << dendl; + } + } while (!listing.next.empty()); + + return 0; +} + + +int read_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + std::string_view zone_id, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer) +{ + if (!zone_id.empty()) { + return cfgstore->read_zone_by_id(dpp, y, zone_id, info, writer); + } + if (!zone_name.empty()) { + return cfgstore->read_zone_by_name(dpp, y, zone_name, info, writer); + } + + std::string realm_id; + int r = cfgstore->read_default_realm_id(dpp, y, realm_id); + if (r == -ENOENT) { + return cfgstore->read_zone_by_name(dpp, y, default_zone_name, info, writer); + } + if (r < 0) { + return r; + } + return cfgstore->read_default_zone(dpp, y, realm_id, info, writer); +} + +extern int get_zones_pool_set(const DoutPrefixProvider *dpp, optional_yield y, + rgw::sal::ConfigStore* cfgstore, + std::string_view my_zone_id, + std::set& pools); + +int create_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, bool exclusive, + RGWZoneParams& info, std::unique_ptr* writer) +{ + if (info.name.empty()) { + ldpp_dout(dpp, -1) << __func__ << " requires a zone name" << dendl; + return -EINVAL; + } + if (info.id.empty()) { + info.id = gen_random_uuid(); + } + + // add default placement with empty pool name + rgw_pool pool; + auto& placement = info.placement_pools["default-placement"]; + placement.storage_classes.set_storage_class( + RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); + + // build a set of all pool names used by other zones + std::set pools; + int r = get_zones_pool_set(dpp, y, cfgstore, info.id, pools); + if (r < 0) { + return r; + } + + // initialize pool names with the zone name prefix + r = init_zone_pool_names(dpp, y, pools, info); + if (r < 0) { + return r; + } + + r = cfgstore->create_zone(dpp, y, exclusive, info, nullptr); + if (r < 0) { + ldpp_dout(dpp, 0) << "failed to create zone with " + << cpp_strerror(r) << dendl; + return r; + } + + // try to set as default. may race with another create, so pass exclusive=true + // so we don't override an existing default + r = set_default_zone(dpp, y, cfgstore, info, true); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, 0) << "WARNING: failed to set zone as default: " + << cpp_strerror(r) << dendl; + } + + return 0; + +} + +int set_default_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWZoneParams& info, + bool exclusive) +{ + return cfgstore->write_default_zone_id( + dpp, y, exclusive, info.realm_id, info.id); +} + +int delete_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWZoneParams& info, + sal::ZoneWriter& writer) +{ + // remove this zone from any zonegroups that contain it + int r = remove_zone_from_groups(dpp, y, cfgstore, info.id); + if (r < 0) { + return r; + } + + return writer.remove(dpp, y); +} + +} // namespace rgw + +static inline int conf_to_uint64(const JSONFormattable& config, const string& key, uint64_t *pval) +{ + string sval; + if (config.find(key, &sval)) { + string err; + uint64_t val = strict_strtoll(sval.c_str(), 10, &err); + if (!err.empty()) { + return -EINVAL; + } + *pval = val; + } + return 0; +} + +int RGWZoneGroupPlacementTier::update_params(const JSONFormattable& config) +{ + int r = -1; + + if (config.exists("retain_head_object")) { + string s = config["retain_head_object"]; + if (s == "true") { + retain_head_object = true; + } else { + retain_head_object = false; + } + } + + if (tier_type == "cloud-s3") { + r = t.s3.update_params(config); + } + + return r; +} + +int RGWZoneGroupPlacementTier::clear_params(const JSONFormattable& config) +{ + if (config.exists("retain_head_object")) { + retain_head_object = false; + } + + if (tier_type == "cloud-s3") { + t.s3.clear_params(config); + } + + return 0; +} + +int RGWZoneGroupPlacementTierS3::update_params(const JSONFormattable& config) +{ + int r = -1; + + if (config.exists("endpoint")) { + endpoint = config["endpoint"]; + } + if (config.exists("target_path")) { + target_path = config["target_path"]; + } + if (config.exists("region")) { + region = config["region"]; + } + if (config.exists("host_style")) { + string s; + s = config["host_style"]; + if (s != "virtual") { + host_style = PathStyle; + } else { + host_style = VirtualStyle; + } + } + if (config.exists("target_storage_class")) { + target_storage_class = config["target_storage_class"]; + } + if (config.exists("access_key")) { + key.id = config["access_key"]; + } + if (config.exists("secret")) { + key.key = config["secret"]; + } + if (config.exists("multipart_sync_threshold")) { + r = conf_to_uint64(config, "multipart_sync_threshold", &multipart_sync_threshold); + if (r < 0) { + multipart_sync_threshold = DEFAULT_MULTIPART_SYNC_PART_SIZE; + } + } + + if (config.exists("multipart_min_part_size")) { + r = conf_to_uint64(config, "multipart_min_part_size", &multipart_min_part_size); + if (r < 0) { + multipart_min_part_size = DEFAULT_MULTIPART_SYNC_PART_SIZE; + } + } + + if (config.exists("acls")) { + const JSONFormattable& cc = config["acls"]; + if (cc.is_array()) { + for (auto& c : cc.array()) { + RGWTierACLMapping m; + m.init(c); + if (!m.source_id.empty()) { + acl_mappings[m.source_id] = m; + } + } + } else { + RGWTierACLMapping m; + m.init(cc); + if (!m.source_id.empty()) { + acl_mappings[m.source_id] = m; + } + } + } + return 0; +} + +int RGWZoneGroupPlacementTierS3::clear_params(const JSONFormattable& config) +{ + if (config.exists("endpoint")) { + endpoint.clear(); + } + if (config.exists("target_path")) { + target_path.clear(); + } + if (config.exists("region")) { + region.clear(); + } + if (config.exists("host_style")) { + /* default */ + host_style = PathStyle; + } + if (config.exists("target_storage_class")) { + target_storage_class.clear(); + } + if (config.exists("access_key")) { + key.id.clear(); + } + if (config.exists("secret")) { + key.key.clear(); + } + if (config.exists("multipart_sync_threshold")) { + multipart_sync_threshold = DEFAULT_MULTIPART_SYNC_PART_SIZE; + } + if (config.exists("multipart_min_part_size")) { + multipart_min_part_size = DEFAULT_MULTIPART_SYNC_PART_SIZE; + } + if (config.exists("acls")) { + const JSONFormattable& cc = config["acls"]; + if (cc.is_array()) { + for (auto& c : cc.array()) { + RGWTierACLMapping m; + m.init(c); + acl_mappings.erase(m.source_id); + } + } else { + RGWTierACLMapping m; + m.init(cc); + acl_mappings.erase(m.source_id); + } + } + return 0; +} + +void rgw_meta_sync_info::generate_test_instances(list& o) +{ + auto info = new rgw_meta_sync_info; + info->state = rgw_meta_sync_info::StateBuildingFullSyncMaps; + info->period = "periodid"; + info->realm_epoch = 5; + o.push_back(info); + o.push_back(new rgw_meta_sync_info); +} + +void rgw_meta_sync_marker::generate_test_instances(list& o) +{ + auto marker = new rgw_meta_sync_marker; + marker->state = rgw_meta_sync_marker::IncrementalSync; + marker->marker = "01234"; + marker->realm_epoch = 5; + o.push_back(marker); + o.push_back(new rgw_meta_sync_marker); +} + +void rgw_meta_sync_status::generate_test_instances(list& o) +{ + o.push_back(new rgw_meta_sync_status); +} + +void RGWZoneParams::generate_test_instances(list &o) +{ + o.push_back(new RGWZoneParams); + o.push_back(new RGWZoneParams); +} + +void RGWPeriodLatestEpochInfo::generate_test_instances(list &o) +{ + RGWPeriodLatestEpochInfo *z = new RGWPeriodLatestEpochInfo; + o.push_back(z); + o.push_back(new RGWPeriodLatestEpochInfo); +} + +void RGWZoneGroup::generate_test_instances(list& o) +{ + RGWZoneGroup *r = new RGWZoneGroup; + o.push_back(r); + o.push_back(new RGWZoneGroup); +} + +void RGWPeriodLatestEpochInfo::dump(Formatter *f) const { + encode_json("latest_epoch", epoch, f); +} + +void RGWPeriodLatestEpochInfo::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("latest_epoch", epoch, obj); +} + +void RGWNameToId::dump(Formatter *f) const { + encode_json("obj_id", obj_id, f); +} + +void RGWNameToId::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("obj_id", obj_id, obj); +} + diff --git a/src/rgw/driver/rados/rgw_zone.h b/src/rgw/driver/rados/rgw_zone.h new file mode 100644 index 00000000000..e1792a40cce --- /dev/null +++ b/src/rgw/driver/rados/rgw_zone.h @@ -0,0 +1,1525 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#ifndef CEPH_RGW_ZONE_H +#define CEPH_RGW_ZONE_H + +#include +#include "rgw_common.h" +#include "rgw_sal_fwd.h" +#include "rgw_sync_policy.h" +#include "rgw_zone_features.h" + +namespace rgw_zone_defaults { + +extern std::string zone_names_oid_prefix; +extern std::string region_info_oid_prefix; +extern std::string realm_names_oid_prefix; +extern std::string zone_group_info_oid_prefix; +extern std::string realm_info_oid_prefix; +extern std::string default_region_info_oid; +extern std::string default_zone_group_info_oid; +extern std::string region_map_oid; +extern std::string default_realm_info_oid; +extern std::string default_zonegroup_name; +extern std::string default_zone_name; +extern std::string zonegroup_names_oid_prefix; +extern std::string RGW_DEFAULT_ZONE_ROOT_POOL; +extern std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL; +extern std::string RGW_DEFAULT_REALM_ROOT_POOL; +extern std::string RGW_DEFAULT_PERIOD_ROOT_POOL; +extern std::string avail_pools; +extern std::string default_storage_pool_suffix; + +} + +class JSONObj; +class RGWSyncModulesManager; + + +struct RGWNameToId { + std::string obj_id; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(obj_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(obj_id, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWNameToId) + +struct RGWDefaultSystemMetaObjInfo { + std::string default_id; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(default_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(default_id, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo) + +class RGWSI_SysObj; +class RGWSI_Zone; + +class RGWSystemMetaObj { +public: + std::string id; + std::string name; + + CephContext *cct{nullptr}; + RGWSI_SysObj *sysobj_svc{nullptr}; + RGWSI_Zone *zone_svc{nullptr}; + + int store_name(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); + int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); + int read_info(const DoutPrefixProvider *dpp, const std::string& obj_id, optional_yield y, bool old_format = false); + int read_id(const DoutPrefixProvider *dpp, const std::string& obj_name, std::string& obj_id, optional_yield y); + int read_default(const DoutPrefixProvider *dpp, + RGWDefaultSystemMetaObjInfo& default_info, + const std::string& oid, + optional_yield y); + /* read and use default id */ + int use_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); + +public: + RGWSystemMetaObj() {} + RGWSystemMetaObj(const std::string& _name): name(_name) {} + RGWSystemMetaObj(const std::string& _id, const std::string& _name) : id(_id), name(_name) {} + RGWSystemMetaObj(CephContext *_cct, RGWSI_SysObj *_sysobj_svc) { + reinit_instance(_cct, _sysobj_svc); + } + RGWSystemMetaObj(const std::string& _name, CephContext *_cct, RGWSI_SysObj *_sysobj_svc): name(_name) { + reinit_instance(_cct, _sysobj_svc); + } + + const std::string& get_name() const { return name; } + const std::string& get_id() const { return id; } + + void set_name(const std::string& _name) { name = _name;} + void set_id(const std::string& _id) { id = _id;} + void clear_id() { id.clear(); } + + virtual ~RGWSystemMetaObj() {} + + virtual void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(id, bl); + encode(name, bl); + ENCODE_FINISH(bl); + } + + virtual void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(id, bl); + decode(name, bl); + DECODE_FINISH(bl); + } + + void reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc); + int init(const DoutPrefixProvider *dpp, CephContext *_cct, RGWSI_SysObj *_sysobj_svc, + optional_yield y, + bool setup_obj = true, bool old_format = false); + virtual int read_default_id(const DoutPrefixProvider *dpp, std::string& default_id, optional_yield y, + bool old_format = false); + virtual int set_as_default(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = false); + int delete_default(); + virtual int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true); + int delete_obj(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); + int rename(const DoutPrefixProvider *dpp, const std::string& new_name, optional_yield y); + int update(const DoutPrefixProvider *dpp, optional_yield y) { return store_info(dpp, false, y);} + int update_name(const DoutPrefixProvider *dpp, optional_yield y) { return store_name(dpp, false, y);} + int read(const DoutPrefixProvider *dpp, optional_yield y); + int write(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); + + virtual rgw_pool get_pool(CephContext *cct) const = 0; + virtual const std::string get_default_oid(bool old_format = false) const = 0; + virtual const std::string& get_names_oid_prefix() const = 0; + virtual const std::string& get_info_oid_prefix(bool old_format = false) const = 0; + virtual std::string get_predefined_id(CephContext *cct) const = 0; + virtual const std::string& get_predefined_name(CephContext *cct) const = 0; + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWSystemMetaObj) + +struct RGWZoneStorageClass { + boost::optional data_pool; + boost::optional compression_type; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(data_pool, bl); + encode(compression_type, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(data_pool, bl); + decode(compression_type, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWZoneStorageClass) + + +class RGWZoneStorageClasses { + std::map m; + + /* in memory only */ + RGWZoneStorageClass *standard_class; + +public: + RGWZoneStorageClasses() { + standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; + } + RGWZoneStorageClasses(const RGWZoneStorageClasses& rhs) { + m = rhs.m; + standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; + } + RGWZoneStorageClasses& operator=(const RGWZoneStorageClasses& rhs) { + m = rhs.m; + standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; + return *this; + } + + const RGWZoneStorageClass& get_standard() const { + return *standard_class; + } + + bool find(const std::string& sc, const RGWZoneStorageClass **pstorage_class) const { + auto iter = m.find(sc); + if (iter == m.end()) { + return false; + } + *pstorage_class = &iter->second; + return true; + } + + bool exists(const std::string& sc) const { + if (sc.empty()) { + return true; + } + auto iter = m.find(sc); + return (iter != m.end()); + } + + const std::map& get_all() const { + return m; + } + + std::map& get_all() { + return m; + } + + void set_storage_class(const std::string& sc, const rgw_pool *data_pool, const std::string *compression_type) { + const std::string *psc = ≻ + if (sc.empty()) { + psc = &RGW_STORAGE_CLASS_STANDARD; + } + RGWZoneStorageClass& storage_class = m[*psc]; + if (data_pool) { + storage_class.data_pool = *data_pool; + } + if (compression_type) { + storage_class.compression_type = *compression_type; + } + } + + void remove_storage_class(const std::string& sc) { + if (!sc.empty()) { + m.erase(sc); + } + } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(m, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(m, bl); + standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWZoneStorageClasses) + +struct RGWZonePlacementInfo { + rgw_pool index_pool; + rgw_pool data_extra_pool; /* if not set we should use data_pool */ + RGWZoneStorageClasses storage_classes; + rgw::BucketIndexType index_type; + + RGWZonePlacementInfo() : index_type(rgw::BucketIndexType::Normal) {} + + void encode(bufferlist& bl) const { + ENCODE_START(7, 1, bl); + encode(index_pool.to_str(), bl); + rgw_pool standard_data_pool = get_data_pool(RGW_STORAGE_CLASS_STANDARD); + encode(standard_data_pool.to_str(), bl); + encode(data_extra_pool.to_str(), bl); + encode((uint32_t)index_type, bl); + std::string standard_compression_type = get_compression_type(RGW_STORAGE_CLASS_STANDARD); + encode(standard_compression_type, bl); + encode(storage_classes, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(7, bl); + std::string index_pool_str; + std::string data_pool_str; + decode(index_pool_str, bl); + index_pool = rgw_pool(index_pool_str); + decode(data_pool_str, bl); + rgw_pool standard_data_pool(data_pool_str); + if (struct_v >= 4) { + std::string data_extra_pool_str; + decode(data_extra_pool_str, bl); + data_extra_pool = rgw_pool(data_extra_pool_str); + } + if (struct_v >= 5) { + uint32_t it; + decode(it, bl); + index_type = (rgw::BucketIndexType)it; + } + std::string standard_compression_type; + if (struct_v >= 6) { + decode(standard_compression_type, bl); + } + if (struct_v >= 7) { + decode(storage_classes, bl); + } else { + storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &standard_data_pool, + (!standard_compression_type.empty() ? &standard_compression_type : nullptr)); + } + DECODE_FINISH(bl); + } + const rgw_pool& get_data_extra_pool() const { + static rgw_pool no_pool; + if (data_extra_pool.empty()) { + return storage_classes.get_standard().data_pool.get_value_or(no_pool); + } + return data_extra_pool; + } + const rgw_pool& get_data_pool(const std::string& sc) const { + const RGWZoneStorageClass *storage_class; + static rgw_pool no_pool; + + if (!storage_classes.find(sc, &storage_class)) { + return storage_classes.get_standard().data_pool.get_value_or(no_pool); + } + + return storage_class->data_pool.get_value_or(no_pool); + } + const rgw_pool& get_standard_data_pool() const { + return get_data_pool(RGW_STORAGE_CLASS_STANDARD); + } + + const std::string& get_compression_type(const std::string& sc) const { + const RGWZoneStorageClass *storage_class; + static std::string no_compression; + + if (!storage_classes.find(sc, &storage_class)) { + return no_compression; + } + return storage_class->compression_type.get_value_or(no_compression); + } + + bool storage_class_exists(const std::string& sc) const { + return storage_classes.exists(sc); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + +}; +WRITE_CLASS_ENCODER(RGWZonePlacementInfo) + +struct RGWZoneParams : RGWSystemMetaObj { + rgw_pool domain_root; + rgw_pool control_pool; + rgw_pool gc_pool; + rgw_pool lc_pool; + rgw_pool log_pool; + rgw_pool intent_log_pool; + rgw_pool usage_log_pool; + rgw_pool user_keys_pool; + rgw_pool user_email_pool; + rgw_pool user_swift_pool; + rgw_pool user_uid_pool; + rgw_pool roles_pool; + rgw_pool reshard_pool; + rgw_pool otp_pool; + rgw_pool oidc_pool; + rgw_pool notif_pool; + + RGWAccessKey system_key; + + std::map placement_pools; + + std::string realm_id; + + JSONFormattable tier_config; + + RGWZoneParams() : RGWSystemMetaObj() {} + explicit RGWZoneParams(const std::string& name) : RGWSystemMetaObj(name){} + RGWZoneParams(const rgw_zone_id& id, const std::string& name) : RGWSystemMetaObj(id.id, name) {} + RGWZoneParams(const rgw_zone_id& id, const std::string& name, const std::string& _realm_id) + : RGWSystemMetaObj(id.id, name), realm_id(_realm_id) {} + virtual ~RGWZoneParams(); + + rgw_pool get_pool(CephContext *cct) const override; + const std::string get_default_oid(bool old_format = false) const override; + const std::string& get_names_oid_prefix() const override; + const std::string& get_info_oid_prefix(bool old_format = false) const override; + std::string get_predefined_id(CephContext *cct) const override; + const std::string& get_predefined_name(CephContext *cct) const override; + + int init(const DoutPrefixProvider *dpp, + CephContext *_cct, RGWSI_SysObj *_sysobj_svc, optional_yield y, + bool setup_obj = true, bool old_format = false); + using RGWSystemMetaObj::init; + int read_default_id(const DoutPrefixProvider *dpp, std::string& default_id, optional_yield y, bool old_format = false) override; + int set_as_default(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = false) override; + int create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); + int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true) override; + int fix_pool_names(const DoutPrefixProvider *dpp, optional_yield y); + + const std::string& get_compression_type(const rgw_placement_rule& placement_rule) const; + + void encode(bufferlist& bl) const override { + ENCODE_START(14, 1, bl); + encode(domain_root, bl); + encode(control_pool, bl); + encode(gc_pool, bl); + encode(log_pool, bl); + encode(intent_log_pool, bl); + encode(usage_log_pool, bl); + encode(user_keys_pool, bl); + encode(user_email_pool, bl); + encode(user_swift_pool, bl); + encode(user_uid_pool, bl); + RGWSystemMetaObj::encode(bl); + encode(system_key, bl); + encode(placement_pools, bl); + rgw_pool unused_metadata_heap; + encode(unused_metadata_heap, bl); + encode(realm_id, bl); + encode(lc_pool, bl); + std::map old_tier_config; + encode(old_tier_config, bl); + encode(roles_pool, bl); + encode(reshard_pool, bl); + encode(otp_pool, bl); + encode(tier_config, bl); + encode(oidc_pool, bl); + encode(notif_pool, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) override { + DECODE_START(14, bl); + decode(domain_root, bl); + decode(control_pool, bl); + decode(gc_pool, bl); + decode(log_pool, bl); + decode(intent_log_pool, bl); + decode(usage_log_pool, bl); + decode(user_keys_pool, bl); + decode(user_email_pool, bl); + decode(user_swift_pool, bl); + decode(user_uid_pool, bl); + if (struct_v >= 6) { + RGWSystemMetaObj::decode(bl); + } else if (struct_v >= 2) { + decode(name, bl); + id = name; + } + if (struct_v >= 3) + decode(system_key, bl); + if (struct_v >= 4) + decode(placement_pools, bl); + if (struct_v >= 5) { + rgw_pool unused_metadata_heap; + decode(unused_metadata_heap, bl); + } + if (struct_v >= 6) { + decode(realm_id, bl); + } + if (struct_v >= 7) { + decode(lc_pool, bl); + } else { + lc_pool = log_pool.name + ":lc"; + } + std::map old_tier_config; + if (struct_v >= 8) { + decode(old_tier_config, bl); + } + if (struct_v >= 9) { + decode(roles_pool, bl); + } else { + roles_pool = name + ".rgw.meta:roles"; + } + if (struct_v >= 10) { + decode(reshard_pool, bl); + } else { + reshard_pool = log_pool.name + ":reshard"; + } + if (struct_v >= 11) { + ::decode(otp_pool, bl); + } else { + otp_pool = name + ".rgw.otp"; + } + if (struct_v >= 12) { + ::decode(tier_config, bl); + } else { + for (auto& kv : old_tier_config) { + tier_config.set(kv.first, kv.second); + } + } + if (struct_v >= 13) { + ::decode(oidc_pool, bl); + } else { + oidc_pool = name + ".rgw.meta:oidc"; + } + if (struct_v >= 14) { + decode(notif_pool, bl); + } else { + notif_pool = log_pool.name + ":notif"; + } + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list& o); + + bool get_placement(const std::string& placement_id, RGWZonePlacementInfo *placement) const { + auto iter = placement_pools.find(placement_id); + if (iter == placement_pools.end()) { + return false; + } + *placement = iter->second; + return true; + } + + /* + * return data pool of the head object + */ + bool get_head_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) const { + const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement; + if (!explicit_placement.data_pool.empty()) { + if (!obj.in_extra_data) { + *pool = explicit_placement.data_pool; + } else { + *pool = explicit_placement.get_data_extra_pool(); + } + return true; + } + if (placement_rule.empty()) { + return false; + } + auto iter = placement_pools.find(placement_rule.name); + if (iter == placement_pools.end()) { + return false; + } + if (!obj.in_extra_data) { + *pool = iter->second.get_data_pool(placement_rule.storage_class); + } else { + *pool = iter->second.get_data_extra_pool(); + } + return true; + } + + bool valid_placement(const rgw_placement_rule& rule) const { + auto iter = placement_pools.find(rule.name); + if (iter == placement_pools.end()) { + return false; + } + return iter->second.storage_class_exists(rule.storage_class); + } +}; +WRITE_CLASS_ENCODER(RGWZoneParams) + + +struct RGWZone { + std::string id; + std::string name; + std::list endpoints; + bool log_meta; + bool log_data; + bool read_only; + std::string tier_type; + + std::string redirect_zone; + +/** + * Represents the number of shards for the bucket index object, a value of zero + * indicates there is no sharding. By default (no sharding, the name of the object + * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}', + * sharding_id is zero-based value. It is not recommended to set a too large value + * (e.g. thousand) as it increases the cost for bucket listing. + */ + uint32_t bucket_index_max_shards; + + // pre-shard buckets on creation to enable some write-parallism by default, + // delay the need to reshard as the bucket grows, and (in multisite) get some + // bucket index sharding where dynamic resharding is not supported + static constexpr uint32_t default_bucket_index_max_shards = 11; + + bool sync_from_all; + std::set sync_from; /* list of zones to sync from */ + + rgw::zone_features::set supported_features; + + RGWZone() + : log_meta(false), log_data(false), read_only(false), + bucket_index_max_shards(default_bucket_index_max_shards), + sync_from_all(true) {} + + void encode(bufferlist& bl) const { + ENCODE_START(8, 1, bl); + encode(name, bl); + encode(endpoints, bl); + encode(log_meta, bl); + encode(log_data, bl); + encode(bucket_index_max_shards, bl); + encode(id, bl); + encode(read_only, bl); + encode(tier_type, bl); + encode(sync_from_all, bl); + encode(sync_from, bl); + encode(redirect_zone, bl); + encode(supported_features, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(8, bl); + decode(name, bl); + if (struct_v < 4) { + id = name; + } + decode(endpoints, bl); + if (struct_v >= 2) { + decode(log_meta, bl); + decode(log_data, bl); + } + if (struct_v >= 3) { + decode(bucket_index_max_shards, bl); + } + if (struct_v >= 4) { + decode(id, bl); + decode(read_only, bl); + } + if (struct_v >= 5) { + decode(tier_type, bl); + } + if (struct_v >= 6) { + decode(sync_from_all, bl); + decode(sync_from, bl); + } + if (struct_v >= 7) { + decode(redirect_zone, bl); + } + if (struct_v >= 8) { + decode(supported_features, bl); + } + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list& o); + + bool is_read_only() const { return read_only; } + + bool syncs_from(const std::string& zone_name) const { + return (sync_from_all || sync_from.find(zone_name) != sync_from.end()); + } + + bool supports(std::string_view feature) const { + return supported_features.contains(feature); + } +}; +WRITE_CLASS_ENCODER(RGWZone) + +struct RGWDefaultZoneGroupInfo { + std::string default_zonegroup; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(default_zonegroup, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(default_zonegroup, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + //todo: implement ceph-dencoder +}; +WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo) + +struct RGWTierACLMapping { + ACLGranteeTypeEnum type{ACL_TYPE_CANON_USER}; + std::string source_id; + std::string dest_id; + + RGWTierACLMapping() = default; + + RGWTierACLMapping(ACLGranteeTypeEnum t, + const std::string& s, + const std::string& d) : type(t), + source_id(s), + dest_id(d) {} + + void init(const JSONFormattable& config) { + const std::string& t = config["type"]; + + if (t == "email") { + type = ACL_TYPE_EMAIL_USER; + } else if (t == "uri") { + type = ACL_TYPE_GROUP; + } else { + type = ACL_TYPE_CANON_USER; + } + + source_id = config["source_id"]; + dest_id = config["dest_id"]; + } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode((uint32_t)type, bl); + encode(source_id, bl); + encode(dest_id, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + uint32_t it; + decode(it, bl); + type = (ACLGranteeTypeEnum)it; + decode(source_id, bl); + decode(dest_id, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWTierACLMapping) + +struct RGWZoneGroupPlacementTierS3 { +#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) + std::string endpoint; + RGWAccessKey key; + std::string region; + HostStyle host_style{PathStyle}; + std::string target_storage_class; + + /* Should below be bucket/zone specific?? */ + std::string target_path; + std::map acl_mappings; + + uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE}; + uint64_t multipart_min_part_size{DEFAULT_MULTIPART_SYNC_PART_SIZE}; + + int update_params(const JSONFormattable& config); + int clear_params(const JSONFormattable& config); + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(endpoint, bl); + encode(key, bl); + encode(region, bl); + encode((uint32_t)host_style, bl); + encode(target_storage_class, bl); + encode(target_path, bl); + encode(acl_mappings, bl); + encode(multipart_sync_threshold, bl); + encode(multipart_min_part_size, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(endpoint, bl); + decode(key, bl); + decode(region, bl); + + uint32_t it; + decode(it, bl); + host_style = (HostStyle)it; + + decode(target_storage_class, bl); + decode(target_path, bl); + decode(acl_mappings, bl); + decode(multipart_sync_threshold, bl); + decode(multipart_min_part_size, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTierS3) + +struct RGWZoneGroupPlacementTier { + std::string tier_type; + std::string storage_class; + bool retain_head_object = false; + + struct _tier { + RGWZoneGroupPlacementTierS3 s3; + } t; + + int update_params(const JSONFormattable& config); + int clear_params(const JSONFormattable& config); + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(tier_type, bl); + encode(storage_class, bl); + encode(retain_head_object, bl); + if (tier_type == "cloud-s3") { + encode(t.s3, bl); + } + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(tier_type, bl); + decode(storage_class, bl); + decode(retain_head_object, bl); + if (tier_type == "cloud-s3") { + decode(t.s3, bl); + } + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTier) + +struct RGWZoneGroupPlacementTarget { + std::string name; + std::set tags; + std::set storage_classes; + std::map tier_targets; + + bool user_permitted(const std::list& user_tags) const { + if (tags.empty()) { + return true; + } + for (auto& rule : user_tags) { + if (tags.find(rule) != tags.end()) { + return true; + } + } + return false; + } + + void encode(bufferlist& bl) const { + ENCODE_START(3, 1, bl); + encode(name, bl); + encode(tags, bl); + encode(storage_classes, bl); + encode(tier_targets, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(3, bl); + decode(name, bl); + decode(tags, bl); + if (struct_v >= 2) { + decode(storage_classes, bl); + } + if (storage_classes.empty()) { + storage_classes.insert(RGW_STORAGE_CLASS_STANDARD); + } + if (struct_v >= 3) { + decode(tier_targets, bl); + } + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; +WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget) + +struct RGWZoneGroup : public RGWSystemMetaObj { + std::string api_name; + std::list endpoints; + bool is_master = false; + + rgw_zone_id master_zone; + std::map zones; + + std::map placement_targets; + rgw_placement_rule default_placement; + + std::list hostnames; + std::list hostnames_s3website; + // TODO: Maybe convert hostnames to a map> for + // endpoint_type->hostnames +/* +20:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; }; +20:05 < _robbat21irssi> but that's a later compatability migration planning bit +20:06 < yehudasa> more like if (!hostnames.empty()) { +20:06 < yehudasa> for (std::list::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) { +20:06 < yehudasa> hostname_map["s3"].append(iter->second); +20:07 < yehudasa> hostname_map["s3website"].append(iter->second); +20:07 < yehudasa> s/append/push_back/g +20:08 < _robbat21irssi> inner loop over APIs +20:08 < yehudasa> yeah, probably +20:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website +*/ + std::map > api_hostname_map; + std::map > api_endpoints_map; + + std::string realm_id; + + rgw_sync_policy_info sync_policy; + rgw::zone_features::set enabled_features; + + RGWZoneGroup(): is_master(false){} + RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {} + explicit RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {} + RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWSI_SysObj* sysobj_svc, + const std::string& _realm_id, const std::list& _endpoints) + : RGWSystemMetaObj(_name, cct , sysobj_svc), endpoints(_endpoints), is_master(_is_master), + realm_id(_realm_id) {} + virtual ~RGWZoneGroup(); + + bool is_master_zonegroup() const { return is_master;} + void update_master(const DoutPrefixProvider *dpp, bool _is_master, optional_yield y) { + is_master = _is_master; + post_process_params(dpp, y); + } + void post_process_params(const DoutPrefixProvider *dpp, optional_yield y); + + void encode(bufferlist& bl) const override { + ENCODE_START(6, 1, bl); + encode(name, bl); + encode(api_name, bl); + encode(is_master, bl); + encode(endpoints, bl); + encode(master_zone, bl); + encode(zones, bl); + encode(placement_targets, bl); + encode(default_placement, bl); + encode(hostnames, bl); + encode(hostnames_s3website, bl); + RGWSystemMetaObj::encode(bl); + encode(realm_id, bl); + encode(sync_policy, bl); + encode(enabled_features, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) override { + DECODE_START(6, bl); + decode(name, bl); + decode(api_name, bl); + decode(is_master, bl); + decode(endpoints, bl); + decode(master_zone, bl); + decode(zones, bl); + decode(placement_targets, bl); + decode(default_placement, bl); + if (struct_v >= 2) { + decode(hostnames, bl); + } + if (struct_v >= 3) { + decode(hostnames_s3website, bl); + } + if (struct_v >= 4) { + RGWSystemMetaObj::decode(bl); + decode(realm_id, bl); + } else { + id = name; + } + if (struct_v >= 5) { + decode(sync_policy, bl); + } + if (struct_v >= 6) { + decode(enabled_features, bl); + } + DECODE_FINISH(bl); + } + + int read_default_id(const DoutPrefixProvider *dpp, std::string& default_id, optional_yield y, bool old_format = false) override; + int set_as_default(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = false) override; + int create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); + int equals(const std::string& other_zonegroup) const; + int add_zone(const DoutPrefixProvider *dpp, + const RGWZoneParams& zone_params, bool *is_master, bool *read_only, + const std::list& endpoints, const std::string *ptier_type, + bool *psync_from_all, std::list& sync_from, + std::list& sync_from_rm, std::string *predirect_zone, + std::optional bucket_index_max_shards, RGWSyncModulesManager *sync_mgr, + const rgw::zone_features::set& enable_features, + const rgw::zone_features::set& disable_features, + optional_yield y); + int remove_zone(const DoutPrefixProvider *dpp, const std::string& zone_id, optional_yield y); + int rename_zone(const DoutPrefixProvider *dpp, const RGWZoneParams& zone_params, optional_yield y); + rgw_pool get_pool(CephContext *cct) const override; + const std::string get_default_oid(bool old_region_format = false) const override; + const std::string& get_info_oid_prefix(bool old_region_format = false) const override; + const std::string& get_names_oid_prefix() const override; + std::string get_predefined_id(CephContext *cct) const override; + const std::string& get_predefined_name(CephContext *cct) const override; + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list& o); + + bool supports(std::string_view feature) const { + return enabled_features.contains(feature); + } +}; +WRITE_CLASS_ENCODER(RGWZoneGroup) + +struct RGWPeriodMap +{ + std::string id; + std::map zonegroups; + std::map zonegroups_by_api; + std::map short_zone_ids; + + std::string master_zonegroup; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& bl); + + int update(const RGWZoneGroup& zonegroup, CephContext *cct); + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + + void reset() { + zonegroups.clear(); + zonegroups_by_api.clear(); + master_zonegroup.clear(); + } + + uint32_t get_zone_short_id(const std::string& zone_id) const; + + bool find_zone_by_id(const rgw_zone_id& zone_id, + RGWZoneGroup *zonegroup, + RGWZone *zone) const; + bool find_zone_by_name(const std::string& zone_id, + RGWZoneGroup *zonegroup, + RGWZone *zone) const; +}; +WRITE_CLASS_ENCODER(RGWPeriodMap) + +struct RGWPeriodConfig +{ + RGWQuota quota; + RGWRateLimitInfo user_ratelimit; + RGWRateLimitInfo bucket_ratelimit; + // rate limit unauthenticated user + RGWRateLimitInfo anon_ratelimit; + + void encode(bufferlist& bl) const { + ENCODE_START(2, 1, bl); + encode(quota.bucket_quota, bl); + encode(quota.user_quota, bl); + encode(bucket_ratelimit, bl); + encode(user_ratelimit, bl); + encode(anon_ratelimit, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(2, bl); + decode(quota.bucket_quota, bl); + decode(quota.user_quota, bl); + if (struct_v >= 2) { + decode(bucket_ratelimit, bl); + decode(user_ratelimit, bl); + decode(anon_ratelimit, bl); + } + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + + // the period config must be stored in a local object outside of the period, + // so that it can be used in a default configuration where no realm/period + // exists + int read(const DoutPrefixProvider *dpp, RGWSI_SysObj *sysobj_svc, const std::string& realm_id, optional_yield y); + int write(const DoutPrefixProvider *dpp, RGWSI_SysObj *sysobj_svc, const std::string& realm_id, optional_yield y); + + static std::string get_oid(const std::string& realm_id); + static rgw_pool get_pool(CephContext *cct); +}; +WRITE_CLASS_ENCODER(RGWPeriodConfig) + +class RGWRealm; +class RGWPeriod; + +class RGWRealm : public RGWSystemMetaObj +{ +public: + std::string current_period; + epoch_t epoch{0}; //< realm epoch, incremented for each new period + + int create_control(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); + int delete_control(const DoutPrefixProvider *dpp, optional_yield y); +public: + RGWRealm() {} + RGWRealm(const std::string& _id, const std::string& _name = "") : RGWSystemMetaObj(_id, _name) {} + RGWRealm(CephContext *_cct, RGWSI_SysObj *_sysobj_svc): RGWSystemMetaObj(_cct, _sysobj_svc) {} + RGWRealm(const std::string& _name, CephContext *_cct, RGWSI_SysObj *_sysobj_svc): RGWSystemMetaObj(_name, _cct, _sysobj_svc){} + virtual ~RGWRealm() override; + + void encode(bufferlist& bl) const override { + ENCODE_START(1, 1, bl); + RGWSystemMetaObj::encode(bl); + encode(current_period, bl); + encode(epoch, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) override { + DECODE_START(1, bl); + RGWSystemMetaObj::decode(bl); + decode(current_period, bl); + decode(epoch, bl); + DECODE_FINISH(bl); + } + + int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true) override; + int delete_obj(const DoutPrefixProvider *dpp, optional_yield y); + rgw_pool get_pool(CephContext *cct) const override; + const std::string get_default_oid(bool old_format = false) const override; + const std::string& get_names_oid_prefix() const override; + const std::string& get_info_oid_prefix(bool old_format = false) const override; + std::string get_predefined_id(CephContext *cct) const override; + const std::string& get_predefined_name(CephContext *cct) const override; + + using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list& o); + + const std::string& get_current_period() const { + return current_period; + } + int set_current_period(const DoutPrefixProvider *dpp, RGWPeriod& period, optional_yield y); + void clear_current_period_and_epoch() { + current_period.clear(); + epoch = 0; + } + epoch_t get_epoch() const { return epoch; } + + std::string get_control_oid() const; + /// send a notify on the realm control object + int notify_zone(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y); + /// notify the zone of a new period + int notify_new_period(const DoutPrefixProvider *dpp, const RGWPeriod& period, optional_yield y); + + int find_zone(const DoutPrefixProvider *dpp, + const rgw_zone_id& zid, + RGWPeriod *pperiod, + RGWZoneGroup *pzonegroup, + bool *pfound, + optional_yield y) const; +}; +WRITE_CLASS_ENCODER(RGWRealm) + +struct RGWPeriodLatestEpochInfo { + epoch_t epoch = 0; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(epoch, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(epoch, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list& o); +}; +WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo) + + +/* + * The RGWPeriod object contains the entire configuration of a + * RGWRealm, including its RGWZoneGroups and RGWZones. Consistency of + * this configuration is maintained across all zones by passing around + * the RGWPeriod object in its JSON representation. + * + * If a new configuration changes which zone is the metadata master + * zone (i.e., master zone of the master zonegroup), then a new + * RGWPeriod::id (a uuid) is generated, its RGWPeriod::realm_epoch is + * incremented, and the RGWRealm object is updated to reflect that new + * current_period id and epoch. If the configuration changes BUT which + * zone is the metadata master does NOT change, then only the + * RGWPeriod::epoch is incremented (and the RGWPeriod::id remains the + * same). + * + * When a new RGWPeriod is created with a new RGWPeriod::id (uuid), it + * is linked back to its predecessor RGWPeriod through the + * RGWPeriod::predecessor_uuid field, thus creating a "linked + * list"-like structure of RGWPeriods back to the cluster's creation. + */ +class RGWPeriod +{ +public: + std::string id; //< a uuid + epoch_t epoch{0}; + std::string predecessor_uuid; + std::vector sync_status; + RGWPeriodMap period_map; + RGWPeriodConfig period_config; + std::string master_zonegroup; + rgw_zone_id master_zone; + + std::string realm_id; + std::string realm_name; + epoch_t realm_epoch{1}; //< realm epoch when period was made current + + CephContext *cct{nullptr}; + RGWSI_SysObj *sysobj_svc{nullptr}; + + int read_info(const DoutPrefixProvider *dpp, optional_yield y); + int read_latest_epoch(const DoutPrefixProvider *dpp, + RGWPeriodLatestEpochInfo& epoch_info, + optional_yield y, + RGWObjVersionTracker *objv = nullptr); + int use_latest_epoch(const DoutPrefixProvider *dpp, optional_yield y); + int use_current_period(); + + const std::string get_period_oid() const; + const std::string get_period_oid_prefix() const; + + // gather the metadata sync status for each shard; only for use on master zone + int update_sync_status(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const RGWPeriod ¤t_period, + std::ostream& error_stream, bool force_if_stale); + +public: + RGWPeriod() {} + + explicit RGWPeriod(const std::string& period_id, epoch_t _epoch = 0) + : id(period_id), epoch(_epoch) {} + + const std::string& get_id() const { return id; } + epoch_t get_epoch() const { return epoch; } + epoch_t get_realm_epoch() const { return realm_epoch; } + const std::string& get_predecessor() const { return predecessor_uuid; } + const rgw_zone_id& get_master_zone() const { return master_zone; } + const std::string& get_master_zonegroup() const { return master_zonegroup; } + const std::string& get_realm() const { return realm_id; } + const std::string& get_realm_name() const { return realm_name; } + const RGWPeriodMap& get_map() const { return period_map; } + RGWPeriodConfig& get_config() { return period_config; } + const RGWPeriodConfig& get_config() const { return period_config; } + const std::vector& get_sync_status() const { return sync_status; } + rgw_pool get_pool(CephContext *cct) const; + const std::string& get_latest_epoch_oid() const; + const std::string& get_info_oid_prefix() const; + + void set_user_quota(RGWQuotaInfo& user_quota) { + period_config.quota.user_quota = user_quota; + } + + void set_bucket_quota(RGWQuotaInfo& bucket_quota) { + period_config.quota.bucket_quota = bucket_quota; + } + + void set_id(const std::string& _id) { + this->id = _id; + period_map.id = _id; + } + void set_epoch(epoch_t epoch) { this->epoch = epoch; } + void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; } + + void set_predecessor(const std::string& predecessor) + { + predecessor_uuid = predecessor; + } + + void set_realm_id(const std::string& _realm_id) { + realm_id = _realm_id; + } + + int reflect(const DoutPrefixProvider *dpp, optional_yield y); + + int get_zonegroup(RGWZoneGroup& zonegroup, + const std::string& zonegroup_id) const; + + bool is_single_zonegroup() const + { + return (period_map.zonegroups.size() <= 1); + } + + /* + returns true if there are several zone groups with a least one zone + */ + bool is_multi_zonegroups_with_zones() const + { + int count = 0; + for (const auto& zg: period_map.zonegroups) { + if (zg.second.zones.size() > 0) { + if (count++ > 0) { + return true; + } + } + } + return false; + } + + bool find_zone(const DoutPrefixProvider *dpp, + const rgw_zone_id& zid, + RGWZoneGroup *pzonegroup, + optional_yield y) const; + + int get_latest_epoch(const DoutPrefixProvider *dpp, epoch_t& epoch, optional_yield y); + int set_latest_epoch(const DoutPrefixProvider *dpp, optional_yield y, + epoch_t epoch, bool exclusive = false, + RGWObjVersionTracker *objv = nullptr); + // update latest_epoch if the given epoch is higher, else return -EEXIST + int update_latest_epoch(const DoutPrefixProvider *dpp, epoch_t epoch, optional_yield y); + + int init(const DoutPrefixProvider *dpp, CephContext *_cct, RGWSI_SysObj *_sysobj_svc, const std::string &period_realm_id, optional_yield y, + const std::string &period_realm_name = "", bool setup_obj = true); + int init(const DoutPrefixProvider *dpp, CephContext *_cct, RGWSI_SysObj *_sysobj_svc, optional_yield y, bool setup_obj = true); + + int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true); + int delete_obj(const DoutPrefixProvider *dpp, optional_yield y); + int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); + int add_zonegroup(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, optional_yield y); + + void fork(); + int update(const DoutPrefixProvider *dpp, optional_yield y); + + // commit a staging period; only for use on master zone + int commit(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + RGWRealm& realm, const RGWPeriod ¤t_period, + std::ostream& error_stream, optional_yield y, + bool force_if_stale = false); + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(id, bl); + encode(epoch, bl); + encode(realm_epoch, bl); + encode(predecessor_uuid, bl); + encode(sync_status, bl); + encode(period_map, bl); + encode(master_zone, bl); + encode(master_zonegroup, bl); + encode(period_config, bl); + encode(realm_id, bl); + encode(realm_name, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(id, bl); + decode(epoch, bl); + decode(realm_epoch, bl); + decode(predecessor_uuid, bl); + decode(sync_status, bl); + decode(period_map, bl); + decode(master_zone, bl); + decode(master_zonegroup, bl); + decode(period_config, bl); + decode(realm_id, bl); + decode(realm_name, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list& o); + + static std::string get_staging_id(const std::string& realm_id) { + return realm_id + ":staging"; + } +}; +WRITE_CLASS_ENCODER(RGWPeriod) + +namespace rgw { + +/// Look up a realm by its id. If no id is given, look it up by name. +/// If no name is given, fall back to the cluster's default realm. +int read_realm(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + std::string_view realm_id, + std::string_view realm_name, + RGWRealm& info, + std::unique_ptr* writer = nullptr); + +/// Create a realm and its initial period. If the info.id is empty, a +/// random uuid will be generated. +int create_realm(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, bool exclusive, + RGWRealm& info, + std::unique_ptr* writer = nullptr); + +/// Set the given realm as the cluster's default realm. +int set_default_realm(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWRealm& info, + bool exclusive = false); + +/// Update the current_period of an existing realm. +int realm_set_current_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + sal::RealmWriter& writer, RGWRealm& realm, + const RGWPeriod& period); + +/// Overwrite the local zonegroup and period config objects with the new +/// configuration contained in the given period. +int reflect_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWPeriod& info); + +/// Return the staging period id for the given realm. +std::string get_staging_period_id(std::string_view realm_id); + +/// Convert the given period into a separate staging period, where +/// radosgw-admin can make changes to it without effecting the running +/// configuration. +void fork_period(const DoutPrefixProvider* dpp, RGWPeriod& info); + +/// Read all zonegroups in the period's realm and add them to the period. +int update_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, RGWPeriod& info); + +/// Validates the given 'staging' period and tries to commit it as the +/// realm's new current period. +int commit_period(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, sal::Driver* driver, + RGWRealm& realm, sal::RealmWriter& realm_writer, + const RGWPeriod& current_period, + RGWPeriod& info, std::ostream& error_stream, + bool force_if_stale); + + +/// Look up a zonegroup by its id. If no id is given, look it up by name. +/// If no name is given, fall back to the cluster's default zonegroup. +int read_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + std::string_view zonegroup_id, + std::string_view zonegroup_name, + RGWZoneGroup& info, + std::unique_ptr* writer = nullptr); + +/// Initialize and create the given zonegroup. If the given info.id is empty, +/// a random uuid will be generated. May fail with -EEXIST. +int create_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, bool exclusive, + RGWZoneGroup& info); + +/// Set the given zonegroup as its realm's default zonegroup. +int set_default_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWZoneGroup& info, + bool exclusive = false); + +/// Add a zone to the zonegroup, or update an existing zone entry. +int add_zone_to_group(const DoutPrefixProvider* dpp, + RGWZoneGroup& zonegroup, + const RGWZoneParams& zone_params, + const bool *pis_master, const bool *pread_only, + const std::list& endpoints, + const std::string *ptier_type, + const bool *psync_from_all, + const std::list& sync_from, + const std::list& sync_from_rm, + const std::string *predirect_zone, + std::optional bucket_index_max_shards, + const rgw::zone_features::set& enable_features, + const rgw::zone_features::set& disable_features); + +/// Remove a zone by id from its zonegroup, promoting a new master zone if +/// necessary. +int remove_zone_from_group(const DoutPrefixProvider* dpp, + RGWZoneGroup& info, + const rgw_zone_id& zone_id); + + +/// Look up a zone by its id. If no id is given, look it up by name. If no name +/// is given, fall back to the realm's default zone. +int read_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, + std::string_view zone_id, + std::string_view zone_name, + RGWZoneParams& info, + std::unique_ptr* writer = nullptr); + +/// Initialize and create a new zone. If the given info.id is empty, a random +/// uuid will be generated. Pool names are initialized with the zone name as a +/// prefix. If any pool names conflict with existing zones, a random suffix is +/// added. +int create_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, bool exclusive, + RGWZoneParams& info, + std::unique_ptr* writer = nullptr); + +/// Initialize the zone's pool names using the zone name as a prefix. If a pool +/// name conflicts with an existing zone's pool, add a unique suffix. +int init_zone_pool_names(const DoutPrefixProvider *dpp, optional_yield y, + const std::set& pools, RGWZoneParams& info); + +/// Set the given zone as its realm's default zone. +int set_default_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWZoneParams& info, + bool exclusive = false); + +/// Delete an existing zone and remove it from any zonegroups that contain it. +int delete_zone(const DoutPrefixProvider* dpp, optional_yield y, + sal::ConfigStore* cfgstore, const RGWZoneParams& info, + sal::ZoneWriter& writer); + +} // namespace rgw + +#endif diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index e3234751f4a..4f2820c6cb6 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -3021,7 +3021,7 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, string req_id; if (!s) { // fake req_id - req_id = store->svc.zone_utils->unique_id(store->store->get_new_req_id()); + req_id = store->svc.zone_utils->unique_id(store->driver->get_new_req_id()); } else { req_id = s->req_id; } diff --git a/src/rgw/rgw_sal.cc b/src/rgw/rgw_sal.cc index 0eee88fd674..8a793f75314 100644 --- a/src/rgw/rgw_sal.cc +++ b/src/rgw/rgw_sal.cc @@ -23,13 +23,13 @@ #include "rgw_sal.h" #include "rgw_sal_rados.h" -#include "store/rados/config/store.h" -#include "store/json_config/store.h" +#include "driver/rados/config/store.h" +#include "driver/json_config/store.h" #include "rgw_d3n_datacache.h" #ifdef WITH_RADOSGW_DBSTORE #include "rgw_sal_dbstore.h" -#include "store/dbstore/config/store.h" +#include "driver/dbstore/config/store.h" #endif #ifdef WITH_RADOSGW_MOTR diff --git a/src/rgw/rgw_sal_dbstore.h b/src/rgw/rgw_sal_dbstore.h index c46fb6f842e..a7f496191c7 100644 --- a/src/rgw/rgw_sal_dbstore.h +++ b/src/rgw/rgw_sal_dbstore.h @@ -21,8 +21,8 @@ #include "rgw_lc.h" #include "rgw_multi.h" -#include "store/dbstore/common/dbstore.h" -#include "store/dbstore/dbstore_mgr.h" +#include "driver/dbstore/common/dbstore.h" +#include "driver/dbstore/dbstore_mgr.h" namespace rgw { namespace sal { diff --git a/src/rgw/store/daos/README.md b/src/rgw/store/daos/README.md deleted file mode 100644 index de6d215a016..00000000000 --- a/src/rgw/store/daos/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# DAOS - -Standalone RADOS Gateway (RGW) on [DAOS](http://daos.io/) (Experimental) - -## CMake Option - -Add below cmake option - -```bash - -DWITH_RADOSGW_DAOS=ON -``` - -## Build - -```bash - cd build - ninja [vstart] -``` - -## Running Test cluster - -Edit ceph.conf to add below option - -```conf - [client] - rgw backend store = daos -``` - -Restart vstart cluster or just RGW server - -```bash - [..] RGW=1 ../src/vstart.sh -d -``` - -The above configuration brings up an RGW server on DAOS. - -## Creating a test user - - To create a `testid` user to be used for s3 operations, use the following command: - - ```bash -local akey='0555b35654ad1656d804' -local skey='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' - radosgw-admin user create --uid testid \ - --access-key $akey --secret $skey \ - --display-name 'M. Tester' --email tester@ceph.com --no-mon-config - ``` diff --git a/src/rgw/store/dbstore/CMakeLists.txt b/src/rgw/store/dbstore/CMakeLists.txt deleted file mode 100644 index 0d34d32970b..00000000000 --- a/src/rgw/store/dbstore/CMakeLists.txt +++ /dev/null @@ -1,72 +0,0 @@ -#need to update cmake version here -cmake_minimum_required(VERSION 3.14.0) -project(dbstore) - -option(USE_SQLITE "Enable SQLITE DB" ON) - -set (CMAKE_INCLUDE_DIR ${CMAKE_INCLUDE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/common") - -set(dbstore_srcs - common/dbstore_log.h - common/dbstore.h - common/dbstore.cc - config/store.cc) -IF(USE_SQLITE) - list(APPEND dbstore_srcs - config/sqlite.cc - sqlite/connection.cc - sqlite/error.cc - sqlite/statement.cc) -endif() - -set(dbstore_mgr_srcs - dbstore_mgr.h - dbstore_mgr.cc - ) - -add_library(dbstore_lib ${dbstore_srcs}) -target_include_directories(dbstore_lib - PUBLIC "${CMAKE_SOURCE_DIR}/src/fmt/include" - PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw" - PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/store/rados" - PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") -set(link_targets spawn) -if(WITH_JAEGER) - list(APPEND link_targets jaeger_base) -endif() -list(APPEND link_targets rgw_common) -target_link_libraries(dbstore_lib PUBLIC ${link_targets}) - -set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} dbstore_lib) - -IF(USE_SQLITE) - add_subdirectory(sqlite) - set(CMAKE_INCLUDE_DIR ${CMAKE_INCLUDE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/sqlite") - add_compile_definitions(SQLITE_ENABLED=1) - set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} rgw_common) - set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} sqlite_db) - add_dependencies(sqlite_db dbstore_lib) -ENDIF() - -# add pthread library -set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} pthread) - -find_package(gtest QUIET) -if(WITH_TESTS) - add_subdirectory(tests) -else() - message(WARNING "Gtest not enabled") -endif() - -include_directories(${CMAKE_INCLUDE_DIR}) -add_library(dbstore STATIC ${dbstore_mgr_srcs}) -target_link_libraries(dbstore ${CMAKE_LINK_LIBRARIES}) - -# testing purpose -set(dbstore_main_srcs - dbstore_main.cc) - -set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} dbstore) -add_executable(dbstore-bin ${dbstore_main_srcs}) -add_dependencies(dbstore-bin dbstore) -target_link_libraries(dbstore-bin ${CMAKE_LINK_LIBRARIES}) diff --git a/src/rgw/store/dbstore/README.md b/src/rgw/store/dbstore/README.md deleted file mode 100644 index 659bc205276..00000000000 --- a/src/rgw/store/dbstore/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# DBStore -Standalone Rados Gateway (RGW) on DBStore (Experimental) - - -## CMake Option -Add below cmake option (enabled by default) - - -DWITH_RADOSGW_DBSTORE=ON - - -## Build - - cd build - ninja [vstart] - - -## Running Test cluster -Edit ceph.conf to add below option - - [client] - rgw backend store = dbstore - -Start vstart cluster - - [..] RGW=1 ../src/vstart.sh -o rgw_backend_store=dbstore -n -d - -The above vstart command brings up RGW server on dbstore and creates few default users (eg., testid) to be used for s3 operations. - -`radosgw-admin` can be used to create and remove other users. - - -By default, dbstore creates .db file *'/var/lib/ceph/radosgw/dbstore-default_ns.db'* to store the data. This can be configured using below options in ceph.conf - - [client] - dbstore db dir = - dbstore db name prefix = - - -## DBStore Unit Tests -To execute DBStore unit test cases (using Gtest framework), from build directory - - ninja unittest_dbstore_tests - ./bin/unittest_dbstore_tests [logfile] [loglevel] - (default logfile: rgw_dbstore_tests.log, loglevel: 20) - ninja unittest_dbstore_mgr_tests - ./bin/unittest_dbstore_mgr_tests - -To execute Sample test file - - ninja src/rgw/store/dbstore/install - ./bin/dbstore-bin [logfile] [loglevel] - (default logfile: rgw_dbstore_bin.log, loglevel: 20) - diff --git a/src/rgw/store/dbstore/common/connection_pool.h b/src/rgw/store/dbstore/common/connection_pool.h deleted file mode 100644 index 07f3c81c3df..00000000000 --- a/src/rgw/store/dbstore/common/connection_pool.h +++ /dev/null @@ -1,147 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include -#include -#include -#include -#include "common/dout.h" - -namespace rgw::dbstore { - -template -class ConnectionHandle; - -/// A thread-safe base class that manages a fixed-size pool of generic database -/// connections and supports the reclamation of ConnectionHandles. This class -/// is the subset of ConnectionPool which doesn't depend on the Factory type. -template -class ConnectionPoolBase { - public: - ConnectionPoolBase(std::size_t max_connections) - : connections(max_connections) - {} - private: - friend class ConnectionHandle; - - // TODO: the caller may detect a connection error that prevents the connection - // from being reused. allow them to indicate these errors here - void put(std::unique_ptr connection) - { - auto lock = std::scoped_lock{mutex}; - connections.push_back(std::move(connection)); - - if (connections.size() == 1) { // was empty - cond.notify_one(); - } - } - protected: - std::mutex mutex; - std::condition_variable cond; - boost::circular_buffer> connections; -}; - -/// Handle to a database connection borrowed from the pool. Automatically -/// returns the connection to its pool on the handle's destruction. -template -class ConnectionHandle { - ConnectionPoolBase* pool = nullptr; - std::unique_ptr conn; - public: - ConnectionHandle() noexcept = default; - ConnectionHandle(ConnectionPoolBase* pool, - std::unique_ptr conn) noexcept - : pool(pool), conn(std::move(conn)) {} - - ~ConnectionHandle() { - if (conn) { - pool->put(std::move(conn)); - } - } - - ConnectionHandle(ConnectionHandle&&) = default; - ConnectionHandle& operator=(ConnectionHandle&& o) noexcept { - if (conn) { - pool->put(std::move(conn)); - } - conn = std::move(o.conn); - pool = o.pool; - return *this; - } - - explicit operator bool() const noexcept { return static_cast(conn); } - Connection& operator*() const noexcept { return *conn; } - Connection* operator->() const noexcept { return conn.get(); } - Connection* get() const noexcept { return conn.get(); } -}; - - -// factory_of concept requires the function signature: -// F(const DoutPrefixProvider*) -> std::unique_ptr -template -concept factory_of = requires (F factory, const DoutPrefixProvider* dpp) { - { factory(dpp) } -> std::same_as>; - requires std::move_constructible; -}; - - -/// Generic database connection pool that enforces a limit on open connections. -template Factory> -class ConnectionPool : public ConnectionPoolBase { - public: - ConnectionPool(Factory factory, std::size_t max_connections) - : ConnectionPoolBase(max_connections), - factory(std::move(factory)) - {} - - /// Borrow a connection from the pool. If all existing connections are in use, - /// use the connection factory to create another one. If we've reached the - /// limit on open connections, wait on a condition variable for the next one - /// returned to the pool. - auto get(const DoutPrefixProvider* dpp) - -> ConnectionHandle - { - auto lock = std::unique_lock{this->mutex}; - std::unique_ptr conn; - - if (!this->connections.empty()) { - // take an existing connection - conn = std::move(this->connections.front()); - this->connections.pop_front(); - } else if (total < this->connections.capacity()) { - // add another connection to the pool - conn = factory(dpp); - ++total; - } else { - // wait for the next put() - // TODO: support optional_yield - ldpp_dout(dpp, 4) << "ConnectionPool waiting on a connection" << dendl; - this->cond.wait(lock, [&] { return !this->connections.empty(); }); - ldpp_dout(dpp, 4) << "ConnectionPool done waiting" << dendl; - conn = std::move(this->connections.front()); - this->connections.pop_front(); - } - - return {this, std::move(conn)}; - } - private: - Factory factory; - std::size_t total = 0; -}; - -} // namespace rgw::dbstore diff --git a/src/rgw/store/dbstore/common/dbstore.cc b/src/rgw/store/dbstore/common/dbstore.cc deleted file mode 100644 index 3936368e6f7..00000000000 --- a/src/rgw/store/dbstore/common/dbstore.cc +++ /dev/null @@ -1,2245 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "dbstore.h" - -using namespace std; - -namespace rgw { namespace store { - -map DB::objectmap = {}; - -map DB::getObjectMap() { - return DB::objectmap; -} - -int DB::Initialize(string logfile, int loglevel) -{ - int ret = -1; - const DoutPrefixProvider *dpp = get_def_dpp(); - - if (!cct) { - cout << "Failed to Initialize. No ceph Context \n"; - return -1; - } - - if (loglevel > 0) { - cct->_conf->subsys.set_log_level(ceph_subsys_rgw, loglevel); - } - if (!logfile.empty()) { - cct->_log->set_log_file(logfile); - cct->_log->reopen_log_file(); - } - - - db = openDB(dpp); - - if (!db) { - ldpp_dout(dpp, 0) <<"Failed to open database " << dendl; - return ret; - } - - ret = InitializeDBOps(dpp); - - if (ret) { - ldpp_dout(dpp, 0) <<"InitializeDBOps failed " << dendl; - closeDB(dpp); - db = NULL; - return ret; - } - - ldpp_dout(dpp, 0) << "DB successfully initialized - name:" \ - << db_name << "" << dendl; - - return ret; -} - -int DB::createGC(const DoutPrefixProvider *dpp) { - int ret = 0; - /* create gc thread */ - - gc_worker = std::make_unique(dpp, this); - gc_worker->create("db_gc"); - - return ret; -} - -int DB::stopGC() { - if (gc_worker) { - gc_worker->signal_stop(); - gc_worker->join(); - } - return 0; -} - -int DB::Destroy(const DoutPrefixProvider *dpp) -{ - if (!db) - return 0; - - stopGC(); - - closeDB(dpp); - - - ldpp_dout(dpp, 20)<<"DB successfully destroyed - name:" \ - < DB::getDBOp(const DoutPrefixProvider *dpp, std::string_view Op, - const DBOpParams *params) -{ - if (!Op.compare("InsertUser")) - return dbops.InsertUser; - if (!Op.compare("RemoveUser")) - return dbops.RemoveUser; - if (!Op.compare("GetUser")) - return dbops.GetUser; - if (!Op.compare("InsertBucket")) - return dbops.InsertBucket; - if (!Op.compare("UpdateBucket")) - return dbops.UpdateBucket; - if (!Op.compare("RemoveBucket")) - return dbops.RemoveBucket; - if (!Op.compare("GetBucket")) - return dbops.GetBucket; - if (!Op.compare("ListUserBuckets")) - return dbops.ListUserBuckets; - if (!Op.compare("InsertLCEntry")) - return dbops.InsertLCEntry; - if (!Op.compare("RemoveLCEntry")) - return dbops.RemoveLCEntry; - if (!Op.compare("GetLCEntry")) - return dbops.GetLCEntry; - if (!Op.compare("ListLCEntries")) - return dbops.ListLCEntries; - if (!Op.compare("InsertLCHead")) - return dbops.InsertLCHead; - if (!Op.compare("RemoveLCHead")) - return dbops.RemoveLCHead; - if (!Op.compare("GetLCHead")) - return dbops.GetLCHead; - - /* Object Operations */ - map::iterator iter; - class ObjectOp* Ob; - - { - const std::lock_guard lk(mtx); - iter = DB::objectmap.find(params->op.bucket.info.bucket.name); - } - - if (iter == DB::objectmap.end()) { - ldpp_dout(dpp, 30)<<"No objectmap found for bucket: " \ - <op.bucket.info.bucket.name << dendl; - /* not found */ - return nullptr; - } - - Ob = iter->second; - - if (!Op.compare("PutObject")) - return Ob->PutObject; - if (!Op.compare("DeleteObject")) - return Ob->DeleteObject; - if (!Op.compare("GetObject")) - return Ob->GetObject; - if (!Op.compare("UpdateObject")) - return Ob->UpdateObject; - if (!Op.compare("ListBucketObjects")) - return Ob->ListBucketObjects; - if (!Op.compare("ListVersionedObjects")) - return Ob->ListVersionedObjects; - if (!Op.compare("PutObjectData")) - return Ob->PutObjectData; - if (!Op.compare("UpdateObjectData")) - return Ob->UpdateObjectData; - if (!Op.compare("GetObjectData")) - return Ob->GetObjectData; - if (!Op.compare("DeleteObjectData")) - return Ob->DeleteObjectData; - if (!Op.compare("DeleteStaleObjectData")) - return Ob->DeleteStaleObjectData; - - return nullptr; -} - -int DB::objectmapInsert(const DoutPrefixProvider *dpp, string bucket, class ObjectOp* ptr) -{ - map::iterator iter; - class ObjectOp *Ob; - - const std::lock_guard lk(mtx); - iter = DB::objectmap.find(bucket); - - if (iter != DB::objectmap.end()) { - // entry already exists - // return success or replace it or - // return error ? - // - // return success for now & delete the newly allocated ptr - ldpp_dout(dpp, 30)<<"Objectmap entry already exists for bucket("\ - <InitializeObjectOps(getDBname(), dpp); - - DB::objectmap.insert(pair(bucket, Ob)); - - return 0; -} - -int DB::objectmapDelete(const DoutPrefixProvider *dpp, string bucket) -{ - map::iterator iter; - - const std::lock_guard lk(mtx); - iter = DB::objectmap.find(bucket); - - if (iter == DB::objectmap.end()) { - // entry doesn't exist - // return success or return error ? - // return success for now - ldpp_dout(dpp, 20)<<"Objectmap entry for bucket("<cct = cct; - - //reset params here - params->user_table = user_table; - params->bucket_table = bucket_table; - params->quota_table = quota_table; - params->lc_entry_table = lc_entry_table; - params->lc_head_table = lc_head_table; - - ret = 0; -out: - return ret; -} - -int DB::ProcessOp(const DoutPrefixProvider *dpp, std::string_view Op, DBOpParams *params) { - int ret = -1; - shared_ptr db_op; - - db_op = getDBOp(dpp, Op, params); - - if (!db_op) { - ldpp_dout(dpp, 0)<<"No db_op found for Op("<Execute(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0)<<"In Process op Execute failed for fop(" << Op << ")" << dendl; - } else { - ldpp_dout(dpp, 20)<<"Successfully processed fop(" << Op << ")" << dendl; - } - - return ret; -} - -int DB::get_user(const DoutPrefixProvider *dpp, - const std::string& query_str, const std::string& query_str_val, - RGWUserInfo& uinfo, map *pattrs, - RGWObjVersionTracker *pobjv_tracker) { - int ret = 0; - - if (query_str.empty() || query_str_val.empty()) { - ldpp_dout(dpp, 0)<<"In GetUser - Invalid query(" << query_str <<"), query_str_val(" << query_str_val <<")" << dendl; - return -1; - } - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.query_str = query_str; - - // validate query_str with UserTable entries names - if (query_str == "username") { - params.op.user.uinfo.display_name = query_str_val; - } else if (query_str == "email") { - params.op.user.uinfo.user_email = query_str_val; - } else if (query_str == "access_key") { - RGWAccessKey k(query_str_val, ""); - map keys; - keys[query_str_val] = k; - params.op.user.uinfo.access_keys = keys; - } else if (query_str == "user_id") { - params.op.user.uinfo.user_id = uinfo.user_id; - } else { - ldpp_dout(dpp, 0)<<"In GetUser Invalid query string :" <read_version = params.op.user.user_version; - } - -out: - return ret; -} - -int DB::store_user(const DoutPrefixProvider *dpp, - RGWUserInfo& uinfo, bool exclusive, map *pattrs, - RGWObjVersionTracker *pobjv, RGWUserInfo* pold_info) -{ - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - int ret = 0; - - /* Check if the user already exists and return the old info, caller will have a use for it */ - RGWUserInfo orig_info; - RGWObjVersionTracker objv_tracker = {}; - obj_version& obj_ver = objv_tracker.read_version; - - orig_info.user_id = uinfo.user_id; - ret = get_user(dpp, string("user_id"), uinfo.user_id.id, orig_info, nullptr, &objv_tracker); - - if (!ret && obj_ver.ver) { - /* already exists. */ - - if (pold_info) { - *pold_info = orig_info; - } - - if (pobjv && (pobjv->read_version.ver != obj_ver.ver)) { - /* Object version mismatch.. return ECANCELED */ - ret = -ECANCELED; - ldpp_dout(dpp, 0)<<"User Read version mismatch err:(" <read_version = obj_ver; - pobjv->write_version = obj_ver; - } - -out: - return ret; -} - -int DB::remove_user(const DoutPrefixProvider *dpp, - RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv) -{ - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - int ret = 0; - - RGWUserInfo orig_info; - RGWObjVersionTracker objv_tracker = {}; - - orig_info.user_id = uinfo.user_id; - ret = get_user(dpp, string("user_id"), uinfo.user_id.id, orig_info, nullptr, &objv_tracker); - - if (ret) { - return ret; - } - - if (!ret && objv_tracker.read_version.ver) { - /* already exists. */ - - if (pobjv && (pobjv->read_version.ver != objv_tracker.read_version.ver)) { - /* Object version mismatch.. return ECANCELED */ - ret = -ECANCELED; - ldpp_dout(dpp, 0)<<"User Read version mismatch err:(" <& attrs, - RGWBucketInfo& info, - obj_version *pobjv, - obj_version *pep_objv, - real_time creation_time, - rgw_bucket *pmaster_bucket, - uint32_t *pmaster_num_shards, - optional_yield y, - bool exclusive) -{ - /* - * XXX: Simple creation for now. - * - * Referring to RGWRados::create_bucket(), - * Check if bucket already exists, select_bucket_placement, - * is explicit put/remove instance info needed? - should not be ideally - */ - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - int ret = 0; - - /* Check if the bucket already exists and return the old info, caller will have a use for it */ - RGWBucketInfo orig_info; - orig_info.bucket.name = bucket.name; - ret = get_bucket_info(dpp, string("name"), "", orig_info, nullptr, nullptr, nullptr); - - if (!ret && !orig_info.owner.id.empty() && exclusive) { - /* already exists. Return the old info */ - - info = std::move(orig_info); - return ret; - } - - RGWObjVersionTracker& objv_tracker = info.objv_tracker; - - objv_tracker.read_version.clear(); - - if (pobjv) { - objv_tracker.write_version = *pobjv; - } else { - objv_tracker.generate_new_write_ver(cct); - } - params.op.bucket.bucket_version = objv_tracker.write_version; - objv_tracker.read_version = params.op.bucket.bucket_version; - - uint64_t bid = next_bucket_id(); - string s = getDBname() + "." + std::to_string(bid); - bucket.marker = bucket.bucket_id = s; - - info.bucket = bucket; - info.owner = owner.user_id; - info.zonegroup = zonegroup_id; - info.placement_rule = placement_rule; - info.swift_ver_location = swift_ver_location; - info.swift_versioning = (!swift_ver_location.empty()); - - info.requester_pays = false; - if (real_clock::is_zero(creation_time)) { - info.creation_time = ceph::real_clock::now(); - } else { - info.creation_time = creation_time; - } - if (pquota_info) { - info.quota = *pquota_info; - } - - params.op.bucket.info = info; - params.op.bucket.bucket_attrs = attrs; - params.op.bucket.mtime = ceph::real_time(); - params.op.user.uinfo.user_id.id = owner.user_id.id; - - ret = ProcessOp(dpp, "InsertBucket", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"create_bucket failed with err:(" <add(std::move(entry)); - } - - if (query_str == "all") { - // userID/OwnerID may have changed. Update it. - user.id = params.op.bucket.info.owner.id; - } - -out: - return ret; -} - -int DB::update_bucket(const DoutPrefixProvider *dpp, const std::string& query_str, - RGWBucketInfo& info, - bool exclusive, - const rgw_user* powner_id, - map* pattrs, - ceph::real_time* pmtime, - RGWObjVersionTracker* pobjv) -{ - int ret = 0; - DBOpParams params = {}; - obj_version bucket_version; - RGWBucketInfo orig_info; - - /* Check if the bucket already exists and return the old info, caller will have a use for it */ - orig_info.bucket.name = info.bucket.name; - params.op.bucket.info.bucket.name = info.bucket.name; - ret = get_bucket_info(dpp, string("name"), "", orig_info, nullptr, nullptr, - &bucket_version); - - if (ret) { - ldpp_dout(dpp, 0)<<"Failed to read bucket info err:(" <read_version.ver != bucket_version.ver) { - ldpp_dout(dpp, 0)<<"Read version mismatch err:(" <id; - } else { - params.op.user.uinfo.user_id.id = orig_info.owner.id; - } - - /* Update version & mtime */ - params.op.bucket.bucket_version.ver = ++(bucket_version.ver); - - if (pmtime) { - params.op.bucket.mtime = *pmtime;; - } else { - params.op.bucket.mtime = ceph::real_time(); - } - - if (query_str == "attrs") { - params.op.query_str = "attrs"; - params.op.bucket.bucket_attrs = *pattrs; - } else if (query_str == "owner") { - /* Update only owner i.e, chown. - * Update creation_time too */ - params.op.query_str = "owner"; - params.op.bucket.info.creation_time = params.op.bucket.mtime; - } else if (query_str == "info") { - params.op.query_str = "info"; - params.op.bucket.info = info; - } else { - ret = -1; - ldpp_dout(dpp, 0)<<"In UpdateBucket Invalid query_str : " << query_str << dendl; - goto out; - } - - ret = ProcessOp(dpp, "UpdateBucket", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In UpdateBucket failed err:(" <read_version = params.op.bucket.bucket_version; - pobjv->write_version = params.op.bucket.bucket_version; - } - -out: - return ret; -} - -/** - * Get ordered listing of the objects in a bucket. - * - * max_p: maximum number of results to return - * bucket: bucket to list contents of - * prefix: only return results that match this prefix - * delim: do not include results that match this string. - * Any skipped results will have the matching portion of their name - * inserted in common_prefixes with a "true" mark. - * marker: if filled in, begin the listing with this object. - * end_marker: if filled in, end the listing with this object. - * result: the objects are put in here. - * common_prefixes: if delim is filled in, any matching prefixes are - * placed here. - * is_truncated: if number of objects in the bucket is bigger than - * max, then truncated. - */ -int DB::Bucket::List::list_objects(const DoutPrefixProvider *dpp, int64_t max, - vector *result, - map *common_prefixes, bool *is_truncated) -{ - int ret = 0; - DB *store = target->get_store(); - int64_t count = 0; - std::string prev_obj; - - DBOpParams db_params = {}; - store->InitializeParams(dpp, &db_params); - - db_params.op.bucket.info = target->get_bucket_info(); - /* XXX: Handle whole marker? key -> name, instance, ns? */ - db_params.op.obj.min_marker = params.marker.name; - db_params.op.obj.max_marker = params.end_marker.name; - db_params.op.obj.prefix = params.prefix + "%"; - db_params.op.list_max_count = max + 1; /* +1 for next_marker */ - - ret = store->ProcessOp(dpp, "ListBucketObjects", &db_params); - - if (ret) { - ldpp_dout(dpp, 0)<<"In ListBucketObjects failed err:(" <= max) { - *is_truncated = true; - next_marker.name = entry.key.name; - next_marker.instance = entry.key.instance; - break; - } - - if (!params.delim.empty()) { - const std::string& objname = entry.key.name; - const int delim_pos = objname.find(params.delim, params.prefix.size()); - if (delim_pos >= 0) { - /* extract key -with trailing delimiter- for CommonPrefix */ - const std::string& prefix_key = - objname.substr(0, delim_pos + params.delim.length()); - - if (common_prefixes && - common_prefixes->find(prefix_key) == common_prefixes->end()) { - next_marker = prefix_key; - (*common_prefixes)[prefix_key] = true; - count++; - } - continue; - } - } - - if (!params.end_marker.name.empty() && - params.end_marker.name.compare(entry.key.name) <= 0) { - // should not include end_marker - *is_truncated = false; - break; - } - count++; - result->push_back(std::move(entry)); - } -out: - return ret; -} - -int DB::raw_obj::InitializeParamsfromRawObj(const DoutPrefixProvider *dpp, - DBOpParams* params) { - int ret = 0; - - if (!params) - return -1; - - params->op.bucket.info.bucket.name = bucket_name; - params->op.obj.state.obj.key.name = obj_name; - params->op.obj.state.obj.key.instance = obj_instance; - params->op.obj.state.obj.key.ns = obj_ns; - params->op.obj.obj_id = obj_id; - - if (multipart_part_str != "0.0") { - params->op.obj.is_multipart = true; - } else { - params->op.obj.is_multipart = false; - } - - params->op.obj_data.multipart_part_str = multipart_part_str; - params->op.obj_data.part_num = part_num; - - return ret; -} - -int DB::Object::InitializeParamsfromObject(const DoutPrefixProvider *dpp, - DBOpParams* params) { - int ret = 0; - string bucket = bucket_info.bucket.name; - - if (!params) - return -1; - - params->op.bucket.info.bucket.name = bucket; - params->op.obj.state.obj = obj; - params->op.obj.obj_id = obj_id; - - return ret; -} - -int DB::Object::get_object_impl(const DoutPrefixProvider *dpp, DBOpParams& params) { - int ret = 0; - - if (params.op.obj.state.obj.key.name.empty()) { - /* Initialize */ - store->InitializeParams(dpp, ¶ms); - InitializeParamsfromObject(dpp, ¶ms); - } - - ret = store->ProcessOp(dpp, "GetObject", ¶ms); - - /* pick one field check if object exists */ - if (!ret && !params.op.obj.state.exists) { - ldpp_dout(dpp, 0)<<"Object(bucket:" << bucket_info.bucket.name << ", Object:"<< obj.key.name << ") doesn't exist" << dendl; - ret = -ENOENT; - } - - return ret; -} - -int DB::Object::obj_omap_set_val_by_key(const DoutPrefixProvider *dpp, - const std::string& key, bufferlist& val, - bool must_exist) { - int ret = 0; - - DBOpParams params = {}; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <ProcessOp(dpp, "UpdateObject", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <& keys, - std::map* vals) -{ - int ret = 0; - DBOpParams params = {}; - std::map omap; - - if (!vals) - return -1; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <ProcessOp(dpp, "UpdateObject", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <& info) -{ - int ret = 0; - DBOpParams params = {}; - std::map omap; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <set_instance(buf); -} - -int DB::Object::obj_omap_get_all(const DoutPrefixProvider *dpp, - std::map *m) -{ - int ret = 0; - DBOpParams params = {}; - std::map omap; - - if (!m) - return -1; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" < *m, bool* pmore) -{ - int ret = 0; - DBOpParams params = {}; - std::map omap; - map::iterator iter; - uint64_t count = 0; - - if (!m) - return -1; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <first < marker) - continue; - - if ((++count) > max_count) { - *pmore = true; - break; - } - - (*m)[iter->first] = iter->second; - } - -out: - return ret; -} - -int DB::Object::set_attrs(const DoutPrefixProvider *dpp, - map& setattrs, - map* rmattrs) -{ - int ret = 0; - - DBOpParams params = {}; - rgw::sal::Attrs *attrs; - map::iterator iter; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <begin(); iter != rmattrs->end(); ++iter) { - (*attrs).erase(iter->first); - } - } - for (iter = setattrs.begin(); iter != setattrs.end(); ++iter) { - (*attrs)[iter->first] = iter->second; - } - - params.op.query_str = "attrs"; - params.op.obj.state.mtime = real_clock::now(); - - ret = store->ProcessOp(dpp, "UpdateObject", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" < *attrset; - - store->InitializeParams(dpp, ¶ms); - InitializeParamsfromObject(dpp, ¶ms); - - ret = store->ProcessOp(dpp, "GetObject", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0) <<"In GetObject failed err:(" <ProcessOp(dpp, "UpdateObject", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <InitializeParams(dpp, ¶ms); - InitializeParamsfromRawObj(dpp, ¶ms); - - ret = db->ProcessOp(dpp, "GetObjectData", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In GetObjectData failed err:(" <InitializeParams(dpp, ¶ms); - InitializeParamsfromRawObj(dpp, ¶ms); - - /* XXX: Check for chunk_size ?? */ - params.op.obj_data.offset = ofs; - unsigned write_len = std::min((uint64_t)bl.length() - write_ofs, len); - bl.begin(write_ofs).copy(write_len, params.op.obj_data.data); - params.op.obj_data.size = params.op.obj_data.data.length(); - params.op.obj.state.mtime = real_clock::now(); - - ret = db->ProcessOp(dpp, "PutObjectData", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In PutObjectData failed err:(" <& list_entries) { - int ret = 0; - store = get_store(); - DBOpParams db_params = {}; - - store->InitializeParams(dpp, &db_params); - InitializeParamsfromObject(dpp, &db_params); - - db_params.op.list_max_count = MAX_VERSIONED_OBJECTS; - - ret = store->ProcessOp(dpp, "ListVersionedObjects", &db_params); - - if (ret) { - ldpp_dout(dpp, 0)<<"In ListVersionedObjects failed err:(" <InitializeParams(dpp, ¶ms); - InitializeParamsfromObject(dpp, ¶ms); - params.op.obj.state.obj.key = ent.key; - - ret = get_object_impl(dpp, params); - - if (ret) { - ldpp_dout(dpp, 0) <<"get_object_impl of versioned object failed err:(" <shadow_obj to store ObjectID string */ - s->shadow_obj = params.op.obj.obj_id; - - *state = &obj_state; - **state = *s; - -out: - return ret; - -} - -int DB::Object::get_state(const DoutPrefixProvider *dpp, RGWObjState** pstate, bool follow_olh) -{ - return get_obj_state(dpp, bucket_info, obj, follow_olh, pstate); -} - -int DB::Object::Read::get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest) -{ - RGWObjState* state; - int r = source->get_state(dpp, &state, true); - if (r < 0) - return r; - if (!state->exists) - return -ENOENT; - if (!state->get_attr(name, dest)) - return -ENODATA; - - return 0; -} - -int DB::Object::Read::prepare(const DoutPrefixProvider *dpp) -{ - DB *store = source->get_store(); - CephContext *cct = store->ctx(); - - bufferlist etag; - - map::iterator iter; - - RGWObjState* astate; - - int r = source->get_state(dpp, &astate, true); - if (r < 0) - return r; - - if (!astate->exists) { - return -ENOENT; - } - - state.obj = astate->obj; - source->obj_id = astate->shadow_obj; - - if (params.target_obj) { - *params.target_obj = state.obj; - } - if (params.attrs) { - *params.attrs = astate->attrset; - if (cct->_conf->subsys.should_gather()) { - for (iter = params.attrs->begin(); iter != params.attrs->end(); ++iter) { - ldpp_dout(dpp, 20) << "Read xattr rgw_rados: " << iter->first << dendl; - } - } - } - - if (conds.if_match || conds.if_nomatch) { - r = get_attr(dpp, RGW_ATTR_ETAG, etag); - if (r < 0) - return r; - - if (conds.if_match) { - string if_match_str = rgw_string_unquote(conds.if_match); - ldpp_dout(dpp, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-Match: " << if_match_str << dendl; - if (if_match_str.compare(0, etag.length(), etag.c_str(), etag.length()) != 0) { - return -ERR_PRECONDITION_FAILED; - } - } - - if (conds.if_nomatch) { - string if_nomatch_str = rgw_string_unquote(conds.if_nomatch); - ldpp_dout(dpp, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-NoMatch: " << if_nomatch_str << dendl; - if (if_nomatch_str.compare(0, etag.length(), etag.c_str(), etag.length()) == 0) { - return -ERR_NOT_MODIFIED; - } - } - } - - if (params.obj_size) - *params.obj_size = astate->size; - if (params.lastmod) - *params.lastmod = astate->mtime; - - return 0; -} - -int DB::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end) -{ - if (ofs < 0) { - ofs += obj_size; - if (ofs < 0) - ofs = 0; - end = obj_size - 1; - } else if (end < 0) { - end = obj_size - 1; - } - - if (obj_size > 0) { - if (ofs >= (off_t)obj_size) { - return -ERANGE; - } - if (end >= (off_t)obj_size) { - end = obj_size - 1; - } - } - return 0; -} - -int DB::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider *dpp) -{ - DB *store = source->get_store(); - - uint64_t read_ofs = ofs; - uint64_t len, read_len; - - bufferlist read_bl; - uint64_t max_chunk_size = store->get_max_chunk_size(); - - RGWObjState* astate; - int r = source->get_state(dpp, &astate, true); - if (r < 0) - return r; - - if (!astate->exists) { - return -ENOENT; - } - - if (astate->size == 0) { - end = 0; - } else if (end >= (int64_t)astate->size) { - end = astate->size - 1; - } - - if (end < 0) - len = 0; - else - len = end - ofs + 1; - - - if (len > max_chunk_size) { - len = max_chunk_size; - } - - int head_data_size = astate->data.length(); - bool reading_from_head = (ofs < head_data_size); - - if (reading_from_head) { - if (astate) { // && astate->prefetch_data)? - if (!ofs && astate->data.length() >= len) { - bl = astate->data; - return bl.length(); - } - - if (ofs < astate->data.length()) { - unsigned copy_len = std::min((uint64_t)head_data_size - ofs, len); - astate->data.begin(ofs).copy(copy_len, bl); - return bl.length(); - } - } - } - - /* tail object */ - int part_num = (ofs / max_chunk_size); - /* XXX: Handle multipart_str */ - raw_obj read_obj(store, source->get_bucket_info().bucket.name, astate->obj.key.name, - astate->obj.key.instance, astate->obj.key.ns, source->obj_id, "0.0", part_num); - - read_len = len; - - ldpp_dout(dpp, 20) << "dbstore->read obj-ofs=" << ofs << " read_ofs=" << read_ofs << " read_len=" << read_len << dendl; - - // read from non head object - r = read_obj.read(dpp, read_ofs, read_len, bl); - - if (r < 0) { - return r; - } - - return bl.length(); -} - -static int _get_obj_iterate_cb(const DoutPrefixProvider *dpp, - const DB::raw_obj& read_obj, off_t obj_ofs, - off_t len, bool is_head_obj, - RGWObjState* astate, void *arg) -{ - struct db_get_obj_data* d = static_cast(arg); - return d->store->get_obj_iterate_cb(dpp, read_obj, obj_ofs, len, - is_head_obj, astate, arg); -} - -int DB::get_obj_iterate_cb(const DoutPrefixProvider *dpp, - const raw_obj& read_obj, off_t obj_ofs, - off_t len, bool is_head_obj, - RGWObjState* astate, void *arg) -{ - struct db_get_obj_data* d = static_cast(arg); - bufferlist bl; - int r = 0; - - if (is_head_obj) { - bl = astate->data; - } else { - // read from non head object - raw_obj robj = read_obj; - /* read entire data. So pass offset as '0' & len as '-1' */ - r = robj.read(dpp, 0, -1, bl); - - if (r <= 0) { - return r; - } - } - - unsigned read_ofs = 0, read_len = 0; - while (read_ofs < bl.length()) { - unsigned chunk_len = std::min((uint64_t)bl.length() - read_ofs, (uint64_t)len); - r = d->client_cb->handle_data(bl, read_ofs, chunk_len); - if (r < 0) - return r; - read_ofs += chunk_len; - read_len += chunk_len; - ldpp_dout(dpp, 20) << "dbstore->get_obj_iterate_cb obj-ofs=" << obj_ofs << " len=" << len << " chunk_len = " << chunk_len << " read_len = " << read_len << dendl; - } - - - d->offset += read_len; - - return read_len; -} - -int DB::Object::Read::iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb) -{ - DB *store = source->get_store(); - const uint64_t chunk_size = store->get_max_chunk_size(); - - db_get_obj_data data(store, cb, ofs); - - int r = source->iterate_obj(dpp, source->get_bucket_info(), state.obj, - ofs, end, chunk_size, _get_obj_iterate_cb, &data); - if (r < 0) { - ldpp_dout(dpp, 0) << "iterate_obj() failed with " << r << dendl; - return r; - } - - return 0; -} - -int DB::Object::iterate_obj(const DoutPrefixProvider *dpp, - const RGWBucketInfo& bucket_info, const rgw_obj& obj, - off_t ofs, off_t end, uint64_t max_chunk_size, - iterate_obj_cb cb, void *arg) -{ - DB *store = get_store(); - uint64_t len; - RGWObjState* astate; - - int r = get_state(dpp, &astate, true); - if (r < 0) { - return r; - } - - if (!astate->exists) { - return -ENOENT; - } - - if (end < 0) - len = 0; - else - len = end - ofs + 1; - - /* XXX: Will it really help to store all parts info in astate like manifest in Rados? */ - int part_num = 0; - int head_data_size = astate->data.length(); - - while (ofs <= end && (uint64_t)ofs < astate->size) { - part_num = (ofs / max_chunk_size); - uint64_t read_len = std::min(len, max_chunk_size); - - /* XXX: Handle multipart_str */ - raw_obj read_obj(store, get_bucket_info().bucket.name, astate->obj.key.name, - astate->obj.key.instance, astate->obj.key.ns, obj_id, "0.0", part_num); - bool reading_from_head = (ofs < head_data_size); - - r = cb(dpp, read_obj, ofs, read_len, reading_from_head, astate, arg); - if (r <= 0) { - return r; - } - /* r refers to chunk_len (no. of bytes) handled in cb */ - len -= r; - ofs += r; - } - - return 0; -} - -int DB::Object::Write::prepare(const DoutPrefixProvider* dpp) -{ - DB *store = target->get_store(); - - int ret = -1; - - /* XXX: handle assume_noent */ - - obj_state.obj = target->obj; - - if (target->obj_id.empty()) { - if (!target->obj.key.instance.empty() && (target->obj.key.instance != "null")) { - /* versioned object. Set obj_id same as versionID/instance */ - target->obj_id = target->obj.key.instance; - } else { - // generate obj_id - char buf[33]; - gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); - target->obj_id = buf; - } - } - - ret = 0; - return ret; -} - -/* writes tail objects */ -int DB::Object::Write::write_data(const DoutPrefixProvider* dpp, - bufferlist& data, uint64_t ofs) { - DB *store = target->get_store(); - /* tail objects */ - /* XXX: Split into parts each of max_chunk_size. But later make tail - * object chunk size limit to sqlite blob limit */ - int part_num = 0; - - uint64_t max_chunk_size = store->get_max_chunk_size(); - - /* tail_obj ofs should be greater than max_head_size */ - if (mp_part_str == "0.0") { // ensure not multipart meta object - if (ofs < store->get_max_head_size()) { - return -1; - } - } - - uint64_t end = data.length(); - uint64_t write_ofs = 0; - /* as we are writing max_chunk_size at a time in sal_dbstore DBAtomicWriter::process(), - * maybe this while loop is not needed - */ - while (write_ofs < end) { - part_num = (ofs / max_chunk_size); - uint64_t len = std::min(end, max_chunk_size); - - /* XXX: Handle multipart_str */ - raw_obj write_obj(store, target->get_bucket_info().bucket.name, obj_state.obj.key.name, - obj_state.obj.key.instance, obj_state.obj.key.ns, target->obj_id, mp_part_str, part_num); - - - ldpp_dout(dpp, 20) << "dbstore->write obj-ofs=" << ofs << " write_len=" << len << dendl; - - // write into non head object - int r = write_obj.write(dpp, ofs, write_ofs, len, data); - if (r < 0) { - return r; - } - /* r refers to chunk_len (no. of bytes) handled in raw_obj::write */ - len -= r; - ofs += r; - write_ofs += r; - } - - return 0; -} - -/* Write metadata & head object data */ -int DB::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, - uint64_t size, uint64_t accounted_size, - map& attrs, - bool assume_noent, bool modify_tail) -{ - DB *store = target->get_store(); - - RGWObjState* state = &obj_state; - map *attrset; - DBOpParams params = {}; - int ret = 0; - string etag; - string content_type; - bufferlist acl_bl; - string storage_class; - - map::iterator iter; - - store->InitializeParams(dpp, ¶ms); - target->InitializeParamsfromObject(dpp, ¶ms); - - obj_state = params.op.obj.state; - - if (real_clock::is_zero(meta.set_mtime)) { - meta.set_mtime = real_clock::now(); - } - - attrset = &state->attrset; - if (target->bucket_info.obj_lock_enabled() && target->bucket_info.obj_lock.has_rule()) { - // && meta.flags == PUT_OBJ_CREATE) { - auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); - if (iter == attrs.end()) { - real_time lock_until_date = target->bucket_info.obj_lock.get_lock_until_date(meta.set_mtime); - string mode = target->bucket_info.obj_lock.get_mode(); - RGWObjectRetention obj_retention(mode, lock_until_date); - bufferlist bl; - obj_retention.encode(bl); - (*attrset)[RGW_ATTR_OBJECT_RETENTION] = bl; - } - } - - state->mtime = meta.set_mtime; - - if (meta.data) { - /* if we want to overwrite the data, we also want to overwrite the - xattrs, so just remove the object */ - params.op.obj.head_data = *meta.data; - } - - if (meta.rmattrs) { - for (iter = meta.rmattrs->begin(); iter != meta.rmattrs->end(); ++iter) { - const string& name = iter->first; - (*attrset).erase(name.c_str()); - } - } - - if (meta.manifest) { - storage_class = meta.manifest->get_tail_placement().placement_rule.storage_class; - - /* remove existing manifest attr */ - iter = attrs.find(RGW_ATTR_MANIFEST); - if (iter != attrs.end()) - attrs.erase(iter); - - bufferlist bl; - encode(*meta.manifest, bl); - (*attrset)[RGW_ATTR_MANIFEST] = bl; - } - - for (iter = attrs.begin(); iter != attrs.end(); ++iter) { - const string& name = iter->first; - bufferlist& bl = iter->second; - - if (!bl.length()) - continue; - - (*attrset)[name.c_str()] = bl; - - if (name.compare(RGW_ATTR_ETAG) == 0) { - etag = rgw_bl_str(bl); - params.op.obj.etag = etag; - } else if (name.compare(RGW_ATTR_CONTENT_TYPE) == 0) { - content_type = rgw_bl_str(bl); - } else if (name.compare(RGW_ATTR_ACL) == 0) { - acl_bl = bl; - } - } - - if (!storage_class.empty()) { - bufferlist bl; - bl.append(storage_class); - (*attrset)[RGW_ATTR_STORAGE_CLASS] = bl; - } - - params.op.obj.state = *state ; - params.op.obj.state.exists = true; - params.op.obj.state.size = size; - params.op.obj.state.accounted_size = accounted_size; - params.op.obj.owner = target->get_bucket_info().owner.id; - params.op.obj.category = meta.category; - - if (meta.mtime) { - *meta.mtime = meta.set_mtime; - } - - params.op.query_str = "meta"; - params.op.obj.obj_id = target->obj_id; - - /* Check if versioned */ - bool is_versioned = !target->obj.key.instance.empty() && (target->obj.key.instance != "null"); - params.op.obj.is_versioned = is_versioned; - - if (is_versioned && (params.op.obj.category == RGWObjCategory::Main)) { - /* versioned object */ - params.op.obj.flags |= rgw_bucket_dir_entry::FLAG_VER; - } - ret = store->ProcessOp(dpp, "PutObject", ¶ms); - if (ret) { - ldpp_dout(dpp, 0)<<"In PutObject failed err:(" <list_versioned_objects(dpp, del_params.op.obj.list_entries); - if (ret) { - ldpp_dout(dpp, 0)<<"ListVersionedObjects failed err:(" <get_store(); - - ret = store->ProcessOp(dpp, "DeleteObject", &del_params); - if (ret) { - ldpp_dout(dpp, 0) << "In DeleteObject failed err:(" <ProcessOp(dpp, "UpdateObjectData", &update_params); - - if (ret) { - ldpp_dout(dpp, 0) << "Updating tail objects mtime failed err:(" <get_store(); - bool versioning_suspended = ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == BUCKET_VERSIONS_SUSPENDED); - int ret = -1; - DBOpParams olh_params = {}; - std::string version_id; - DBOpParams next_params = del_params; - - version_id = del_params.op.obj.state.obj.key.instance; - - DBOpParams dm_params = del_params; - - // create delete marker - - store->InitializeParams(dpp, &dm_params); - target->InitializeParamsfromObject(dpp, &dm_params); - dm_params.op.obj.category = RGWObjCategory::None; - - if (versioning_suspended) { - dm_params.op.obj.state.obj.key.instance = "null"; - } else { - store->gen_rand_obj_instance_name(&dm_params.op.obj.state.obj.key); - dm_params.op.obj.obj_id = dm_params.op.obj.state.obj.key.instance; - } - - dm_params.op.obj.flags |= (rgw_bucket_dir_entry::FLAG_DELETE_MARKER); - - ret = store->ProcessOp(dpp, "PutObject", &dm_params); - - if (ret) { - ldpp_dout(dpp, 0) << "delete_olh: failed to create delete marker - err:(" <* entry) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_entry.index = oid; - params.op.lc_entry.entry.set_bucket(marker); - - params.op.query_str = "get_entry"; - ret = ProcessOp(dpp, "GetLCEntry", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In GetLCEntry failed err:(" <reset(e); - } - -out: - return ret; -} - -int DB::get_next_entry(const std::string& oid, const std::string& marker, - std::unique_ptr* entry) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_entry.index = oid; - params.op.lc_entry.entry.set_bucket(marker); - - params.op.query_str = "get_next_entry"; - ret = ProcessOp(dpp, "GetLCEntry", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In GetLCEntry failed err:(" <reset(e); - } - -out: - return ret; -} - -int DB::set_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_entry.index = oid; - params.op.lc_entry.entry = entry; - - ret = ProcessOp(dpp, "InsertLCEntry", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In InsertLCEntry failed err:(" <>& entries) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - entries.clear(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_entry.index = oid; - params.op.lc_entry.min_marker = marker; - params.op.list_max_count = max_entries; - - ret = ProcessOp(dpp, "ListLCEntries", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In ListLCEntries failed err:(" <(std::move(entry))); - } - -out: - return ret; -} - -int DB::rm_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_entry.index = oid; - params.op.lc_entry.entry = entry; - - ret = ProcessOp(dpp, "RemoveLCEntry", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In RemoveLCEntry failed err:(" <* head) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_head.index = oid; - - ret = ProcessOp(dpp, "GetLCHead", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In GetLCHead failed err:(" <(params.op.lc_head.head); - -out: - return ret; -} - -int DB::put_head(const std::string& oid, rgw::sal::Lifecycle::LCHead& head) -{ - int ret = 0; - const DoutPrefixProvider *dpp = get_def_dpp(); - - DBOpParams params = {}; - InitializeParams(dpp, ¶ms); - - params.op.lc_head.index = oid; - params.op.lc_head.head = head; - - ret = ProcessOp(dpp, "InsertLCHead", ¶ms); - - if (ret) { - ldpp_dout(dpp, 0)<<"In InsertLCHead failed err:(" < lk(mtx); - - ldpp_dout(dpp, 2) << " DB GC started " << dendl; - int max = 100; - RGWUserBuckets buckets; - bool is_truncated = false; - - do { - std::string& marker = bucket_marker; - rgw_user user; - user.id = user_marker; - buckets.clear(); - is_truncated = false; - - int r = db->list_buckets(dpp, "all", user, marker, string(), - max, false, &buckets, &is_truncated); - - if (r < 0) { //do nothing? retry later ? - break; - } - - for (const auto& ent : buckets.get_buckets()) { - const std::string &bname = ent.first; - - r = db->delete_stale_objs(dpp, bname, gc_obj_min_wait); - - if (r < 0) { //do nothing? skip to next entry? - ldpp_dout(dpp, 2) << " delete_stale_objs failed for bucket( " << bname <<")" << dendl; - } - bucket_marker = bname; - user_marker = user.id; - - /* XXX: If using locks, unlock here and reacquire in the next iteration */ - cv.wait_for(lk, std::chrono::milliseconds(100)); - if (stop_signalled) { - goto done; - } - } - } while(is_truncated); - - bucket_marker.clear(); - cv.wait_for(lk, std::chrono::milliseconds(gc_interval*10)); - } while(! stop_signalled); - -done: - return nullptr; -} - -} } // namespace rgw::store - diff --git a/src/rgw/store/dbstore/common/dbstore.h b/src/rgw/store/dbstore/common/dbstore.h deleted file mode 100644 index 12ab3f0600d..00000000000 --- a/src/rgw/store/dbstore/common/dbstore.h +++ /dev/null @@ -1,2024 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#ifndef DB_STORE_H -#define DB_STORE_H - -#include -#include -#include -#include -#include -#include -#include -// this seems safe to use, at least for now--arguably, we should -// prefer header-only fmt, in general -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include "fmt/format.h" -#include -#include "rgw_sal_store.h" -#include "rgw_common.h" -#include "rgw_bucket.h" -#include "global/global_context.h" -#include "global/global_init.h" -#include "common/ceph_context.h" -#include "rgw_obj_manifest.h" -#include "rgw_multi.h" - -namespace rgw { namespace store { - -class DB; - -struct DBOpUserInfo { - RGWUserInfo uinfo = {}; - obj_version user_version; - rgw::sal::Attrs user_attrs; -}; - -struct DBOpBucketInfo { - RGWBucketEnt ent; // maybe not needed. not used in create/get_bucket - RGWBucketInfo info; - RGWUser* owner = nullptr; - rgw::sal::Attrs bucket_attrs; - obj_version bucket_version; - ceph::real_time mtime; - // used for list query - std::string min_marker; - std::string max_marker; - std::list list_entries; -}; - -struct DBOpObjectInfo { - RGWAccessControlPolicy acls; - RGWObjState state = {}; - - /* Below are taken from rgw_bucket_dir_entry */ - RGWObjCategory category; - std::string etag; - std::string owner; - std::string owner_display_name; - std::string content_type; - std::string storage_class; - bool appendable; - uint64_t index_ver; - std::string tag; - uint16_t flags; - uint64_t versioned_epoch; - - /* from state.manifest (RGWObjManifest) */ - std::map objs; - uint64_t head_size{0}; - rgw_placement_rule head_placement_rule; - uint64_t max_head_size{0}; - std::string obj_id; - rgw_bucket_placement tail_placement; /* might be different than the original bucket, - as object might have been copied across pools */ - std::map rules; - std::string tail_instance; /* tail object's instance */ - - - /* Obj's omap store */ - std::map omap; - - /* Extra fields */ - bool is_multipart; - std::list mp_parts; - - bufferlist head_data; - std::string min_marker; - std::string max_marker; - std::string prefix; - std::list list_entries; - /* XXX: Maybe use std::vector instead of std::list */ - - /* for versioned objects */ - bool is_versioned; - uint64_t version_num = 0; -}; - -struct DBOpObjectDataInfo { - RGWObjState state; - uint64_t part_num; - std::string multipart_part_str; - uint64_t offset; - uint64_t size; - bufferlist data{}; -}; - -struct DBOpLCHeadInfo { - std::string index; - rgw::sal::StoreLifecycle::StoreLCHead head; -}; - -struct DBOpLCEntryInfo { - std::string index; - rgw::sal::StoreLifecycle::StoreLCEntry entry; - // used for list query - std::string min_marker; - std::list list_entries; -}; - -struct DBOpInfo { - std::string name; // Op name - /* Support only single access_key for now. So store - * it separately as primary access_key_id & secret to - * be able to query easily. - * - * XXX: Swift keys and subuser not supported for now */ - DBOpUserInfo user; - std::string query_str; - DBOpBucketInfo bucket; - DBOpObjectInfo obj; - DBOpObjectDataInfo obj_data; - DBOpLCHeadInfo lc_head; - DBOpLCEntryInfo lc_entry; - uint64_t list_max_count; -}; - -struct DBOpParams { - CephContext *cct; - - /* Tables */ - std::string user_table; - std::string bucket_table; - std::string object_table; - - /* Ops*/ - DBOpInfo op; - - std::string objectdata_table; - std::string object_trigger; - std::string object_view; - std::string quota_table; - std::string lc_head_table; - std::string lc_entry_table; - std::string obj; -}; - -/* Used for prepared schemas. - * Difference with above structure is that all - * the fields are strings here to accommodate any - * style identifiers used by backend db. By default - * initialized with sqlitedb style, can be overriden - * using InitPrepareParams() - * - * These identifiers are used in prepare and bind statements - * to get the right index of each param. - */ -struct DBOpUserPrepareInfo { - static constexpr const char* user_id = ":user_id"; - static constexpr const char* tenant = ":tenant"; - static constexpr const char* ns = ":ns"; - static constexpr const char* display_name = ":display_name"; - static constexpr const char* user_email = ":user_email"; - /* Support only single access_key for now. So store - * it separately as primary access_key_id & secret to - * be able to query easily. - * - * In future, when need to support & query from multiple - * access keys, better to maintain them in a separate table. - */ - static constexpr const char* access_keys_id = ":access_keys_id"; - static constexpr const char* access_keys_secret = ":access_keys_secret"; - static constexpr const char* access_keys = ":access_keys"; - static constexpr const char* swift_keys = ":swift_keys"; - static constexpr const char* subusers = ":subusers"; - static constexpr const char* suspended = ":suspended"; - static constexpr const char* max_buckets = ":max_buckets"; - static constexpr const char* op_mask = ":op_mask"; - static constexpr const char* user_caps = ":user_caps"; - static constexpr const char* admin = ":admin"; - static constexpr const char* system = ":system"; - static constexpr const char* placement_name = ":placement_name"; - static constexpr const char* placement_storage_class = ":placement_storage_class"; - static constexpr const char* placement_tags = ":placement_tags"; - static constexpr const char* bucket_quota = ":bucket_quota"; - static constexpr const char* temp_url_keys = ":temp_url_keys"; - static constexpr const char* user_quota = ":user_quota"; - static constexpr const char* type = ":type"; - static constexpr const char* mfa_ids = ":mfa_ids"; - static constexpr const char* assumed_role_arn = ":assumed_role_arn"; - static constexpr const char* user_attrs = ":user_attrs"; - static constexpr const char* user_ver = ":user_vers"; - static constexpr const char* user_ver_tag = ":user_ver_tag"; -}; - -struct DBOpBucketPrepareInfo { - static constexpr const char* bucket_name = ":bucket_name"; - static constexpr const char* tenant = ":tenant"; - static constexpr const char* marker = ":marker"; - static constexpr const char* bucket_id = ":bucket_id"; - static constexpr const char* size = ":size"; - static constexpr const char* size_rounded = ":size_rounded"; - static constexpr const char* creation_time = ":creation_time"; - static constexpr const char* count = ":count"; - static constexpr const char* placement_name = ":placement_name"; - static constexpr const char* placement_storage_class = ":placement_storage_class"; - /* ownerid - maps to DBOpUserPrepareInfo */ - static constexpr const char* flags = ":flags"; - static constexpr const char* zonegroup = ":zonegroup"; - static constexpr const char* has_instance_obj = ":has_instance_obj"; - static constexpr const char* quota = ":quota"; - static constexpr const char* requester_pays = ":requester_pays"; - static constexpr const char* has_website = ":has_website"; - static constexpr const char* website_conf = ":website_conf"; - static constexpr const char* swift_versioning = ":swift_versioning"; - static constexpr const char* swift_ver_location = ":swift_ver_location"; - static constexpr const char* mdsearch_config = ":mdsearch_config"; - static constexpr const char* new_bucket_instance_id = ":new_bucket_instance_id"; - static constexpr const char* obj_lock = ":obj_lock"; - static constexpr const char* sync_policy_info_groups = ":sync_policy_info_groups"; - static constexpr const char* bucket_attrs = ":bucket_attrs"; - static constexpr const char* bucket_ver = ":bucket_vers"; - static constexpr const char* bucket_ver_tag = ":bucket_ver_tag"; - static constexpr const char* mtime = ":mtime"; - static constexpr const char* min_marker = ":min_marker"; - static constexpr const char* max_marker = ":max_marker"; -}; - -struct DBOpObjectPrepareInfo { - static constexpr const char* obj_name = ":obj_name"; - static constexpr const char* obj_instance = ":obj_instance"; - static constexpr const char* obj_ns = ":obj_ns"; - static constexpr const char* acls = ":acls"; - static constexpr const char* index_ver = ":index_ver"; - static constexpr const char* tag = ":tag"; - static constexpr const char* flags = ":flags"; - static constexpr const char* versioned_epoch = ":versioned_epoch"; - static constexpr const char* obj_category = ":obj_category"; - static constexpr const char* etag = ":etag"; - static constexpr const char* owner = ":owner"; - static constexpr const char* owner_display_name = ":owner_display_name"; - static constexpr const char* storage_class = ":storage_class"; - static constexpr const char* appendable = ":appendable"; - static constexpr const char* content_type = ":content_type"; - static constexpr const char* index_hash_source = ":index_hash_source"; - static constexpr const char* obj_size = ":obj_size"; - static constexpr const char* accounted_size = ":accounted_size"; - static constexpr const char* mtime = ":mtime"; - static constexpr const char* epoch = ":epoch"; - static constexpr const char* obj_tag = ":obj_tag"; - static constexpr const char* tail_tag = ":tail_tag"; - static constexpr const char* write_tag = ":write_tag"; - static constexpr const char* fake_tag = ":fake_tag"; - static constexpr const char* shadow_obj = ":shadow_obj"; - static constexpr const char* has_data = ":has_data"; - static constexpr const char* is_versioned = ":is_versioned"; - static constexpr const char* version_num = ":version_num"; - static constexpr const char* pg_ver = ":pg_ver"; - static constexpr const char* zone_short_id = ":zone_short_id"; - static constexpr const char* obj_version = ":obj_version"; - static constexpr const char* obj_version_tag = ":obj_version_tag"; - static constexpr const char* obj_attrs = ":obj_attrs"; - static constexpr const char* head_size = ":head_size"; - static constexpr const char* max_head_size = ":max_head_size"; - static constexpr const char* obj_id = ":obj_id"; - static constexpr const char* tail_instance = ":tail_instance"; - static constexpr const char* head_placement_rule_name = ":head_placement_rule_name"; - static constexpr const char* head_placement_storage_class = ":head_placement_storage_class"; - static constexpr const char* tail_placement_rule_name = ":tail_placement_rule_name"; - static constexpr const char* tail_placement_storage_class = ":tail_placement_storage_class"; - static constexpr const char* manifest_part_objs = ":manifest_part_objs"; - static constexpr const char* manifest_part_rules = ":manifest_part_rules"; - static constexpr const char* omap = ":omap"; - static constexpr const char* is_multipart = ":is_multipart"; - static constexpr const char* mp_parts = ":mp_parts"; - static constexpr const char* head_data = ":head_data"; - static constexpr const char* min_marker = ":min_marker"; - static constexpr const char* max_marker = ":max_marker"; - static constexpr const char* prefix = ":prefix"; - /* Below used to update mp_parts obj name - * from meta object to src object on completion */ - static constexpr const char* new_obj_name = ":new_obj_name"; - static constexpr const char* new_obj_instance = ":new_obj_instance"; - static constexpr const char* new_obj_ns = ":new_obj_ns"; -}; - -struct DBOpObjectDataPrepareInfo { - static constexpr const char* part_num = ":part_num"; - static constexpr const char* offset = ":offset"; - static constexpr const char* data = ":data"; - static constexpr const char* size = ":size"; - static constexpr const char* multipart_part_str = ":multipart_part_str"; -}; - -struct DBOpLCEntryPrepareInfo { - static constexpr const char* index = ":index"; - static constexpr const char* bucket_name = ":bucket_name"; - static constexpr const char* start_time = ":start_time"; - static constexpr const char* status = ":status"; - static constexpr const char* min_marker = ":min_marker"; -}; - -struct DBOpLCHeadPrepareInfo { - static constexpr const char* index = ":index"; - static constexpr const char* start_date = ":start_date"; - static constexpr const char* marker = ":marker"; -}; - -struct DBOpPrepareInfo { - DBOpUserPrepareInfo user; - std::string_view query_str; // view into DBOpInfo::query_str - DBOpBucketPrepareInfo bucket; - DBOpObjectPrepareInfo obj; - DBOpObjectDataPrepareInfo obj_data; - DBOpLCHeadPrepareInfo lc_head; - DBOpLCEntryPrepareInfo lc_entry; - static constexpr const char* list_max_count = ":list_max_count"; -}; - -struct DBOpPrepareParams { - /* Tables */ - std::string user_table; - std::string bucket_table; - std::string object_table; - - /* Ops */ - DBOpPrepareInfo op; - - - std::string objectdata_table; - std::string object_trigger; - std::string object_view; - std::string quota_table; - std::string lc_head_table; - std::string lc_entry_table; -}; - -struct DBOps { - std::shared_ptr InsertUser; - std::shared_ptr RemoveUser; - std::shared_ptr GetUser; - std::shared_ptr InsertBucket; - std::shared_ptr UpdateBucket; - std::shared_ptr RemoveBucket; - std::shared_ptr GetBucket; - std::shared_ptr ListUserBuckets; - std::shared_ptr InsertLCEntry; - std::shared_ptr RemoveLCEntry; - std::shared_ptr GetLCEntry; - std::shared_ptr ListLCEntries; - std::shared_ptr InsertLCHead; - std::shared_ptr RemoveLCHead; - std::shared_ptr GetLCHead; -}; - -class ObjectOp { - public: - ObjectOp() {}; - - virtual ~ObjectOp() {} - - std::shared_ptr PutObject; - std::shared_ptr DeleteObject; - std::shared_ptr GetObject; - std::shared_ptr UpdateObject; - std::shared_ptr ListBucketObjects; - std::shared_ptr ListVersionedObjects; - std::shared_ptr PutObjectData; - std::shared_ptr UpdateObjectData; - std::shared_ptr GetObjectData; - std::shared_ptr DeleteObjectData; - std::shared_ptr DeleteStaleObjectData; - - virtual int InitializeObjectOps(std::string db_name, const DoutPrefixProvider *dpp) { return 0; } -}; - -class DBOp { - private: - static constexpr std::string_view CreateUserTableQ = - /* Corresponds to rgw::sal::User - * - * For now only UserID is made Primary key. - * If multiple tenants are stored in single .db handle, should - * make both (UserID, Tenant) as Primary Key. - * - * XXX: - * - AccessKeys, SwiftKeys, Subusers (map<>) are stored as blob. - * To enable easy query, first accesskey is stored in separate fields - * AccessKeysID, AccessKeysSecret. - * In future, may be have separate table to store these keys and - * query on that table. - * - Quota stored as blob .. should be linked to quota table. - */ - "CREATE TABLE IF NOT EXISTS '{}' ( \ - UserID TEXT NOT NULL UNIQUE, \ - Tenant TEXT , \ - NS TEXT , \ - DisplayName TEXT , \ - UserEmail TEXT , \ - AccessKeysID TEXT , \ - AccessKeysSecret TEXT , \ - AccessKeys BLOB , \ - SwiftKeys BLOB , \ - SubUsers BLOB , \ - Suspended INTEGER , \ - MaxBuckets INTEGER , \ - OpMask INTEGER , \ - UserCaps BLOB , \ - Admin INTEGER , \ - System INTEGER , \ - PlacementName TEXT , \ - PlacementStorageClass TEXT , \ - PlacementTags BLOB , \ - BucketQuota BLOB , \ - TempURLKeys BLOB , \ - UserQuota BLOB , \ - TYPE INTEGER , \ - MfaIDs BLOB , \ - AssumedRoleARN TEXT , \ - UserAttrs BLOB, \ - UserVersion INTEGER, \ - UserVersionTag TEXT, \ - PRIMARY KEY (UserID) \n);"; - - static constexpr std::string_view CreateBucketTableQ = - /* Corresponds to rgw::sal::Bucket - * - * For now only BucketName is made Primary key. Since buckets should - * be unique across users in rgw, OwnerID is not made part of primary key. - * However it is still referenced as foreign key - * - * If multiple tenants are stored in single .db handle, should - * make both (BucketName, Tenant) as Primary Key. Also should - * reference (UserID, Tenant) as Foreign key. - * - * leaving below RADOS specific fields - * - rgw_data_placement_target explicit_placement (struct rgw_bucket) - * - rgw::BucketLayout layout (struct RGWBucketInfo) - * - const static uint32_t NUM_SHARDS_BLIND_BUCKET (struct RGWBucketInfo), - * should be '0' indicating no sharding. - * - cls_rgw_reshard_status reshard_status (struct RGWBucketInfo) - * - * XXX: - * - Quota stored as blob .. should be linked to quota table. - * - WebsiteConf stored as BLOB..if required, should be split - * - Storing bucket_version (struct RGWBucket), objv_tracker - * (struct RGWBucketInfo) separately. Are they same? - * - */ - "CREATE TABLE IF NOT EXISTS '{}' ( \ - BucketName TEXT NOT NULL UNIQUE , \ - Tenant TEXT, \ - Marker TEXT, \ - BucketID TEXT, \ - Size INTEGER, \ - SizeRounded INTEGER,\ - CreationTime BLOB, \ - Count INTEGER, \ - PlacementName TEXT , \ - PlacementStorageClass TEXT , \ - OwnerID TEXT NOT NULL, \ - Flags INTEGER, \ - Zonegroup TEXT, \ - HasInstanceObj BOOLEAN, \ - Quota BLOB, \ - RequesterPays BOOLEAN, \ - HasWebsite BOOLEAN, \ - WebsiteConf BLOB, \ - SwiftVersioning BOOLEAN, \ - SwiftVerLocation TEXT, \ - MdsearchConfig BLOB, \ - NewBucketInstanceID TEXT,\ - ObjectLock BLOB, \ - SyncPolicyInfoGroups BLOB, \ - BucketAttrs BLOB, \ - BucketVersion INTEGER, \ - BucketVersionTag TEXT, \ - Mtime BLOB, \ - PRIMARY KEY (BucketName) \ - FOREIGN KEY (OwnerID) \ - REFERENCES '{}' (UserID) ON DELETE CASCADE ON UPDATE CASCADE \n);"; - - static constexpr std::string_view CreateObjectTableTriggerQ = - "CREATE TRIGGER IF NOT EXISTS '{}' \ - AFTER INSERT ON '{}' \ - BEGIN \ - UPDATE '{}' \ - SET VersionNum = (SELECT COALESCE(max(VersionNum), 0) from '{}' where ObjName = new.ObjName) + 1 \ - where ObjName = new.ObjName and ObjInstance = new.ObjInstance; \ - END;"; - - static constexpr std::string_view CreateObjectTableQ = - /* Corresponds to rgw::sal::Object - * - * For now only BucketName, ObjName is made Primary key. - * If multiple tenants are stored in single .db handle, should - * include Tenant too in the Primary Key. Also should - * reference (BucketID, Tenant) as Foreign key. - * - * referring to - * - rgw_bucket_dir_entry - following are added for now - * flags, - * versioned_epoch - * tag - * index_ver - * meta.category - * meta.etag - * meta.storageclass - * meta.appendable - * meta.content_type - * meta.owner - * meta.owner_display_name - * - * - RGWObjState. Below are omitted from that struct - * as they seem in-memory variables - * * is_atomic, has_atts, exists, prefetch_data, keep_tail, - * - RGWObjManifest - * - * Extra field added "IsMultipart" to flag multipart uploads, - * HeadData to store first chunk data. - */ - "CREATE TABLE IF NOT EXISTS '{}' ( \ - ObjName TEXT NOT NULL , \ - ObjInstance TEXT, \ - ObjNS TEXT, \ - BucketName TEXT NOT NULL , \ - ACLs BLOB, \ - IndexVer INTEGER, \ - Tag TEXT, \ - Flags INTEGER, \ - VersionedEpoch INTEGER, \ - ObjCategory INTEGER, \ - Etag TEXT, \ - Owner TEXT, \ - OwnerDisplayName TEXT, \ - StorageClass TEXT, \ - Appendable BOOL, \ - ContentType TEXT, \ - IndexHashSource TEXT, \ - ObjSize INTEGER, \ - AccountedSize INTEGER, \ - Mtime BLOB, \ - Epoch INTEGER, \ - ObjTag BLOB, \ - TailTag BLOB, \ - WriteTag TEXT, \ - FakeTag BOOL, \ - ShadowObj TEXT, \ - HasData BOOL, \ - IsVersioned BOOL, \ - VersionNum INTEGER, \ - PGVer INTEGER, \ - ZoneShortID INTEGER, \ - ObjVersion INTEGER, \ - ObjVersionTag TEXT, \ - ObjAttrs BLOB, \ - HeadSize INTEGER, \ - MaxHeadSize INTEGER, \ - ObjID TEXT NOT NULL, \ - TailInstance TEXT, \ - HeadPlacementRuleName TEXT, \ - HeadPlacementRuleStorageClass TEXT, \ - TailPlacementRuleName TEXT, \ - TailPlacementStorageClass TEXT, \ - ManifestPartObjs BLOB, \ - ManifestPartRules BLOB, \ - Omap BLOB, \ - IsMultipart BOOL, \ - MPPartsList BLOB, \ - HeadData BLOB, \ - PRIMARY KEY (ObjName, ObjInstance, BucketName), \ - FOREIGN KEY (BucketName) \ - REFERENCES '{}' (BucketName) ON DELETE CASCADE ON UPDATE CASCADE \n);"; - - static constexpr std::string_view CreateObjectDataTableQ = - /* Extra field 'MultipartPartStr' added which signifies multipart - * . For regular object, it is '0.0' - * - * - part: a collection of stripes that make a contiguous part of an - object. A regular object will only have one part (although might have - many stripes), a multipart object might have many parts. Each part - has a fixed stripe size (ObjChunkSize), although the last stripe of a - part might be smaller than that. - */ - "CREATE TABLE IF NOT EXISTS '{}' ( \ - ObjName TEXT NOT NULL , \ - ObjInstance TEXT, \ - ObjNS TEXT, \ - BucketName TEXT NOT NULL , \ - ObjID TEXT NOT NULL , \ - MultipartPartStr TEXT, \ - PartNum INTEGER NOT NULL, \ - Offset INTEGER, \ - Size INTEGER, \ - Mtime BLOB, \ - Data BLOB, \ - PRIMARY KEY (ObjName, BucketName, ObjInstance, ObjID, MultipartPartStr, PartNum), \ - FOREIGN KEY (BucketName) \ - REFERENCES '{}' (BucketName) ON DELETE CASCADE ON UPDATE CASCADE \n);"; - - static constexpr std::string_view CreateObjectViewQ = - /* This query creats temporary view with entries from ObjectData table which have - * corresponding head object (i.e, with same ObjName, ObjInstance, ObjNS, ObjID) - * in the Object table. - * - * GC thread can use this view to delete stale entries from the ObjectData table which - * do not exist in this view. - * - * XXX: This view is throwing ForeignKey mismatch error, mostly may be because all the keys - * of objectdata table are not referenced here. So this view is not used atm. - */ - "CREATE TEMP VIEW IF NOT EXISTS '{}' AS \ - SELECT s.ObjName, s.ObjInstance, s.ObjID from '{}' as s INNER JOIN '{}' USING \ - (ObjName, BucketName, ObjInstance, ObjID);"; - - - static constexpr std::string_view CreateQuotaTableQ = - "CREATE TABLE IF NOT EXISTS '{}' ( \ - QuotaID INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE , \ - MaxSizeSoftThreshold INTEGER , \ - MaxObjsSoftThreshold INTEGER , \ - MaxSize INTEGER , \ - MaxObjects INTEGER , \ - Enabled Boolean , \ - CheckOnRaw Boolean \n);"; - - static constexpr std::string_view CreateLCEntryTableQ = - "CREATE TABLE IF NOT EXISTS '{}' ( \ - LCIndex TEXT NOT NULL , \ - BucketName TEXT NOT NULL , \ - StartTime INTEGER , \ - Status INTEGER , \ - PRIMARY KEY (LCIndex, BucketName) \n);"; - - static constexpr std::string_view CreateLCHeadTableQ = - "CREATE TABLE IF NOT EXISTS '{}' ( \ - LCIndex TEXT NOT NULL , \ - Marker TEXT , \ - StartDate INTEGER , \ - PRIMARY KEY (LCIndex) \n);"; - - static constexpr std::string_view DropQ = "DROP TABLE IF EXISTS '{}'"; - static constexpr std::string_view ListAllQ = "SELECT * from '{}'"; - - public: - DBOp() {} - virtual ~DBOp() {} - std::mutex mtx; // to protect prepared stmt - - static std::string CreateTableSchema(std::string_view type, - const DBOpParams *params) { - if (!type.compare("User")) - return fmt::format(CreateUserTableQ, - params->user_table); - if (!type.compare("Bucket")) - return fmt::format(CreateBucketTableQ, - params->bucket_table, - params->user_table); - if (!type.compare("Object")) - return fmt::format(CreateObjectTableQ, - params->object_table, - params->bucket_table); - if (!type.compare("ObjectTrigger")) - return fmt::format(CreateObjectTableTriggerQ, - params->object_trigger, - params->object_table, - params->object_table, - params->object_table); - if (!type.compare("ObjectData")) - return fmt::format(CreateObjectDataTableQ, - params->objectdata_table, - params->bucket_table); - if (!type.compare("ObjectView")) - return fmt::format(CreateObjectTableQ, - params->object_view, - params->objectdata_table, - params->object_table); - if (!type.compare("Quota")) - return fmt::format(CreateQuotaTableQ, - params->quota_table); - if (!type.compare("LCHead")) - return fmt::format(CreateLCHeadTableQ, - params->lc_head_table); - if (!type.compare("LCEntry")) - return fmt::format(CreateLCEntryTableQ, - params->lc_entry_table, - params->bucket_table); - - ceph_abort_msgf("incorrect table type %.*s", type.size(), type.data()); - } - - static std::string DeleteTableSchema(std::string_view table) { - return fmt::format(DropQ, table); - } - static std::string ListTableSchema(std::string_view table) { - return fmt::format(ListAllQ, table); - } - - virtual int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; } - virtual int Bind(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; } - virtual int Execute(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; } -}; - -class InsertUserOp : virtual public DBOp { - private: - /* For existing entires, - - * (1) INSERT or REPLACE - it will delete previous entry and then - * inserts new one. Since it deletes previos enties, it will - * trigger all foriegn key cascade deletes or other triggers. - * (2) INSERT or UPDATE - this will set NULL values to unassigned - * fields. - * more info: https://code-examples.net/en/q/377728 - * - * For now using INSERT or REPLACE. If required of updating existing - * record, will use another query. - */ - static constexpr std::string_view Query = "INSERT OR REPLACE INTO '{}' \ - (UserID, Tenant, NS, DisplayName, UserEmail, \ - AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ - SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ - System, PlacementName, PlacementStorageClass, PlacementTags, \ - BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ - UserAttrs, UserVersion, UserVersionTag) \ - VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \ - {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});"; - - public: - virtual ~InsertUserOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.user_table, - params.op.user.user_id, params.op.user.tenant, params.op.user.ns, - params.op.user.display_name, params.op.user.user_email, - params.op.user.access_keys_id, params.op.user.access_keys_secret, - params.op.user.access_keys, params.op.user.swift_keys, - params.op.user.subusers, params.op.user.suspended, - params.op.user.max_buckets, params.op.user.op_mask, - params.op.user.user_caps, params.op.user.admin, params.op.user.system, - params.op.user.placement_name, params.op.user.placement_storage_class, - params.op.user.placement_tags, params.op.user.bucket_quota, - params.op.user.temp_url_keys, params.op.user.user_quota, - params.op.user.type, params.op.user.mfa_ids, - params.op.user.assumed_role_arn, params.op.user.user_attrs, - params.op.user.user_ver, params.op.user.user_ver_tag); - } - -}; - -class RemoveUserOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' where UserID = {}"; - - public: - virtual ~RemoveUserOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.user_table, - params.op.user.user_id); - } -}; - -class GetUserOp: virtual public DBOp { - private: - /* If below query columns are updated, make sure to update the indexes - * in list_user() cbk in sqliteDB.cc */ - static constexpr std::string_view Query = "SELECT \ - UserID, Tenant, NS, DisplayName, UserEmail, \ - AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ - SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ - System, PlacementName, PlacementStorageClass, PlacementTags, \ - BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ - UserAttrs, UserVersion, UserVersionTag from '{}' where UserID = {}"; - - static constexpr std::string_view QueryByEmail = "SELECT \ - UserID, Tenant, NS, DisplayName, UserEmail, \ - AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ - SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ - System, PlacementName, PlacementStorageClass, PlacementTags, \ - BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ - UserAttrs, UserVersion, UserVersionTag from '{}' where UserEmail = {}"; - - static constexpr std::string_view QueryByAccessKeys = "SELECT \ - UserID, Tenant, NS, DisplayName, UserEmail, \ - AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ - SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ - System, PlacementName, PlacementStorageClass, PlacementTags, \ - BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ - UserAttrs, UserVersion, UserVersionTag from '{}' where AccessKeysID = {}"; - - static constexpr std::string_view QueryByUserID = "SELECT \ - UserID, Tenant, NS, DisplayName, UserEmail, \ - AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\ - SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \ - System, PlacementName, PlacementStorageClass, PlacementTags, \ - BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \ - UserAttrs, UserVersion, UserVersionTag \ - from '{}' where UserID = {}"; - - public: - virtual ~GetUserOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - if (params.op.query_str == "email") { - return fmt::format(QueryByEmail, params.user_table, - params.op.user.user_email); - } else if (params.op.query_str == "access_key") { - return fmt::format(QueryByAccessKeys, - params.user_table, - params.op.user.access_keys_id); - } else if (params.op.query_str == "user_id") { - return fmt::format(QueryByUserID, - params.user_table, - params.op.user.user_id); - } else { - return fmt::format(Query, params.user_table, - params.op.user.user_id); - } - } -}; - -class InsertBucketOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "INSERT OR REPLACE INTO '{}' \ - (BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ - Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \ - HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ - SwiftVersioning, SwiftVerLocation, \ - MdsearchConfig, NewBucketInstanceID, ObjectLock, \ - SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime) \ - VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, \ - {}, {}, {}, {}, {}, {}, {}, {}, {}, \ - {}, {}, {}, {}, {}, {}, {}, {}, {}, {})"; - - public: - virtual ~InsertBucketOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.bucket_table, - params.op.bucket.bucket_name, params.op.bucket.tenant, - params.op.bucket.marker, params.op.bucket.bucket_id, - params.op.bucket.size, params.op.bucket.size_rounded, - params.op.bucket.creation_time, params.op.bucket.count, - params.op.bucket.placement_name, params.op.bucket.placement_storage_class, - params.op.user.user_id, - params.op.bucket.flags, params.op.bucket.zonegroup, params.op.bucket.has_instance_obj, - params.op.bucket.quota, params.op.bucket.requester_pays, params.op.bucket.has_website, - params.op.bucket.website_conf, params.op.bucket.swift_versioning, - params.op.bucket.swift_ver_location, params.op.bucket.mdsearch_config, - params.op.bucket.new_bucket_instance_id, params.op.bucket.obj_lock, - params.op.bucket.sync_policy_info_groups, params.op.bucket.bucket_attrs, - params.op.bucket.bucket_ver, params.op.bucket.bucket_ver_tag, - params.op.bucket.mtime); - } -}; - -class UpdateBucketOp: virtual public DBOp { - private: - // Updates Info, Mtime, Version - static constexpr std::string_view InfoQuery = - "UPDATE '{}' SET Tenant = {}, Marker = {}, BucketID = {}, CreationTime = {}, \ - Count = {}, PlacementName = {}, PlacementStorageClass = {}, OwnerID = {}, Flags = {}, \ - Zonegroup = {}, HasInstanceObj = {}, Quota = {}, RequesterPays = {}, HasWebsite = {}, \ - WebsiteConf = {}, SwiftVersioning = {}, SwiftVerLocation = {}, MdsearchConfig = {}, \ - NewBucketInstanceID = {}, ObjectLock = {}, SyncPolicyInfoGroups = {}, \ - BucketVersion = {}, Mtime = {} WHERE BucketName = {}"; - // Updates Attrs, OwnerID, Mtime, Version - static constexpr std::string_view AttrsQuery = - "UPDATE '{}' SET OwnerID = {}, BucketAttrs = {}, Mtime = {}, BucketVersion = {} \ - WHERE BucketName = {}"; - // Updates OwnerID, CreationTime, Mtime, Version - static constexpr std::string_view OwnerQuery = - "UPDATE '{}' SET OwnerID = {}, CreationTime = {}, Mtime = {}, BucketVersion = {} WHERE BucketName = {}"; - - public: - virtual ~UpdateBucketOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - if (params.op.query_str == "info") { - return fmt::format(InfoQuery, params.bucket_table, - params.op.bucket.tenant, params.op.bucket.marker, params.op.bucket.bucket_id, - params.op.bucket.creation_time, params.op.bucket.count, - params.op.bucket.placement_name, params.op.bucket.placement_storage_class, - params.op.user.user_id, - params.op.bucket.flags, params.op.bucket.zonegroup, params.op.bucket.has_instance_obj, - params.op.bucket.quota, params.op.bucket.requester_pays, params.op.bucket.has_website, - params.op.bucket.website_conf, params.op.bucket.swift_versioning, - params.op.bucket.swift_ver_location, params.op.bucket.mdsearch_config, - params.op.bucket.new_bucket_instance_id, params.op.bucket.obj_lock, - params.op.bucket.sync_policy_info_groups, - params.op.bucket.bucket_ver, params.op.bucket.mtime, - params.op.bucket.bucket_name); - } - if (params.op.query_str == "attrs") { - return fmt::format(AttrsQuery, params.bucket_table, - params.op.user.user_id, params.op.bucket.bucket_attrs, - params.op.bucket.mtime, - params.op.bucket.bucket_ver, params.op.bucket.bucket_name); - } - if (params.op.query_str == "owner") { - return fmt::format(OwnerQuery, params.bucket_table, - params.op.user.user_id, params.op.bucket.creation_time, - params.op.bucket.mtime, - params.op.bucket.bucket_ver, params.op.bucket.bucket_name); - } - return ""; - } -}; - -class RemoveBucketOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' where BucketName = {}"; - - public: - virtual ~RemoveBucketOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.bucket_table, - params.op.bucket.bucket_name); - } -}; - -class GetBucketOp: virtual public DBOp { - private: - static constexpr std::string_view Query = "SELECT \ - BucketName, BucketTable.Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ - Count, BucketTable.PlacementName, BucketTable.PlacementStorageClass, OwnerID, Flags, Zonegroup, \ - HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ - SwiftVersioning, SwiftVerLocation, \ - MdsearchConfig, NewBucketInstanceID, ObjectLock, \ - SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime, NS \ - from '{}' as BucketTable INNER JOIN '{}' ON OwnerID = UserID where BucketName = {}"; - - public: - virtual ~GetBucketOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - //return fmt::format(Query, params.op.bucket.bucket_name, - // params.bucket_table, params.user_table); - return fmt::format(Query, - params.bucket_table, params.user_table, - params.op.bucket.bucket_name); - } -}; - -class ListUserBucketsOp: virtual public DBOp { - private: - // once we have stats also stored, may have to update this query to join - // these two tables. - static constexpr std::string_view Query = "SELECT \ - BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ - Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \ - HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ - SwiftVersioning, SwiftVerLocation, \ - MdsearchConfig, NewBucketInstanceID, ObjectLock, \ - SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime \ - FROM '{}' WHERE OwnerID = {} AND BucketName > {} ORDER BY BucketName ASC LIMIT {}"; - - /* BucketNames are unique across users. Hence userid/OwnerID is not used as - * marker or for ordering here in the below query - */ - static constexpr std::string_view AllQuery = "SELECT \ - BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \ - Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \ - HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \ - SwiftVersioning, SwiftVerLocation, \ - MdsearchConfig, NewBucketInstanceID, ObjectLock, \ - SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime \ - FROM '{}' WHERE BucketName > {} ORDER BY BucketName ASC LIMIT {}"; - - public: - virtual ~ListUserBucketsOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - if (params.op.query_str == "all") { - return fmt::format(AllQuery, params.bucket_table, - params.op.bucket.min_marker, - params.op.list_max_count); - } else { - return fmt::format(Query, params.bucket_table, - params.op.user.user_id, params.op.bucket.min_marker, - params.op.list_max_count); - } - } -}; - -class PutObjectOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "INSERT OR REPLACE INTO '{}' \ - (ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ - Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ - StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ - AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ - ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ - ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ - ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ - TailPlacementRuleName, TailPlacementStorageClass, \ - ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \ - HeadData) \ - VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \ - {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \ - {}, {}, {}, \ - {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})"; - - public: - virtual ~PutObjectOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.object_table, params.op.obj.obj_name, - params.op.obj.obj_instance, params.op.obj.obj_ns, - params.op.bucket.bucket_name, params.op.obj.acls, params.op.obj.index_ver, - params.op.obj.tag, params.op.obj.flags, params.op.obj.versioned_epoch, - params.op.obj.obj_category, params.op.obj.etag, params.op.obj.owner, - params.op.obj.owner_display_name, params.op.obj.storage_class, - params.op.obj.appendable, params.op.obj.content_type, - params.op.obj.index_hash_source, params.op.obj.obj_size, - params.op.obj.accounted_size, params.op.obj.mtime, - params.op.obj.epoch, params.op.obj.obj_tag, params.op.obj.tail_tag, - params.op.obj.write_tag, params.op.obj.fake_tag, params.op.obj.shadow_obj, - params.op.obj.has_data, params.op.obj.is_versioned, - params.op.obj.version_num, - params.op.obj.pg_ver, params.op.obj.zone_short_id, - params.op.obj.obj_version, params.op.obj.obj_version_tag, - params.op.obj.obj_attrs, params.op.obj.head_size, - params.op.obj.max_head_size, params.op.obj.obj_id, - params.op.obj.tail_instance, - params.op.obj.head_placement_rule_name, - params.op.obj.head_placement_storage_class, - params.op.obj.tail_placement_rule_name, - params.op.obj.tail_placement_storage_class, - params.op.obj.manifest_part_objs, - params.op.obj.manifest_part_rules, params.op.obj.omap, - params.op.obj.is_multipart, params.op.obj.mp_parts, - params.op.obj.head_data); - } -}; - -class DeleteObjectOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {}"; - - public: - virtual ~DeleteObjectOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.object_table, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance); - } -}; - -class GetObjectOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "SELECT \ - ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ - Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ - StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ - AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ - ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ - ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ - ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ - TailPlacementRuleName, TailPlacementStorageClass, \ - ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \ - HeadData from '{}' \ - where BucketName = {} and ObjName = {} and ObjInstance = {}"; - - public: - virtual ~GetObjectOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.object_table, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance); - } -}; - -class ListBucketObjectsOp: virtual public DBOp { - private: - // once we have stats also stored, may have to update this query to join - // these two tables. - static constexpr std::string_view Query = - "SELECT \ - ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ - Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ - StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ - AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ - ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ - ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ - ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ - TailPlacementRuleName, TailPlacementStorageClass, \ - ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, HeadData from '{}' \ - where BucketName = {} and ObjName >= {} and ObjName LIKE {} ORDER BY ObjName ASC, VersionNum DESC LIMIT {}"; - public: - virtual ~ListBucketObjectsOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - /* XXX: Include obj_id, delim */ - return fmt::format(Query, - params.object_table, - params.op.bucket.bucket_name, - params.op.obj.min_marker, - params.op.obj.prefix, - params.op.list_max_count); - } -}; - -#define MAX_VERSIONED_OBJECTS 20 -class ListVersionedObjectsOp: virtual public DBOp { - private: - // once we have stats also stored, may have to update this query to join - // these two tables. - static constexpr std::string_view Query = - "SELECT \ - ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \ - Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \ - StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \ - AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \ - ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \ - ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \ - ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \ - TailPlacementRuleName, TailPlacementStorageClass, \ - ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \ - HeadData from '{}' \ - where BucketName = {} and ObjName = {} ORDER BY VersionNum DESC LIMIT {}"; - public: - virtual ~ListVersionedObjectsOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - /* XXX: Include obj_id, delim */ - return fmt::format(Query, - params.object_table, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.list_max_count); - } -}; - -class UpdateObjectOp: virtual public DBOp { - private: - // Updates Omap - static constexpr std::string_view OmapQuery = - "UPDATE '{}' SET Omap = {}, Mtime = {} \ - where BucketName = {} and ObjName = {} and ObjInstance = {}"; - static constexpr std::string_view AttrsQuery = - "UPDATE '{}' SET ObjAttrs = {}, Mtime = {} \ - where BucketName = {} and ObjName = {} and ObjInstance = {}"; - static constexpr std::string_view MPQuery = - "UPDATE '{}' SET MPPartsList = {}, Mtime = {} \ - where BucketName = {} and ObjName = {} and ObjInstance = {}"; - static constexpr std::string_view MetaQuery = - "UPDATE '{}' SET \ - ObjNS = {}, ACLs = {}, IndexVer = {}, Tag = {}, Flags = {}, VersionedEpoch = {}, \ - ObjCategory = {}, Etag = {}, Owner = {}, OwnerDisplayName = {}, \ - StorageClass = {}, Appendable = {}, ContentType = {}, \ - IndexHashSource = {}, ObjSize = {}, AccountedSize = {}, Mtime = {}, \ - Epoch = {}, ObjTag = {}, TailTag = {}, WriteTag = {}, FakeTag = {}, \ - ShadowObj = {}, HasData = {}, IsVersioned = {}, VersionNum = {}, PGVer = {}, \ - ZoneShortID = {}, ObjVersion = {}, ObjVersionTag = {}, ObjAttrs = {}, \ - HeadSize = {}, MaxHeadSize = {}, ObjID = {}, TailInstance = {}, \ - HeadPlacementRuleName = {}, HeadPlacementRuleStorageClass = {}, \ - TailPlacementRuleName = {}, TailPlacementStorageClass = {}, \ - ManifestPartObjs = {}, ManifestPartRules = {}, Omap = {}, \ - IsMultipart = {}, MPPartsList = {}, HeadData = {} \ - WHERE ObjName = {} and ObjInstance = {} and BucketName = {}"; - - public: - virtual ~UpdateObjectOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - if (params.op.query_str == "omap") { - return fmt::format(OmapQuery, - params.object_table, params.op.obj.omap, - params.op.obj.mtime, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance); - } - if (params.op.query_str == "attrs") { - return fmt::format(AttrsQuery, - params.object_table, params.op.obj.obj_attrs, - params.op.obj.mtime, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance); - } - if (params.op.query_str == "mp") { - return fmt::format(MPQuery, - params.object_table, params.op.obj.mp_parts, - params.op.obj.mtime, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance); - } - if (params.op.query_str == "meta") { - return fmt::format(MetaQuery, - params.object_table, - params.op.obj.obj_ns, params.op.obj.acls, params.op.obj.index_ver, - params.op.obj.tag, params.op.obj.flags, params.op.obj.versioned_epoch, - params.op.obj.obj_category, params.op.obj.etag, params.op.obj.owner, - params.op.obj.owner_display_name, params.op.obj.storage_class, - params.op.obj.appendable, params.op.obj.content_type, - params.op.obj.index_hash_source, params.op.obj.obj_size, - params.op.obj.accounted_size, params.op.obj.mtime, - params.op.obj.epoch, params.op.obj.obj_tag, params.op.obj.tail_tag, - params.op.obj.write_tag, params.op.obj.fake_tag, params.op.obj.shadow_obj, - params.op.obj.has_data, params.op.obj.is_versioned, params.op.obj.version_num, - params.op.obj.pg_ver, params.op.obj.zone_short_id, - params.op.obj.obj_version, params.op.obj.obj_version_tag, - params.op.obj.obj_attrs, params.op.obj.head_size, - params.op.obj.max_head_size, params.op.obj.obj_id, - params.op.obj.tail_instance, - params.op.obj.head_placement_rule_name, - params.op.obj.head_placement_storage_class, - params.op.obj.tail_placement_rule_name, - params.op.obj.tail_placement_storage_class, - params.op.obj.manifest_part_objs, - params.op.obj.manifest_part_rules, params.op.obj.omap, - params.op.obj.is_multipart, params.op.obj.mp_parts, - params.op.obj.head_data, - params.op.obj.obj_name, params.op.obj.obj_instance, - params.op.bucket.bucket_name); - } - return ""; - } -}; - -class PutObjectDataOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "INSERT OR REPLACE INTO '{}' \ - (ObjName, ObjInstance, ObjNS, BucketName, ObjID, MultipartPartStr, PartNum, Offset, Size, Mtime, Data) \ - VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})"; - - public: - virtual ~PutObjectDataOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.objectdata_table, - params.op.obj.obj_name, params.op.obj.obj_instance, - params.op.obj.obj_ns, - params.op.bucket.bucket_name, - params.op.obj.obj_id, - params.op.obj_data.multipart_part_str, - params.op.obj_data.part_num, - params.op.obj_data.offset, - params.op.obj_data.size, - params.op.obj.mtime, - params.op.obj_data.data); - } -}; - -/* XXX: Recheck if this is really needed */ -class UpdateObjectDataOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "UPDATE '{}' \ - SET Mtime = {} WHERE ObjName = {} and ObjInstance = {} and \ - BucketName = {} and ObjID = {}"; - - public: - virtual ~UpdateObjectDataOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.objectdata_table, - params.op.obj.mtime, - params.op.obj.obj_name, params.op.obj.obj_instance, - params.op.bucket.bucket_name, - params.op.obj.obj_id); - } -}; - -class GetObjectDataOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "SELECT \ - ObjName, ObjInstance, ObjNS, BucketName, ObjID, MultipartPartStr, PartNum, Offset, Size, Mtime, Data \ - from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {} and ObjID = {} ORDER BY MultipartPartStr, PartNum"; - - public: - virtual ~GetObjectDataOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.objectdata_table, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance, - params.op.obj.obj_id); - } -}; - -class DeleteObjectDataOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {} and ObjID = {}"; - - public: - virtual ~DeleteObjectDataOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.objectdata_table, - params.op.bucket.bucket_name, - params.op.obj.obj_name, - params.op.obj.obj_instance, - params.op.obj.obj_id); - } -}; - -class DeleteStaleObjectDataOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' WHERE (ObjName, ObjInstance, ObjID) NOT IN (SELECT s.ObjName, s.ObjInstance, s.ObjID from '{}' as s INNER JOIN '{}' USING (ObjName, BucketName, ObjInstance, ObjID)) and Mtime < {}"; - - public: - virtual ~DeleteStaleObjectDataOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, - params.objectdata_table, - params.objectdata_table, - params.object_table, - params.op.obj.mtime); - } -}; - -class InsertLCEntryOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "INSERT OR REPLACE INTO '{}' \ - (LCIndex, BucketName, StartTime, Status) \ - VALUES ({}, {}, {}, {})"; - - public: - virtual ~InsertLCEntryOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.lc_entry_table, - params.op.lc_entry.index, params.op.lc_entry.bucket_name, - params.op.lc_entry.start_time, params.op.lc_entry.status); - } -}; - -class RemoveLCEntryOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' where LCIndex = {} and BucketName = {}"; - - public: - virtual ~RemoveLCEntryOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.lc_entry_table, - params.op.lc_entry.index, params.op.lc_entry.bucket_name); - } -}; - -class GetLCEntryOp: virtual public DBOp { - private: - static constexpr std::string_view Query = "SELECT \ - LCIndex, BucketName, StartTime, Status \ - from '{}' where LCIndex = {} and BucketName = {}"; - static constexpr std::string_view NextQuery = "SELECT \ - LCIndex, BucketName, StartTime, Status \ - from '{}' where LCIndex = {} and BucketName > {} ORDER BY BucketName ASC"; - - public: - virtual ~GetLCEntryOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - if (params.op.query_str == "get_next_entry") { - return fmt::format(NextQuery, params.lc_entry_table, - params.op.lc_entry.index, params.op.lc_entry.bucket_name); - } - // default - return fmt::format(Query, params.lc_entry_table, - params.op.lc_entry.index, params.op.lc_entry.bucket_name); - } -}; - -class ListLCEntriesOp: virtual public DBOp { - private: - static constexpr std::string_view Query = "SELECT \ - LCIndex, BucketName, StartTime, Status \ - FROM '{}' WHERE LCIndex = {} AND BucketName > {} ORDER BY BucketName ASC LIMIT {}"; - - public: - virtual ~ListLCEntriesOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.lc_entry_table, - params.op.lc_entry.index, params.op.lc_entry.min_marker, - params.op.list_max_count); - } -}; - -class InsertLCHeadOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "INSERT OR REPLACE INTO '{}' \ - (LCIndex, Marker, StartDate) \ - VALUES ({}, {}, {})"; - - public: - virtual ~InsertLCHeadOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.lc_head_table, - params.op.lc_head.index, params.op.lc_head.marker, - params.op.lc_head.start_date); - } -}; - -class RemoveLCHeadOp: virtual public DBOp { - private: - static constexpr std::string_view Query = - "DELETE from '{}' where LCIndex = {}"; - - public: - virtual ~RemoveLCHeadOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.lc_head_table, - params.op.lc_head.index); - } -}; - -class GetLCHeadOp: virtual public DBOp { - private: - static constexpr std::string_view Query = "SELECT \ - LCIndex, Marker, StartDate \ - from '{}' where LCIndex = {}"; - - public: - virtual ~GetLCHeadOp() {} - - static std::string Schema(DBOpPrepareParams ¶ms) { - return fmt::format(Query, params.lc_head_table, - params.op.lc_head.index); - } -}; - -/* taken from rgw_rados.h::RGWOLHInfo */ -struct DBOLHInfo { - rgw_obj target; - bool removed; - DBOLHInfo() : removed(false) {} - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(target, bl); - encode(removed, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(target, bl); - decode(removed, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(DBOLHInfo) - -class DB { - private: - const std::string db_name; - rgw::sal::Driver* driver; - const std::string user_table; - const std::string bucket_table; - const std::string quota_table; - const std::string lc_head_table; - const std::string lc_entry_table; - static std::map objectmap; - - protected: - void *db; - CephContext *cct; - const DoutPrefix dp; - uint64_t max_bucket_id = 0; - // XXX: default ObjStripeSize or ObjChunk size - 4M, make them configurable? - uint64_t ObjHeadSize = 1024; /* 1K - default head data size */ - uint64_t ObjChunkSize = (get_blob_limit() - 1000); /* 1000 to accommodate other fields */ - // Below mutex is to protect objectmap and other shared - // objects if any. - std::mutex mtx; - - public: - DB(std::string db_name, CephContext *_cct) : db_name(db_name), - user_table(db_name+"_user_table"), - bucket_table(db_name+"_bucket_table"), - quota_table(db_name+"_quota_table"), - lc_head_table(db_name+"_lc_head_table"), - lc_entry_table(db_name+"_lc_entry_table"), - cct(_cct), - dp(_cct, ceph_subsys_rgw, "rgw DBStore backend: ") - {} - /* DB() {}*/ - - DB(CephContext *_cct) : db_name("default_db"), - user_table(db_name+"_user_table"), - bucket_table(db_name+"_bucket_table"), - quota_table(db_name+"_quota_table"), - lc_head_table(db_name+"_lc_head_table"), - lc_entry_table(db_name+"_lc_entry_table"), - cct(_cct), - dp(_cct, ceph_subsys_rgw, "rgw DBStore backend: ") - {} - virtual ~DB() {} - - const std::string getDBname() { return db_name; } - const std::string getDBfile() { return db_name + ".db"; } - const std::string getUserTable() { return user_table; } - const std::string getBucketTable() { return bucket_table; } - const std::string getQuotaTable() { return quota_table; } - const std::string getLCHeadTable() { return lc_head_table; } - const std::string getLCEntryTable() { return lc_entry_table; } - const std::string getObjectTable(std::string bucket) { - return db_name+"_"+bucket+"_object_table"; } - const std::string getObjectDataTable(std::string bucket) { - return db_name+"_"+bucket+"_objectdata_table"; } - const std::string getObjectView(std::string bucket) { - return db_name+"_"+bucket+"_object_view"; } - const std::string getObjectTrigger(std::string bucket) { - return db_name+"_"+bucket+"_object_trigger"; } - - std::map getObjectMap(); - - struct DBOps dbops; // DB operations, make it private? - - void set_driver(rgw::sal::Driver* _driver) { - driver = _driver; - } - - void set_context(CephContext *_cct) { - cct = _cct; - } - - CephContext *ctx() { return cct; } - const DoutPrefixProvider *get_def_dpp() { return &dp; } - - int Initialize(std::string logfile, int loglevel); - int Destroy(const DoutPrefixProvider *dpp); - int LockInit(const DoutPrefixProvider *dpp); - int LockDestroy(const DoutPrefixProvider *dpp); - int Lock(const DoutPrefixProvider *dpp); - int Unlock(const DoutPrefixProvider *dpp); - - int InitializeParams(const DoutPrefixProvider *dpp, DBOpParams *params); - int ProcessOp(const DoutPrefixProvider *dpp, std::string_view Op, DBOpParams *params); - std::shared_ptr getDBOp(const DoutPrefixProvider *dpp, std::string_view Op, const DBOpParams *params); - int objectmapInsert(const DoutPrefixProvider *dpp, std::string bucket, class ObjectOp* ptr); - int objectmapDelete(const DoutPrefixProvider *dpp, std::string bucket); - - virtual uint64_t get_blob_limit() { return 0; }; - virtual void *openDB(const DoutPrefixProvider *dpp) { return NULL; } - virtual int closeDB(const DoutPrefixProvider *dpp) { return 0; } - virtual int createTables(const DoutPrefixProvider *dpp) { return 0; } - virtual int InitializeDBOps(const DoutPrefixProvider *dpp) { return 0; } - virtual int InitPrepareParams(const DoutPrefixProvider *dpp, - DBOpPrepareParams &p_params, - DBOpParams* params) = 0; - virtual int createLCTables(const DoutPrefixProvider *dpp) = 0; - - virtual int ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) = 0; - virtual int ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) = 0; - virtual int ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) = 0; - - int get_user(const DoutPrefixProvider *dpp, - const std::string& query_str, const std::string& query_str_val, - RGWUserInfo& uinfo, std::map *pattrs, - RGWObjVersionTracker *pobjv_tracker); - int store_user(const DoutPrefixProvider *dpp, - RGWUserInfo& uinfo, bool exclusive, std::map *pattrs, - RGWObjVersionTracker *pobjv_tracker, RGWUserInfo* pold_info); - int remove_user(const DoutPrefixProvider *dpp, - RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv_tracker); - int get_bucket_info(const DoutPrefixProvider *dpp, const std::string& query_str, - const std::string& query_str_val, - RGWBucketInfo& info, rgw::sal::Attrs* pattrs, ceph::real_time* pmtime, - obj_version* pbucket_version); - int create_bucket(const DoutPrefixProvider *dpp, - const RGWUserInfo& owner, rgw_bucket& bucket, - const std::string& zonegroup_id, - const rgw_placement_rule& placement_rule, - const std::string& swift_ver_location, - const RGWQuotaInfo * pquota_info, - std::map& attrs, - RGWBucketInfo& info, - obj_version *pobjv, - obj_version *pep_objv, - real_time creation_time, - rgw_bucket *pmaster_bucket, - uint32_t *pmaster_num_shards, - optional_yield y, - bool exclusive); - - int next_bucket_id() { return ++max_bucket_id; }; - - int remove_bucket(const DoutPrefixProvider *dpp, const RGWBucketInfo info); - int list_buckets(const DoutPrefixProvider *dpp, const std::string& query_str, - rgw_user& user, - const std::string& marker, - const std::string& end_marker, - uint64_t max, - bool need_stats, - RGWUserBuckets *buckets, - bool *is_truncated); - int update_bucket(const DoutPrefixProvider *dpp, const std::string& query_str, - RGWBucketInfo& info, bool exclusive, - const rgw_user* powner_id, std::map* pattrs, - ceph::real_time* pmtime, RGWObjVersionTracker* pobjv); - - uint64_t get_max_head_size() { return ObjHeadSize; } - uint64_t get_max_chunk_size() { return ObjChunkSize; } - void gen_rand_obj_instance_name(rgw_obj_key *target_key); - - // db raw obj string is of format - - // "____" - static constexpr std::string_view raw_obj_oid = "{0}_{1}_{2}_{3}_{4}"; - - std::string to_oid(std::string_view bucket, std::string_view obj_name, - std::string_view obj_instance, std::string_view obj_id, - std::string_view mp_str, uint64_t partnum) { - return fmt::format(raw_obj_oid, bucket, obj_name, obj_instance, obj_id, mp_str, partnum); - } - int from_oid(const std::string& oid, std::string& bucket, std::string& obj_name, std::string& obj_id, - std::string& obj_instance, - std::string& mp_str, uint64_t& partnum) { - // TODO: use ceph::split() from common/split.h - // XXX: doesn't this break if obj_name has underscores in it? - std::vector result; - boost::split(result, oid, boost::is_any_of("_")); - bucket = result[0]; - obj_name = result[1]; - obj_instance = result[2]; - obj_id = result[3]; - mp_str = result[4]; - partnum = stoi(result[5]); - - return 0; - } - - struct raw_obj { - DB* db; - - std::string bucket_name; - std::string obj_name; - std::string obj_instance; - std::string obj_ns; - std::string obj_id; - std::string multipart_part_str; - uint64_t part_num; - - std::string obj_table; - std::string obj_data_table; - - raw_obj(DB* _db) { - db = _db; - } - - raw_obj(DB* _db, std::string& _bname, std::string& _obj_name, std::string& _obj_instance, - std::string& _obj_ns, std::string& _obj_id, std::string _mp_part_str, int _part_num) { - db = _db; - bucket_name = _bname; - obj_name = _obj_name; - obj_instance = _obj_instance; - obj_ns = _obj_ns; - obj_id = _obj_id; - multipart_part_str = _mp_part_str; - part_num = _part_num; - - obj_table = bucket_name+".object.table"; - obj_data_table = bucket_name+".objectdata.table"; - } - - raw_obj(DB* _db, std::string& oid) { - int r; - - db = _db; - r = db->from_oid(oid, bucket_name, obj_name, obj_instance, obj_id, multipart_part_str, - part_num); - if (r < 0) { - multipart_part_str = "0.0"; - part_num = 0; - } - - obj_table = db->getObjectTable(bucket_name); - obj_data_table = db->getObjectDataTable(bucket_name); - } - - int InitializeParamsfromRawObj (const DoutPrefixProvider *dpp, DBOpParams* params); - - int read(const DoutPrefixProvider *dpp, int64_t ofs, uint64_t end, bufferlist& bl); - int write(const DoutPrefixProvider *dpp, int64_t ofs, int64_t write_ofs, uint64_t len, bufferlist& bl); - }; - - class GC : public Thread { - const DoutPrefixProvider *dpp; - DB *db; - /* Default time interval for GC - * XXX: Make below options configurable - * - * gc_interval: The time between successive gc thread runs - * gc_obj_min_wait: Min. time to wait before deleting any data post its creation. - * - */ - std::mutex mtx; - std::condition_variable cv; - bool stop_signalled = false; - uint32_t gc_interval = 24*60*60; //sec ; default: 24*60*60 - uint32_t gc_obj_min_wait = 60*60; //60*60sec default - std::string bucket_marker; - std::string user_marker; - - public: - GC(const DoutPrefixProvider *_dpp, DB* _db) : - dpp(_dpp), db(_db) {} - - void *entry() override; - - void signal_stop() { - std::lock_guard lk_guard(mtx); - stop_signalled = true; - cv.notify_one(); - } - - friend class DB; - }; - std::unique_ptr gc_worker; - - class Bucket { - friend class DB; - DB* store; - - RGWBucketInfo bucket_info; - - public: - Bucket(DB *_store, const RGWBucketInfo& _binfo) : store(_store), bucket_info(_binfo) {} - DB *get_store() { return store; } - rgw_bucket& get_bucket() { return bucket_info.bucket; } - RGWBucketInfo& get_bucket_info() { return bucket_info; } - - class List { - protected: - // absolute maximum number of objects that - // list_objects_(un)ordered can return - static constexpr int64_t bucket_list_objects_absolute_max = 25000; - - DB::Bucket *target; - rgw_obj_key next_marker; - - public: - - struct Params { - std::string prefix; - std::string delim; - rgw_obj_key marker; - rgw_obj_key end_marker; - std::string ns; - bool enforce_ns; - RGWAccessListFilter* access_list_filter; - RGWBucketListNameFilter force_check_filter; - bool list_versions; - bool allow_unordered; - - Params() : - enforce_ns(true), - access_list_filter(nullptr), - list_versions(false), - allow_unordered(false) - {} - } params; - - explicit List(DB::Bucket *_target) : target(_target) {} - - /* XXX: Handle ordered and unordered separately. - * For now returning only ordered entries */ - int list_objects(const DoutPrefixProvider *dpp, int64_t max, - std::vector *result, - std::map *common_prefixes, bool *is_truncated); - rgw_obj_key& get_next_marker() { - return next_marker; - } - }; - }; - - class Object { - friend class DB; - DB* store; - - RGWBucketInfo bucket_info; - rgw_obj obj; - - RGWObjState obj_state; - std::string obj_id; - - bool versioning_disabled; - - bool bs_initialized; - - public: - Object(DB *_store, const RGWBucketInfo& _bucket_info, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info), - obj(_obj), - versioning_disabled(false), - bs_initialized(false) {} - - Object(DB *_store, const RGWBucketInfo& _bucket_info, const rgw_obj& _obj, const std::string& _obj_id) : store(_store), bucket_info(_bucket_info), obj(_obj), obj_id(_obj_id) {} - - struct Read { - DB::Object *source; - - struct GetObjState { - rgw_obj obj; - } state; - - struct ConditionParams { - const ceph::real_time *mod_ptr; - const ceph::real_time *unmod_ptr; - bool high_precision_time; - uint32_t mod_zone_id; - uint64_t mod_pg_ver; - const char *if_match; - const char *if_nomatch; - - ConditionParams() : - mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0), - if_match(NULL), if_nomatch(NULL) {} - } conds; - - struct Params { - ceph::real_time *lastmod; - uint64_t *obj_size; - std::map *attrs; - rgw_obj *target_obj; - - Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr), - target_obj(nullptr) {} - } params; - - explicit Read(DB::Object *_source) : source(_source) {} - - int prepare(const DoutPrefixProvider *dpp); - static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end); - int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider *dpp); - int iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb); - int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest); - }; - - struct Write { - DB::Object *target; - RGWObjState obj_state; - std::string mp_part_str = "0.0"; // multipart num - - struct MetaParams { - ceph::real_time *mtime; - std::map* rmattrs; - const bufferlist *data; - RGWObjManifest *manifest; - const std::string *ptag; - std::list *remove_objs; - ceph::real_time set_mtime; - rgw_user owner; - RGWObjCategory category; - int flags; - const char *if_match; - const char *if_nomatch; - std::optional olh_epoch; - ceph::real_time delete_at; - bool canceled; - const std::string *user_data; - rgw_zone_set *zones_trace; - bool modify_tail; - bool completeMultipart; - bool appendable; - - MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL), - remove_objs(NULL), category(RGWObjCategory::Main), flags(0), - if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr), - modify_tail(false), completeMultipart(false), appendable(false) {} - } meta; - - explicit Write(DB::Object *_target) : target(_target) {} - - void set_mp_part_str(std::string _mp_part_str) { mp_part_str = _mp_part_str;} - int prepare(const DoutPrefixProvider* dpp); - int write_data(const DoutPrefixProvider* dpp, - bufferlist& data, uint64_t ofs); - int _do_write_meta(const DoutPrefixProvider *dpp, - uint64_t size, uint64_t accounted_size, - std::map& attrs, - bool assume_noent, bool modify_tail); - int write_meta(const DoutPrefixProvider *dpp, uint64_t size, - uint64_t accounted_size, std::map& attrs); - }; - - struct Delete { - DB::Object *target; - - struct DeleteParams { - rgw_user bucket_owner; - int versioning_status; - ACLOwner obj_owner; /* needed for creation of deletion marker */ - uint64_t olh_epoch; - std::string marker_version_id; - uint32_t bilog_flags; - std::list *remove_objs; - ceph::real_time expiration_time; - ceph::real_time unmod_since; - ceph::real_time mtime; /* for setting delete marker mtime */ - bool high_precision_time; - rgw_zone_set *zones_trace; - bool abortmp; - uint64_t parts_accounted_size; - - DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {} - } params; - - struct DeleteResult { - bool delete_marker; - std::string version_id; - - DeleteResult() : delete_marker(false) {} - } result; - - explicit Delete(DB::Object *_target) : target(_target) {} - - int delete_obj(const DoutPrefixProvider *dpp); - int delete_obj_impl(const DoutPrefixProvider *dpp, DBOpParams& del_params); - int create_dm(const DoutPrefixProvider *dpp, DBOpParams& del_params); - }; - - /* XXX: the parameters may be subject to change. All we need is bucket name - * & obj name,instance - keys */ - int get_object_impl(const DoutPrefixProvider *dpp, DBOpParams& params); - int get_obj_state(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - const rgw_obj& obj, - bool follow_olh, RGWObjState **state); - int get_state(const DoutPrefixProvider *dpp, RGWObjState **pstate, bool follow_olh); - int list_versioned_objects(const DoutPrefixProvider *dpp, - std::list& list_entries); - - DB *get_store() { return store; } - rgw_obj& get_obj() { return obj; } - RGWBucketInfo& get_bucket_info() { return bucket_info; } - - int InitializeParamsfromObject(const DoutPrefixProvider *dpp, DBOpParams* params); - int set_attrs(const DoutPrefixProvider *dpp, std::map& setattrs, - std::map* rmattrs); - int transition(const DoutPrefixProvider *dpp, - const rgw_placement_rule& rule, const real_time& mtime, - uint64_t olh_epoch); - int obj_omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, bool must_exist); - int obj_omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, - const std::set& keys, - std::map* vals); - int obj_omap_get_all(const DoutPrefixProvider *dpp, std::map *m); - int obj_omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count, - std::map *m, bool* pmore); - using iterate_obj_cb = int (*)(const DoutPrefixProvider*, const raw_obj&, off_t, off_t, - bool, RGWObjState*, void*); - int add_mp_part(const DoutPrefixProvider *dpp, RGWUploadPartInfo info); - int get_mp_parts_list(const DoutPrefixProvider *dpp, std::list& info); - - int iterate_obj(const DoutPrefixProvider *dpp, - const RGWBucketInfo& bucket_info, const rgw_obj& obj, - off_t ofs, off_t end, uint64_t max_chunk_size, - iterate_obj_cb cb, void *arg); - }; - int get_obj_iterate_cb(const DoutPrefixProvider *dpp, - const raw_obj& read_obj, off_t obj_ofs, - off_t len, bool is_head_obj, - RGWObjState *astate, void *arg); - - int get_entry(const std::string& oid, const std::string& marker, - std::unique_ptr* entry); - int get_next_entry(const std::string& oid, const std::string& marker, - std::unique_ptr* entry); - int set_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry); - int list_entries(const std::string& oid, const std::string& marker, - uint32_t max_entries, std::vector>& entries); - int rm_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry); - int get_head(const std::string& oid, std::unique_ptr* head); - int put_head(const std::string& oid, rgw::sal::Lifecycle::LCHead& head); - int delete_stale_objs(const DoutPrefixProvider *dpp, const std::string& bucket, - uint32_t min_wait); - int createGC(const DoutPrefixProvider *_dpp); - int stopGC(); -}; - -struct db_get_obj_data { - DB* store; - RGWGetDataCB* client_cb = nullptr; - uint64_t offset; // next offset to write to client - - db_get_obj_data(DB* db, RGWGetDataCB* cb, uint64_t offset) : - store(db), client_cb(cb), offset(offset) {} - ~db_get_obj_data() {} -}; - -} } // namespace rgw::store - -#endif diff --git a/src/rgw/store/dbstore/common/dbstore_log.h b/src/rgw/store/dbstore/common/dbstore_log.h deleted file mode 100644 index 8d981d5adc4..00000000000 --- a/src/rgw/store/dbstore/common/dbstore_log.h +++ /dev/null @@ -1,18 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#ifndef DB_STORE_LOG_H -#define DB_STORE_LOG_H - -#include -#include -#include -#include -#include -#include -#include "common/dout.h" - -#undef dout_prefix -#define dout_prefix *_dout << "rgw dbstore: " - -#endif diff --git a/src/rgw/store/dbstore/config/sqlite.cc b/src/rgw/store/dbstore/config/sqlite.cc deleted file mode 100644 index 051dc34e921..00000000000 --- a/src/rgw/store/dbstore/config/sqlite.cc +++ /dev/null @@ -1,2072 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include -#include -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include - -#include "include/buffer.h" -#include "include/encoding.h" -#include "common/dout.h" -#include "common/random_string.h" -#include "rgw_zone.h" - -#include "common/connection_pool.h" -#include "sqlite/connection.h" -#include "sqlite/error.h" -#include "sqlite/statement.h" -#include "sqlite_schema.h" -#include "sqlite.h" - -#define dout_subsys ceph_subsys_rgw_dbstore - -namespace rgw::dbstore::config { - -struct Prefix : DoutPrefixPipe { - std::string_view prefix; - Prefix(const DoutPrefixProvider& dpp, std::string_view prefix) - : DoutPrefixPipe(dpp), prefix(prefix) {} - unsigned get_subsys() const override { return dout_subsys; } - void add_prefix(std::ostream& out) const override { - out << prefix; - } -}; - -namespace { - -// parameter names for prepared statement bindings -static constexpr const char* P1 = ":1"; -static constexpr const char* P2 = ":2"; -static constexpr const char* P3 = ":3"; -static constexpr const char* P4 = ":4"; -static constexpr const char* P5 = ":5"; -static constexpr const char* P6 = ":6"; - - -void read_text_rows(const DoutPrefixProvider* dpp, - const sqlite::stmt_execution& stmt, - std::span entries, - sal::ListResult& result) -{ - result.entries = sqlite::read_text_rows(dpp, stmt, entries); - if (result.entries.size() < entries.size()) { // end of listing - result.next.clear(); - } else { - result.next = result.entries.back(); - } -} - -struct RealmRow { - RGWRealm info; - int ver; - std::string tag; -}; - -void read_realm_row(const sqlite::stmt_execution& stmt, RealmRow& row) -{ - row.info.id = sqlite::column_text(stmt, 0); - row.info.name = sqlite::column_text(stmt, 1); - row.info.current_period = sqlite::column_text(stmt, 2); - row.info.epoch = sqlite::column_int(stmt, 3); - row.ver = sqlite::column_int(stmt, 4); - row.tag = sqlite::column_text(stmt, 5); -} - -void read_period_row(const sqlite::stmt_execution& stmt, RGWPeriod& row) -{ - // just read the Data column and decode everything else from that - std::string data = sqlite::column_text(stmt, 3); - - bufferlist bl = bufferlist::static_from_string(data); - auto p = bl.cbegin(); - decode(row, p); -} - -struct ZoneGroupRow { - RGWZoneGroup info; - int ver; - std::string tag; -}; - -void read_zonegroup_row(const sqlite::stmt_execution& stmt, ZoneGroupRow& row) -{ - std::string data = sqlite::column_text(stmt, 3); - row.ver = sqlite::column_int(stmt, 4); - row.tag = sqlite::column_text(stmt, 5); - - bufferlist bl = bufferlist::static_from_string(data); - auto p = bl.cbegin(); - decode(row.info, p); -} - -struct ZoneRow { - RGWZoneParams info; - int ver; - std::string tag; -}; - -void read_zone_row(const sqlite::stmt_execution& stmt, ZoneRow& row) -{ - std::string data = sqlite::column_text(stmt, 3); - row.ver = sqlite::column_int(stmt, 4); - row.tag = sqlite::column_text(stmt, 5); - - bufferlist bl = bufferlist::static_from_string(data); - auto p = bl.cbegin(); - decode(row.info, p); -} - -std::string generate_version_tag(CephContext* cct) -{ - static constexpr auto TAG_LEN = 24; - return gen_rand_alphanumeric(cct, TAG_LEN); -} - -using SQLiteConnectionHandle = ConnectionHandle; - -using SQLiteConnectionPool = ConnectionPool< - sqlite::Connection, sqlite::ConnectionFactory>; - -} // anonymous namespace - -class SQLiteImpl : public SQLiteConnectionPool { - public: - using SQLiteConnectionPool::SQLiteConnectionPool; -}; - - -SQLiteConfigStore::SQLiteConfigStore(std::unique_ptr impl) - : impl(std::move(impl)) -{ -} - -SQLiteConfigStore::~SQLiteConfigStore() = default; - - -// Realm - -class SQLiteRealmWriter : public sal::RealmWriter { - SQLiteImpl* impl; - int ver; - std::string tag; - std::string realm_id; - std::string realm_name; - public: - SQLiteRealmWriter(SQLiteImpl* impl, int ver, std::string tag, - std::string_view realm_id, std::string_view realm_name) - : impl(impl), ver(ver), tag(std::move(tag)), - realm_id(realm_id), realm_name(realm_name) - {} - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWRealm& info) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:realm_write "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after a conflict or delete - } - if (realm_id != info.id || realm_name != info.name) { - return -EINVAL; // can't modify realm id or name directly - } - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["realm_upd"]; - if (!stmt) { - const std::string sql = fmt::format(schema::realm_update5, - P1, P2, P3, P4, P5); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, info.current_period); - sqlite::bind_int(dpp, binding, P3, info.epoch); - sqlite::bind_int(dpp, binding, P4, ver); - sqlite::bind_text(dpp, binding, P5, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - // our version is no longer consistent, so later writes would fail too - impl = nullptr; - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm update failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::foreign_key_constraint) { - return -EINVAL; // refers to nonexistent CurrentPeriod - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - ++ver; - return 0; - } - - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWRealm& info, std::string_view new_name) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:realm_rename "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - if (realm_id != info.id || realm_name != info.name) { - return -EINVAL; // can't modify realm id or name directly - } - if (new_name.empty()) { - ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["realm_rename"]; - if (!stmt) { - const std::string sql = fmt::format(schema::realm_rename4, - P1, P2, P3, P4); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - sqlite::bind_text(dpp, binding, P2, new_name); - sqlite::bind_int(dpp, binding, P3, ver); - sqlite::bind_text(dpp, binding, P4, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - impl = nullptr; - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm rename failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::unique_constraint) { - return -EEXIST; // Name already taken - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - info.name = std::string{new_name}; - ++ver; - return 0; - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:realm_remove "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["realm_del"]; - if (!stmt) { - const std::string sql = fmt::format(schema::realm_delete3, P1, P2, P3); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - sqlite::bind_int(dpp, binding, P2, ver); - sqlite::bind_text(dpp, binding, P3, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - impl = nullptr; // prevent any further writes after delete - if (!::sqlite3_changes(conn->db.get())) { - return -ECANCELED; // VersionNumber/Tag mismatch - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; - } -}; // SQLiteRealmWriter - - -int SQLiteConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:write_default_realm_id "}; dpp = &prefix; - - if (realm_id.empty()) { - ldpp_dout(dpp, 0) << "requires a realm id" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["def_realm_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::default_realm_insert1, P1); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["def_realm_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::default_realm_upsert1, P1); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default realm insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::primary_key_constraint) { - return -EEXIST; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string& realm_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_default_realm_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["def_realm_sel"]; - if (!stmt) { - static constexpr std::string_view sql = schema::default_realm_select0; - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - realm_id = sqlite::column_text(reset, 0); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default realm select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y) - -{ - Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_realm_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["def_realm_del"]; - if (!stmt) { - static constexpr std::string_view sql = schema::default_realm_delete0; - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { - return -ENOENT; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default realm delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -int SQLiteConfigStore::create_realm(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWRealm& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:create_realm "}; dpp = &prefix; - - if (info.id.empty()) { - ldpp_dout(dpp, 0) << "realm cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.name.empty()) { - ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; - return -EINVAL; - } - - int ver = 1; - auto tag = generate_version_tag(dpp->get_cct()); - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["realm_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::realm_insert4, - P1, P2, P3, P4); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["realm_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::realm_upsert4, - P1, P2, P3, P4); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, info.name); - sqlite::bind_int(dpp, binding, P3, ver); - sqlite::bind_text(dpp, binding, P4, tag); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::primary_key_constraint) { - return -EEXIST; // ID already taken - } else if (e.code() == sqlite::errc::unique_constraint) { - return -EEXIST; // Name already taken - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), ver, std::move(tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWRealm& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_by_id "}; dpp = &prefix; - - if (realm_id.empty()) { - ldpp_dout(dpp, 0) << "requires a realm id" << dendl; - return -EINVAL; - } - - RealmRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["realm_sel_id"]; - if (!stmt) { - const std::string sql = fmt::format(schema::realm_select_id1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_realm_row(reset, row); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -static void realm_select_by_name(const DoutPrefixProvider* dpp, - sqlite::Connection& conn, - std::string_view realm_name, - RealmRow& row) -{ - auto& stmt = conn.statements["realm_sel_name"]; - if (!stmt) { - const std::string sql = fmt::format(schema::realm_select_name1, P1); - stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_name); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_realm_row(reset, row); -} - -int SQLiteConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_by_name "}; dpp = &prefix; - - if (realm_name.empty()) { - ldpp_dout(dpp, 0) << "requires a realm name" << dendl; - return -EINVAL; - } - - RealmRow row; - try { - auto conn = impl->get(dpp); - realm_select_by_name(dpp, *conn, realm_name, row); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_default_realm(const DoutPrefixProvider* dpp, - optional_yield y, - RGWRealm& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_default_realm "}; dpp = &prefix; - - RealmRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["realm_sel_def"]; - if (!stmt) { - static constexpr std::string_view sql = schema::realm_select_default0; - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_realm_row(reset, row); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - std::string& realm_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_realm_id "}; dpp = &prefix; - - if (realm_name.empty()) { - ldpp_dout(dpp, 0) << "requires a realm name" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - - RealmRow row; - realm_select_by_name(dpp, *conn, realm_name, row); - - realm_id = std::move(row.info.id); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "realm decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - return 0; -} - -int SQLiteConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp, - optional_yield y, - const RGWPeriod& period) -{ - return -ENOTSUP; -} - -int SQLiteConfigStore::list_realm_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:list_realm_names "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["realm_sel_names"]; - if (!stmt) { - const std::string sql = fmt::format(schema::realm_select_names2, P1, P2); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, marker); - sqlite::bind_int(dpp, binding, P2, entries.size()); - - auto reset = sqlite::stmt_execution{stmt.get()}; - read_text_rows(dpp, reset, entries, result); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "realm select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -// Period - -int SQLiteConfigStore::create_period(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWPeriod& info) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:create_period "}; dpp = &prefix; - - if (info.id.empty()) { - ldpp_dout(dpp, 0) << "period cannot have an empty id" << dendl; - return -EINVAL; - } - - bufferlist bl; - encode(info, bl); - const auto data = std::string_view{bl.c_str(), bl.length()}; - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["period_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::period_insert4, - P1, P2, P3, P4); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["period_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::period_upsert4, - P1, P2, P3, P4); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_int(dpp, binding, P2, info.epoch); - sqlite::bind_text(dpp, binding, P3, info.realm_id); - sqlite::bind_text(dpp, binding, P4, data); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "period insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::foreign_key_constraint) { - return -EINVAL; // refers to nonexistent RealmID - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -static void period_select_epoch(const DoutPrefixProvider* dpp, - sqlite::Connection& conn, - std::string_view id, uint32_t epoch, - RGWPeriod& row) -{ - auto& stmt = conn.statements["period_sel_epoch"]; - if (!stmt) { - const std::string sql = fmt::format(schema::period_select_epoch2, P1, P2); - stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, id); - sqlite::bind_int(dpp, binding, P2, epoch); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_period_row(reset, row); -} - -static void period_select_latest(const DoutPrefixProvider* dpp, - sqlite::Connection& conn, - std::string_view id, RGWPeriod& row) -{ - auto& stmt = conn.statements["period_sel_latest"]; - if (!stmt) { - const std::string sql = fmt::format(schema::period_select_latest1, P1); - stmt = sqlite::prepare_statement(dpp, conn.db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_period_row(reset, row); -} - -int SQLiteConfigStore::read_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id, - std::optional epoch, - RGWPeriod& info) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_period "}; dpp = &prefix; - - if (period_id.empty()) { - ldpp_dout(dpp, 0) << "requires a period id" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - if (epoch) { - period_select_epoch(dpp, *conn, period_id, *epoch, info); - } else { - period_select_latest(dpp, *conn, period_id, info); - } - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "period decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "period select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::delete_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:delete_period "}; dpp = &prefix; - - if (period_id.empty()) { - ldpp_dout(dpp, 0) << "requires a period id" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["period_del"]; - if (!stmt) { - const std::string sql = fmt::format(schema::period_delete1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, period_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { - return -ENOENT; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "period delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::list_period_ids(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:list_period_ids "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["period_sel_ids"]; - if (!stmt) { - const std::string sql = fmt::format(schema::period_select_ids2, P1, P2); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, marker); - sqlite::bind_int(dpp, binding, P2, entries.size()); - - auto reset = sqlite::stmt_execution{stmt.get()}; - read_text_rows(dpp, reset, entries, result); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "period select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -// ZoneGroup - -class SQLiteZoneGroupWriter : public sal::ZoneGroupWriter { - SQLiteImpl* impl; - int ver; - std::string tag; - std::string zonegroup_id; - std::string zonegroup_name; - public: - SQLiteZoneGroupWriter(SQLiteImpl* impl, int ver, std::string tag, - std::string_view zonegroup_id, - std::string_view zonegroup_name) - : impl(impl), ver(ver), tag(std::move(tag)), - zonegroup_id(zonegroup_id), zonegroup_name(zonegroup_name) - {} - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWZoneGroup& info) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_write "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - if (zonegroup_id != info.id || zonegroup_name != info.name) { - return -EINVAL; // can't modify zonegroup id or name directly - } - - bufferlist bl; - encode(info, bl); - const auto data = std::string_view{bl.c_str(), bl.length()}; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_upd"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zonegroup_update5, - P1, P2, P3, P4, P5); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, info.realm_id); - sqlite::bind_text(dpp, binding, P3, data); - sqlite::bind_int(dpp, binding, P4, ver); - sqlite::bind_text(dpp, binding, P5, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - impl = nullptr; - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup update failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::foreign_key_constraint) { - return -EINVAL; // refers to nonexistent RealmID - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; - } - - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWZoneGroup& info, std::string_view new_name) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_rename "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) { - return -EINVAL; // can't modify zonegroup id or name directly - } - if (new_name.empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_rename"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zonegroup_rename4, - P1, P2, P3, P4); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, new_name); - sqlite::bind_int(dpp, binding, P3, ver); - sqlite::bind_text(dpp, binding, P4, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - impl = nullptr; - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup rename failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::unique_constraint) { - return -EEXIST; // Name already taken - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - info.name = std::string{new_name}; - return 0; - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:zonegroup_remove "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_del"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zonegroup_delete3, - P1, P2, P3); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, zonegroup_id); - sqlite::bind_int(dpp, binding, P2, ver); - sqlite::bind_text(dpp, binding, P3, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - impl = nullptr; - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; - } -}; // SQLiteZoneGroupWriter - - -int SQLiteConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zonegroup_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:write_default_zonegroup_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["def_zonegroup_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::default_zonegroup_insert2, - P1, P2); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["def_zonegroup_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::default_zonegroup_upsert2, - P1, P2); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - sqlite::bind_text(dpp, binding, P2, zonegroup_id); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default zonegroup insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zonegroup_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zonegroup_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["def_zonegroup_sel"]; - if (!stmt) { - const std::string sql = fmt::format(schema::default_zonegroup_select1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - zonegroup_id = sqlite::column_text(reset, 0); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default zonegroup select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_zonegroup_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["def_zonegroup_del"]; - if (!stmt) { - const std::string sql = fmt::format(schema::default_zonegroup_delete1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { - return -ENOENT; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default zonegroup delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -int SQLiteConfigStore::create_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneGroup& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:create_zonegroup "}; dpp = &prefix; - - if (info.id.empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.name.empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; - return -EINVAL; - } - - int ver = 1; - auto tag = generate_version_tag(dpp->get_cct()); - - bufferlist bl; - encode(info, bl); - const auto data = std::string_view{bl.c_str(), bl.length()}; - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["zonegroup_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::zonegroup_insert6, - P1, P2, P3, P4, P5, P6); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["zonegroup_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::zonegroup_upsert6, - P1, P2, P3, P4, P5, P6); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, info.name); - sqlite::bind_text(dpp, binding, P3, info.realm_id); - sqlite::bind_text(dpp, binding, P4, data); - sqlite::bind_int(dpp, binding, P5, ver); - sqlite::bind_text(dpp, binding, P6, tag); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::foreign_key_constraint) { - return -EINVAL; // refers to nonexistent RealmID - } else if (e.code() == sqlite::errc::primary_key_constraint) { - return -EEXIST; // ID already taken - } else if (e.code() == sqlite::errc::unique_constraint) { - return -EEXIST; // Name already taken - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), ver, std::move(tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_id, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_zonegroup_by_id "}; dpp = &prefix; - - if (zonegroup_id.empty()) { - ldpp_dout(dpp, 0) << "requires a zonegroup id" << dendl; - return -EINVAL; - } - - ZoneGroupRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_sel_id"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zonegroup_select_id1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, zonegroup_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_zonegroup_row(reset, row); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_zonegroup_by_name "}; dpp = &prefix; - - if (zonegroup_name.empty()) { - ldpp_dout(dpp, 0) << "requires a zonegroup name" << dendl; - return -EINVAL; - } - - ZoneGroupRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_sel_name"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zonegroup_select_name1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, zonegroup_name); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_zonegroup_row(reset, row); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zonegroup "}; dpp = &prefix; - - ZoneGroupRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_sel_def"]; - if (!stmt) { - static constexpr std::string_view sql = schema::zonegroup_select_default0; - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_zonegroup_row(reset, row); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "zonegroup decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:list_zonegroup_names "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zonegroup_sel_names"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zonegroup_select_names2, P1, P2); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - auto reset = sqlite::stmt_execution{stmt.get()}; - - sqlite::bind_text(dpp, binding, P1, marker); - sqlite::bind_int(dpp, binding, P2, entries.size()); - - read_text_rows(dpp, reset, entries, result); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zonegroup select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -// Zone - -class SQLiteZoneWriter : public sal::ZoneWriter { - SQLiteImpl* impl; - int ver; - std::string tag; - std::string zone_id; - std::string zone_name; - public: - SQLiteZoneWriter(SQLiteImpl* impl, int ver, std::string tag, - std::string_view zone_id, std::string_view zone_name) - : impl(impl), ver(ver), tag(std::move(tag)), - zone_id(zone_id), zone_name(zone_name) - {} - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWZoneParams& info) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:zone_write "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - if (zone_id != info.id || zone_name != info.name) { - return -EINVAL; // can't modify zone id or name directly - } - - bufferlist bl; - encode(info, bl); - const auto data = std::string_view{bl.c_str(), bl.length()}; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_upd"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zone_update5, - P1, P2, P3, P4, P5); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, info.realm_id); - sqlite::bind_text(dpp, binding, P3, data); - sqlite::bind_int(dpp, binding, P4, ver); - sqlite::bind_text(dpp, binding, P5, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - impl = nullptr; - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone update failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::foreign_key_constraint) { - return -EINVAL; // refers to nonexistent RealmID - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - ++ver; - return 0; - } - - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWZoneParams& info, std::string_view new_name) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:zone_rename "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - if (zone_id != info.id || zone_name != info.name) { - return -EINVAL; // can't modify zone id or name directly - } - if (new_name.empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_rename"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zone_rename4, P1, P2, P2, P3); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, new_name); - sqlite::bind_int(dpp, binding, P3, ver); - sqlite::bind_text(dpp, binding, P4, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - impl = nullptr; - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone rename failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::unique_constraint) { - return -EEXIST; // Name already taken - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - info.name = std::string{new_name}; - ++ver; - return 0; - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - Prefix prefix{*dpp, "dbconfig:sqlite:zone_remove "}; dpp = &prefix; - - if (!impl) { - return -EINVAL; // can't write after conflict or delete - } - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_del"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zone_delete3, P1, P2, P3); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, zone_id); - sqlite::bind_int(dpp, binding, P2, ver); - sqlite::bind_text(dpp, binding, P3, tag); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - impl = nullptr; - if (!::sqlite3_changes(conn->db.get())) { // VersionNumber/Tag mismatch - return -ECANCELED; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; - } -}; // SQLiteZoneWriter - - -int SQLiteConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zone_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:write_default_zone_id "}; dpp = &prefix; - - if (zone_id.empty()) { - ldpp_dout(dpp, 0) << "requires a zone id" << dendl; - return -EINVAL; - } - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["def_zone_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::default_zone_insert2, P1, P2); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["def_zone_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::default_zone_upsert2, P1, P2); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - sqlite::bind_text(dpp, binding, P2, zone_id); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default zone insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zone_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zone_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["def_zone_sel"]; - if (!stmt) { - const std::string sql = fmt::format(schema::default_zone_select1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - zone_id = sqlite::column_text(reset, 0); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default zone select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:delete_default_zone_id "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["def_zone_del"]; - if (!stmt) { - const std::string sql = fmt::format(schema::default_zone_delete1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval0(dpp, reset); - - if (!::sqlite3_changes(conn->db.get())) { - return -ENOENT; - } - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "default zone delete failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -int SQLiteConfigStore::create_zone(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneParams& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:create_zone "}; dpp = &prefix; - - if (info.id.empty()) { - ldpp_dout(dpp, 0) << "zone cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.name.empty()) { - ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl; - return -EINVAL; - } - - int ver = 1; - auto tag = generate_version_tag(dpp->get_cct()); - - bufferlist bl; - encode(info, bl); - const auto data = std::string_view{bl.c_str(), bl.length()}; - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["zone_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::zone_insert6, - P1, P2, P3, P4, P5, P6); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["zone_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::zone_upsert6, - P1, P2, P3, P4, P5, P6); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, info.id); - sqlite::bind_text(dpp, binding, P2, info.name); - sqlite::bind_text(dpp, binding, P3, info.realm_id); - sqlite::bind_text(dpp, binding, P4, data); - sqlite::bind_int(dpp, binding, P5, ver); - sqlite::bind_text(dpp, binding, P6, tag); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::foreign_key_constraint) { - return -EINVAL; // refers to nonexistent RealmID - } else if (e.code() == sqlite::errc::primary_key_constraint) { - return -EEXIST; // ID already taken - } else if (e.code() == sqlite::errc::unique_constraint) { - return -EEXIST; // Name already taken - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), ver, std::move(tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_id, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_zone_by_id "}; dpp = &prefix; - - if (zone_id.empty()) { - ldpp_dout(dpp, 0) << "requires a zone id" << dendl; - return -EINVAL; - } - - ZoneRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_sel_id"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zone_select_id1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, zone_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_zone_row(reset, row); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_zone_by_name "}; dpp = &prefix; - - if (zone_name.empty()) { - ldpp_dout(dpp, 0) << "requires a zone name" << dendl; - return -EINVAL; - } - - ZoneRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_sel_name"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zone_select_name1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, zone_name); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_zone_row(reset, row); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::read_default_zone(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_default_zone "}; dpp = &prefix; - - ZoneRow row; - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_sel_def"]; - if (!stmt) { - static constexpr std::string_view sql = schema::zone_select_default0; - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - read_zone_row(reset, row); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - - info = std::move(row.info); - if (writer) { - *writer = std::make_unique( - impl.get(), row.ver, std::move(row.tag), info.id, info.name); - } - return 0; -} - -int SQLiteConfigStore::list_zone_names(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:list_zone_names "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["zone_sel_names"]; - if (!stmt) { - const std::string sql = fmt::format(schema::zone_select_names2, P1, P2); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, marker); - sqlite::bind_int(dpp, binding, P2, entries.size()); - - auto reset = sqlite::stmt_execution{stmt.get()}; - read_text_rows(dpp, reset, entries, result); - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "zone select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - - -// PeriodConfig - -int SQLiteConfigStore::read_period_config(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWPeriodConfig& info) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:read_period_config "}; dpp = &prefix; - - try { - auto conn = impl->get(dpp); - auto& stmt = conn->statements["period_conf_sel"]; - if (!stmt) { - const std::string sql = fmt::format(schema::period_config_select1, P1); - stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - auto binding = sqlite::stmt_binding{stmt.get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - - auto reset = sqlite::stmt_execution{stmt.get()}; - sqlite::eval1(dpp, reset); - - std::string data = sqlite::column_text(reset, 0); - bufferlist bl = bufferlist::static_from_string(data); - auto p = bl.cbegin(); - decode(info, p); - - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "period config decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "period config select failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::done) { - return -ENOENT; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -int SQLiteConfigStore::write_period_config(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - const RGWPeriodConfig& info) -{ - Prefix prefix{*dpp, "dbconfig:sqlite:write_period_config "}; dpp = &prefix; - - bufferlist bl; - encode(info, bl); - const auto data = std::string_view{bl.c_str(), bl.length()}; - - try { - auto conn = impl->get(dpp); - sqlite::stmt_ptr* stmt = nullptr; - if (exclusive) { - stmt = &conn->statements["period_conf_ins"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::period_config_insert2, P1, P2); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } else { - stmt = &conn->statements["period_conf_ups"]; - if (!*stmt) { - const std::string sql = fmt::format(schema::period_config_upsert2, P1, P2); - *stmt = sqlite::prepare_statement(dpp, conn->db.get(), sql); - } - } - auto binding = sqlite::stmt_binding{stmt->get()}; - sqlite::bind_text(dpp, binding, P1, realm_id); - sqlite::bind_text(dpp, binding, P2, data); - - auto reset = sqlite::stmt_execution{stmt->get()}; - sqlite::eval0(dpp, reset); - } catch (const buffer::error& e) { - ldpp_dout(dpp, 20) << "period config decode failed: " << e.what() << dendl; - return -EIO; - } catch (const sqlite::error& e) { - ldpp_dout(dpp, 20) << "period config insert failed: " << e.what() << dendl; - if (e.code() == sqlite::errc::primary_key_constraint) { - return -EEXIST; - } else if (e.code() == sqlite::errc::busy) { - return -EBUSY; - } - return -EIO; - } - return 0; -} - -namespace { - -int version_cb(void* user, int count, char** values, char** names) -{ - if (count != 1) { - return EINVAL; - } - std::string_view name = names[0]; - if (name != "user_version") { - return EINVAL; - } - std::string_view value = values[0]; - auto result = std::from_chars(value.begin(), value.end(), - *reinterpret_cast(user)); - if (result.ec != std::errc{}) { - return static_cast(result.ec); - } - return 0; -} - -void apply_schema_migrations(const DoutPrefixProvider* dpp, sqlite3* db) -{ - sqlite::execute(dpp, db, "PRAGMA foreign_keys = ON", nullptr, nullptr); - - // initiate a transaction and read the current schema version - uint32_t version = 0; - sqlite::execute(dpp, db, "BEGIN; PRAGMA user_version", version_cb, &version); - - const uint32_t initial_version = version; - ldpp_dout(dpp, 4) << "current schema version " << version << dendl; - - // use the version as an index into schema::migrations - auto m = std::next(schema::migrations.begin(), version); - - for (; m != schema::migrations.end(); ++m, ++version) { - try { - sqlite::execute(dpp, db, m->up, nullptr, nullptr); - } catch (const sqlite::error&) { - ldpp_dout(dpp, -1) << "ERROR: schema migration failed on v" << version - << ": " << m->description << dendl; - throw; - } - } - - if (version > initial_version) { - // update the user_version and commit the transaction - const auto commit = fmt::format("PRAGMA user_version = {}; COMMIT", version); - sqlite::execute(dpp, db, commit.c_str(), nullptr, nullptr); - - ldpp_dout(dpp, 4) << "upgraded database schema to version " << version << dendl; - } else { - // nothing to commit - sqlite::execute(dpp, db, "ROLLBACK", nullptr, nullptr); - } -} - -} // anonymous namespace - - -auto create_sqlite_store(const DoutPrefixProvider* dpp, const std::string& uri) - -> std::unique_ptr -{ - Prefix prefix{*dpp, "dbconfig:sqlite:create_sqlite_store "}; dpp = &prefix; - - // build the connection pool - int flags = SQLITE_OPEN_CREATE | SQLITE_OPEN_URI | SQLITE_OPEN_READWRITE | - SQLITE_OPEN_NOMUTEX; - auto factory = sqlite::ConnectionFactory{uri, flags}; - - // sqlite does not support concurrent writers. we enforce this limitation by - // using a connection pool of size=1 - static constexpr size_t max_connections = 1; - auto impl = std::make_unique(std::move(factory), max_connections); - - // open a connection to apply schema migrations - auto conn = impl->get(dpp); - apply_schema_migrations(dpp, conn->db.get()); - - return std::make_unique(std::move(impl)); -} - -} // namespace rgw::dbstore::config diff --git a/src/rgw/store/dbstore/config/sqlite.h b/src/rgw/store/dbstore/config/sqlite.h deleted file mode 100644 index d79e040728c..00000000000 --- a/src/rgw/store/dbstore/config/sqlite.h +++ /dev/null @@ -1,172 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "rgw_sal_config.h" - -class DoutPrefixProvider; - -namespace rgw::dbstore::config { - -struct SQLiteImpl; - -class SQLiteConfigStore : public sal::ConfigStore { - public: - explicit SQLiteConfigStore(std::unique_ptr impl); - ~SQLiteConfigStore() override; - - int write_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id) override; - int read_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string& realm_id) override; - int delete_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y) override; - - int create_realm(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWRealm& info, - std::unique_ptr* writer) override; - int read_realm_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWRealm& info, - std::unique_ptr* writer) override; - int read_realm_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) override; - int read_default_realm(const DoutPrefixProvider* dpp, - optional_yield y, - RGWRealm& info, - std::unique_ptr* writer) override; - int read_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view realm_name, - std::string& realm_id) override; - int realm_notify_new_period(const DoutPrefixProvider* dpp, - optional_yield y, - const RGWPeriod& period) override; - int list_realm_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - int create_period(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWPeriod& info) override; - int read_period(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view period_id, - std::optional epoch, RGWPeriod& info) override; - int delete_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id) override; - int list_period_ids(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - int write_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zonegroup_id) override; - int read_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zonegroup_id) override; - int delete_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) override; - - int create_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneGroup& info, - std::unique_ptr* writer) override; - int read_zonegroup_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_id, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - int read_zonegroup_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - int read_default_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - int list_zonegroup_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - int write_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zone_id) override; - int read_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zone_id) override; - int delete_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) override; - - int create_zone(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneParams& info, - std::unique_ptr* writer) override; - int read_zone_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_id, - RGWZoneParams& info, - std::unique_ptr* writer) override; - int read_zone_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) override; - int read_default_zone(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneParams& info, - std::unique_ptr* writer) override; - int list_zone_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - int read_period_config(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWPeriodConfig& info) override; - int write_period_config(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - const RGWPeriodConfig& info) override; - - private: - std::unique_ptr impl; -}; // SQLiteConfigStore - - -auto create_sqlite_store(const DoutPrefixProvider* dpp, const std::string& uri) - -> std::unique_ptr; - -} // namespace rgw::dbstore::config diff --git a/src/rgw/store/dbstore/config/sqlite_schema.h b/src/rgw/store/dbstore/config/sqlite_schema.h deleted file mode 100644 index c8a8fce3e72..00000000000 --- a/src/rgw/store/dbstore/config/sqlite_schema.h +++ /dev/null @@ -1,299 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include - -namespace rgw::dbstore::config::schema { - -struct Migration { - // human-readable description to help with debugging migration errors - const char* description = nullptr; - // series of sql statements to apply the schema migration - const char* up = nullptr; - // series of sql statements to undo the schema migration - const char* down = nullptr; -}; - -static constexpr std::initializer_list migrations {{ - .description = "create the initial ConfigStore tables", - .up = R"( -CREATE TABLE IF NOT EXISTS Realms ( - ID TEXT PRIMARY KEY NOT NULL, - Name TEXT UNIQUE NOT NULL, - CurrentPeriod TEXT, - Epoch INTEGER DEFAULT 0, - VersionNumber INTEGER, - VersionTag TEXT -); -CREATE TABLE IF NOT EXISTS Periods ( - ID TEXT NOT NULL, - Epoch INTEGER DEFAULT 0, - RealmID TEXT NOT NULL REFERENCES Realms (ID), - Data TEXT NOT NULL, - PRIMARY KEY (ID, Epoch) -); -CREATE TABLE IF NOT EXISTS PeriodConfigs ( - RealmID TEXT PRIMARY KEY NOT NULL REFERENCES Realms (ID), - Data TEXT NOT NULL -); -CREATE TABLE IF NOT EXISTS ZoneGroups ( - ID TEXT PRIMARY KEY NOT NULL, - Name TEXT UNIQUE NOT NULL, - RealmID TEXT NOT NULL REFERENCES Realms (ID), - Data TEXT NOT NULL, - VersionNumber INTEGER, - VersionTag TEXT -); -CREATE TABLE IF NOT EXISTS Zones ( - ID TEXT PRIMARY KEY NOT NULL, - Name TEXT UNIQUE NOT NULL, - RealmID TEXT NOT NULL REFERENCES Realms (ID), - Data TEXT NOT NULL, - VersionNumber INTEGER, - VersionTag TEXT -); -CREATE TABLE IF NOT EXISTS DefaultRealms ( - ID TEXT, - Empty TEXT PRIMARY KEY -); -CREATE TABLE IF NOT EXISTS DefaultZoneGroups ( - ID TEXT, - RealmID TEXT PRIMARY KEY REFERENCES Realms (ID) -); -CREATE TABLE IF NOT EXISTS DefaultZones ( - ID TEXT, - RealmID TEXT PRIMARY KEY REFERENCES Realms (ID) -); -)", - .down = R"( -DROP TABLE IF EXISTS Realms; -DROP TABLE IF EXISTS Periods; -DROP TABLE IF EXISTS PeriodConfigs; -DROP TABLE IF EXISTS ZoneGroups; -DROP TABLE IF EXISTS Zones; -DROP TABLE IF EXISTS DefaultRealms; -DROP TABLE IF EXISTS DefaultZoneGroups; -DROP TABLE IF EXISTS DefaultZones; -)" - } -}; - - -// DefaultRealms - -static constexpr const char* default_realm_insert1 = -"INSERT INTO DefaultRealms (ID, Empty) VALUES ({}, '')"; - -static constexpr const char* default_realm_upsert1 = -R"(INSERT INTO DefaultRealms (ID, Empty) VALUES ({0}, '') -ON CONFLICT(Empty) DO UPDATE SET ID = {0})"; - -static constexpr const char* default_realm_select0 = -"SELECT ID FROM DefaultRealms LIMIT 1"; - -static constexpr const char* default_realm_delete0 = -"DELETE FROM DefaultRealms"; - - -// Realms - -static constexpr const char* realm_update5 = -"UPDATE Realms SET CurrentPeriod = {1}, Epoch = {2}, VersionNumber = {3} + 1 \ -WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}"; - -static constexpr const char* realm_rename4 = -"UPDATE Realms SET Name = {1}, VersionNumber = {2} + 1 \ -WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}"; - -static constexpr const char* realm_delete3 = -"DELETE FROM Realms WHERE ID = {} AND VersionNumber = {} AND VersionTag = {}"; - -static constexpr const char* realm_insert4 = -"INSERT INTO Realms (ID, Name, VersionNumber, VersionTag) \ -VALUES ({}, {}, {}, {})"; - -static constexpr const char* realm_upsert4 = -"INSERT INTO Realms (ID, Name, VersionNumber, VersionTag) \ -VALUES ({0}, {1}, {2}, {3}) \ -ON CONFLICT(ID) DO UPDATE SET Name = {1}, \ -VersionNumber = {2}, VersionTag = {3}"; - -static constexpr const char* realm_select_id1 = -"SELECT * FROM Realms WHERE ID = {} LIMIT 1"; - -static constexpr const char* realm_select_name1 = -"SELECT * FROM Realms WHERE Name = {} LIMIT 1"; - -static constexpr const char* realm_select_default0 = -"SELECT r.* FROM Realms r \ -INNER JOIN DefaultRealms d \ -ON d.ID = r.ID LIMIT 1"; - -static constexpr const char* realm_select_names2 = -"SELECT Name FROM Realms WHERE Name > {} \ -ORDER BY Name ASC LIMIT {}"; - - -// Periods - -static constexpr const char* period_insert4 = -"INSERT INTO Periods (ID, Epoch, RealmID, Data) \ -VALUES ({}, {}, {}, {})"; - -static constexpr const char* period_upsert4 = -"INSERT INTO Periods (ID, Epoch, RealmID, Data) \ -VALUES ({0}, {1}, {2}, {3}) \ -ON CONFLICT DO UPDATE SET RealmID = {2}, Data = {3}"; - -static constexpr const char* period_select_epoch2 = -"SELECT * FROM Periods WHERE ID = {} AND Epoch = {} LIMIT 1"; - -static constexpr const char* period_select_latest1 = -"SELECT * FROM Periods WHERE ID = {} ORDER BY Epoch DESC LIMIT 1"; - -static constexpr const char* period_delete1 = -"DELETE FROM Periods WHERE ID = {}"; - -static constexpr const char* period_select_ids2 = -"SELECT ID FROM Periods WHERE ID > {} ORDER BY ID ASC LIMIT {}"; - - -// DefaultZoneGroups - -static constexpr const char* default_zonegroup_insert2 = -"INSERT INTO DefaultZoneGroups (RealmID, ID) VALUES ({}, {})"; - -static constexpr const char* default_zonegroup_upsert2 = -"INSERT INTO DefaultZoneGroups (RealmID, ID) \ -VALUES ({0}, {1}) \ -ON CONFLICT(RealmID) DO UPDATE SET ID = {1}"; - -static constexpr const char* default_zonegroup_select1 = -"SELECT ID FROM DefaultZoneGroups WHERE RealmID = {}"; - -static constexpr const char* default_zonegroup_delete1 = -"DELETE FROM DefaultZoneGroups WHERE RealmID = {}"; - - -// ZoneGroups - -static constexpr const char* zonegroup_update5 = -"UPDATE ZoneGroups SET RealmID = {1}, Data = {2}, VersionNumber = {3} + 1 \ -WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}"; - -static constexpr const char* zonegroup_rename4 = -"UPDATE ZoneGroups SET Name = {1}, VersionNumber = {2} + 1 \ -WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}"; - -static constexpr const char* zonegroup_delete3 = -"DELETE FROM ZoneGroups WHERE ID = {} \ -AND VersionNumber = {} AND VersionTag = {}"; - -static constexpr const char* zonegroup_insert6 = -"INSERT INTO ZoneGroups (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ -VALUES ({}, {}, {}, {}, {}, {})"; - -static constexpr const char* zonegroup_upsert6 = -"INSERT INTO ZoneGroups (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ -VALUES ({0}, {1}, {2}, {3}, {4}, {5}) \ -ON CONFLICT (ID) DO UPDATE SET Name = {1}, RealmID = {2}, \ -Data = {3}, VersionNumber = {4}, VersionTag = {5}"; - -static constexpr const char* zonegroup_select_id1 = -"SELECT * FROM ZoneGroups WHERE ID = {} LIMIT 1"; - -static constexpr const char* zonegroup_select_name1 = -"SELECT * FROM ZoneGroups WHERE Name = {} LIMIT 1"; - -static constexpr const char* zonegroup_select_default0 = -"SELECT z.* FROM ZoneGroups z \ -INNER JOIN DefaultZoneGroups d \ -ON d.ID = z.ID LIMIT 1"; - -static constexpr const char* zonegroup_select_names2 = -"SELECT Name FROM ZoneGroups WHERE Name > {} \ -ORDER BY Name ASC LIMIT {}"; - - -// DefaultZones - -static constexpr const char* default_zone_insert2 = -"INSERT INTO DefaultZones (RealmID, ID) VALUES ({}, {})"; - -static constexpr const char* default_zone_upsert2 = -"INSERT INTO DefaultZones (RealmID, ID) VALUES ({0}, {1}) \ -ON CONFLICT(RealmID) DO UPDATE SET ID = {1}"; - -static constexpr const char* default_zone_select1 = -"SELECT ID FROM DefaultZones WHERE RealmID = {}"; - -static constexpr const char* default_zone_delete1 = -"DELETE FROM DefaultZones WHERE RealmID = {}"; - - -// Zones - -static constexpr const char* zone_update5 = -"UPDATE Zones SET RealmID = {1}, Data = {2}, VersionNumber = {3} + 1 \ -WHERE ID = {0} AND VersionNumber = {3} AND VersionTag = {4}"; - -static constexpr const char* zone_rename4 = -"UPDATE Zones SET Name = {1}, VersionNumber = {2} + 1 \ -WHERE ID = {0} AND VersionNumber = {2} AND VersionTag = {3}"; - -static constexpr const char* zone_delete3 = -"DELETE FROM Zones WHERE ID = {} AND VersionNumber = {} AND VersionTag = {}"; - -static constexpr const char* zone_insert6 = -"INSERT INTO Zones (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ -VALUES ({}, {}, {}, {}, {}, {})"; - -static constexpr const char* zone_upsert6 = -"INSERT INTO Zones (ID, Name, RealmID, Data, VersionNumber, VersionTag) \ -VALUES ({0}, {1}, {2}, {3}, {4}, {5}) \ -ON CONFLICT (ID) DO UPDATE SET Name = {1}, RealmID = {2}, \ -Data = {3}, VersionNumber = {4}, VersionTag = {5}"; - -static constexpr const char* zone_select_id1 = -"SELECT * FROM Zones WHERE ID = {} LIMIT 1"; - -static constexpr const char* zone_select_name1 = -"SELECT * FROM Zones WHERE Name = {} LIMIT 1"; - -static constexpr const char* zone_select_default0 = -"SELECT z.* FROM Zones z \ -INNER JOIN DefaultZones d \ -ON d.ID = z.ID LIMIT 1"; - -static constexpr const char* zone_select_names2 = -"SELECT Name FROM Zones WHERE Name > {} \ -ORDER BY Name ASC LIMIT {}"; - - -// PeriodConfigs - -static constexpr const char* period_config_insert2 = -"INSERT INTO PeriodConfigs (RealmID, Data) VALUES ({}, {})"; - -static constexpr const char* period_config_upsert2 = -"INSERT INTO PeriodConfigs (RealmID, Data) VALUES ({0}, {1}) \ -ON CONFLICT (RealmID) DO UPDATE SET Data = {1}"; - -static constexpr const char* period_config_select1 = -"SELECT Data FROM PeriodConfigs WHERE RealmID = {} LIMIT 1"; - -} // namespace rgw::dbstore::config::schema diff --git a/src/rgw/store/dbstore/config/store.cc b/src/rgw/store/dbstore/config/store.cc deleted file mode 100644 index 66f7471d534..00000000000 --- a/src/rgw/store/dbstore/config/store.cc +++ /dev/null @@ -1,40 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include "store.h" -#ifdef SQLITE_ENABLED -#include "sqlite.h" -#endif - -namespace rgw::dbstore { - -auto create_config_store(const DoutPrefixProvider* dpp, const std::string& uri) - -> std::unique_ptr -{ -#ifdef SQLITE_ENABLED - if (uri.starts_with("file:")) { - return config::create_sqlite_store(dpp, uri); - } -#endif - throw std::runtime_error(fmt::format("unrecognized URI {}", uri)); -} - -} // namespace rgw::dbstore diff --git a/src/rgw/store/dbstore/config/store.h b/src/rgw/store/dbstore/config/store.h deleted file mode 100644 index 553d9f70934..00000000000 --- a/src/rgw/store/dbstore/config/store.h +++ /dev/null @@ -1,27 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include "rgw_sal_config.h" - -namespace rgw::dbstore { - -// ConfigStore factory -auto create_config_store(const DoutPrefixProvider* dpp, const std::string& uri) - -> std::unique_ptr; - -} // namespace rgw::dbstore diff --git a/src/rgw/store/dbstore/dbstore_main.cc b/src/rgw/store/dbstore/dbstore_main.cc deleted file mode 100644 index 08724d8227e..00000000000 --- a/src/rgw/store/dbstore/dbstore_main.cc +++ /dev/null @@ -1,201 +0,0 @@ -#include -#include -#include -#include -#include - -#include "dbstore_mgr.h" -#include -#include - -using namespace std; -using namespace rgw::store; -using DB = rgw::store::DB; - -struct thr_args { - DB *dbs; - int thr_id; -}; - -void* process(void *arg) -{ - struct thr_args *t_args = (struct thr_args*)arg; - - DB *db = t_args->dbs; - int thr_id = t_args->thr_id; - int ret = -1; - - cout<<"Entered thread:"<get_def_dpp(); - - db->InitializeParams(dpp, ¶ms); - - params.op.user.uinfo.display_name = user1; - params.op.user.uinfo.user_id.tenant = "tenant"; - params.op.user.uinfo.user_id.id = user1; - params.op.user.uinfo.suspended = 123; - params.op.user.uinfo.max_buckets = 456; - params.op.user.uinfo.assumed_role_arn = "role"; - params.op.user.uinfo.placement_tags.push_back("tags1"); - params.op.user.uinfo.placement_tags.push_back("tags2"); - - RGWAccessKey k1("id1", "key1"); - RGWAccessKey k2("id2", "key2"); - params.op.user.uinfo.access_keys.insert(make_pair("key1", k1)); - params.op.user.uinfo.access_keys.insert(make_pair("key2", k2)); - - ret = db->ProcessOp(dpp, "InsertUser", ¶ms); - cout << "InsertUser return value: " << ret << "\n"; - - DBOpParams params2 = {}; - params.op.user.uinfo.user_id.tenant = "tenant2"; - - db->InitializeParams(dpp, ¶ms2); - params2.op.user.uinfo.display_name = user1; - ret = db->ProcessOp(dpp, "GetUser", ¶ms2); - - cout << "GetUser return value: " << ret << "\n"; - - cout << "tenant: " << params2.op.user.uinfo.user_id.tenant << "\n"; - cout << "suspended: " << (int)params2.op.user.uinfo.suspended << "\n"; - cout << "assumed_role_arn: " << params2.op.user.uinfo.assumed_role_arn << "\n"; - - list::iterator it = params2.op.user.uinfo.placement_tags.begin(); - - while (it != params2.op.user.uinfo.placement_tags.end()) { - cout << "list = " << *it << "\n"; - it++; - } - - map::iterator it2 = params2.op.user.uinfo.access_keys.begin(); - - while (it2 != params2.op.user.uinfo.access_keys.end()) { - cout << "keys = " << it2->first << "\n"; - RGWAccessKey k = it2->second; - cout << "id = " << k.id << ", keys = " << k.key << "\n"; - it2++; - } - - params.op.bucket.info.bucket.name = bucketa; - db->ProcessOp(dpp, "InsertBucket", ¶ms); - - params.op.user.uinfo.display_name = user2; - params.op.user.uinfo.user_id.id = user2; - db->ProcessOp(dpp, "InsertUser", ¶ms); - - params.op.bucket.info.bucket.name = bucketb; - db->ProcessOp(dpp, "InsertBucket", ¶ms); - - db->ProcessOp(dpp, "GetUser", ¶ms); - db->ProcessOp(dpp, "GetBucket", ¶ms); - - db->ListAllUsers(dpp, ¶ms); - db->ListAllBuckets(dpp, ¶ms); - - params.op.bucket.info.bucket.name = bucketb; - - db->ProcessOp(dpp, "RemoveBucket", ¶ms); - - params.op.user.uinfo.user_id.id = user2; - db->ProcessOp(dpp, "RemoveUser", ¶ms); - - db->ListAllUsers(dpp, ¶ms); - db->ListAllBuckets(dpp, ¶ms); - cout<<"Exiting thread:"< args; - auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_DAEMON, CINIT_FLAG_NO_MON_CONFIG, 1); - dbsm = new DBStoreManager(cct.get(), logfile, loglevel); - dbs = dbsm->getDB(tenant, true); - - cout<<"No. of threads being created = "<destroyAllHandles(); - - return 0; -} diff --git a/src/rgw/store/dbstore/dbstore_mgr.cc b/src/rgw/store/dbstore/dbstore_mgr.cc deleted file mode 100644 index 6835f526bc6..00000000000 --- a/src/rgw/store/dbstore/dbstore_mgr.cc +++ /dev/null @@ -1,140 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "dbstore_mgr.h" -#include "common/dbstore_log.h" - -#include - -static constexpr auto dout_subsys = ceph_subsys_rgw; - -using namespace std; - - -/* Given a tenant, find and return the DBStore handle. - * If not found and 'create' set to true, create one - * and return - */ -DB *DBStoreManager::getDB (string tenant, bool create) -{ - map::iterator iter; - DB *dbs = nullptr; - pair::iterator,bool> ret; - - if (tenant.empty()) - return default_db; - - if (DBStoreHandles.empty()) - goto not_found; - - iter = DBStoreHandles.find(tenant); - - if (iter != DBStoreHandles.end()) - return iter->second; - -not_found: - if (!create) - return nullptr; - - dbs = createDB(tenant); - - return dbs; -} - -/* Create DBStore instance */ -DB *DBStoreManager::createDB(std::string tenant) { - DB *dbs = nullptr; - pair::iterator,bool> ret; - const auto& db_path = g_conf().get_val("dbstore_db_dir"); - const auto& db_name = g_conf().get_val("dbstore_db_name_prefix") + "-" + tenant; - - auto db_full_path = std::filesystem::path(db_path) / db_name; - ldout(cct, 0) << "DB initialization full db_path("<Initialize("", -1) < 0) { - ldout(cct, 0) << "DB initialization failed for tenant("<(tenant, dbs)); - - /* - * Its safe to check for already existing entry (just - * incase other thread raced and created the entry) - */ - if (ret.second == false) { - /* Entry already created by another thread */ - delete dbs; - - dbs = ret.first->second; - } - - return dbs; -} - -void DBStoreManager::deleteDB(string tenant) { - map::iterator iter; - DB *dbs = nullptr; - - if (tenant.empty() || DBStoreHandles.empty()) - return; - - /* XXX: Check if we need to perform this operation under a lock */ - iter = DBStoreHandles.find(tenant); - - if (iter == DBStoreHandles.end()) - return; - - dbs = iter->second; - - DBStoreHandles.erase(iter); - dbs->Destroy(dbs->get_def_dpp()); - delete dbs; - - return; -} - -void DBStoreManager::deleteDB(DB *dbs) { - if (!dbs) - return; - - (void)deleteDB(dbs->getDBname()); -} - - -void DBStoreManager::destroyAllHandles(){ - map::iterator iter; - DB *dbs = nullptr; - - if (DBStoreHandles.empty()) - return; - - for (iter = DBStoreHandles.begin(); iter != DBStoreHandles.end(); - ++iter) { - dbs = iter->second; - dbs->Destroy(dbs->get_def_dpp()); - delete dbs; - } - - DBStoreHandles.clear(); - - return; -} - - diff --git a/src/rgw/store/dbstore/dbstore_mgr.h b/src/rgw/store/dbstore/dbstore_mgr.h deleted file mode 100644 index 77fc3aaf731..00000000000 --- a/src/rgw/store/dbstore/dbstore_mgr.h +++ /dev/null @@ -1,56 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "common/ceph_context.h" -#include "common/dbstore.h" -#include "sqlite/sqliteDB.h" - -using namespace rgw::store; -using DB = rgw::store::DB; - -/* XXX: Should be a dbstore config option */ -const static std::string default_tenant = "default_ns"; - -class DBStoreManager { -private: - std::map DBStoreHandles; - DB *default_db = nullptr; - CephContext *cct; - -public: - DBStoreManager(CephContext *_cct): DBStoreHandles() { - cct = _cct; - default_db = createDB(default_tenant); - }; - DBStoreManager(CephContext *_cct, std::string logfile, int loglevel): DBStoreHandles() { - /* No ceph context. Create one with log args provided */ - cct = _cct; - cct->_log->set_log_file(logfile); - cct->_log->reopen_log_file(); - cct->_conf->subsys.set_log_level(ceph_subsys_rgw, loglevel); - default_db = createDB(default_tenant); - }; - ~DBStoreManager() { destroyAllHandles(); }; - - /* XXX: TBD based on testing - * 1) Lock to protect DBStoreHandles map. - * 2) Refcount of each DBStore to protect from - * being deleted while using it. - */ - DB* getDB () { return default_db; }; - DB* getDB (std::string tenant, bool create); - DB* createDB (std::string tenant); - void deleteDB (std::string tenant); - void deleteDB (DB* db); - void destroyAllHandles(); -}; diff --git a/src/rgw/store/dbstore/sqlite/CMakeLists.txt b/src/rgw/store/dbstore/sqlite/CMakeLists.txt deleted file mode 100644 index 909765e3058..00000000000 --- a/src/rgw/store/dbstore/sqlite/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 3.14.0) -project(sqlite_db) - -find_package(SQLite3 REQUIRED) - -set(sqlite_db_srcs - sqliteDB.h - sqliteDB.cc) - -include_directories(${CMAKE_INCLUDE_DIR}) - -set(SQLITE_COMPILE_FLAGS "-DSQLITE_THREADSAFE=1") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SQLITE_COMPILE_FLAGS}") - -add_library(sqlite_db STATIC ${sqlite_db_srcs}) -target_link_libraries(sqlite_db sqlite3 dbstore_lib rgw_common) diff --git a/src/rgw/store/dbstore/sqlite/connection.cc b/src/rgw/store/dbstore/sqlite/connection.cc deleted file mode 100644 index 143a3a0d5a1..00000000000 --- a/src/rgw/store/dbstore/sqlite/connection.cc +++ /dev/null @@ -1,34 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/dout.h" -#include "connection.h" -#include "error.h" - -namespace rgw::dbstore::sqlite { - -db_ptr open_database(const char* filename, int flags) -{ - sqlite3* db = nullptr; - const int result = ::sqlite3_open_v2(filename, &db, flags, nullptr); - if (result != SQLITE_OK) { - throw std::system_error(result, sqlite::error_category()); - } - // request extended result codes - (void) ::sqlite3_extended_result_codes(db, 1); - return db_ptr{db}; -} - -} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/store/dbstore/sqlite/connection.h b/src/rgw/store/dbstore/sqlite/connection.h deleted file mode 100644 index f5cd77d6e26..00000000000 --- a/src/rgw/store/dbstore/sqlite/connection.h +++ /dev/null @@ -1,66 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include "sqlite/statement.h" - -class DoutPrefixProvider; - -namespace rgw::dbstore::sqlite { - -// owning sqlite3 pointer -struct db_deleter { - void operator()(sqlite3* p) const { ::sqlite3_close(p); } -}; -using db_ptr = std::unique_ptr; - - -// open the database file or throw on error -db_ptr open_database(const char* filename, int flags); - - -struct Connection { - db_ptr db; - // map of statements, prepared on first use - std::map statements; - - explicit Connection(db_ptr db) : db(std::move(db)) {} -}; - -// sqlite connection factory for ConnectionPool -class ConnectionFactory { - std::string uri; - int flags; - public: - ConnectionFactory(std::string uri, int flags) - : uri(std::move(uri)), flags(flags) {} - - auto operator()(const DoutPrefixProvider* dpp) - -> std::unique_ptr - { - auto db = open_database(uri.c_str(), flags); - return std::make_unique(std::move(db)); - } -}; - -} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/store/dbstore/sqlite/error.cc b/src/rgw/store/dbstore/sqlite/error.cc deleted file mode 100644 index 5fe9eb0ae82..00000000000 --- a/src/rgw/store/dbstore/sqlite/error.cc +++ /dev/null @@ -1,37 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "error.h" - -namespace rgw::dbstore::sqlite { - -const std::error_category& error_category() -{ - struct category : std::error_category { - const char* name() const noexcept override { - return "dbstore:sqlite"; - } - std::string message(int ev) const override { - return ::sqlite3_errstr(ev); - } - std::error_condition default_error_condition(int code) const noexcept override { - return {code & 0xFF, category()}; - } - }; - static category instance; - return instance; -} - -} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/store/dbstore/sqlite/error.h b/src/rgw/store/dbstore/sqlite/error.h deleted file mode 100644 index 15396d8ca2b..00000000000 --- a/src/rgw/store/dbstore/sqlite/error.h +++ /dev/null @@ -1,81 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include - -namespace rgw::dbstore::sqlite { - -// error category for sqlite extended result codes: -// https://www.sqlite.org/rescode.html -const std::error_category& error_category(); - - -// sqlite exception type that carries the extended error code and message -class error : public std::runtime_error { - std::error_code ec; - public: - error(const char* errmsg, std::error_code ec) - : runtime_error(errmsg), ec(ec) {} - error(sqlite3* db, std::error_code ec) : error(::sqlite3_errmsg(db), ec) {} - error(sqlite3* db, int result) : error(db, {result, error_category()}) {} - error(sqlite3* db) : error(db, ::sqlite3_extended_errcode(db)) {} - std::error_code code() const { return ec; } -}; - - -// sqlite error conditions for primary and extended result codes -// -// 'primary' error_conditions will match 'primary' error_codes as well as any -// 'extended' error_codes whose lowest 8 bits match that primary code. for -// example, the error_condition for SQLITE_CONSTRAINT will match the error_codes -// SQLITE_CONSTRAINT and SQLITE_CONSTRAINT_* -enum class errc { - // primary result codes - ok = SQLITE_OK, - busy = SQLITE_BUSY, - constraint = SQLITE_CONSTRAINT, - row = SQLITE_ROW, - done = SQLITE_DONE, - - // extended result codes - primary_key_constraint = SQLITE_CONSTRAINT_PRIMARYKEY, - foreign_key_constraint = SQLITE_CONSTRAINT_FOREIGNKEY, - unique_constraint = SQLITE_CONSTRAINT_UNIQUE, - - // ..add conditions as needed -}; - -inline std::error_code make_error_code(errc e) -{ - return {static_cast(e), error_category()}; -} - -inline std::error_condition make_error_condition(errc e) -{ - return {static_cast(e), error_category()}; -} - -} // namespace rgw::dbstore::sqlite - -namespace std { - -// enable implicit conversions from sqlite::errc to std::error_condition -template<> struct is_error_condition_enum< - rgw::dbstore::sqlite::errc> : public true_type {}; - -} // namespace std diff --git a/src/rgw/store/dbstore/sqlite/sqliteDB.cc b/src/rgw/store/dbstore/sqlite/sqliteDB.cc deleted file mode 100644 index b0ced456a3c..00000000000 --- a/src/rgw/store/dbstore/sqlite/sqliteDB.cc +++ /dev/null @@ -1,3001 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "sqliteDB.h" - -using namespace std; - -#define SQL_PREPARE(dpp, params, sdb, stmt, ret, Op) \ - do { \ - string schema; \ - schema = Schema(params); \ - sqlite3_prepare_v2 (*sdb, schema.c_str(), \ - -1, &stmt , NULL); \ - if (!stmt) { \ - ldpp_dout(dpp, 0) <<"failed to prepare statement " \ - <<"for Op("<(blob), blob_len); \ - \ - decode(param, b); \ - }while(0); - -#define SQL_EXECUTE(dpp, params, stmt, cbk, args...) \ - do{ \ - const std::lock_guard lk(((DBOp*)(this))->mtx); \ - if (!stmt) { \ - ret = Prepare(dpp, params); \ - } \ - \ - if (!stmt) { \ - ldpp_dout(dpp, 0) <<"No prepared statement "<< dendl; \ - goto out; \ - } \ - \ - ret = Bind(dpp, params); \ - if (ret) { \ - ldpp_dout(dpp, 0) <<"Bind parameters failed for stmt(" <op, stmt, cbk); \ - \ - Reset(dpp, stmt); \ - \ - if (ret) { \ - ldpp_dout(dpp, 0) <<"Execution failed for stmt(" <user_table.empty()) { - params->user_table = getUserTable(); - } - if (params->user_table.empty()) { - params->user_table = getUserTable(); - } - if (params->bucket_table.empty()) { - params->bucket_table = getBucketTable(); - } - if (params->quota_table.empty()) { - params->quota_table = getQuotaTable(); - } - if (params->lc_entry_table.empty()) { - params->lc_entry_table = getLCEntryTable(); - } - if (params->lc_head_table.empty()) { - params->lc_head_table = getLCHeadTable(); - } - - p_params.user_table = params->user_table; - p_params.bucket_table = params->bucket_table; - p_params.quota_table = params->quota_table; - p_params.lc_entry_table = params->lc_entry_table; - p_params.lc_head_table = params->lc_head_table; - - p_params.op.query_str = params->op.query_str; - - bucket = params->op.bucket.info.bucket.name; - - if (!bucket.empty()) { - if (params->object_table.empty()) { - params->object_table = getObjectTable(bucket); - } - if (params->objectdata_table.empty()) { - params->objectdata_table = getObjectDataTable(bucket); - } - if (params->object_view.empty()) { - params->object_view = getObjectView(bucket); - } - if (params->object_trigger.empty()) { - params->object_trigger = getObjectTrigger(bucket); - } - p_params.object_table = params->object_table; - p_params.objectdata_table = params->objectdata_table; - p_params.object_view = params->object_view; - } - - return 0; -} - -static int list_callback(void *None, int argc, char **argv, char **aname) -{ - int i; - for(i=0; i < argc; i++) { - string arg = argv[i] ? argv[i] : "NULL"; - cout<(&this->db, this->getDBname(), cct); - dbops.RemoveUser = make_shared(&this->db, this->getDBname(), cct); - dbops.GetUser = make_shared(&this->db, this->getDBname(), cct); - dbops.InsertBucket = make_shared(&this->db, this->getDBname(), cct); - dbops.UpdateBucket = make_shared(&this->db, this->getDBname(), cct); - dbops.RemoveBucket = make_shared(&this->db, this->getDBname(), cct); - dbops.GetBucket = make_shared(&this->db, this->getDBname(), cct); - dbops.ListUserBuckets = make_shared(&this->db, this->getDBname(), cct); - dbops.InsertLCEntry = make_shared(&this->db, this->getDBname(), cct); - dbops.RemoveLCEntry = make_shared(&this->db, this->getDBname(), cct); - dbops.GetLCEntry = make_shared(&this->db, this->getDBname(), cct); - dbops.ListLCEntries = make_shared(&this->db, this->getDBname(), cct); - dbops.InsertLCHead = make_shared(&this->db, this->getDBname(), cct); - dbops.RemoveLCHead = make_shared(&this->db, this->getDBname(), cct); - dbops.GetLCHead = make_shared(&this->db, this->getDBname(), cct); - - return 0; -} - -void *SQLiteDB::openDB(const DoutPrefixProvider *dpp) -{ - string dbname; - int rc = 0; - - dbname = getDBfile(); - if (dbname.empty()) { - ldpp_dout(dpp, 0)<<"dbname is NULL" << dendl; - goto out; - } - - rc = sqlite3_open_v2(dbname.c_str(), (sqlite3**)&db, - SQLITE_OPEN_READWRITE | - SQLITE_OPEN_CREATE | - SQLITE_OPEN_FULLMUTEX, - NULL); - - if (rc) { - ldpp_dout(dpp, 0) <<"Cant open "<user_table); - - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeleteUserTable failed " << dendl; - - ldpp_dout(dpp, 20)<<"DeleteUserTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::DeleteBucketTable(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = DeleteTableSchema(params->bucket_table); - - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeletebucketTable failed " << dendl; - - ldpp_dout(dpp, 20)<<"DeletebucketTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::DeleteObjectTable(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = DeleteTableSchema(params->object_table); - - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeleteObjectTable failed " << dendl; - - ldpp_dout(dpp, 20)<<"DeleteObjectTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::DeleteObjectDataTable(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = DeleteTableSchema(params->objectdata_table); - - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeleteObjectDataTable failed " << dendl; - - ldpp_dout(dpp, 20)<<"DeleteObjectDataTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::DeleteQuotaTable(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = DeleteTableSchema(params->quota_table); - - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeleteQuotaTable failed " << dendl; - - ldpp_dout(dpp, 20)<<"DeleteQuotaTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::DeleteLCEntryTable(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = DeleteTableSchema(params->lc_entry_table); - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeleteLCEntryTable failed " << dendl; - ldpp_dout(dpp, 20)<<"DeleteLCEntryTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::DeleteLCHeadTable(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = DeleteTableSchema(params->lc_head_table); - ret = exec(dpp, schema.c_str(), NULL); - if (ret) - ldpp_dout(dpp, 0)<<"DeleteLCHeadTable failed " << dendl; - ldpp_dout(dpp, 20)<<"DeleteLCHeadTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = ListTableSchema(params->user_table); - ret = exec(dpp, schema.c_str(), &list_callback); - if (ret) - ldpp_dout(dpp, 0)<<"GetUsertable failed " << dendl; - - ldpp_dout(dpp, 20)<<"GetUserTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - - schema = ListTableSchema(params->bucket_table); - - ret = exec(dpp, schema.c_str(), &list_callback); - if (ret) - ldpp_dout(dpp, 0)<<"Listbuckettable failed " << dendl; - - ldpp_dout(dpp, 20)<<"ListbucketTable suceeded " << dendl; - - return ret; -} - -int SQLiteDB::ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) -{ - int ret = -1; - string schema; - map::iterator iter; - map objectmap; - string bucket; - - objectmap = getObjectMap(); - - if (objectmap.empty()) - ldpp_dout(dpp, 20)<<"objectmap empty " << dendl; - - for (iter = objectmap.begin(); iter != objectmap.end(); ++iter) { - bucket = iter->first; - params->object_table = getObjectTable(bucket); - schema = ListTableSchema(params->object_table); - - ret = exec(dpp, schema.c_str(), &list_callback); - if (ret) - ldpp_dout(dpp, 0)<<"ListObjecttable failed " << dendl; - - ldpp_dout(dpp, 20)<<"ListObjectTable suceeded " << dendl; - } - - return ret; -} - -int SQLObjectOp::InitializeObjectOps(string db_name, const DoutPrefixProvider *dpp) -{ - PutObject = make_shared(sdb, db_name, cct); - DeleteObject = make_shared(sdb, db_name, cct); - GetObject = make_shared(sdb, db_name, cct); - UpdateObject = make_shared(sdb, db_name, cct); - ListBucketObjects = make_shared(sdb, db_name, cct); - ListVersionedObjects = make_shared(sdb, db_name, cct); - PutObjectData = make_shared(sdb, db_name, cct); - UpdateObjectData = make_shared(sdb, db_name, cct); - GetObjectData = make_shared(sdb, db_name, cct); - DeleteObjectData = make_shared(sdb, db_name, cct); - DeleteStaleObjectData = make_shared(sdb, db_name, cct); - - return 0; -} - -int SQLInsertUser::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLInsertUser - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertUser"); -out: - return ret; -} - -int SQLInsertUser::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.tenant, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.tenant.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.ns, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.ns.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.display_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.display_name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_email, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_email.c_str(), sdb); - - if (!params->op.user.uinfo.access_keys.empty()) { - string access_key; - string key; - map::const_iterator it = - params->op.user.uinfo.access_keys.begin(); - const RGWAccessKey& k = it->second; - access_key = k.id; - key = k.key; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.access_keys_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, access_key.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.access_keys_secret, sdb); - SQL_BIND_TEXT(dpp, stmt, index, key.c_str(), sdb); - - } - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.access_keys, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.access_keys, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.swift_keys, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.swift_keys, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.subusers, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.subusers, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.suspended, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.suspended, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.max_buckets, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.max_buckets, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.op_mask, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.op_mask, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_caps, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.caps, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.admin, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.admin, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.system, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.system, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.placement_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.default_placement.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.default_placement.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.placement_tags, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.placement_tags, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.bucket_quota, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.quota.bucket_quota, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.temp_url_keys, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.temp_url_keys, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_quota, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.quota.user_quota, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.type, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.uinfo.type, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.mfa_ids, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.uinfo.mfa_ids, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.assumed_role_arn, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.assumed_role_arn.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_attrs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.user.user_attrs, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_ver, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.user.user_version.ver, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_ver_tag, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.user_version.tag.c_str(), sdb); - -out: - return rc; -} - -int SQLInsertUser::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLRemoveUser::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLRemoveUser - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveUser"); -out: - return ret; -} - -int SQLRemoveUser::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - -out: - return rc; -} - -int SQLRemoveUser::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLGetUser::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLGetUser - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - if (params->op.query_str == "email") { - SQL_PREPARE(dpp, p_params, sdb, email_stmt, ret, "PrepareGetUser"); - } else if (params->op.query_str == "access_key") { - SQL_PREPARE(dpp, p_params, sdb, ak_stmt, ret, "PrepareGetUser"); - } else if (params->op.query_str == "user_id") { - SQL_PREPARE(dpp, p_params, sdb, userid_stmt, ret, "PrepareGetUser"); - } else { // by default by userid - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetUser"); - } -out: - return ret; -} - -int SQLGetUser::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.query_str == "email") { - SQL_BIND_INDEX(dpp, email_stmt, index, p_params.op.user.user_email, sdb); - SQL_BIND_TEXT(dpp, email_stmt, index, params->op.user.uinfo.user_email.c_str(), sdb); - } else if (params->op.query_str == "access_key") { - if (!params->op.user.uinfo.access_keys.empty()) { - string access_key; - map::const_iterator it = - params->op.user.uinfo.access_keys.begin(); - const RGWAccessKey& k = it->second; - access_key = k.id; - - SQL_BIND_INDEX(dpp, ak_stmt, index, p_params.op.user.access_keys_id, sdb); - SQL_BIND_TEXT(dpp, ak_stmt, index, access_key.c_str(), sdb); - } - } else if (params->op.query_str == "user_id") { - SQL_BIND_INDEX(dpp, userid_stmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, userid_stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - } else { // by default by userid - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - } - -out: - return rc; -} - -int SQLGetUser::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - if (params->op.query_str == "email") { - SQL_EXECUTE(dpp, params, email_stmt, list_user); - } else if (params->op.query_str == "access_key") { - SQL_EXECUTE(dpp, params, ak_stmt, list_user); - } else if (params->op.query_str == "user_id") { - SQL_EXECUTE(dpp, params, userid_stmt, list_user); - } else { // by default by userid - SQL_EXECUTE(dpp, params, stmt, list_user); - } - -out: - return ret; -} - -int SQLInsertBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLInsertBucket - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertBucket"); - -out: - return ret; -} - -int SQLInsertBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - // user_id here is copied as OwnerID in the bucket table. - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.tenant, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.tenant.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.marker, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.marker.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.bucket_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.size, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.ent.size, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.size_rounded, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.ent.size_rounded, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.creation_time, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.creation_time, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.count, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.ent.count, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.placement_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.placement_rule.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.placement_rule.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.flags, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.flags, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.zonegroup, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.zonegroup.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.has_instance_obj, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.has_instance_obj, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.quota, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.quota, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.requester_pays, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.requester_pays, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.has_website, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.has_website, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.website_conf, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.website_conf, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.swift_versioning, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.info.swift_versioning, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.swift_ver_location, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.swift_ver_location.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.mdsearch_config, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.mdsearch_config, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.new_bucket_instance_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.new_bucket_instance_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.obj_lock, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.obj_lock, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.sync_policy_info_groups, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.info.sync_policy, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_attrs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.bucket_attrs, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_ver, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.bucket.bucket_version.ver, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_ver_tag, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.bucket_version.tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.bucket.mtime, sdb); - -out: - return rc; -} - -int SQLInsertBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - class SQLObjectOp *ObPtr = NULL; - string bucket_name = params->op.bucket.info.bucket.name; - struct DBOpPrepareParams p_params = PrepareParams; - - ObPtr = new SQLObjectOp(sdb, ctx()); - - objectmapInsert(dpp, bucket_name, ObPtr); - - SQL_EXECUTE(dpp, params, stmt, NULL); - - /* Once Bucket is inserted created corresponding object(&data) tables - */ - InitPrepareParams(dpp, p_params, params); - - (void)createObjectTable(dpp, params); - (void)createObjectDataTable(dpp, params); - (void)createObjectTableTrigger(dpp, params); -out: - return ret; -} - -int SQLUpdateBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLUpdateBucket - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - if (params->op.query_str == "attrs") { - SQL_PREPARE(dpp, p_params, sdb, attrs_stmt, ret, "PrepareUpdateBucket"); - } else if (params->op.query_str == "owner") { - SQL_PREPARE(dpp, p_params, sdb, owner_stmt, ret, "PrepareUpdateBucket"); - } else if (params->op.query_str == "info") { - SQL_PREPARE(dpp, p_params, sdb, info_stmt, ret, "PrepareUpdateBucket"); - } else { - ldpp_dout(dpp, 0)<<"In SQLUpdateBucket invalid query_str:" << - params->op.query_str << "" << dendl; - goto out; - } - -out: - return ret; -} - -int SQLUpdateBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - sqlite3_stmt** stmt = NULL; // Prepared statement - - /* All below fields for attrs */ - if (params->op.query_str == "attrs") { - stmt = &attrs_stmt; - } else if (params->op.query_str == "owner") { - stmt = &owner_stmt; - } else if (params->op.query_str == "info") { - stmt = &info_stmt; - } else { - ldpp_dout(dpp, 0)<<"In SQLUpdateBucket invalid query_str:" << - params->op.query_str << "" << dendl; - goto out; - } - - if (params->op.query_str == "attrs") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_attrs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.bucket_attrs, sdb); - } else if (params->op.query_str == "owner") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.creation_time, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.creation_time, sdb); - } else if (params->op.query_str == "info") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.tenant, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.tenant.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.marker, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.marker.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_id, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.bucket_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.creation_time, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.creation_time, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.count, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.ent.count, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.placement_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.placement_rule.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.placement_rule.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.flags, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.flags, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.zonegroup, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.zonegroup.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.has_instance_obj, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.has_instance_obj, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.quota, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.quota, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.requester_pays, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.requester_pays, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.has_website, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.has_website, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.website_conf, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.website_conf, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.swift_versioning, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.info.swift_versioning, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.swift_ver_location, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.swift_ver_location.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.mdsearch_config, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.mdsearch_config, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.new_bucket_instance_id, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.new_bucket_instance_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.obj_lock, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.obj_lock, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.sync_policy_info_groups, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.info.sync_policy, sdb); - } - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_ver, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.bucket.bucket_version.ver, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.bucket.mtime, sdb); - -out: - return rc; -} - -int SQLUpdateBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - sqlite3_stmt** stmt = NULL; // Prepared statement - - if (params->op.query_str == "attrs") { - stmt = &attrs_stmt; - } else if (params->op.query_str == "owner") { - stmt = &owner_stmt; - } else if (params->op.query_str == "info") { - stmt = &info_stmt; - } else { - ldpp_dout(dpp, 0)<<"In SQLUpdateBucket invalid query_str:" << - params->op.query_str << "" << dendl; - goto out; - } - - SQL_EXECUTE(dpp, params, *stmt, NULL); -out: - return ret; -} - -int SQLRemoveBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLRemoveBucket - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveBucket"); - -out: - return ret; -} - -int SQLRemoveBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - -out: - return rc; -} - -int SQLRemoveBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - objectmapDelete(dpp, params->op.bucket.info.bucket.name); - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLGetBucket::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLGetBucket - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetBucket"); - -out: - return ret; -} - -int SQLGetBucket::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - -out: - return rc; -} - -int SQLGetBucket::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - class SQLObjectOp *ObPtr = NULL; - - params->op.name = "GetBucket"; - - ObPtr = new SQLObjectOp(sdb, ctx()); - - /* For the case when the server restarts, need to reinsert objectmap*/ - objectmapInsert(dpp, params->op.bucket.info.bucket.name, ObPtr); - SQL_EXECUTE(dpp, params, stmt, list_bucket); -out: - return ret; -} - -int SQLListUserBuckets::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLListUserBuckets - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - if (params->op.query_str == "all") { - SQL_PREPARE(dpp, p_params, sdb, all_stmt, ret, "PrepareListUserBuckets"); - }else { - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListUserBuckets"); - } - -out: - return ret; -} - -int SQLListUserBuckets::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - sqlite3_stmt** pstmt = NULL; // Prepared statement - - if (params->op.query_str == "all") { - pstmt = &all_stmt; - } else { - pstmt = &stmt; - } - - if (params->op.query_str != "all") { - SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.user.user_id, sdb); - SQL_BIND_TEXT(dpp, *pstmt, index, params->op.user.uinfo.user_id.id.c_str(), sdb); - } - - SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.bucket.min_marker, sdb); - SQL_BIND_TEXT(dpp, *pstmt, index, params->op.bucket.min_marker.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.list_max_count, sdb); - SQL_BIND_INT(dpp, *pstmt, index, params->op.list_max_count, sdb); - -out: - return rc; -} - -int SQLListUserBuckets::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - if (params->op.query_str == "all") { - SQL_EXECUTE(dpp, params, all_stmt, list_bucket); - } else { - SQL_EXECUTE(dpp, params, stmt, list_bucket); - } -out: - return ret; -} - -int SQLPutObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLPutObject - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PreparePutObject"); - -out: - return ret; -} - -int SQLPutObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - int VersionNum = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_ns, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.ns.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.acls, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.acls, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.index_ver, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.index_ver, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tag, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.flags, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.flags, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.versioned_epoch, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.versioned_epoch, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_category, sdb); - SQL_BIND_INT(dpp, stmt, index, (uint8_t)(params->op.obj.category), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.etag, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.etag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.owner, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.owner.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.owner_display_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.owner_display_name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.storage_class, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.appendable, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.appendable, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.content_type, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.content_type.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.index_hash_source, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.index_hash_source.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_size, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.size, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.accounted_size, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.accounted_size, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.epoch, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.epoch, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_tag, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.obj_tag, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_tag, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.tail_tag, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.write_tag, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.write_tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.fake_tag, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.fake_tag, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.shadow_obj, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.shadow_obj.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.has_data, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.has_data, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.is_versioned, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.is_versioned, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.version_num, sdb); - SQL_BIND_INT(dpp, stmt, index, VersionNum, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.pg_ver, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.pg_ver, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.zone_short_id, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.zone_short_id, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_version, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.state.objv_tracker.read_version.ver, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_version_tag, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.objv_tracker.read_version.tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_attrs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.attrset, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_size, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.head_size, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.max_head_size, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.max_head_size, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tail_instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_placement_rule_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.head_placement_rule.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.head_placement_rule.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_placement_rule_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tail_placement.placement_rule.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.tail_placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.tail_placement.placement_rule.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.manifest_part_objs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.objs, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.manifest_part_rules, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.rules, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.omap, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.omap, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.is_multipart, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.obj.is_multipart, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mp_parts, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.mp_parts, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.head_data, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.head_data, sdb); - -out: - return rc; -} - -int SQLPutObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLDeleteObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLDeleteObject - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareDeleteObject"); - -out: - return ret; -} - -int SQLDeleteObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); -out: - return rc; -} - -int SQLDeleteObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLGetObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLGetObject - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetObject"); - -out: - return ret; -} - -int SQLGetObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - -out: - return rc; -} - -int SQLGetObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, list_object); -out: - return ret; -} - -int SQLUpdateObject::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - struct DBOpParams copy = *params; - string bucket_name; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLUpdateObject - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - if (params->op.query_str == "omap") { - SQL_PREPARE(dpp, p_params, sdb, omap_stmt, ret, "PrepareUpdateObject"); - } else if (params->op.query_str == "attrs") { - SQL_PREPARE(dpp, p_params, sdb, attrs_stmt, ret, "PrepareUpdateObject"); - } else if (params->op.query_str == "meta") { - SQL_PREPARE(dpp, p_params, sdb, meta_stmt, ret, "PrepareUpdateObject"); - } else if (params->op.query_str == "mp") { - SQL_PREPARE(dpp, p_params, sdb, mp_stmt, ret, "PrepareUpdateObject"); - } else { - ldpp_dout(dpp, 0)<<"In SQLUpdateObject invalid query_str:" << - params->op.query_str << dendl; - goto out; - } - -out: - return ret; -} - -int SQLUpdateObject::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - sqlite3_stmt** stmt = NULL; // Prepared statement - - /* All below fields for attrs */ - if (params->op.query_str == "omap") { - stmt = &omap_stmt; - } else if (params->op.query_str == "attrs") { - stmt = &attrs_stmt; - } else if (params->op.query_str == "meta") { - stmt = &meta_stmt; - } else if (params->op.query_str == "mp") { - stmt = &mp_stmt; - } else { - ldpp_dout(dpp, 0)<<"In SQLUpdateObject invalid query_str:" << - params->op.query_str << dendl; - goto out; - } - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.mtime, sdb); - - if (params->op.query_str == "omap") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.omap, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.omap, sdb); - } - if (params->op.query_str == "attrs") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_attrs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.attrset, sdb); - } - if (params->op.query_str == "mp") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.mp_parts, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.mp_parts, sdb); - } - if (params->op.query_str == "meta") { - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_ns, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.key.ns.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.acls, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.acls, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.index_ver, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.index_ver, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tag, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.flags, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.flags, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.versioned_epoch, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.versioned_epoch, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_category, sdb); - SQL_BIND_INT(dpp, *stmt, index, (uint8_t)(params->op.obj.category), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.etag, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.etag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.owner, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.owner.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.owner_display_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.owner_display_name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.storage_class, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.appendable, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.appendable, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.content_type, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.content_type.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.index_hash_source, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.obj.index_hash_source.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_size, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.size, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.accounted_size, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.accounted_size, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.epoch, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.epoch, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_tag, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.obj_tag, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_tag, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.tail_tag, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.write_tag, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.write_tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.fake_tag, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.fake_tag, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.shadow_obj, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.shadow_obj.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.has_data, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.has_data, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.is_versioned, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.is_versioned, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.version_num, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.version_num, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.pg_ver, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.pg_ver, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.zone_short_id, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.zone_short_id, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_version, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.state.objv_tracker.read_version.ver, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_version_tag, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.state.objv_tracker.read_version.tag.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_attrs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.state.attrset, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_size, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.head_size, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.max_head_size, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.max_head_size, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.obj_id, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.obj_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_instance, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tail_instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_placement_rule_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.head_placement_rule.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.head_placement_rule.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_placement_rule_name, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tail_placement.placement_rule.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.tail_placement_storage_class, sdb); - SQL_BIND_TEXT(dpp, *stmt, index, params->op.obj.tail_placement.placement_rule.storage_class.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.manifest_part_objs, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.objs, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.manifest_part_rules, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.rules, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.omap, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.omap, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.is_multipart, sdb); - SQL_BIND_INT(dpp, *stmt, index, params->op.obj.is_multipart, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.mp_parts, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.mp_parts, sdb); - - SQL_BIND_INDEX(dpp, *stmt, index, p_params.op.obj.head_data, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, *stmt, index, params->op.obj.head_data, sdb); - } - -out: - return rc; -} - -int SQLUpdateObject::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - sqlite3_stmt** stmt = NULL; // Prepared statement - - if (params->op.query_str == "omap") { - stmt = &omap_stmt; - } else if (params->op.query_str == "attrs") { - stmt = &attrs_stmt; - } else if (params->op.query_str == "meta") { - stmt = &meta_stmt; - } else if (params->op.query_str == "mp") { - stmt = &mp_stmt; - } else { - ldpp_dout(dpp, 0)<<"In SQLUpdateObject invalid query_str:" << - params->op.query_str << dendl; - goto out; - } - - SQL_EXECUTE(dpp, params, *stmt, NULL); -out: - return ret; -} - -int SQLListBucketObjects::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLListBucketObjects - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListBucketObjects"); - -out: - return ret; -} - -int SQLListBucketObjects::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.min_marker, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.min_marker.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.prefix, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.prefix.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.list_max_count, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.list_max_count, sdb); - -out: - return rc; -} - -int SQLListBucketObjects::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, list_object); -out: - return ret; -} - -int SQLListVersionedObjects::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLListVersionedObjects - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListVersionedObjects"); - -out: - return ret; -} - -int SQLListVersionedObjects::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.list_max_count, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.list_max_count, sdb); - -out: - return rc; -} - -int SQLListVersionedObjects::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, list_object); -out: - return ret; -} - -int SQLPutObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLPutObjectData - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PreparePutObjectData"); - -out: - return ret; -} - -int SQLPutObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_ns, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.ns.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.part_num, sdb); - - SQL_BIND_INT(dpp, stmt, index, params->op.obj_data.part_num, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.offset, sdb); - - SQL_BIND_INT(dpp, stmt, index, params->op.obj_data.offset, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.data, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj_data.data, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.size, sdb); - - SQL_BIND_INT(dpp, stmt, index, params->op.obj_data.size, sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj_data.multipart_part_str, sdb); - - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj_data.multipart_part_str.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); - -out: - return rc; -} - -int SQLPutObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLUpdateObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLUpdateObjectData - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareUpdateObjectData"); - -out: - return ret; -} - -int SQLUpdateObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); - -out: - return rc; -} - -int SQLUpdateObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLGetObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLGetObjectData - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetObjectData"); - -out: - return ret; -} - -int SQLGetObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); - -out: - return rc; -} - -int SQLGetObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, get_objectdata); -out: - return ret; -} - -int SQLDeleteObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLDeleteObjectData - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareDeleteObjectData"); - -out: - return ret; -} - -int SQLDeleteObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - if (params->op.obj.state.obj.key.instance.empty()) { - params->op.obj.state.obj.key.instance = "null"; - } - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.bucket.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.bucket.info.bucket.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.name.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_instance, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.state.obj.key.instance.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.obj_id, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.obj.obj_id.c_str(), sdb); - -out: - return rc; -} - -int SQLDeleteObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLDeleteStaleObjectData::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLDeleteStaleObjectData - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareDeleteStaleObjectData"); - -out: - return ret; -} - -int SQLDeleteStaleObjectData::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.obj.mtime, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, params->op.obj.state.mtime, sdb); - -out: - return rc; -} - -int SQLDeleteStaleObjectData::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLInsertLCEntry::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLInsertLCEntry - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertLCEntry"); - -out: - return ret; -} - -int SQLInsertLCEntry::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.index, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.index.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.entry.get_bucket().c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.status, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.lc_entry.entry.get_status(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.start_time, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.lc_entry.entry.get_start_time(), sdb); - -out: - return rc; -} - -int SQLInsertLCEntry::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLRemoveLCEntry::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLRemoveLCEntry - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveLCEntry"); - -out: - return ret; -} - -int SQLRemoveLCEntry::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.index, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.index.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.bucket_name, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.entry.get_bucket().c_str(), sdb); - -out: - return rc; -} - -int SQLRemoveLCEntry::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLGetLCEntry::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - sqlite3_stmt** pstmt = NULL; // Prepared statement - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLGetLCEntry - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - if (params->op.query_str == "get_next_entry") { - pstmt = &next_stmt; - } else { - pstmt = &stmt; - } - SQL_PREPARE(dpp, p_params, sdb, *pstmt, ret, "PrepareGetLCEntry"); - -out: - return ret; -} - -int SQLGetLCEntry::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - sqlite3_stmt** pstmt = NULL; // Prepared statement - - if (params->op.query_str == "get_next_entry") { - pstmt = &next_stmt; - } else { - pstmt = &stmt; - } - SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.lc_entry.index, sdb); - SQL_BIND_TEXT(dpp, *pstmt, index, params->op.lc_entry.index.c_str(), sdb); - - SQL_BIND_INDEX(dpp, *pstmt, index, p_params.op.lc_entry.bucket_name, sdb); - SQL_BIND_TEXT(dpp, *pstmt, index, params->op.lc_entry.entry.get_bucket().c_str(), sdb); - -out: - return rc; -} - -int SQLGetLCEntry::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - sqlite3_stmt** pstmt = NULL; // Prepared statement - - if (params->op.query_str == "get_next_entry") { - pstmt = &next_stmt; - } else { - pstmt = &stmt; - } - - SQL_EXECUTE(dpp, params, *pstmt, list_lc_entry); -out: - return ret; -} - -int SQLListLCEntries::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLListLCEntries - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareListLCEntries"); - -out: - return ret; -} - -int SQLListLCEntries::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.index, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.index.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_entry.min_marker, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_entry.min_marker.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.list_max_count, sdb); - SQL_BIND_INT(dpp, stmt, index, params->op.list_max_count, sdb); - -out: - return rc; -} - -int SQLListLCEntries::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, list_lc_entry); -out: - return ret; -} - -int SQLInsertLCHead::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLInsertLCHead - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareInsertLCHead"); - -out: - return ret; -} - -int SQLInsertLCHead::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.index, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.index.c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.marker, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.head.get_marker().c_str(), sdb); - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.start_date, sdb); - SQL_ENCODE_BLOB_PARAM(dpp, stmt, index, static_cast(params->op.lc_head.head.start_date), sdb); - -out: - return rc; -} - -int SQLInsertLCHead::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLRemoveLCHead::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLRemoveLCHead - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareRemoveLCHead"); - -out: - return ret; -} - -int SQLRemoveLCHead::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.index, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.index.c_str(), sdb); - -out: - return rc; -} - -int SQLRemoveLCHead::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - SQL_EXECUTE(dpp, params, stmt, NULL); -out: - return ret; -} - -int SQLGetLCHead::Prepare(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - struct DBOpPrepareParams p_params = PrepareParams; - - if (!*sdb) { - ldpp_dout(dpp, 0)<<"In SQLGetLCHead - no db" << dendl; - goto out; - } - - InitPrepareParams(dpp, p_params, params); - - SQL_PREPARE(dpp, p_params, sdb, stmt, ret, "PrepareGetLCHead"); - -out: - return ret; -} - -int SQLGetLCHead::Bind(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int index = -1; - int rc = 0; - struct DBOpPrepareParams p_params = PrepareParams; - - SQL_BIND_INDEX(dpp, stmt, index, p_params.op.lc_head.index, sdb); - SQL_BIND_TEXT(dpp, stmt, index, params->op.lc_head.index.c_str(), sdb); - -out: - return rc; -} - -int SQLGetLCHead::Execute(const DoutPrefixProvider *dpp, struct DBOpParams *params) -{ - int ret = -1; - - // clear the params before fetching the entry - params->op.lc_head.head = {}; - SQL_EXECUTE(dpp, params, stmt, list_lc_head); -out: - return ret; -} diff --git a/src/rgw/store/dbstore/sqlite/sqliteDB.h b/src/rgw/store/dbstore/sqlite/sqliteDB.h deleted file mode 100644 index 038b24fe5b3..00000000000 --- a/src/rgw/store/dbstore/sqlite/sqliteDB.h +++ /dev/null @@ -1,554 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#ifndef SQLITE_DB_H -#define SQLITE_DB_H - -#include -#include -#include -#include -#include "rgw/store/dbstore/common/dbstore.h" - -using namespace rgw::store; - -class SQLiteDB : public DB, virtual public DBOp { - private: - sqlite3_mutex *mutex = NULL; - - protected: - CephContext *cct; - - public: - sqlite3_stmt *stmt = NULL; - DBOpPrepareParams PrepareParams; - - SQLiteDB(sqlite3 *dbi, std::string db_name, CephContext *_cct) : DB(db_name, _cct), cct(_cct) { - db = (void*)dbi; - } - SQLiteDB(std::string db_name, CephContext *_cct) : DB(db_name, _cct), cct(_cct) { - } - ~SQLiteDB() {} - - uint64_t get_blob_limit() override { return SQLITE_LIMIT_LENGTH; } - void *openDB(const DoutPrefixProvider *dpp) override; - int closeDB(const DoutPrefixProvider *dpp) override; - int InitializeDBOps(const DoutPrefixProvider *dpp) override; - - int InitPrepareParams(const DoutPrefixProvider *dpp, DBOpPrepareParams &p_params, - DBOpParams* params) override; - - int exec(const DoutPrefixProvider *dpp, const char *schema, - int (*callback)(void*,int,char**,char**)); - int Step(const DoutPrefixProvider *dpp, DBOpInfo &op, sqlite3_stmt *stmt, - int (*cbk)(const DoutPrefixProvider *dpp, DBOpInfo &op, sqlite3_stmt *stmt)); - int Reset(const DoutPrefixProvider *dpp, sqlite3_stmt *stmt); - /* default value matches with sqliteDB style */ - - int createTables(const DoutPrefixProvider *dpp) override; - int createBucketTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int createUserTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int createObjectTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int createObjectDataTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int createObjectView(const DoutPrefixProvider *dpp, DBOpParams *params); - int createObjectTableTrigger(const DoutPrefixProvider *dpp, DBOpParams *params); - int createQuotaTable(const DoutPrefixProvider *dpp, DBOpParams *params); - void populate_object_params(const DoutPrefixProvider *dpp, - struct DBOpPrepareParams& p_params, - struct DBOpParams* params, bool data); - - int createLCTables(const DoutPrefixProvider *dpp) override; - - int DeleteBucketTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int DeleteUserTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int DeleteObjectTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int DeleteObjectDataTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int DeleteQuotaTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int DeleteLCEntryTable(const DoutPrefixProvider *dpp, DBOpParams *params); - int DeleteLCHeadTable(const DoutPrefixProvider *dpp, DBOpParams *params); - - int ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) override; - int ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) override; - int ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) override; -}; - -class SQLObjectOp : public ObjectOp { - private: - sqlite3 **sdb = NULL; - CephContext *cct; - - public: - SQLObjectOp(sqlite3 **sdbi, CephContext *_cct) : sdb(sdbi), cct(_cct) {}; - ~SQLObjectOp() {} - - int InitializeObjectOps(std::string db_name, const DoutPrefixProvider *dpp); -}; - -class SQLInsertUser : public SQLiteDB, public InsertUserOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLInsertUser(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLInsertUser() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLRemoveUser : public SQLiteDB, public RemoveUserOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLRemoveUser(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLRemoveUser() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLGetUser : public SQLiteDB, public GetUserOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - sqlite3_stmt *email_stmt = NULL; // Prepared statement to query by useremail - sqlite3_stmt *ak_stmt = NULL; // Prepared statement to query by access_key_id - sqlite3_stmt *userid_stmt = NULL; // Prepared statement to query by user_id - - public: - SQLGetUser(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLGetUser() { - if (stmt) - sqlite3_finalize(stmt); - if (email_stmt) - sqlite3_finalize(email_stmt); - if (ak_stmt) - sqlite3_finalize(ak_stmt); - if (userid_stmt) - sqlite3_finalize(userid_stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLInsertBucket : public SQLiteDB, public InsertBucketOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLInsertBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLInsertBucket() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLUpdateBucket : public SQLiteDB, public UpdateBucketOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *info_stmt = NULL; // Prepared statement - sqlite3_stmt *attrs_stmt = NULL; // Prepared statement - sqlite3_stmt *owner_stmt = NULL; // Prepared statement - - public: - SQLUpdateBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLUpdateBucket() { - if (info_stmt) - sqlite3_finalize(info_stmt); - if (attrs_stmt) - sqlite3_finalize(attrs_stmt); - if (owner_stmt) - sqlite3_finalize(owner_stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLRemoveBucket : public SQLiteDB, public RemoveBucketOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLRemoveBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLRemoveBucket() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLGetBucket : public SQLiteDB, public GetBucketOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLGetBucket(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLGetBucket() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLListUserBuckets : public SQLiteDB, public ListUserBucketsOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - sqlite3_stmt *all_stmt = NULL; // Prepared statement - - public: - SQLListUserBuckets(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLListUserBuckets() { - if (stmt) - sqlite3_finalize(stmt); - if (all_stmt) - sqlite3_finalize(all_stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLPutObject : public SQLiteDB, public PutObjectOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLPutObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLPutObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLPutObject() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLDeleteObject : public SQLiteDB, public DeleteObjectOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLDeleteObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLDeleteObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLDeleteObject() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLGetObject : public SQLiteDB, public GetObjectOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLGetObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLGetObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLGetObject() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLUpdateObject : public SQLiteDB, public UpdateObjectOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *omap_stmt = NULL; // Prepared statement - sqlite3_stmt *attrs_stmt = NULL; // Prepared statement - sqlite3_stmt *meta_stmt = NULL; // Prepared statement - sqlite3_stmt *mp_stmt = NULL; // Prepared statement - - public: - SQLUpdateObject(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLUpdateObject(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLUpdateObject() { - if (omap_stmt) - sqlite3_finalize(omap_stmt); - if (attrs_stmt) - sqlite3_finalize(attrs_stmt); - if (meta_stmt) - sqlite3_finalize(meta_stmt); - } - - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLListBucketObjects : public SQLiteDB, public ListBucketObjectsOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLListBucketObjects(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLListBucketObjects(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLListBucketObjects() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLListVersionedObjects : public SQLiteDB, public ListVersionedObjectsOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLListVersionedObjects(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLListVersionedObjects(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLListVersionedObjects() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLPutObjectData : public SQLiteDB, public PutObjectDataOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLPutObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLPutObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLPutObjectData() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLUpdateObjectData : public SQLiteDB, public UpdateObjectDataOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLUpdateObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLUpdateObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLUpdateObjectData() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLGetObjectData : public SQLiteDB, public GetObjectDataOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLGetObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLGetObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLGetObjectData() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLDeleteObjectData : public SQLiteDB, public DeleteObjectDataOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLDeleteObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLDeleteObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLDeleteObjectData() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLDeleteStaleObjectData : public SQLiteDB, public DeleteStaleObjectDataOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLDeleteStaleObjectData(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - SQLDeleteStaleObjectData(sqlite3 **sdbi, std::string db_name, CephContext *cct) : SQLiteDB(*sdbi, db_name, cct), sdb(sdbi) {} - - ~SQLDeleteStaleObjectData() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLInsertLCEntry : public SQLiteDB, public InsertLCEntryOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLInsertLCEntry(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLInsertLCEntry() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLRemoveLCEntry : public SQLiteDB, public RemoveLCEntryOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLRemoveLCEntry(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLRemoveLCEntry() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLGetLCEntry : public SQLiteDB, public GetLCEntryOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - sqlite3_stmt *next_stmt = NULL; // Prepared statement - - public: - SQLGetLCEntry(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLGetLCEntry() { - if (stmt) - sqlite3_finalize(stmt); - if (next_stmt) - sqlite3_finalize(next_stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLListLCEntries : public SQLiteDB, public ListLCEntriesOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLListLCEntries(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLListLCEntries() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLInsertLCHead : public SQLiteDB, public InsertLCHeadOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLInsertLCHead(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLInsertLCHead() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLRemoveLCHead : public SQLiteDB, public RemoveLCHeadOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLRemoveLCHead(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLRemoveLCHead() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -class SQLGetLCHead : public SQLiteDB, public GetLCHeadOp { - private: - sqlite3 **sdb = NULL; - sqlite3_stmt *stmt = NULL; // Prepared statement - - public: - SQLGetLCHead(void **db, std::string db_name, CephContext *cct) : SQLiteDB((sqlite3 *)(*db), db_name, cct), sdb((sqlite3 **)db) {} - ~SQLGetLCHead() { - if (stmt) - sqlite3_finalize(stmt); - } - int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params); - int Execute(const DoutPrefixProvider *dpp, DBOpParams *params); - int Bind(const DoutPrefixProvider *dpp, DBOpParams *params); -}; - -#endif diff --git a/src/rgw/store/dbstore/sqlite/statement.cc b/src/rgw/store/dbstore/sqlite/statement.cc deleted file mode 100644 index dcf7dba9c50..00000000000 --- a/src/rgw/store/dbstore/sqlite/statement.cc +++ /dev/null @@ -1,196 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/dout.h" -#include "error.h" -#include "statement.h" - -#define dout_subsys ceph_subsys_rgw_dbstore - -namespace rgw::dbstore::sqlite { - -// owning pointer to arbitrary memory allocated and returned by sqlite3 -struct sqlite_deleter { - template - void operator()(T* p) { ::sqlite3_free(p); } -}; -template -using sqlite_ptr = std::unique_ptr; - - -stmt_ptr prepare_statement(const DoutPrefixProvider* dpp, - sqlite3* db, std::string_view sql) -{ - sqlite3_stmt* stmt = nullptr; - int result = ::sqlite3_prepare_v2(db, sql.data(), sql.size(), &stmt, nullptr); - auto ec = std::error_code{result, sqlite::error_category()}; - if (ec != sqlite::errc::ok) { - const char* errmsg = ::sqlite3_errmsg(db); - ldpp_dout(dpp, 1) << "preparation failed: " << errmsg - << " (" << ec << ")\nstatement: " << sql << dendl; - throw sqlite::error(errmsg, ec); - } - return stmt_ptr{stmt}; -} - -static int bind_index(const DoutPrefixProvider* dpp, - const stmt_binding& stmt, const char* name) -{ - const int index = ::sqlite3_bind_parameter_index(stmt.get(), name); - if (index <= 0) { - ldpp_dout(dpp, 1) << "binding failed on parameter name=" - << name << dendl; - sqlite3* db = ::sqlite3_db_handle(stmt.get()); - throw sqlite::error(db); - } - return index; -} - -void bind_text(const DoutPrefixProvider* dpp, const stmt_binding& stmt, - const char* name, std::string_view value) -{ - const int index = bind_index(dpp, stmt, name); - - int result = ::sqlite3_bind_text(stmt.get(), index, value.data(), - value.size(), SQLITE_STATIC); - auto ec = std::error_code{result, sqlite::error_category()}; - if (ec != sqlite::errc::ok) { - ldpp_dout(dpp, 1) << "binding failed on parameter name=" - << name << " value=" << value << dendl; - sqlite3* db = ::sqlite3_db_handle(stmt.get()); - throw sqlite::error(db, ec); - } -} - -void bind_int(const DoutPrefixProvider* dpp, const stmt_binding& stmt, - const char* name, int value) -{ - const int index = bind_index(dpp, stmt, name); - - int result = ::sqlite3_bind_int(stmt.get(), index, value); - auto ec = std::error_code{result, sqlite::error_category()}; - if (ec != sqlite::errc::ok) { - ldpp_dout(dpp, 1) << "binding failed on parameter name=" - << name << " value=" << value << dendl; - sqlite3* db = ::sqlite3_db_handle(stmt.get()); - throw sqlite::error(db, ec); - } -} - -void eval0(const DoutPrefixProvider* dpp, const stmt_execution& stmt) -{ - sqlite_ptr sql; - if (dpp->get_cct()->_conf->subsys.should_gather()) { - sql.reset(::sqlite3_expanded_sql(stmt.get())); - } - - const int result = ::sqlite3_step(stmt.get()); - auto ec = std::error_code{result, sqlite::error_category()}; - sqlite3* db = ::sqlite3_db_handle(stmt.get()); - - if (ec != sqlite::errc::done) { - const char* errmsg = ::sqlite3_errmsg(db); - ldpp_dout(dpp, 20) << "evaluation failed: " << errmsg - << " (" << ec << ")\nstatement: " << sql.get() << dendl; - throw sqlite::error(errmsg, ec); - } - ldpp_dout(dpp, 20) << "evaluation succeeded: " << sql.get() << dendl; -} - -void eval1(const DoutPrefixProvider* dpp, const stmt_execution& stmt) -{ - sqlite_ptr sql; - if (dpp->get_cct()->_conf->subsys.should_gather()) { - sql.reset(::sqlite3_expanded_sql(stmt.get())); - } - - const int result = ::sqlite3_step(stmt.get()); - auto ec = std::error_code{result, sqlite::error_category()}; - if (ec != sqlite::errc::row) { - sqlite3* db = ::sqlite3_db_handle(stmt.get()); - const char* errmsg = ::sqlite3_errmsg(db); - ldpp_dout(dpp, 1) << "evaluation failed: " << errmsg << " (" << ec - << ")\nstatement: " << sql.get() << dendl; - throw sqlite::error(errmsg, ec); - } - ldpp_dout(dpp, 20) << "evaluation succeeded: " << sql.get() << dendl; -} - -int column_int(const stmt_execution& stmt, int column) -{ - return ::sqlite3_column_int(stmt.get(), column); -} - -std::string column_text(const stmt_execution& stmt, int column) -{ - const unsigned char* text = ::sqlite3_column_text(stmt.get(), column); - // may be NULL - if (text) { - const std::size_t size = ::sqlite3_column_bytes(stmt.get(), column); - return {reinterpret_cast(text), size}; - } else { - return {}; - } -} - -auto read_text_rows(const DoutPrefixProvider* dpp, - const stmt_execution& stmt, - std::span entries) - -> std::span -{ - sqlite_ptr sql; - if (dpp->get_cct()->_conf->subsys.should_gather()) { - sql.reset(::sqlite3_expanded_sql(stmt.get())); - } - - std::size_t count = 0; - while (count < entries.size()) { - const int result = ::sqlite3_step(stmt.get()); - auto ec = std::error_code{result, sqlite::error_category()}; - if (ec == sqlite::errc::done) { - break; - } - if (ec != sqlite::errc::row) { - sqlite3* db = ::sqlite3_db_handle(stmt.get()); - const char* errmsg = ::sqlite3_errmsg(db); - ldpp_dout(dpp, 1) << "evaluation failed: " << errmsg << " (" << ec - << ")\nstatement: " << sql.get() << dendl; - throw sqlite::error(errmsg, ec); - } - entries[count] = column_text(stmt, 0); - ++count; - } - ldpp_dout(dpp, 20) << "statement evaluation produced " << count - << " results: " << sql.get() << dendl; - - return entries.first(count); -} - -void execute(const DoutPrefixProvider* dpp, sqlite3* db, const char* query, - sqlite3_callback callback, void* arg) -{ - char* errmsg = nullptr; - const int result = ::sqlite3_exec(db, query, callback, arg, &errmsg); - auto ec = std::error_code{result, sqlite::error_category()}; - auto ptr = sqlite_ptr{errmsg}; // free on destruction - if (ec != sqlite::errc::ok) { - ldpp_dout(dpp, 1) << "query execution failed: " << errmsg << " (" << ec - << ")\nquery: " << query << dendl; - throw sqlite::error(errmsg, ec); - } - ldpp_dout(dpp, 20) << "query execution succeeded: " << query << dendl; -} - -} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/store/dbstore/sqlite/statement.h b/src/rgw/store/dbstore/sqlite/statement.h deleted file mode 100644 index 98b4acfea23..00000000000 --- a/src/rgw/store/dbstore/sqlite/statement.h +++ /dev/null @@ -1,83 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include -#include - -#include - -class DoutPrefixProvider; - -namespace rgw::dbstore::sqlite { - -// owning sqlite3_stmt pointer -struct stmt_deleter { - void operator()(sqlite3_stmt* p) const { ::sqlite3_finalize(p); } -}; -using stmt_ptr = std::unique_ptr; - -// non-owning sqlite3_stmt pointer that clears binding state on destruction -struct stmt_binding_deleter { - void operator()(sqlite3_stmt* p) const { ::sqlite3_clear_bindings(p); } -}; -using stmt_binding = std::unique_ptr; - -// non-owning sqlite3_stmt pointer that clears execution state on destruction -struct stmt_execution_deleter { - void operator()(sqlite3_stmt* p) const { ::sqlite3_reset(p); } -}; -using stmt_execution = std::unique_ptr; - - -// prepare the sql statement or throw on error -stmt_ptr prepare_statement(const DoutPrefixProvider* dpp, - sqlite3* db, std::string_view sql); - -// bind an input string for the given parameter name -void bind_text(const DoutPrefixProvider* dpp, const stmt_binding& stmt, - const char* name, std::string_view value); - -// bind an input integer for the given parameter name -void bind_int(const DoutPrefixProvider* dpp, const stmt_binding& stmt, - const char* name, int value); - -// evaluate a prepared statement, expecting no result rows -void eval0(const DoutPrefixProvider* dpp, const stmt_execution& stmt); - -// evaluate a prepared statement, expecting a single result row -void eval1(const DoutPrefixProvider* dpp, const stmt_execution& stmt); - -// return the given column as an integer -int column_int(const stmt_execution& stmt, int column); - -// return the given column as text, or an empty string on NULL -std::string column_text(const stmt_execution& stmt, int column); - -// read the text column from each result row into the given entries, and return -// the sub-span of entries that contain results -auto read_text_rows(const DoutPrefixProvider* dpp, - const stmt_execution& stmt, - std::span entries) - -> std::span; - -// execute a raw query without preparing a statement. the optional callback -// can be used to read results -void execute(const DoutPrefixProvider* dpp, sqlite3* db, const char* query, - sqlite3_callback callback, void* arg); - -} // namespace rgw::dbstore::sqlite diff --git a/src/rgw/store/dbstore/tests/CMakeLists.txt b/src/rgw/store/dbstore/tests/CMakeLists.txt deleted file mode 100644 index 4e60dcf5ee2..00000000000 --- a/src/rgw/store/dbstore/tests/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -cmake_minimum_required(VERSION 3.14.0) -project(dbstore-tests) - -set (CMAKE_LINK_LIBRARIES ${CMAKE_LINK_LIBRARIES} gtest) - -set(dbstore_tests_srcs - dbstore_tests.cc) - -include_directories(${CMAKE_INCLUDE_DIR}) - -add_executable(unittest_dbstore_tests ${dbstore_tests_srcs}) -target_link_libraries(unittest_dbstore_tests ${CMAKE_LINK_LIBRARIES}) -add_ceph_unittest(unittest_dbstore_tests) - -add_executable(unittest_dbstore_mgr_tests dbstore_mgr_tests.cc) -target_link_libraries(unittest_dbstore_mgr_tests dbstore gtest_main) -add_ceph_unittest(unittest_dbstore_mgr_tests) diff --git a/src/rgw/store/dbstore/tests/dbstore_mgr_tests.cc b/src/rgw/store/dbstore/tests/dbstore_mgr_tests.cc deleted file mode 100644 index 4f58f476465..00000000000 --- a/src/rgw/store/dbstore/tests/dbstore_mgr_tests.cc +++ /dev/null @@ -1,157 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "common/ceph_context.h" -#include "rgw/store/dbstore/dbstore_mgr.h" - -#include -#include -#include - -using namespace rgw; -namespace fs = std::filesystem; -const static std::string TEST_DIR = "rgw_dbstore_tests"; - -bool endsWith(const std::string &mainStr, const std::string &toMatch) -{ - if(mainStr.size() >= toMatch.size() && - mainStr.compare(mainStr.size() - toMatch.size(), toMatch.size(), toMatch) == 0) - return true; - else - return false; -} - -class TestDBStoreManager : public ::testing::Test { -protected: - void SetUp() override { - ctx_ = std::make_shared(CEPH_ENTITY_TYPE_CLIENT); - g_ceph_context = ctx_.get(); - fs::current_path(fs::temp_directory_path()); - fs::create_directory(TEST_DIR); - } - - void TearDown() override { - fs::current_path(fs::temp_directory_path()); - fs::remove_all(TEST_DIR); - } - - std::string getTestDir() const { - auto test_dir = fs::temp_directory_path() / TEST_DIR; - return test_dir.string(); - } - - fs::path getDBFullPath(const std::string & base_dir, - const std::string & tenant) const { - auto db_path = ctx_->_conf.get_val("dbstore_db_dir"); - const auto& db_name = ctx_->_conf.get_val("dbstore_db_name_prefix") + "-" + tenant + ".db"; - - auto db_full_path = std::filesystem::path(db_path) / db_name; - auto db_full_path_test = fs::path(base_dir) / db_full_path; - return db_full_path_test; - } - - std::string getDBTenant(const std::string & base_dir, - const std::string & tenant) const { - auto db_name = ctx_->_conf.get_val("dbstore_db_name_prefix"); - db_name += "-" + tenant; - auto db_full_path = fs::path(base_dir) / db_name; - return db_full_path.string(); - } - - std::string getDBTenant(const std::string & tenant = default_tenant) const { - return getDBTenant(getTestDir(), tenant); - } - - fs::path getDBFullPath(const std::string & tenant) const { - return getDBFullPath(getTestDir(), tenant); - } - - fs::path getLogFilePath(const std::string & log_file) { - return fs::temp_directory_path() / log_file; - } - - std::shared_ptr getContext() const { - return ctx_; - } - - private: - std::shared_ptr ctx_; -}; - -TEST_F(TestDBStoreManager, BasicInstantiateUsingDBDir) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - EXPECT_FALSE(fs::exists(getDBFullPath(default_tenant))); - auto dbstore_mgr = std::make_shared(getContext().get()); - EXPECT_TRUE(fs::exists(getDBFullPath(default_tenant))); -} - -TEST_F(TestDBStoreManager, DBNamePrefix) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - std::string prefix = "testprefix"; - getContext()->_conf.set_val("dbstore_db_name_prefix", prefix); - - EXPECT_FALSE(fs::exists(getDBFullPath(default_tenant))); - auto dbstore_mgr = std::make_shared(getContext().get()); - EXPECT_TRUE(fs::exists(getDBFullPath(default_tenant))); - - // check that the database name contains the given prefix - std::string expected_db_name = prefix + "-" + default_tenant + ".db"; - EXPECT_TRUE(endsWith(getDBFullPath(default_tenant), expected_db_name)); -} - -TEST_F(TestDBStoreManager, BasicInstantiateSecondConstructor) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - EXPECT_FALSE(fs::exists(getDBFullPath(default_tenant))); - auto dbstore_mgr = std::make_shared(getContext().get(), getLogFilePath("test.log").string(), 10); - EXPECT_TRUE(fs::exists(getDBFullPath(default_tenant))); -} - -TEST_F(TestDBStoreManager, TestDBName) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - auto dbstore_mgr = std::make_shared(getContext().get()); - auto db = dbstore_mgr->getDB(default_tenant, false); - ASSERT_NE(nullptr, db); - EXPECT_EQ(getDBTenant(), db->getDBname()); -} - - -TEST_F(TestDBStoreManager, TestDBNameDefaultDB) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - auto dbstore_mgr = std::make_shared(getContext().get()); - // passing an empty tenant should return the default_db - auto db = dbstore_mgr->getDB("", false); - ASSERT_NE(nullptr, db); - EXPECT_EQ(getDBTenant(), db->getDBname()); -} - -TEST_F(TestDBStoreManager, TestDBBadTenant) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - auto dbstore_mgr = std::make_shared(getContext().get()); - auto db = dbstore_mgr->getDB("does-not-exist", false); - ASSERT_EQ(nullptr, db); -} - -TEST_F(TestDBStoreManager, TestGetNewDB) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - auto dbstore_mgr = std::make_shared(getContext().get()); - - auto new_tenant_path = "new_tenant"; - auto db = dbstore_mgr->getDB(new_tenant_path, true); - ASSERT_NE(nullptr, db); - EXPECT_EQ(getDBTenant(new_tenant_path), db->getDBname()); -} - -TEST_F(TestDBStoreManager, TestDelete) { - getContext()->_conf.set_val("dbstore_db_dir", getTestDir()); - - auto dbstore_mgr = std::make_shared(getContext().get()); - dbstore_mgr->deleteDB(default_tenant); - auto db = dbstore_mgr->getDB(default_tenant, false); - ASSERT_EQ(nullptr, db); -} diff --git a/src/rgw/store/dbstore/tests/dbstore_tests.cc b/src/rgw/store/dbstore/tests/dbstore_tests.cc deleted file mode 100644 index e87002f61b5..00000000000 --- a/src/rgw/store/dbstore/tests/dbstore_tests.cc +++ /dev/null @@ -1,1424 +0,0 @@ -#include "gtest/gtest.h" -#include -#include -#include -#include -#include -#include -#include -#include "rgw_common.h" - -using namespace std; -using DB = rgw::store::DB; - -vector args; - -namespace gtest { - class Environment* env; - - class Environment : public ::testing::Environment { - public: - Environment(): tenant("default_ns"), db(nullptr), - db_type("SQLite"), ret(-1) {} - - Environment(string tenantname, string db_typename): - tenant(tenantname), db(nullptr), - db_type(db_typename), ret(-1) {} - - virtual ~Environment() {} - - void SetUp() override { - cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE | CINIT_FLAG_NO_MON_CONFIG | CINIT_FLAG_NO_DAEMON_ACTIONS); - if (!db_type.compare("SQLite")) { - db = new SQLiteDB(tenant, cct.get()); - ASSERT_TRUE(db != nullptr); - ret = db->Initialize(logfile, loglevel); - ASSERT_GE(ret, 0); - } - } - - void TearDown() override { - if (!db) - return; - db->Destroy(db->get_def_dpp()); - delete db; - } - - string tenant; - DB *db; - string db_type; - int ret; - string logfile = "rgw_dbstore_tests.log"; - int loglevel = 30; - boost::intrusive_ptr cct; - }; -} - -ceph::real_time bucket_mtime = real_clock::now(); -string marker1; - -class DBGetDataCB : public RGWGetDataCB { - public: - bufferlist data_bl; - off_t data_ofs, data_len; - - int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) { - data_bl = bl; - data_ofs = bl_ofs; - data_len = bl_len; - return 0; - } -}; - -namespace { - - class DBStoreTest : public ::testing::Test { - protected: - int ret; - DB *db = nullptr; - string user1 = "user1"; - string user_id1 = "user_id1"; - string bucket1 = "bucket1"; - string object1 = "object1"; - string data = "Hello World"; - DBOpParams GlobalParams = {}; - const DoutPrefixProvider *dpp; - - DBStoreTest() {} - void SetUp() { - db = gtest::env->db; - ASSERT_TRUE(db != nullptr); - dpp = db->get_def_dpp(); - ASSERT_TRUE(dpp != nullptr); - - GlobalParams.op.user.uinfo.display_name = user1; - GlobalParams.op.user.uinfo.user_id.id = user_id1; - GlobalParams.op.bucket.info.bucket.name = bucket1; - GlobalParams.op.obj.state.obj.bucket = GlobalParams.op.bucket.info.bucket; - GlobalParams.op.obj.state.obj.key.name = object1; - GlobalParams.op.obj.state.obj.key.instance = "inst1"; - GlobalParams.op.obj.obj_id = "obj_id1"; - GlobalParams.op.obj_data.part_num = 0; - - /* As of now InitializeParams doesnt do anything - * special based on fop. Hence its okay to do - * global initialization once. - */ - ret = db->InitializeParams(dpp, &GlobalParams); - ASSERT_EQ(ret, 0); - } - - void TearDown() { - } - - int write_object(const DoutPrefixProvider *dpp, DBOpParams params) { - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - DB::Object::Write write_op(&op_target); - map setattrs; - ret = write_op.prepare(dpp); - if (ret) - return ret; - - write_op.meta.mtime = &bucket_mtime; - write_op.meta.category = RGWObjCategory::Main; - write_op.meta.owner = params.op.user.uinfo.user_id; - - bufferlist b1 = params.op.obj.head_data; - write_op.meta.data = &b1; - - bufferlist b2; - encode("ACL", b2); - setattrs[RGW_ATTR_ACL] = b2; - - ret = write_op.write_meta(0, params.op.obj.state.size, b1.length()+1, setattrs); - return ret; - } - }; -} - -TEST_F(DBStoreTest, InsertUser) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.user.uinfo.user_id.tenant = "tenant"; - params.op.user.uinfo.user_email = "user1@dbstore.com"; - params.op.user.uinfo.suspended = 123; - params.op.user.uinfo.max_buckets = 456; - params.op.user.uinfo.assumed_role_arn = "role"; - params.op.user.uinfo.placement_tags.push_back("tags"); - RGWAccessKey k1("id1", "key1"); - RGWAccessKey k2("id2", "key2"); - params.op.user.uinfo.access_keys["id1"] = k1; - params.op.user.uinfo.access_keys["id2"] = k2; - params.op.user.user_version.ver = 1; - params.op.user.user_version.tag = "UserTAG"; - - ret = db->ProcessOp(dpp, "InsertUser", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, GetUser) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ProcessOp(dpp, "GetUser", ¶ms); - ASSERT_EQ(ret, 0); - ASSERT_EQ(params.op.user.uinfo.user_id.tenant, "tenant"); - ASSERT_EQ(params.op.user.uinfo.user_email, "user1@dbstore.com"); - ASSERT_EQ(params.op.user.uinfo.user_id.id, "user_id1"); - ASSERT_EQ(params.op.user.uinfo.suspended, 123); - ASSERT_EQ(params.op.user.uinfo.max_buckets, 456); - ASSERT_EQ(params.op.user.uinfo.assumed_role_arn, "role"); - ASSERT_EQ(params.op.user.uinfo.placement_tags.back(), "tags"); - RGWAccessKey k; - map::iterator it2 = params.op.user.uinfo.access_keys.begin(); - k = it2->second; - ASSERT_EQ(k.id, "id1"); - ASSERT_EQ(k.key, "key1"); - it2++; - k = it2->second; - ASSERT_EQ(k.id, "id2"); - ASSERT_EQ(k.key, "key2"); - -} - -TEST_F(DBStoreTest, GetUserQuery) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.query_str = "email"; - params.op.user.uinfo.user_email = "user1@dbstore.com"; - - ret = db->ProcessOp(dpp, "GetUser", ¶ms); - ASSERT_EQ(ret, 0); - ASSERT_EQ(params.op.user.uinfo.user_id.tenant, "tenant"); - ASSERT_EQ(params.op.user.uinfo.user_email, "user1@dbstore.com"); - ASSERT_EQ(params.op.user.uinfo.user_id.id, "user_id1"); - ASSERT_EQ(params.op.user.uinfo.suspended, 123); - ASSERT_EQ(params.op.user.uinfo.max_buckets, 456); - ASSERT_EQ(params.op.user.uinfo.assumed_role_arn, "role"); - ASSERT_EQ(params.op.user.uinfo.placement_tags.back(), "tags"); - RGWAccessKey k; - map::iterator it2 = params.op.user.uinfo.access_keys.begin(); - k = it2->second; - ASSERT_EQ(k.id, "id1"); - ASSERT_EQ(k.key, "key1"); - it2++; - k = it2->second; - ASSERT_EQ(k.id, "id2"); - ASSERT_EQ(k.key, "key2"); - -} - -TEST_F(DBStoreTest, GetUserQueryByEmail) { - int ret = -1; - RGWUserInfo uinfo; - string email = "user1@dbstore.com"; - map attrs; - RGWObjVersionTracker objv; - - ret = db->get_user(dpp, "email", email, uinfo, &attrs, &objv); - ASSERT_EQ(ret, 0); - ASSERT_EQ(uinfo.user_id.tenant, "tenant"); - ASSERT_EQ(uinfo.user_email, "user1@dbstore.com"); - ASSERT_EQ(uinfo.user_id.id, "user_id1"); - ASSERT_EQ(uinfo.suspended, 123); - ASSERT_EQ(uinfo.max_buckets, 456); - ASSERT_EQ(uinfo.assumed_role_arn, "role"); - ASSERT_EQ(uinfo.placement_tags.back(), "tags"); - RGWAccessKey k; - map::iterator it2 = uinfo.access_keys.begin(); - k = it2->second; - ASSERT_EQ(k.id, "id1"); - ASSERT_EQ(k.key, "key1"); - it2++; - k = it2->second; - ASSERT_EQ(k.id, "id2"); - ASSERT_EQ(k.key, "key2"); - ASSERT_EQ(objv.read_version.ver, 1); -} - -TEST_F(DBStoreTest, GetUserQueryByAccessKey) { - int ret = -1; - RGWUserInfo uinfo; - string key = "id1"; - - ret = db->get_user(dpp, "access_key", key, uinfo, nullptr, nullptr); - ASSERT_EQ(ret, 0); - ASSERT_EQ(uinfo.user_id.tenant, "tenant"); - ASSERT_EQ(uinfo.user_email, "user1@dbstore.com"); - ASSERT_EQ(uinfo.user_id.id, "user_id1"); - ASSERT_EQ(uinfo.suspended, 123); - ASSERT_EQ(uinfo.max_buckets, 456); - ASSERT_EQ(uinfo.assumed_role_arn, "role"); - ASSERT_EQ(uinfo.placement_tags.back(), "tags"); - RGWAccessKey k; - map::iterator it2 = uinfo.access_keys.begin(); - k = it2->second; - ASSERT_EQ(k.id, "id1"); - ASSERT_EQ(k.key, "key1"); - it2++; - k = it2->second; - ASSERT_EQ(k.id, "id2"); - ASSERT_EQ(k.key, "key2"); -} - -TEST_F(DBStoreTest, StoreUser) { - struct DBOpParams params = GlobalParams; - int ret = -1; - RGWUserInfo uinfo, old_uinfo; - map attrs; - RGWObjVersionTracker objv_tracker; - - bufferlist attr1, attr2; - encode("attrs1", attr1); - attrs["attr1"] = attr1; - encode("attrs2", attr2); - attrs["attr2"] = attr2; - - uinfo.user_id.id = "user_id2"; - uinfo.user_id.tenant = "tenant"; - uinfo.user_email = "user2@dbstore.com"; - uinfo.suspended = 123; - uinfo.max_buckets = 456; - uinfo.assumed_role_arn = "role"; - uinfo.placement_tags.push_back("tags"); - RGWAccessKey k1("id1", "key1"); - RGWAccessKey k2("id2", "key2"); - uinfo.access_keys["id1"] = k1; - uinfo.access_keys["id2"] = k2; - - /* non exclusive create..should create new one */ - ret = db->store_user(dpp, uinfo, false, &attrs, &objv_tracker, &old_uinfo); - ASSERT_EQ(ret, 0); - ASSERT_EQ(old_uinfo.user_email, ""); - ASSERT_EQ(objv_tracker.read_version.ver, 1); - ASSERT_EQ(objv_tracker.read_version.tag, "UserTAG"); - - /* invalid version number */ - objv_tracker.read_version.ver = 4; - ret = db->store_user(dpp, uinfo, true, &attrs, &objv_tracker, &old_uinfo); - ASSERT_EQ(ret, -125); /* returns ECANCELED */ - ASSERT_EQ(old_uinfo.user_id.id, uinfo.user_id.id); - ASSERT_EQ(old_uinfo.user_email, uinfo.user_email); - - /* exclusive create..should not create new one */ - uinfo.user_email = "user2_new@dbstore.com"; - objv_tracker.read_version.ver = 1; - ret = db->store_user(dpp, uinfo, true, &attrs, &objv_tracker, &old_uinfo); - ASSERT_EQ(ret, 0); - ASSERT_EQ(old_uinfo.user_email, "user2@dbstore.com"); - ASSERT_EQ(objv_tracker.read_version.ver, 1); - - ret = db->store_user(dpp, uinfo, false, &attrs, &objv_tracker, &old_uinfo); - ASSERT_EQ(ret, 0); - ASSERT_EQ(old_uinfo.user_email, "user2@dbstore.com"); - ASSERT_EQ(objv_tracker.read_version.ver, 2); - ASSERT_EQ(objv_tracker.read_version.tag, "UserTAG"); -} - -TEST_F(DBStoreTest, GetUserQueryByUserID) { - int ret = -1; - RGWUserInfo uinfo; - map attrs; - RGWObjVersionTracker objv; - - uinfo.user_id.tenant = "tenant"; - uinfo.user_id.id = "user_id2"; - - ret = db->get_user(dpp, "user_id", "user_id2", uinfo, &attrs, &objv); - ASSERT_EQ(ret, 0); - ASSERT_EQ(uinfo.user_id.tenant, "tenant"); - ASSERT_EQ(uinfo.user_email, "user2_new@dbstore.com"); - ASSERT_EQ(uinfo.user_id.id, "user_id2"); - ASSERT_EQ(uinfo.suspended, 123); - ASSERT_EQ(uinfo.max_buckets, 456); - ASSERT_EQ(uinfo.assumed_role_arn, "role"); - ASSERT_EQ(uinfo.placement_tags.back(), "tags"); - RGWAccessKey k; - map::iterator it = uinfo.access_keys.begin(); - k = it->second; - ASSERT_EQ(k.id, "id1"); - ASSERT_EQ(k.key, "key1"); - it++; - k = it->second; - ASSERT_EQ(k.id, "id2"); - ASSERT_EQ(k.key, "key2"); - - ASSERT_EQ(objv.read_version.ver, 2); - - bufferlist k1, k2; - string attr; - map::iterator it2 = attrs.begin(); - k1 = it2->second; - decode(attr, k1); - ASSERT_EQ(attr, "attrs1"); - it2++; - k2 = it2->second; - decode(attr, k2); - ASSERT_EQ(attr, "attrs2"); -} - -TEST_F(DBStoreTest, ListAllUsers) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ListAllUsers(dpp, ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, InsertBucket) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.bucket.info.bucket.name = "bucket1"; - params.op.bucket.info.bucket.tenant = "tenant"; - params.op.bucket.info.bucket.marker = "marker1"; - - params.op.bucket.ent.size = 1024; - - params.op.bucket.info.has_instance_obj = false; - params.op.bucket.bucket_version.ver = 1; - params.op.bucket.bucket_version.tag = "read_tag"; - - params.op.bucket.mtime = bucket_mtime; - - ret = db->ProcessOp(dpp, "InsertBucket", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, UpdateBucketAttrs) { - int ret = -1; - RGWBucketInfo info; - map attrs; - RGWObjVersionTracker objv; - - bufferlist aclbl, aclbl2; - encode("attrs1", aclbl); - attrs["attr1"] = aclbl; - encode("attrs2", aclbl2); - attrs["attr2"] = aclbl2; - - info.bucket.name = "bucket1"; - - /* invalid version number */ - objv.read_version.ver = 4; - ret = db->update_bucket(dpp, "attrs", info, false, nullptr, &attrs, &bucket_mtime, &objv); - ASSERT_EQ(ret, -125); /* returns ECANCELED */ - - /* right version number */ - objv.read_version.ver = 1; - ret = db->update_bucket(dpp, "attrs", info, false, nullptr, &attrs, &bucket_mtime, &objv); - ASSERT_EQ(ret, 0); - ASSERT_EQ(objv.read_version.ver, 2); -} - -TEST_F(DBStoreTest, UpdateBucketInfo) { - struct DBOpParams params = GlobalParams; - int ret = -1; - RGWBucketInfo info; - - params.op.bucket.info.bucket.name = "bucket1"; - - ret = db->ProcessOp(dpp, "GetBucket", ¶ms); - ASSERT_EQ(ret, 0); - - info = params.op.bucket.info; - - info.bucket.marker = "marker2"; - ret = db->update_bucket(dpp, "info", info, false, nullptr, nullptr, &bucket_mtime, nullptr); - ASSERT_EQ(ret, 0); - ASSERT_EQ(info.objv_tracker.read_version.ver, 3); -} - -TEST_F(DBStoreTest, GetBucket) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.bucket.info.bucket.name = "bucket1"; - ret = db->ProcessOp(dpp, "GetBucket", ¶ms); - ASSERT_EQ(ret, 0); - ASSERT_EQ(params.op.bucket.info.bucket.name, "bucket1"); - ASSERT_EQ(params.op.bucket.info.bucket.tenant, "tenant"); - ASSERT_EQ(params.op.bucket.info.bucket.marker, "marker2"); - ASSERT_EQ(params.op.bucket.ent.size, 1024); - ASSERT_EQ(params.op.bucket.ent.bucket.name, "bucket1"); - ASSERT_EQ(params.op.bucket.ent.bucket.tenant, "tenant"); - ASSERT_EQ(params.op.bucket.info.has_instance_obj, false); - ASSERT_EQ(params.op.bucket.info.objv_tracker.read_version.ver, 3); - ASSERT_EQ(params.op.bucket.info.objv_tracker.read_version.tag, "read_tag"); - ASSERT_EQ(params.op.bucket.mtime, bucket_mtime); - ASSERT_EQ(params.op.bucket.info.owner.id, "user_id1"); - bufferlist k, k2; - string acl; - map::iterator it2 = params.op.bucket.bucket_attrs.begin(); - k = it2->second; - decode(acl, k); - ASSERT_EQ(acl, "attrs1"); - it2++; - k2 = it2->second; - decode(acl, k2); - ASSERT_EQ(acl, "attrs2"); -} - -TEST_F(DBStoreTest, CreateBucket) { - struct DBOpParams params = GlobalParams; - int ret = -1; - RGWBucketInfo info; - RGWUserInfo owner; - rgw_bucket bucket; - obj_version objv; - rgw_placement_rule rule; - map attrs; - - owner.user_id.id = "user_id1"; - bucket.name = "bucket1"; - bucket.tenant = "tenant"; - - objv.ver = 2; - objv.tag = "write_tag"; - - rule.name = "rule1"; - rule.storage_class = "sc1"; - - ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, - attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, - null_yield, false); - ASSERT_EQ(ret, 0); - bucket.name = "bucket2"; - ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, - attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, - null_yield, false); - ASSERT_EQ(ret, 0); - bucket.name = "bucket3"; - ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, - attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, - null_yield, false); - ASSERT_EQ(ret, 0); - bucket.name = "bucket4"; - ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, - attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, - null_yield, false); - ASSERT_EQ(ret, 0); - bucket.name = "bucket5"; - ret = db->create_bucket(dpp, owner, bucket, "zid", rule, "swift_ver", NULL, - attrs, info, &objv, NULL, bucket_mtime, NULL, NULL, - null_yield, false); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, GetBucketQueryByName) { - int ret = -1; - RGWBucketInfo binfo; - binfo.bucket.name = "bucket2"; - rgw::sal::Attrs attrs; - ceph::real_time mtime; - obj_version objv; - - ret = db->get_bucket_info(dpp, "name", "", binfo, &attrs, &mtime, &objv); - ASSERT_EQ(ret, 0); - ASSERT_EQ(binfo.bucket.name, "bucket2"); - ASSERT_EQ(binfo.bucket.tenant, "tenant"); - ASSERT_EQ(binfo.owner.id, "user_id1"); - ASSERT_EQ(binfo.objv_tracker.read_version.ver, 2); - ASSERT_EQ(binfo.objv_tracker.read_version.tag, "write_tag"); - ASSERT_EQ(binfo.zonegroup, "zid"); - ASSERT_EQ(binfo.creation_time, bucket_mtime); - ASSERT_EQ(binfo.placement_rule.name, "rule1"); - ASSERT_EQ(binfo.placement_rule.storage_class, "sc1"); - ASSERT_EQ(objv.ver, 2); - ASSERT_EQ(objv.tag, "write_tag"); - - marker1 = binfo.bucket.marker; -} - -TEST_F(DBStoreTest, ListUserBuckets) { - struct DBOpParams params = GlobalParams; - int ret = -1; - rgw_user owner; - int max = 2; - bool need_stats = true; - bool is_truncated = false; - RGWUserBuckets ulist; - - owner.id = "user_id1"; - - marker1 = ""; - do { - is_truncated = false; - ret = db->list_buckets(dpp, "", owner, marker1, "", max, need_stats, &ulist, - &is_truncated); - ASSERT_EQ(ret, 0); - - cout << "marker1 :" << marker1 << "\n"; - - cout << "is_truncated :" << is_truncated << "\n"; - - for (const auto& ent: ulist.get_buckets()) { - RGWBucketEnt e = ent.second; - cout << "###################### \n"; - cout << "ent.bucket.id : " << e.bucket.name << "\n"; - cout << "ent.bucket.marker : " << e.bucket.marker << "\n"; - cout << "ent.bucket.bucket_id : " << e.bucket.bucket_id << "\n"; - cout << "ent.size : " << e.size << "\n"; - cout << "ent.rule.name : " << e.placement_rule.name << "\n"; - - marker1 = e.bucket.name; - } - ulist.clear(); - } while(is_truncated); -} - -TEST_F(DBStoreTest, BucketChown) { - int ret = -1; - RGWBucketInfo info; - rgw_user user; - user.id = "user_id2"; - - info.bucket.name = "bucket5"; - - ret = db->update_bucket(dpp, "owner", info, false, &user, nullptr, &bucket_mtime, nullptr); - ASSERT_EQ(ret, 0); - ASSERT_EQ(info.objv_tracker.read_version.ver, 3); -} - -TEST_F(DBStoreTest, ListAllBuckets) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ListAllBuckets(dpp, ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, ListAllBuckets2) { - struct DBOpParams params = GlobalParams; - int ret = -1; - rgw_user owner; - int max = 2; - bool need_stats = true; - bool is_truncated = false; - RGWUserBuckets ulist; - - marker1 = ""; - do { - is_truncated = false; - ret = db->list_buckets(dpp, "all", owner, marker1, "", max, need_stats, &ulist, - &is_truncated); - ASSERT_EQ(ret, 0); - - cout << "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"; - cout << "ownerID : " << owner.id << "\n"; - cout << "marker1 :" << marker1 << "\n"; - - cout << "is_truncated :" << is_truncated << "\n"; - - for (const auto& ent: ulist.get_buckets()) { - RGWBucketEnt e = ent.second; - cout << "###################### \n"; - cout << "ent.bucket.id : " << e.bucket.name << "\n"; - cout << "ent.bucket.marker : " << e.bucket.marker << "\n"; - cout << "ent.bucket.bucket_id : " << e.bucket.bucket_id << "\n"; - cout << "ent.size : " << e.size << "\n"; - cout << "ent.rule.name : " << e.placement_rule.name << "\n"; - - marker1 = e.bucket.name; - } - ulist.clear(); - } while(is_truncated); -} - -TEST_F(DBStoreTest, RemoveBucketAPI) { - int ret = -1; - RGWBucketInfo info; - - info.bucket.name = "bucket5"; - - ret = db->remove_bucket(dpp, info); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, RemoveUserAPI) { - int ret = -1; - RGWUserInfo uinfo; - RGWObjVersionTracker objv; - - uinfo.user_id.tenant = "tenant"; - uinfo.user_id.id = "user_id2"; - - /* invalid version number...should fail */ - objv.read_version.ver = 4; - ret = db->remove_user(dpp, uinfo, &objv); - ASSERT_EQ(ret, -125); - - objv.read_version.ver = 2; - ret = db->remove_user(dpp, uinfo, &objv); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, PutObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.obj.category = RGWObjCategory::Main; - params.op.obj.storage_class = "STANDARD"; - bufferlist b1; - encode("HELLO WORLD", b1); - cout<<"XXXXXXXXX Insert b1.length " << b1.length() << "\n"; - params.op.obj.head_data = b1; - params.op.obj.state.size = 12; - params.op.obj.state.is_olh = false; - ret = db->ProcessOp(dpp, "PutObject", ¶ms); - ASSERT_EQ(ret, 0); - - /* Insert another objects */ - params.op.obj.state.obj.key.name = "object2"; - params.op.obj.state.obj.key.instance = "inst2"; - ret = db->ProcessOp(dpp, "PutObject", ¶ms); - ASSERT_EQ(ret, 0); - - params.op.obj.state.obj.key.name = "object3"; - params.op.obj.state.obj.key.instance = "inst3"; - ret = db->ProcessOp(dpp, "PutObject", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, ListAllObjects) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ListAllObjects(dpp, ¶ms); - ASSERT_GE(ret, 0); -} - -TEST_F(DBStoreTest, GetObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ProcessOp(dpp, "GetObject", ¶ms); - ASSERT_EQ(ret, 0); - ASSERT_EQ(params.op.obj.category, RGWObjCategory::Main); - ASSERT_EQ(params.op.obj.storage_class, "STANDARD"); - string data; - decode(data, params.op.obj.head_data); - ASSERT_EQ(data, "HELLO WORLD"); - ASSERT_EQ(params.op.obj.state.size, 12); - cout << "versionNum :" << params.op.obj.version_num << "\n"; -} - -TEST_F(DBStoreTest, GetObjectState) { - struct DBOpParams params = GlobalParams; - int ret = -1; - RGWObjState* s; - - params.op.obj.state.obj.key.name = "object2"; - params.op.obj.state.obj.key.instance = "inst2"; - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - ret = op_target.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, - false, &s); - ASSERT_EQ(ret, 0); - ASSERT_EQ(s->size, 12); - ASSERT_EQ(s->is_olh, false); - cout << "versionNum :" << params.op.obj.version_num << "\n"; - - /* Recheck with get_state API */ - ret = op_target.get_state(dpp, &s, false); - ASSERT_EQ(ret, 0); - ASSERT_EQ(s->size, 12); - ASSERT_EQ(s->is_olh, false); - cout << "versionNum :" << params.op.obj.version_num << "\n"; -} - -TEST_F(DBStoreTest, ObjAttrs) { - struct DBOpParams params = GlobalParams; - int ret = -1; - map setattrs; - map rmattrs; - map readattrs; - - bufferlist b1, b2, b3; - encode("ACL", b1); - setattrs[RGW_ATTR_ACL] = b1; - encode("LC", b2); - setattrs[RGW_ATTR_LC] = b2; - encode("ETAG", b3); - setattrs[RGW_ATTR_ETAG] = b3; - - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - /* Set some attrs */ - ret = op_target.set_attrs(dpp, setattrs, nullptr); - ASSERT_EQ(ret, 0); - - /* read those attrs */ - DB::Object::Read read_op(&op_target); - read_op.params.attrs = &readattrs; - ret = read_op.prepare(dpp); - ASSERT_EQ(ret, 0); - - string val; - decode(val, readattrs[RGW_ATTR_ACL]); - ASSERT_EQ(val, "ACL"); - decode(val, readattrs[RGW_ATTR_LC]); - ASSERT_EQ(val, "LC"); - decode(val, readattrs[RGW_ATTR_ETAG]); - ASSERT_EQ(val, "ETAG"); - - /* Remove some attrs */ - rmattrs[RGW_ATTR_ACL] = b1; - map empty; - ret = op_target.set_attrs(dpp, empty, &rmattrs); - ASSERT_EQ(ret, 0); - - /* read those attrs */ - ret = read_op.prepare(dpp); - ASSERT_EQ(ret, 0); - - ASSERT_EQ(readattrs.count(RGW_ATTR_ACL), 0); - decode(val, readattrs[RGW_ATTR_LC]); - ASSERT_EQ(val, "LC"); - decode(val, readattrs[RGW_ATTR_ETAG]); - ASSERT_EQ(val, "ETAG"); -} - -TEST_F(DBStoreTest, WriteObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - params.op.obj.state.obj.key.name = "object3"; - params.op.obj.state.obj.key.instance = "inst3"; - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - bufferlist b1; - encode("HELLO WORLD - Object3", b1); - params.op.obj.head_data = b1; - params.op.obj.state.size = 22; - - ret = write_object(dpp, params); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, ReadObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - map readattrs; - params.op.obj.state.obj.key.name = "object3"; - params.op.obj.state.obj.key.instance = "inst3"; - uint64_t obj_size; - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - DB::Object::Read read_op(&op_target); - read_op.params.attrs = &readattrs; - read_op.params.obj_size = &obj_size; - ret = read_op.prepare(dpp); - ASSERT_EQ(ret, 0); - - bufferlist bl; - ret = read_op.read(0, 25, bl, dpp); - cout<<"XXXXXXXXX Insert bl.length " << bl.length() << "\n"; - ASSERT_EQ(ret, 25); - - string data; - decode(data, bl); - ASSERT_EQ(data, "HELLO WORLD - Object3"); - ASSERT_EQ(obj_size, 22); -} - -TEST_F(DBStoreTest, IterateObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - map readattrs; - uint64_t obj_size; - DBGetDataCB cb; - - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - DB::Object::Read read_op(&op_target); - read_op.params.attrs = &readattrs; - read_op.params.obj_size = &obj_size; - ret = read_op.prepare(dpp); - ASSERT_EQ(ret, 0); - - bufferlist bl; - ret = read_op.iterate(dpp, 0, 15, &cb); - ASSERT_EQ(ret, 0); - string data; - decode(data, cb.data_bl); - cout << "XXXXXXXXXX iterate data is " << data << ", bl_ofs = " << cb.data_ofs << ", bl_len = " << cb.data_len << "\n"; - ASSERT_EQ(data, "HELLO WORLD"); - ASSERT_EQ(cb.data_ofs, 0); - ASSERT_EQ(cb.data_len, 15); -} - -TEST_F(DBStoreTest, ListBucketObjects) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - int max = 2; - bool is_truncated = false; - rgw_obj_key marker1; - DB::Bucket target(db, params.op.bucket.info); - DB::Bucket::List list_op(&target); - - vector dir_list; - - marker1.name = ""; - do { - is_truncated = false; - list_op.params.marker = marker1; - ret = list_op.list_objects(dpp, max, &dir_list, nullptr, &is_truncated); - ASSERT_EQ(ret, 0); - - cout << "marker1 :" << marker1.name << "\n"; - - cout << "is_truncated :" << is_truncated << "\n"; - - for (const auto& ent: dir_list) { - cls_rgw_obj_key key = ent.key; - cout << "###################### \n"; - cout << "key.name : " << key.name << "\n"; - cout << "key.instance : " << key.instance << "\n"; - - marker1 = list_op.get_next_marker(); - } - dir_list.clear(); - } while(is_truncated); -} - -TEST_F(DBStoreTest, DeleteObj) { - struct DBOpParams params = GlobalParams; - int ret = -1; - RGWObjState *s; - - /* delete object2 */ - params.op.obj.state.obj.key.name = "object2"; - params.op.obj.state.obj.key.instance = "inst2"; - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - DB::Object::Delete delete_op(&op_target); - ret = delete_op.delete_obj(dpp); - ASSERT_EQ(ret, 0); - - /* Should return ENOENT */ - ret = op_target.get_state(dpp, &s, false); - ASSERT_EQ(ret, -2); -} - -TEST_F(DBStoreTest, WriteVersionedObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::string instances[] = {"inst1", "inst2", "inst3"}; - bufferlist b1; - - params.op.obj.flags |= rgw_bucket_dir_entry::FLAG_CURRENT; - params.op.obj.state.obj.key.name = "object1"; - - /* Write versioned objects */ - DB::Object op_target(db, params.op.bucket.info, params.op.obj.state.obj); - DB::Object::Write write_op(&op_target); - - /* Version1 */ - params.op.obj.state.obj.key.instance = instances[0]; - encode("HELLO WORLD", b1); - params.op.obj.head_data = b1; - params.op.obj.state.size = 12; - ret = write_object(dpp, params); - ASSERT_EQ(ret, 0); - - /* Version2 */ - params.op.obj.state.obj.key.instance = instances[1]; - b1.clear(); - encode("HELLO WORLD ABC", b1); - params.op.obj.head_data = b1; - params.op.obj.state.size = 16; - ret = write_object(dpp, params); - ASSERT_EQ(ret, 0); - - /* Version3 */ - params.op.obj.state.obj.key.instance = instances[2]; - b1.clear(); - encode("HELLO WORLD A", b1); - params.op.obj.head_data = b1; - params.op.obj.state.size = 14; - ret = write_object(dpp, params); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, ListVersionedObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::string instances[] = {"inst1", "inst2", "inst3"}; - int i = 0; - - /* list versioned objects */ - params.op.obj.state.obj.key.instance.clear(); - params.op.list_max_count = MAX_VERSIONED_OBJECTS; - ret = db->ProcessOp(dpp, "ListVersionedObjects", ¶ms); - ASSERT_EQ(ret, 0); - - i = 2; - for (auto ent: params.op.obj.list_entries) { - - - ASSERT_EQ(ent.key.instance, instances[i]); - i--; - } -} - -TEST_F(DBStoreTest, ReadVersionedObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::string instances[] = {"inst1", "inst2", "inst3"}; - std::string data; - - /* read object.. should fetch latest version */ - RGWObjState* s; - params = GlobalParams; - params.op.obj.state.obj.key.instance.clear(); - DB::Object op_target2(db, params.op.bucket.info, params.op.obj.state.obj); - ret = op_target2.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, - true, &s); - ASSERT_EQ(ret, 0); - ASSERT_EQ(s->obj.key.instance, instances[2]); - decode(data, s->data); - ASSERT_EQ(data, "HELLO WORLD A"); - ASSERT_EQ(s->size, 14); - - /* read a particular non-current version */ - params.op.obj.state.obj.key.instance = instances[1]; - DB::Object op_target3(db, params.op.bucket.info, params.op.obj.state.obj); - ret = op_target3.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, - true, &s); - ASSERT_EQ(ret, 0); - decode(data, s->data); - ASSERT_EQ(data, "HELLO WORLD ABC"); - ASSERT_EQ(s->size, 16); -} - -TEST_F(DBStoreTest, DeleteVersionedObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::string instances[] = {"inst1", "inst2", "inst3"}; - std::string data; - std::string dm_instance; - int i = 0; - - /* Delete object..should create delete marker */ - params.op.obj.state.obj.key.instance.clear(); - DB::Object op_target(db, params.op.bucket.info, params.op.obj.state.obj); - DB::Object::Delete delete_op(&op_target); - delete_op.params.versioning_status |= BUCKET_VERSIONED; - - ret = delete_op.delete_obj(dpp); - ASSERT_EQ(ret, 0); - - /* list versioned objects */ - params = GlobalParams; - params.op.obj.state.obj.key.instance.clear(); - params.op.list_max_count = MAX_VERSIONED_OBJECTS; - ret = db->ProcessOp(dpp, "ListVersionedObjects", ¶ms); - - i = 3; - for (auto ent: params.op.obj.list_entries) { - string is_delete_marker = (ent.flags & rgw_bucket_dir_entry::FLAG_DELETE_MARKER)? "true" : "false"; - cout << "ent.name: " << ent.key.name << ". ent.instance: " << ent.key.instance << " is_delete_marker = " << is_delete_marker << "\n"; - - if (i == 3) { - ASSERT_EQ(is_delete_marker, "true"); - dm_instance = ent.key.instance; - } else { - ASSERT_EQ(is_delete_marker, "false"); - ASSERT_EQ(ent.key.instance, instances[i]); - } - - i--; - } - - /* read object.. should return -ENOENT */ - RGWObjState* s; - params = GlobalParams; - params.op.obj.state.obj.key.instance.clear(); - DB::Object op_target2(db, params.op.bucket.info, params.op.obj.state.obj); - ret = op_target2.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, - true, &s); - ASSERT_EQ(ret, -ENOENT); - - /* Delete delete marker..should be able to read object now */ - params.op.obj.state.obj.key.instance = dm_instance; - DB::Object op_target3(db, params.op.bucket.info, params.op.obj.state.obj); - DB::Object::Delete delete_op2(&op_target3); - delete_op2.params.versioning_status |= BUCKET_VERSIONED; - - ret = delete_op2.delete_obj(dpp); - ASSERT_EQ(ret, 0); - - /* read object.. should fetch latest version */ - params = GlobalParams; - params.op.obj.state.obj.key.instance.clear(); - DB::Object op_target4(db, params.op.bucket.info, params.op.obj.state.obj); - ret = op_target4.get_obj_state(dpp, params.op.bucket.info, params.op.obj.state.obj, - true, &s); - ASSERT_EQ(s->obj.key.instance, instances[2]); - decode(data, s->data); - ASSERT_EQ(data, "HELLO WORLD A"); - ASSERT_EQ(s->size, 14); - - /* delete latest version using version-id. Next version should get promoted */ - params.op.obj.state.obj.key.instance = instances[2]; - DB::Object op_target5(db, params.op.bucket.info, params.op.obj.state.obj); - DB::Object::Delete delete_op3(&op_target5); - delete_op3.params.versioning_status |= BUCKET_VERSIONED; - - ret = delete_op3.delete_obj(dpp); - ASSERT_EQ(ret, 0); - - /* list versioned objects..only two versions should be present - * with second version marked as CURRENT */ - params = GlobalParams; - params.op.obj.state.obj.key.instance.clear(); - params.op.list_max_count = MAX_VERSIONED_OBJECTS; - ret = db->ProcessOp(dpp, "ListVersionedObjects", ¶ms); - - i = 1; - for (auto ent: params.op.obj.list_entries) { - - if (i == 1) { - dm_instance = ent.key.instance; - } else { - ASSERT_EQ(ent.key.instance, instances[i]); - } - - i--; - } - -} - -TEST_F(DBStoreTest, ObjectOmapSetVal) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - string val = "part1_val"; - bufferlist bl; - encode(val, bl); - ret = op_target.obj_omap_set_val_by_key(dpp, "part1", bl, false); - ASSERT_EQ(ret, 0); - - val = "part2_val"; - bl.clear(); - encode(val, bl); - ret = op_target.obj_omap_set_val_by_key(dpp, "part2", bl, false); - ASSERT_EQ(ret, 0); - - val = "part3_val"; - bl.clear(); - encode(val, bl); - ret = op_target.obj_omap_set_val_by_key(dpp, "part3", bl, false); - ASSERT_EQ(ret, 0); - - val = "part4_val"; - bl.clear(); - encode(val, bl); - ret = op_target.obj_omap_set_val_by_key(dpp, "part4", bl, false); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, ObjectOmapGetValsByKeys) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::set keys; - std::map vals; - - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - keys.insert("part2"); - keys.insert("part4"); - - ret = op_target.obj_omap_get_vals_by_keys(dpp, "", keys, &vals); - ASSERT_EQ(ret, 0); - ASSERT_EQ(vals.size(), 2); - - string val; - decode(val, vals["part2"]); - ASSERT_EQ(val, "part2_val"); - decode(val, vals["part4"]); - ASSERT_EQ(val, "part4_val"); -} - -TEST_F(DBStoreTest, ObjectOmapGetAll) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::map vals; - - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - ret = op_target.obj_omap_get_all(dpp, &vals); - ASSERT_EQ(ret, 0); - ASSERT_EQ(vals.size(), 4); - - string val; - decode(val, vals["part1"]); - ASSERT_EQ(val, "part1_val"); - decode(val, vals["part2"]); - ASSERT_EQ(val, "part2_val"); - decode(val, vals["part3"]); - ASSERT_EQ(val, "part3_val"); - decode(val, vals["part4"]); - ASSERT_EQ(val, "part4_val"); -} - -TEST_F(DBStoreTest, ObjectOmapGetVals) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::set keys; - std::map vals; - bool pmore; - - DB::Object op_target(db, params.op.bucket.info, - params.op.obj.state.obj); - - ret = op_target.obj_omap_get_vals(dpp, "part3", 10, &vals, &pmore); - ASSERT_EQ(ret, 0); - ASSERT_EQ(vals.size(), 2); - - string val; - decode(val, vals["part3"]); - ASSERT_EQ(val, "part3_val"); - decode(val, vals["part4"]); - ASSERT_EQ(val, "part4_val"); -} - -TEST_F(DBStoreTest, PutObjectData) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.obj_data.part_num = 1; - params.op.obj_data.offset = 10; - params.op.obj_data.multipart_part_str = "2"; - bufferlist b1; - encode("HELLO WORLD", b1); - params.op.obj_data.data = b1; - params.op.obj_data.size = 12; - params.op.obj.state.mtime = real_clock::now(); - ret = db->ProcessOp(dpp, "PutObjectData", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, UpdateObjectData) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.obj.state.mtime = bucket_mtime; - ret = db->ProcessOp(dpp, "UpdateObjectData", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, GetObjectData) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.obj.state.obj.key.instance = "inst1"; - params.op.obj.state.obj.key.name = "object1"; - ret = db->ProcessOp(dpp, "GetObjectData", ¶ms); - ASSERT_EQ(ret, 0); - ASSERT_EQ(params.op.obj_data.part_num, 1); - ASSERT_EQ(params.op.obj_data.offset, 10); - ASSERT_EQ(params.op.obj_data.multipart_part_str, "2"); - ASSERT_EQ(params.op.obj.state.obj.key.instance, "inst1"); - ASSERT_EQ(params.op.obj.state.obj.key.name, "object1"); - ASSERT_EQ(params.op.obj.state.mtime, bucket_mtime); - string data; - decode(data, params.op.obj_data.data); - ASSERT_EQ(data, "HELLO WORLD"); -} - -TEST_F(DBStoreTest, DeleteObjectData) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ProcessOp(dpp, "DeleteObjectData", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, DeleteObject) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ProcessOp(dpp, "DeleteObject", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, LCTables) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->createLCTables(dpp); - ASSERT_GE(ret, 0); -} - -TEST_F(DBStoreTest, LCHead) { - struct DBOpParams params = GlobalParams; - int ret = -1; - std::string index1 = "bucket1"; - std::string index2 = "bucket2"; - time_t lc_time = ceph_clock_now(); - std::unique_ptr head; - std::string ents[] = {"entry1", "entry2", "entry3"}; - rgw::sal::StoreLifecycle::StoreLCHead head1(lc_time, 0, ents[0]); - rgw::sal::StoreLifecycle::StoreLCHead head2(lc_time, 0, ents[1]); - rgw::sal::StoreLifecycle::StoreLCHead head3(lc_time, 0, ents[2]); - - ret = db->put_head(index1, head1); - ASSERT_EQ(ret, 0); - ret = db->put_head(index2, head2); - ASSERT_EQ(ret, 0); - - ret = db->get_head(index1, &head); - ASSERT_EQ(ret, 0); - ASSERT_EQ(head->get_marker(), "entry1"); - - ret = db->get_head(index2, &head); - ASSERT_EQ(ret, 0); - ASSERT_EQ(head->get_marker(), "entry2"); - - // update index1 - ret = db->put_head(index1, head3); - ASSERT_EQ(ret, 0); - ret = db->get_head(index1, &head); - ASSERT_EQ(ret, 0); - ASSERT_EQ(head->get_marker(), "entry3"); - -} -TEST_F(DBStoreTest, LCEntry) { - struct DBOpParams params = GlobalParams; - int ret = -1; - uint64_t lc_time = ceph_clock_now(); - std::string index1 = "lcindex1"; - std::string index2 = "lcindex2"; - typedef enum {lc_uninitial = 1, lc_complete} status; - std::string ents[] = {"bucket1", "bucket2", "bucket3", "bucket4"}; - std::unique_ptr entry; - rgw::sal::StoreLifecycle::StoreLCEntry entry1(ents[0], lc_time, lc_uninitial); - rgw::sal::StoreLifecycle::StoreLCEntry entry2(ents[1], lc_time, lc_uninitial); - rgw::sal::StoreLifecycle::StoreLCEntry entry3(ents[2], lc_time, lc_uninitial); - rgw::sal::StoreLifecycle::StoreLCEntry entry4(ents[3], lc_time, lc_uninitial); - - vector> lc_entries; - - ret = db->set_entry(index1, entry1); - ASSERT_EQ(ret, 0); - ret = db->set_entry(index1, entry2); - ASSERT_EQ(ret, 0); - ret = db->set_entry(index1, entry3); - ASSERT_EQ(ret, 0); - ret = db->set_entry(index2, entry4); - ASSERT_EQ(ret, 0); - - // get entry index1, entry1 - ret = db->get_entry(index1, ents[0], &entry); - ASSERT_EQ(ret, 0); - ASSERT_EQ(entry->get_status(), lc_uninitial); - ASSERT_EQ(entry->get_start_time(), lc_time); - - // get next entry index1, entry2 - ret = db->get_next_entry(index1, ents[1], &entry); - ASSERT_EQ(ret, 0); - ASSERT_EQ(entry->get_bucket(), ents[2]); - ASSERT_EQ(entry->get_status(), lc_uninitial); - ASSERT_EQ(entry->get_start_time(), lc_time); - - // update entry4 to entry5 - entry4.status = lc_complete; - ret = db->set_entry(index2, entry4); - ASSERT_EQ(ret, 0); - ret = db->get_entry(index2, ents[3], &entry); - ASSERT_EQ(ret, 0); - ASSERT_EQ(entry->get_status(), lc_complete); - - // list entries - ret = db->list_entries(index1, "", 5, lc_entries); - ASSERT_EQ(ret, 0); - for (const auto& ent: lc_entries) { - cout << "###################### \n"; - cout << "lc entry.bucket : " << ent->get_bucket() << "\n"; - cout << "lc entry.status : " << ent->get_status() << "\n"; - } - - // remove index1, entry3 - ret = db->rm_entry(index1, entry3); - ASSERT_EQ(ret, 0); - - // get next entry index1, entry2.. should be null - entry.release(); - ret = db->get_next_entry(index1, ents[1], &entry); - ASSERT_EQ(ret, 0); - ASSERT_EQ(entry.get(), nullptr); -} - -TEST_F(DBStoreTest, RemoveBucket) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ProcessOp(dpp, "RemoveBucket", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, RemoveUser) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - ret = db->ProcessOp(dpp, "RemoveUser", ¶ms); - ASSERT_EQ(ret, 0); -} - -TEST_F(DBStoreTest, InsertTestIDUser) { - struct DBOpParams params = GlobalParams; - int ret = -1; - - params.op.user.uinfo.user_id.id = "testid"; - params.op.user.uinfo.display_name = "M. Tester"; - params.op.user.uinfo.user_id.tenant = "tenant"; - params.op.user.uinfo.user_email = "tester@ceph.com"; - RGWAccessKey k1("0555b35654ad1656d804", "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="); - params.op.user.uinfo.access_keys["0555b35654ad1656d804"] = k1; - params.op.user.user_version.ver = 1; - params.op.user.user_version.tag = "UserTAG"; - - ret = db->ProcessOp(dpp, "InsertUser", ¶ms); - ASSERT_EQ(ret, 0); -} - -int main(int argc, char **argv) -{ - int ret = -1; - string c_logfile = "rgw_dbstore_tests.log"; - int c_loglevel = 20; - - // format: ./dbstore-tests logfile loglevel - if (argc == 3) { - c_logfile = argv[1]; - c_loglevel = (atoi)(argv[2]); - cout << "logfile:" << c_logfile << ", loglevel set to " << c_loglevel << "\n"; - } - - ::testing::InitGoogleTest(&argc, argv); - - gtest::env = new gtest::Environment(); - gtest::env->logfile = c_logfile; - gtest::env->loglevel = c_loglevel; - ::testing::AddGlobalTestEnvironment(gtest::env); - - ret = RUN_ALL_TESTS(); - - return ret; -} diff --git a/src/rgw/store/immutable_config/store.cc b/src/rgw/store/immutable_config/store.cc deleted file mode 100644 index 8d3e0765faa..00000000000 --- a/src/rgw/store/immutable_config/store.cc +++ /dev/null @@ -1,422 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "rgw_zone.h" -#include "store.h" - -namespace rgw::sal { - -ImmutableConfigStore::ImmutableConfigStore(const RGWZoneGroup& zonegroup, - const RGWZoneParams& zone, - const RGWPeriodConfig& period_config) - : zonegroup(zonegroup), zone(zone), period_config(period_config) -{ -} - -// Realm -int ImmutableConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string& realm_id) -{ - return -ENOENT; -} - -int ImmutableConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y) -{ - return -EROFS; -} - - -int ImmutableConfigStore::create_realm(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWRealm& info, - std::unique_ptr* writer) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWRealm& info, - std::unique_ptr* writer) -{ - return -ENOENT; -} - -int ImmutableConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) -{ - return -ENOENT; -} - -int ImmutableConfigStore::read_default_realm(const DoutPrefixProvider* dpp, - optional_yield y, - RGWRealm& info, - std::unique_ptr* writer) -{ - return -ENOENT; -} - -int ImmutableConfigStore::read_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view realm_name, - std::string& realm_id) -{ - return -ENOENT; -} - -int ImmutableConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp, - optional_yield y, - const RGWPeriod& period) -{ - return -ENOTSUP; -} - -int ImmutableConfigStore::list_realm_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) -{ - result.next.clear(); - result.entries = entries.first(0); - return 0; -} - - -// Period -int ImmutableConfigStore::create_period(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWPeriod& info) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_period(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view period_id, - std::optional epoch, RGWPeriod& info) -{ - return -ENOENT; -} - -int ImmutableConfigStore::delete_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id) -{ - return -EROFS; -} - -int ImmutableConfigStore::list_period_ids(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) -{ - result.next.clear(); - result.entries = entries.first(0); - return 0; -} - - -// ZoneGroup - -class ImmutableZoneGroupWriter : public ZoneGroupWriter { - public: - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWZoneGroup& info) override - { - return -EROFS; - } - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWZoneGroup& info, std::string_view new_name) override - { - return -EROFS; - } - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - return -EROFS; - } -}; - -int ImmutableConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zonegroup_id) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zonegroup_id) -{ - if (!realm_id.empty()) { - return -ENOENT; - } - zonegroup_id = zonegroup.id; - return 0; -} - -int ImmutableConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) -{ - return -EROFS; -} - - -int ImmutableConfigStore::create_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneGroup& info, - std::unique_ptr* writer) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_id, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - if (zonegroup_id != zonegroup.id) { - return -ENOENT; - } - - info = zonegroup; - - if (writer) { - *writer = std::make_unique(); - } - return 0; -} -int ImmutableConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - if (zonegroup_name != zonegroup.name) { - return -ENOENT; - } - - info = zonegroup; - - if (writer) { - *writer = std::make_unique(); - } - return 0; -} - -int ImmutableConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - info = zonegroup; - - if (writer) { - *writer = std::make_unique(); - } - return 0; -} - -int ImmutableConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) -{ - if (marker < zonegroup.name) { - entries[0] = zonegroup.name; - result.next = zonegroup.name; - result.entries = entries.first(1); - } else { - result.next.clear(); - result.entries = entries.first(0); - } - return 0; -} - -// Zone - -class ImmutableZoneWriter : public ZoneWriter { - public: - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWZoneParams& info) override - { - return -EROFS; - } - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWZoneParams& info, std::string_view new_name) override - { - return -EROFS; - } - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - return -EROFS; - } -}; - -int ImmutableConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zone_id) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zone_id) -{ - if (realm_id.empty()) { - return -ENOENT; - } - zone_id = zone.id; - return 0; -} - -int ImmutableConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) -{ - return -EROFS; -} - - -int ImmutableConfigStore::create_zone(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneParams& info, - std::unique_ptr* writer) -{ - return -EROFS; -} - -int ImmutableConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_id, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - if (zone_id != zone.id) { - return -ENOENT; - } - - info = zone; - - if (writer) { - *writer = std::make_unique(); - } - return 0; -} - -int ImmutableConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - if (zone_name != zone.name) { - return -ENOENT; - } - - info = zone; - - if (writer) { - *writer = std::make_unique(); - } - return 0; -} - -int ImmutableConfigStore::read_default_zone(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - if (!realm_id.empty()) { - return -ENOENT; - } - - info = zone; - - if (writer) { - *writer = std::make_unique(); - } - return 0; -} - -int ImmutableConfigStore::list_zone_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) -{ - if (marker < zone.name) { - entries[0] = zone.name; - result.next = zone.name; - result.entries = entries.first(1); - } else { - result.next.clear(); - result.entries = entries.first(0); - } - return 0; -} - - -// PeriodConfig -int ImmutableConfigStore::read_period_config(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWPeriodConfig& info) -{ - if (!realm_id.empty()) { - return -ENOENT; - } - - info = period_config; - return 0; -} - -int ImmutableConfigStore::write_period_config(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - const RGWPeriodConfig& info) -{ - return -EROFS; -} - - -/// ImmutableConfigStore factory function -auto create_immutable_config_store(const DoutPrefixProvider* dpp, - const RGWZoneGroup& zonegroup, - const RGWZoneParams& zone, - const RGWPeriodConfig& period_config) - -> std::unique_ptr -{ - return std::make_unique(zonegroup, zone, period_config); -} - -} // namespace rgw::sal diff --git a/src/rgw/store/immutable_config/store.h b/src/rgw/store/immutable_config/store.h deleted file mode 100644 index 9a1ac5f1443..00000000000 --- a/src/rgw/store/immutable_config/store.h +++ /dev/null @@ -1,180 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "rgw_sal_config.h" - -namespace rgw::sal { - -/// A read-only ConfigStore that serves the given default zonegroup and zone. -class ImmutableConfigStore : public ConfigStore { - public: - explicit ImmutableConfigStore(const RGWZoneGroup& zonegroup, - const RGWZoneParams& zone, - const RGWPeriodConfig& period_config); - - // Realm - virtual int write_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id) override; - virtual int read_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string& realm_id) override; - virtual int delete_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y) override; - - virtual int create_realm(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_realm_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_realm_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_default_realm(const DoutPrefixProvider* dpp, - optional_yield y, - RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view realm_name, - std::string& realm_id) override; - virtual int realm_notify_new_period(const DoutPrefixProvider* dpp, - optional_yield y, - const RGWPeriod& period) override; - virtual int list_realm_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) override; - - // Period - virtual int create_period(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWPeriod& info) override; - virtual int read_period(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view period_id, - std::optional epoch, RGWPeriod& info) override; - virtual int delete_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id) override; - virtual int list_period_ids(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) override; - - // ZoneGroup - virtual int write_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zonegroup_id) override; - virtual int read_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zonegroup_id) override; - virtual int delete_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) override; - - virtual int create_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int read_zonegroup_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_id, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int read_zonegroup_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int read_default_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int list_zonegroup_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) override; - - // Zone - virtual int write_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zone_id) override; - virtual int read_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zone_id) override; - virtual int delete_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) override; - - virtual int create_zone(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int read_zone_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_id, - RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int read_zone_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int read_default_zone(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int list_zone_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - ListResult& result) override; - - // PeriodConfig - virtual int read_period_config(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWPeriodConfig& info) override; - virtual int write_period_config(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - const RGWPeriodConfig& info) override; - - private: - const RGWZoneGroup zonegroup; - const RGWZoneParams zone; - const RGWPeriodConfig period_config; -}; // ImmutableConfigStore - - -/// ImmutableConfigStore factory function -auto create_immutable_config_store(const DoutPrefixProvider* dpp, - const RGWZoneGroup& zonegroup, - const RGWZoneParams& zone, - const RGWPeriodConfig& period_config) - -> std::unique_ptr; - -} // namespace rgw::sal diff --git a/src/rgw/store/json_config/store.cc b/src/rgw/store/json_config/store.cc deleted file mode 100644 index 49837a8eacd..00000000000 --- a/src/rgw/store/json_config/store.cc +++ /dev/null @@ -1,176 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include -#include "include/buffer.h" -#include "common/errno.h" -#include "common/ceph_json.h" -#include "rgw_zone.h" -#include "store/immutable_config/store.h" -#include "store.h" - -namespace rgw::sal { - -namespace { - -struct DecodedConfig { - RGWZoneGroup zonegroup; - RGWZoneParams zone; - RGWPeriodConfig period_config; - - void decode_json(JSONObj *obj) - { - JSONDecoder::decode_json("zonegroup", zonegroup, obj); - JSONDecoder::decode_json("zone", zone, obj); - JSONDecoder::decode_json("period_config", period_config, obj); - } -}; - -static void parse_config(const DoutPrefixProvider* dpp, const char* filename) -{ - bufferlist bl; - std::string errmsg; - int r = bl.read_file(filename, &errmsg); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to read json config file '" << filename - << "': " << errmsg << dendl; - throw std::system_error(-r, std::system_category()); - } - - JSONParser p; - if (!p.parse(bl.c_str(), bl.length())) { - ldpp_dout(dpp, 0) << "failed to parse json config file" << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } - - DecodedConfig config; - try { - decode_json_obj(config, &p); - } catch (const JSONDecoder::err& e) { - ldpp_dout(dpp, 0) << "failed to decode JSON input: " << e.what() << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } -} - -void sanity_check_config(const DoutPrefixProvider* dpp, DecodedConfig& config) -{ - if (config.zonegroup.id.empty()) { - config.zonegroup.id = "default"; - } - if (config.zonegroup.name.empty()) { - config.zonegroup.name = "default"; - } - if (config.zonegroup.api_name.empty()) { - config.zonegroup.api_name = config.zonegroup.name; - } - - if (config.zone.id.empty()) { - config.zone.id = "default"; - } - if (config.zone.name.empty()) { - config.zone.name = "default"; - } - - // add default placement if it doesn't exist - rgw_pool pool; - RGWZonePlacementInfo placement; - placement.storage_classes.set_storage_class( - RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); - config.zone.placement_pools.emplace("default-placement", - std::move(placement)); - - std::set pools; - int r = rgw::init_zone_pool_names(dpp, null_yield, pools, config.zone); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to set default zone pool names" << dendl; - throw std::system_error(-r, std::system_category()); - } - - // verify that config.zonegroup only contains config.zone - if (config.zonegroup.zones.size() > 1) { - ldpp_dout(dpp, 0) << "zonegroup cannot contain multiple zones" << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } - - if (config.zonegroup.zones.size() == 1) { - auto z = config.zonegroup.zones.begin(); - if (z->first != config.zone.id) { - ldpp_dout(dpp, 0) << "zonegroup contains unknown zone id=" - << z->first << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } - if (z->second.id != config.zone.id) { - ldpp_dout(dpp, 0) << "zonegroup contains unknown zone id=" - << z->second.id << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } - if (z->second.name != config.zone.name) { - ldpp_dout(dpp, 0) << "zonegroup contains unknown zone name=" - << z->second.name << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } - if (config.zonegroup.master_zone != config.zone.id) { - ldpp_dout(dpp, 0) << "zonegroup contains unknown master_zone=" - << config.zonegroup.master_zone << dendl; - throw std::system_error(make_error_code(std::errc::invalid_argument)); - } - } else { - // add the zone to the group - const bool is_master = true; - const bool read_only = false; - std::list endpoints; - std::list sync_from; - std::list sync_from_rm; - rgw::zone_features::set enable_features; - rgw::zone_features::set disable_features; - - enable_features.insert(rgw::zone_features::supported.begin(), - rgw::zone_features::supported.end()); - - int r = rgw::add_zone_to_group(dpp, config.zonegroup, config.zone, - &is_master, &read_only, endpoints, - nullptr, nullptr, sync_from, sync_from_rm, - nullptr, std::nullopt, - enable_features, disable_features); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to add zone to zonegroup: " - << cpp_strerror(r) << dendl; - throw std::system_error(-r, std::system_category()); - } - - config.zonegroup.enabled_features = std::move(enable_features); - } - - // insert the default placement target if it doesn't exist - auto target = RGWZoneGroupPlacementTarget{.name = "default-placement"}; - config.zonegroup.placement_targets.emplace(target.name, target); - if (config.zonegroup.default_placement.name.empty()) { - config.zonegroup.default_placement.name = target.name; - } -} - -} // anonymous namespace - -auto create_json_config_store(const DoutPrefixProvider* dpp, - const std::string& filename) - -> std::unique_ptr -{ - DecodedConfig config; - parse_config(dpp, filename.c_str()); - sanity_check_config(dpp, config); - return create_immutable_config_store(dpp, config.zonegroup, config.zone, - config.period_config); -} - -} // namespace rgw::sal diff --git a/src/rgw/store/json_config/store.h b/src/rgw/store/json_config/store.h deleted file mode 100644 index 63ddf6fdcca..00000000000 --- a/src/rgw/store/json_config/store.h +++ /dev/null @@ -1,27 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "store/immutable_config/store.h" - -namespace rgw::sal { - -/// Create an immutable ConfigStore by parsing the zonegroup and zone from the -/// given json filename. -auto create_json_config_store(const DoutPrefixProvider* dpp, - const std::string& filename) - -> std::unique_ptr; - -} // namespace rgw::sal diff --git a/src/rgw/store/rados/cls_fifo_legacy.cc b/src/rgw/store/rados/cls_fifo_legacy.cc deleted file mode 100644 index 23b39b9fa5f..00000000000 --- a/src/rgw/store/rados/cls_fifo_legacy.cc +++ /dev/null @@ -1,2484 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2020 Red Hat - * Author: Adam C. Emerson - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include -#include -#include -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include "include/rados/librados.hpp" - -#include "include/buffer.h" - -#include "common/async/yield_context.h" -#include "common/random_string.h" - -#include "cls/fifo/cls_fifo_types.h" -#include "cls/fifo/cls_fifo_ops.h" - -#include "cls_fifo_legacy.h" - -namespace rgw::cls::fifo { -namespace cb = ceph::buffer; -namespace fifo = rados::cls::fifo; - -using ceph::from_error_code; - -inline constexpr auto MAX_RACE_RETRIES = 10; - -void create_meta(lr::ObjectWriteOperation* op, - std::string_view id, - std::optional objv, - std::optional oid_prefix, - bool exclusive, - std::uint64_t max_part_size, - std::uint64_t max_entry_size) -{ - fifo::op::create_meta cm; - - cm.id = id; - cm.version = objv; - cm.oid_prefix = oid_prefix; - cm.max_part_size = max_part_size; - cm.max_entry_size = max_entry_size; - cm.exclusive = exclusive; - - cb::list in; - encode(cm, in); - op->exec(fifo::op::CLASS, fifo::op::CREATE_META, in); -} - -int get_meta(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, - std::optional objv, fifo::info* info, - std::uint32_t* part_header_size, - std::uint32_t* part_entry_overhead, - uint64_t tid, optional_yield y, - bool probe) -{ - lr::ObjectReadOperation op; - fifo::op::get_meta gm; - gm.version = objv; - cb::list in; - encode(gm, in); - cb::list bl; - - op.exec(fifo::op::CLASS, fifo::op::GET_META, in, - &bl, nullptr); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); - if (r >= 0) try { - fifo::op::get_meta_reply reply; - auto iter = bl.cbegin(); - decode(reply, iter); - if (info) *info = std::move(reply.info); - if (part_header_size) *part_header_size = reply.part_header_size; - if (part_entry_overhead) - *part_entry_overhead = reply.part_entry_overhead; - } catch (const cb::error& err) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " decode failed: " << err.what() - << " tid=" << tid << dendl; - r = from_error_code(err.code()); - } else if (!(probe && (r == -ENOENT || r == -ENODATA))) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " fifo::op::GET_META failed r=" << r << " tid=" << tid - << dendl; - } - return r; -}; - -namespace { -void update_meta(lr::ObjectWriteOperation* op, const fifo::objv& objv, - const fifo::update& update) -{ - fifo::op::update_meta um; - - um.version = objv; - um.tail_part_num = update.tail_part_num(); - um.head_part_num = update.head_part_num(); - um.min_push_part_num = update.min_push_part_num(); - um.max_push_part_num = update.max_push_part_num(); - um.journal_entries_add = std::move(update).journal_entries_add(); - um.journal_entries_rm = std::move(update).journal_entries_rm(); - - cb::list in; - encode(um, in); - op->exec(fifo::op::CLASS, fifo::op::UPDATE_META, in); -} - -void part_init(lr::ObjectWriteOperation* op, std::string_view tag, - fifo::data_params params) -{ - fifo::op::init_part ip; - - ip.tag = tag; - ip.params = params; - - cb::list in; - encode(ip, in); - op->exec(fifo::op::CLASS, fifo::op::INIT_PART, in); -} - -int push_part(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, std::string_view tag, - std::deque data_bufs, std::uint64_t tid, - optional_yield y) -{ - lr::ObjectWriteOperation op; - fifo::op::push_part pp; - - pp.tag = tag; - pp.data_bufs = data_bufs; - pp.total_len = 0; - - for (const auto& bl : data_bufs) - pp.total_len += bl.length(); - - cb::list in; - encode(pp, in); - auto retval = 0; - op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in, nullptr, &retval); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y, lr::OPERATION_RETURNVEC); - if (r < 0) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " fifo::op::PUSH_PART failed r=" << r - << " tid=" << tid << dendl; - return r; - } - if (retval < 0) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " error handling response retval=" << retval - << " tid=" << tid << dendl; - } - return retval; -} - -void push_part(lr::IoCtx& ioctx, const std::string& oid, std::string_view tag, - std::deque data_bufs, std::uint64_t tid, - lr::AioCompletion* c) -{ - lr::ObjectWriteOperation op; - fifo::op::push_part pp; - - pp.tag = tag; - pp.data_bufs = data_bufs; - pp.total_len = 0; - - for (const auto& bl : data_bufs) - pp.total_len += bl.length(); - - cb::list in; - encode(pp, in); - op.exec(fifo::op::CLASS, fifo::op::PUSH_PART, in); - auto r = ioctx.aio_operate(oid, c, &op, lr::OPERATION_RETURNVEC); - ceph_assert(r >= 0); -} - -void trim_part(lr::ObjectWriteOperation* op, - std::optional tag, - std::uint64_t ofs, bool exclusive) -{ - fifo::op::trim_part tp; - - tp.tag = tag; - tp.ofs = ofs; - tp.exclusive = exclusive; - - cb::list in; - encode(tp, in); - op->exec(fifo::op::CLASS, fifo::op::TRIM_PART, in); -} - -int list_part(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, - std::optional tag, std::uint64_t ofs, - std::uint64_t max_entries, - std::vector* entries, - bool* more, bool* full_part, std::string* ptag, - std::uint64_t tid, optional_yield y) -{ - lr::ObjectReadOperation op; - fifo::op::list_part lp; - - lp.tag = tag; - lp.ofs = ofs; - lp.max_entries = max_entries; - - cb::list in; - encode(lp, in); - cb::list bl; - op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in, &bl, nullptr); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); - if (r >= 0) try { - fifo::op::list_part_reply reply; - auto iter = bl.cbegin(); - decode(reply, iter); - if (entries) *entries = std::move(reply.entries); - if (more) *more = reply.more; - if (full_part) *full_part = reply.full_part; - if (ptag) *ptag = reply.tag; - } catch (const cb::error& err) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " decode failed: " << err.what() - << " tid=" << tid << dendl; - r = from_error_code(err.code()); - } else if (r != -ENOENT) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " fifo::op::LIST_PART failed r=" << r << " tid=" << tid - << dendl; - } - return r; -} - -struct list_entry_completion : public lr::ObjectOperationCompletion { - CephContext* cct; - int* r_out; - std::vector* entries; - bool* more; - bool* full_part; - std::string* ptag; - std::uint64_t tid; - - list_entry_completion(CephContext* cct, int* r_out, std::vector* entries, - bool* more, bool* full_part, std::string* ptag, - std::uint64_t tid) - : cct(cct), r_out(r_out), entries(entries), more(more), - full_part(full_part), ptag(ptag), tid(tid) {} - virtual ~list_entry_completion() = default; - void handle_completion(int r, bufferlist& bl) override { - if (r >= 0) try { - fifo::op::list_part_reply reply; - auto iter = bl.cbegin(); - decode(reply, iter); - if (entries) *entries = std::move(reply.entries); - if (more) *more = reply.more; - if (full_part) *full_part = reply.full_part; - if (ptag) *ptag = reply.tag; - } catch (const cb::error& err) { - lderr(cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " decode failed: " << err.what() - << " tid=" << tid << dendl; - r = from_error_code(err.code()); - } else if (r < 0) { - lderr(cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " fifo::op::LIST_PART failed r=" << r << " tid=" << tid - << dendl; - } - if (r_out) *r_out = r; - } -}; - -lr::ObjectReadOperation list_part(CephContext* cct, - std::optional tag, - std::uint64_t ofs, - std::uint64_t max_entries, - int* r_out, - std::vector* entries, - bool* more, bool* full_part, - std::string* ptag, std::uint64_t tid) -{ - lr::ObjectReadOperation op; - fifo::op::list_part lp; - - lp.tag = tag; - lp.ofs = ofs; - lp.max_entries = max_entries; - - cb::list in; - encode(lp, in); - op.exec(fifo::op::CLASS, fifo::op::LIST_PART, in, - new list_entry_completion(cct, r_out, entries, more, full_part, - ptag, tid)); - return op; -} - -int get_part_info(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, - fifo::part_header* header, - std::uint64_t tid, optional_yield y) -{ - lr::ObjectReadOperation op; - fifo::op::get_part_info gpi; - - cb::list in; - cb::list bl; - encode(gpi, in); - op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in, &bl, nullptr); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); - if (r >= 0) try { - fifo::op::get_part_info_reply reply; - auto iter = bl.cbegin(); - decode(reply, iter); - if (header) *header = std::move(reply.header); - } catch (const cb::error& err) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " decode failed: " << err.what() - << " tid=" << tid << dendl; - r = from_error_code(err.code()); - } else { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " fifo::op::GET_PART_INFO failed r=" << r << " tid=" << tid - << dendl; - } - return r; -} - -struct partinfo_completion : public lr::ObjectOperationCompletion { - CephContext* cct; - int* rp; - fifo::part_header* h; - std::uint64_t tid; - partinfo_completion(CephContext* cct, int* rp, fifo::part_header* h, - std::uint64_t tid) : - cct(cct), rp(rp), h(h), tid(tid) { - } - virtual ~partinfo_completion() = default; - void handle_completion(int r, bufferlist& bl) override { - if (r >= 0) try { - fifo::op::get_part_info_reply reply; - auto iter = bl.cbegin(); - decode(reply, iter); - if (h) *h = std::move(reply.header); - } catch (const cb::error& err) { - r = from_error_code(err.code()); - lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " decode failed: " << err.what() - << " tid=" << tid << dendl; - } else { - lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " fifo::op::GET_PART_INFO failed r=" << r << " tid=" << tid - << dendl; - } - if (rp) { - *rp = r; - } - } -}; - -lr::ObjectReadOperation get_part_info(CephContext* cct, - fifo::part_header* header, - std::uint64_t tid, int* r = 0) -{ - lr::ObjectReadOperation op; - fifo::op::get_part_info gpi; - - cb::list in; - cb::list bl; - encode(gpi, in); - op.exec(fifo::op::CLASS, fifo::op::GET_PART_INFO, in, - new partinfo_completion(cct, r, header, tid)); - return op; -} -} - -std::optional FIFO::to_marker(std::string_view s) -{ - marker m; - if (s.empty()) { - m.num = info.tail_part_num; - m.ofs = 0; - return m; - } - - auto pos = s.find(':'); - if (pos == s.npos) { - return std::nullopt; - } - - auto num = s.substr(0, pos); - auto ofs = s.substr(pos + 1); - - auto n = ceph::parse(num); - if (!n) { - return std::nullopt; - } - m.num = *n; - auto o = ceph::parse(ofs); - if (!o) { - return std::nullopt; - } - m.ofs = *o; - return m; -} - -std::string FIFO::generate_tag() const -{ - static constexpr auto HEADER_TAG_SIZE = 16; - return gen_rand_alphanumeric_plain(static_cast(ioctx.cct()), - HEADER_TAG_SIZE); -} - - -int FIFO::apply_update(const DoutPrefixProvider *dpp, - fifo::info* info, - const fifo::objv& objv, - const fifo::update& update, - std::uint64_t tid) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::unique_lock l(m); - if (objv != info->version) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " version mismatch, canceling: tid=" << tid << dendl; - return -ECANCELED; - } - auto err = info->apply_update(update); - if (err) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " error applying update: " << *err << " tid=" << tid << dendl; - return -ECANCELED; - } - - ++info->version.ver; - - return {}; -} - -int FIFO::_update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, - fifo::objv version, bool* pcanceled, - std::uint64_t tid, optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectWriteOperation op; - bool canceled = false; - update_meta(&op, info.version, update); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r >= 0 || r == -ECANCELED) { - canceled = (r == -ECANCELED); - if (!canceled) { - r = apply_update(dpp, &info, version, update, tid); - if (r < 0) canceled = true; - } - if (canceled) { - r = read_meta(dpp, tid, y); - canceled = r < 0 ? false : true; - } - } - if (pcanceled) *pcanceled = canceled; - if (canceled) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled: tid=" << tid << dendl; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " returning error: r=" << r << " tid=" << tid << dendl; - } - return r; -} - -struct Updater : public Completion { - FIFO* fifo; - fifo::update update; - fifo::objv version; - bool reread = false; - bool* pcanceled = nullptr; - std::uint64_t tid; - Updater(const DoutPrefixProvider *dpp, FIFO* fifo, lr::AioCompletion* super, - const fifo::update& update, fifo::objv version, - bool* pcanceled, std::uint64_t tid) - : Completion(dpp, super), fifo(fifo), update(update), version(version), - pcanceled(pcanceled) {} - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - if (reread) - handle_reread(dpp, std::move(p), r); - else - handle_update(dpp, std::move(p), r); - } - - void handle_update(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " handling async update_meta: tid=" - << tid << dendl; - if (r < 0 && r != -ECANCELED) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " update failed: r=" << r << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - bool canceled = (r == -ECANCELED); - if (!canceled) { - int r = fifo->apply_update(dpp, &fifo->info, version, update, tid); - if (r < 0) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " update failed, marking canceled: r=" << r - << " tid=" << tid << dendl; - canceled = true; - } - } - if (canceled) { - reread = true; - fifo->read_meta(dpp, tid, call(std::move(p))); - return; - } - if (pcanceled) - *pcanceled = false; - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " completing: tid=" << tid << dendl; - complete(std::move(p), 0); - } - - void handle_reread(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " handling async read_meta: tid=" - << tid << dendl; - if (r < 0 && pcanceled) { - *pcanceled = false; - } else if (r >= 0 && pcanceled) { - *pcanceled = true; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " failed dispatching read_meta: r=" << r << " tid=" - << tid << dendl; - } else { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " completing: tid=" << tid << dendl; - } - complete(std::move(p), r); - } -}; - -void FIFO::_update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, - fifo::objv version, bool* pcanceled, - std::uint64_t tid, lr::AioCompletion* c) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectWriteOperation op; - update_meta(&op, info.version, update); - auto updater = std::make_unique(dpp, this, c, update, version, pcanceled, - tid); - auto r = ioctx.aio_operate(oid, Updater::call(std::move(updater)), &op); - assert(r >= 0); -} - -int FIFO::create_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, - optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectWriteOperation op; - op.create(false); /* We don't need exclusivity, part_init ensures - we're creating from the same journal entry. */ - std::unique_lock l(m); - part_init(&op, tag, info.params); - auto oid = info.part_oid(part_num); - l.unlock(); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " part_init failed: r=" << r << " tid=" - << tid << dendl; - } - return r; -} - -int FIFO::remove_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, - optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectWriteOperation op; - op.remove(); - std::unique_lock l(m); - auto oid = info.part_oid(part_num); - l.unlock(); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " remove failed: r=" << r << " tid=" - << tid << dendl; - } - return r; -} - -int FIFO::process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::vector processed; - - std::unique_lock l(m); - auto tmpjournal = info.journal; - auto new_tail = info.tail_part_num; - auto new_head = info.head_part_num; - auto new_max = info.max_push_part_num; - l.unlock(); - - int r = 0; - for (auto& [n, entry] : tmpjournal) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " processing entry: entry=" << entry << " tid=" << tid - << dendl; - switch (entry.op) { - case fifo::journal_entry::Op::create: - r = create_part(dpp, entry.part_num, entry.part_tag, tid, y); - if (entry.part_num > new_max) { - new_max = entry.part_num; - } - break; - case fifo::journal_entry::Op::set_head: - r = 0; - if (entry.part_num > new_head) { - new_head = entry.part_num; - } - break; - case fifo::journal_entry::Op::remove: - r = remove_part(dpp, entry.part_num, entry.part_tag, tid, y); - if (r == -ENOENT) r = 0; - if (entry.part_num >= new_tail) { - new_tail = entry.part_num + 1; - } - break; - default: - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " unknown journaled op: entry=" << entry << " tid=" - << tid << dendl; - return -EIO; - } - - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " processing entry failed: entry=" << entry - << " r=" << r << " tid=" << tid << dendl; - return -r; - } - - processed.push_back(std::move(entry)); - } - - // Postprocess - bool canceled = true; - - for (auto i = 0; canceled && i < MAX_RACE_RETRIES; ++i) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " postprocessing: i=" << i << " tid=" << tid << dendl; - - std::optional tail_part_num; - std::optional head_part_num; - std::optional max_part_num; - - std::unique_lock l(m); - auto objv = info.version; - if (new_tail > tail_part_num) tail_part_num = new_tail; - if (new_head > info.head_part_num) head_part_num = new_head; - if (new_max > info.max_push_part_num) max_part_num = new_max; - l.unlock(); - - if (processed.empty() && - !tail_part_num && - !max_part_num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " nothing to update any more: i=" << i << " tid=" - << tid << dendl; - canceled = false; - break; - } - auto u = fifo::update().tail_part_num(tail_part_num) - .head_part_num(head_part_num).max_push_part_num(max_part_num) - .journal_entries_rm(processed); - r = _update_meta(dpp, u, objv, &canceled, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _update_meta failed: update=" << u - << " r=" << r << " tid=" << tid << dendl; - break; - } - - if (canceled) { - std::vector new_processed; - std::unique_lock l(m); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " update canceled, retrying: i=" << i << " tid=" - << tid << dendl; - for (auto& e : processed) { - auto jiter = info.journal.find(e.part_num); - /* journal entry was already processed */ - if (jiter == info.journal.end() || - !(jiter->second == e)) { - continue; - } - new_processed.push_back(e); - } - processed = std::move(new_processed); - } - } - if (r == 0 && canceled) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - r = -ECANCELED; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " failed, r=: " << r << " tid=" << tid << dendl; - } - return r; -} - -int FIFO::_prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::unique_lock l(m); - std::vector jentries = { info.next_journal_entry(generate_tag()) }; - if (info.journal.find(jentries.front().part_num) != info.journal.end()) { - l.unlock(); - ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " new part journaled, but not processed: tid=" - << tid << dendl; - auto r = process_journal(dpp, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " process_journal failed: r=" << r << " tid=" << tid << dendl; - } - return r; - } - std::int64_t new_head_part_num = info.head_part_num; - auto version = info.version; - - if (is_head) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " needs new head: tid=" << tid << dendl; - auto new_head_jentry = jentries.front(); - new_head_jentry.op = fifo::journal_entry::Op::set_head; - new_head_part_num = jentries.front().part_num; - jentries.push_back(std::move(new_head_jentry)); - } - l.unlock(); - - int r = 0; - bool canceled = true; - for (auto i = 0; canceled && i < MAX_RACE_RETRIES; ++i) { - canceled = false; - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " updating metadata: i=" << i << " tid=" << tid << dendl; - auto u = fifo::update{}.journal_entries_add(jentries); - r = _update_meta(dpp, u, version, &canceled, tid, y); - if (r >= 0 && canceled) { - std::unique_lock l(m); - auto found = (info.journal.find(jentries.front().part_num) != - info.journal.end()); - if ((info.max_push_part_num >= jentries.front().part_num && - info.head_part_num >= new_head_part_num)) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " raced, but journaled and processed: i=" << i - << " tid=" << tid << dendl; - return 0; - } - if (found) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " raced, journaled but not processed: i=" << i - << " tid=" << tid << dendl; - canceled = false; - } - l.unlock(); - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _update_meta failed: update=" << u << " r=" << r - << " tid=" << tid << dendl; - return r; - } - } - if (canceled) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - return -ECANCELED; - } - r = process_journal(dpp, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " process_journal failed: r=" << r << " tid=" << tid << dendl; - } - return r; -} - -int FIFO::_prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::unique_lock l(m); - std::int64_t new_head_num = info.head_part_num + 1; - auto max_push_part_num = info.max_push_part_num; - auto version = info.version; - l.unlock(); - - int r = 0; - if (max_push_part_num < new_head_num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " need new part: tid=" << tid << dendl; - r = _prepare_new_part(dpp, true, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _prepare_new_part failed: r=" << r - << " tid=" << tid << dendl; - return r; - } - std::unique_lock l(m); - if (info.max_push_part_num < new_head_num) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " inconsistency, push part less than head part: " - << " tid=" << tid << dendl; - return -EIO; - } - l.unlock(); - return 0; - } - - bool canceled = true; - for (auto i = 0; canceled && i < MAX_RACE_RETRIES; ++i) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " updating head: i=" << i << " tid=" << tid << dendl; - auto u = fifo::update{}.head_part_num(new_head_num); - r = _update_meta(dpp, u, version, &canceled, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _update_meta failed: update=" << u << " r=" << r - << " tid=" << tid << dendl; - return r; - } - std::unique_lock l(m); - auto head_part_num = info.head_part_num; - version = info.version; - l.unlock(); - if (canceled && (head_part_num >= new_head_num)) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " raced, but completed by the other caller: i=" << i - << " tid=" << tid << dendl; - canceled = false; - } - } - if (canceled) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - return -ECANCELED; - } - return 0; -} - -struct NewPartPreparer : public Completion { - FIFO* f; - std::vector jentries; - int i = 0; - std::int64_t new_head_part_num; - bool canceled = false; - uint64_t tid; - - NewPartPreparer(const DoutPrefixProvider *dpp, FIFO* f, lr::AioCompletion* super, - std::vector jentries, - std::int64_t new_head_part_num, - std::uint64_t tid) - : Completion(dpp, super), f(f), jentries(std::move(jentries)), - new_head_part_num(new_head_part_num), tid(tid) {} - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _update_meta failed: r=" << r - << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - - if (canceled) { - std::unique_lock l(f->m); - auto iter = f->info.journal.find(jentries.front().part_num); - auto max_push_part_num = f->info.max_push_part_num; - auto head_part_num = f->info.head_part_num; - auto version = f->info.version; - auto found = (iter != f->info.journal.end()); - l.unlock(); - if ((max_push_part_num >= jentries.front().part_num && - head_part_num >= new_head_part_num)) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " raced, but journaled and processed: i=" << i - << " tid=" << tid << dendl; - complete(std::move(p), 0); - return; - } - if (i >= MAX_RACE_RETRIES) { - complete(std::move(p), -ECANCELED); - return; - } - if (!found) { - ++i; - f->_update_meta(dpp, fifo::update{} - .journal_entries_add(jentries), - version, &canceled, tid, call(std::move(p))); - return; - } else { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " raced, journaled but not processed: i=" << i - << " tid=" << tid << dendl; - canceled = false; - } - // Fall through. We still need to process the journal. - } - f->process_journal(dpp, tid, super()); - return; - } -}; - -void FIFO::_prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, - lr::AioCompletion* c) -{ - std::unique_lock l(m); - std::vector jentries = { info.next_journal_entry(generate_tag()) }; - if (info.journal.find(jentries.front().part_num) != info.journal.end()) { - l.unlock(); - ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " new part journaled, but not processed: tid=" - << tid << dendl; - process_journal(dpp, tid, c); - return; - } - std::int64_t new_head_part_num = info.head_part_num; - auto version = info.version; - - if (is_head) { - auto new_head_jentry = jentries.front(); - new_head_jentry.op = fifo::journal_entry::Op::set_head; - new_head_part_num = jentries.front().part_num; - jentries.push_back(std::move(new_head_jentry)); - } - l.unlock(); - - auto n = std::make_unique(dpp, this, c, jentries, - new_head_part_num, tid); - auto np = n.get(); - _update_meta(dpp, fifo::update{}.journal_entries_add(jentries), version, - &np->canceled, tid, NewPartPreparer::call(std::move(n))); -} - -struct NewHeadPreparer : public Completion { - FIFO* f; - int i = 0; - bool newpart; - std::int64_t new_head_num; - bool canceled = false; - std::uint64_t tid; - - NewHeadPreparer(const DoutPrefixProvider *dpp, FIFO* f, lr::AioCompletion* super, - bool newpart, std::int64_t new_head_num, std::uint64_t tid) - : Completion(dpp, super), f(f), newpart(newpart), new_head_num(new_head_num), - tid(tid) {} - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - if (newpart) - handle_newpart(std::move(p), r); - else - handle_update(dpp, std::move(p), r); - } - - void handle_newpart(Ptr&& p, int r) { - if (r < 0) { - lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _prepare_new_part failed: r=" << r - << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - std::unique_lock l(f->m); - if (f->info.max_push_part_num < new_head_num) { - l.unlock(); - lderr(f->cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _prepare_new_part failed: r=" << r - << " tid=" << tid << dendl; - complete(std::move(p), -EIO); - } else { - l.unlock(); - complete(std::move(p), 0); - } - } - - void handle_update(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - std::unique_lock l(f->m); - auto head_part_num = f->info.head_part_num; - auto version = f->info.version; - l.unlock(); - - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _update_meta failed: r=" << r - << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - if (canceled) { - if (i >= MAX_RACE_RETRIES) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - complete(std::move(p), -ECANCELED); - return; - } - - // Raced, but there's still work to do! - if (head_part_num < new_head_num) { - canceled = false; - ++i; - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " updating head: i=" << i << " tid=" << tid << dendl; - f->_update_meta(dpp, fifo::update{}.head_part_num(new_head_num), - version, &this->canceled, tid, call(std::move(p))); - return; - } - } - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " succeeded : i=" << i << " tid=" << tid << dendl; - complete(std::move(p), 0); - return; - } -}; - -void FIFO::_prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::unique_lock l(m); - int64_t new_head_num = info.head_part_num + 1; - auto max_push_part_num = info.max_push_part_num; - auto version = info.version; - l.unlock(); - - if (max_push_part_num < new_head_num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " need new part: tid=" << tid << dendl; - auto n = std::make_unique(dpp, this, c, true, new_head_num, - tid); - _prepare_new_part(dpp, true, tid, NewHeadPreparer::call(std::move(n))); - } else { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " updating head: tid=" << tid << dendl; - auto n = std::make_unique(dpp, this, c, false, new_head_num, - tid); - auto np = n.get(); - _update_meta(dpp, fifo::update{}.head_part_num(new_head_num), version, - &np->canceled, tid, NewHeadPreparer::call(std::move(n))); - } -} - -int FIFO::push_entries(const DoutPrefixProvider *dpp, const std::deque& data_bufs, - std::uint64_t tid, optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::unique_lock l(m); - auto head_part_num = info.head_part_num; - auto tag = info.head_tag; - const auto part_oid = info.part_oid(head_part_num); - l.unlock(); - - auto r = push_part(dpp, ioctx, part_oid, tag, data_bufs, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " push_part failed: r=" << r << " tid=" << tid << dendl; - } - return r; -} - -void FIFO::push_entries(const std::deque& data_bufs, - std::uint64_t tid, lr::AioCompletion* c) -{ - std::unique_lock l(m); - auto head_part_num = info.head_part_num; - auto tag = info.head_tag; - const auto part_oid = info.part_oid(head_part_num); - l.unlock(); - - push_part(ioctx, part_oid, tag, data_bufs, tid, c); -} - -int FIFO::trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, - std::optional tag, - bool exclusive, std::uint64_t tid, - optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectWriteOperation op; - std::unique_lock l(m); - const auto part_oid = info.part_oid(part_num); - l.unlock(); - rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive); - auto r = rgw_rados_operate(dpp, ioctx, part_oid, &op, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " trim_part failed: r=" << r << " tid=" << tid << dendl; - } - return 0; -} - -void FIFO::trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, - std::optional tag, - bool exclusive, std::uint64_t tid, - lr::AioCompletion* c) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectWriteOperation op; - std::unique_lock l(m); - const auto part_oid = info.part_oid(part_num); - l.unlock(); - rgw::cls::fifo::trim_part(&op, tag, ofs, exclusive); - auto r = ioctx.aio_operate(part_oid, c, &op); - ceph_assert(r >= 0); -} - -int FIFO::open(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, std::string oid, std::unique_ptr* fifo, - optional_yield y, std::optional objv, - bool probe) -{ - ldpp_dout(dpp, 20) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering" << dendl; - fifo::info info; - std::uint32_t size; - std::uint32_t over; - int r = get_meta(dpp, ioctx, std::move(oid), objv, &info, &size, &over, 0, y, - probe); - if (r < 0) { - if (!(probe && (r == -ENOENT || r == -ENODATA))) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " get_meta failed: r=" << r << dendl; - } - return r; - } - std::unique_ptr f(new FIFO(std::move(ioctx), oid)); - f->info = info; - f->part_header_size = size; - f->part_entry_overhead = over; - // If there are journal entries, process them, in case - // someone crashed mid-transaction. - if (!info.journal.empty()) { - ldpp_dout(dpp, 20) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " processing leftover journal" << dendl; - r = f->process_journal(dpp, 0, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " process_journal failed: r=" << r << dendl; - return r; - } - } - *fifo = std::move(f); - return 0; -} - -int FIFO::create(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, std::string oid, std::unique_ptr* fifo, - optional_yield y, std::optional objv, - std::optional oid_prefix, - bool exclusive, std::uint64_t max_part_size, - std::uint64_t max_entry_size) -{ - ldpp_dout(dpp, 20) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering" << dendl; - lr::ObjectWriteOperation op; - create_meta(&op, oid, objv, oid_prefix, exclusive, max_part_size, - max_entry_size); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " create_meta failed: r=" << r << dendl; - return r; - } - r = open(dpp, std::move(ioctx), std::move(oid), fifo, y, objv); - return r; -} - -int FIFO::read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - fifo::info _info; - std::uint32_t _phs; - std::uint32_t _peo; - - auto r = get_meta(dpp, ioctx, oid, std::nullopt, &_info, &_phs, &_peo, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " get_meta failed: r=" << r << " tid=" << tid << dendl; - return r; - } - std::unique_lock l(m); - // We have a newer version already! - if (_info.version.same_or_later(this->info.version)) { - info = std::move(_info); - part_header_size = _phs; - part_entry_overhead = _peo; - } - return 0; -} - -int FIFO::read_meta(const DoutPrefixProvider *dpp, optional_yield y) { - std::unique_lock l(m); - auto tid = ++next_tid; - l.unlock(); - return read_meta(dpp, tid, y); -} - -struct Reader : public Completion { - FIFO* fifo; - cb::list bl; - std::uint64_t tid; - Reader(const DoutPrefixProvider *dpp, FIFO* fifo, lr::AioCompletion* super, std::uint64_t tid) - : Completion(dpp, super), fifo(fifo), tid(tid) {} - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - if (r >= 0) try { - fifo::op::get_meta_reply reply; - auto iter = bl.cbegin(); - decode(reply, iter); - std::unique_lock l(fifo->m); - if (reply.info.version.same_or_later(fifo->info.version)) { - fifo->info = std::move(reply.info); - fifo->part_header_size = reply.part_header_size; - fifo->part_entry_overhead = reply.part_entry_overhead; - } - } catch (const cb::error& err) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " failed to decode response err=" << err.what() - << " tid=" << tid << dendl; - r = from_error_code(err.code()); - } else { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " read_meta failed r=" << r - << " tid=" << tid << dendl; - } - complete(std::move(p), r); - } -}; - -void FIFO::read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - lr::ObjectReadOperation op; - fifo::op::get_meta gm; - cb::list in; - encode(gm, in); - auto reader = std::make_unique(dpp, this, c, tid); - auto rp = reader.get(); - auto r = ioctx.aio_exec(oid, Reader::call(std::move(reader)), fifo::op::CLASS, - fifo::op::GET_META, in, &rp->bl); - assert(r >= 0); -} - -const fifo::info& FIFO::meta() const { - return info; -} - -std::pair FIFO::get_part_layout_info() const { - return {part_header_size, part_entry_overhead}; -} - -int FIFO::push(const DoutPrefixProvider *dpp, const cb::list& bl, optional_yield y) { - return push(dpp, std::vector{ bl }, y); -} - -void FIFO::push(const DoutPrefixProvider *dpp, const cb::list& bl, lr::AioCompletion* c) { - push(dpp, std::vector{ bl }, c); -} - -int FIFO::push(const DoutPrefixProvider *dpp, const std::vector& data_bufs, optional_yield y) -{ - std::unique_lock l(m); - auto tid = ++next_tid; - auto max_entry_size = info.params.max_entry_size; - auto need_new_head = info.need_new_head(); - l.unlock(); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - if (data_bufs.empty()) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " empty push, returning success tid=" << tid << dendl; - return 0; - } - - // Validate sizes - for (const auto& bl : data_bufs) { - if (bl.length() > max_entry_size) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entry bigger than max_entry_size tid=" << tid << dendl; - return -E2BIG; - } - } - - int r = 0; - if (need_new_head) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " need new head tid=" << tid << dendl; - r = _prepare_new_head(dpp, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _prepare_new_head failed: r=" << r - << " tid=" << tid << dendl; - return r; - } - } - - std::deque remaining(data_bufs.begin(), data_bufs.end()); - std::deque batch; - - uint64_t batch_len = 0; - auto retries = 0; - bool canceled = true; - while ((!remaining.empty() || !batch.empty()) && - (retries <= MAX_RACE_RETRIES)) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " preparing push: remaining=" << remaining.size() - << " batch=" << batch.size() << " retries=" << retries - << " tid=" << tid << dendl; - std::unique_lock l(m); - auto max_part_size = info.params.max_part_size; - auto overhead = part_entry_overhead; - l.unlock(); - - while (!remaining.empty() && - (remaining.front().length() + batch_len <= max_part_size)) { - /* We can send entries with data_len up to max_entry_size, - however, we want to also account the overhead when - dealing with multiple entries. Previous check doesn't - account for overhead on purpose. */ - batch_len += remaining.front().length() + overhead; - batch.push_back(std::move(remaining.front())); - remaining.pop_front(); - } - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " prepared push: remaining=" << remaining.size() - << " batch=" << batch.size() << " retries=" << retries - << " batch_len=" << batch_len - << " tid=" << tid << dendl; - - auto r = push_entries(dpp, batch, tid, y); - if (r == -ERANGE) { - canceled = true; - ++retries; - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " need new head tid=" << tid << dendl; - r = _prepare_new_head(dpp, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " prepare_new_head failed: r=" << r - << " tid=" << tid << dendl; - return r; - } - r = 0; - continue; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " push_entries failed: r=" << r - << " tid=" << tid << dendl; - return r; - } - // Made forward progress! - canceled = false; - retries = 0; - batch_len = 0; - if (r == ssize(batch)) { - batch.clear(); - } else { - batch.erase(batch.begin(), batch.begin() + r); - for (const auto& b : batch) { - batch_len += b.length() + part_entry_overhead; - } - } - } - if (canceled) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - return -ECANCELED; - } - return 0; -} - -struct Pusher : public Completion { - FIFO* f; - std::deque remaining; - std::deque batch; - int i = 0; - std::uint64_t tid; - bool new_heading = false; - - void prep_then_push(const DoutPrefixProvider *dpp, Ptr&& p, const unsigned successes) { - std::unique_lock l(f->m); - auto max_part_size = f->info.params.max_part_size; - auto part_entry_overhead = f->part_entry_overhead; - l.unlock(); - - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " preparing push: remaining=" << remaining.size() - << " batch=" << batch.size() << " i=" << i - << " tid=" << tid << dendl; - - uint64_t batch_len = 0; - if (successes > 0) { - if (successes == batch.size()) { - batch.clear(); - } else { - batch.erase(batch.begin(), batch.begin() + successes); - for (const auto& b : batch) { - batch_len += b.length() + part_entry_overhead; - } - } - } - - if (batch.empty() && remaining.empty()) { - complete(std::move(p), 0); - return; - } - - while (!remaining.empty() && - (remaining.front().length() + batch_len <= max_part_size)) { - - /* We can send entries with data_len up to max_entry_size, - however, we want to also account the overhead when - dealing with multiple entries. Previous check doesn't - account for overhead on purpose. */ - batch_len += remaining.front().length() + part_entry_overhead; - batch.push_back(std::move(remaining.front())); - remaining.pop_front(); - } - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " prepared push: remaining=" << remaining.size() - << " batch=" << batch.size() << " i=" << i - << " batch_len=" << batch_len - << " tid=" << tid << dendl; - push(std::move(p)); - } - - void push(Ptr&& p) { - f->push_entries(batch, tid, call(std::move(p))); - } - - void new_head(const DoutPrefixProvider *dpp, Ptr&& p) { - new_heading = true; - f->_prepare_new_head(dpp, tid, call(std::move(p))); - } - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - if (!new_heading) { - if (r == -ERANGE) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " need new head tid=" << tid << dendl; - new_head(dpp, std::move(p)); - return; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " push_entries failed: r=" << r - << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - i = 0; // We've made forward progress, so reset the race counter! - prep_then_push(dpp, std::move(p), r); - } else { - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " prepare_new_head failed: r=" << r - << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - new_heading = false; - handle_new_head(dpp, std::move(p), r); - } - } - - void handle_new_head(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - if (r == -ECANCELED) { - if (p->i == MAX_RACE_RETRIES) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - complete(std::move(p), -ECANCELED); - return; - } - ++p->i; - } else if (r) { - complete(std::move(p), r); - return; - } - - if (p->batch.empty()) { - prep_then_push(dpp, std::move(p), 0); - return; - } else { - push(std::move(p)); - return; - } - } - - Pusher(const DoutPrefixProvider *dpp, FIFO* f, std::deque&& remaining, - std::uint64_t tid, lr::AioCompletion* super) - : Completion(dpp, super), f(f), remaining(std::move(remaining)), - tid(tid) {} -}; - -void FIFO::push(const DoutPrefixProvider *dpp, const std::vector& data_bufs, - lr::AioCompletion* c) -{ - std::unique_lock l(m); - auto tid = ++next_tid; - auto max_entry_size = info.params.max_entry_size; - auto need_new_head = info.need_new_head(); - l.unlock(); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - auto p = std::make_unique(dpp, this, std::deque(data_bufs.begin(), data_bufs.end()), - tid, c); - // Validate sizes - for (const auto& bl : data_bufs) { - if (bl.length() > max_entry_size) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entry bigger than max_entry_size tid=" << tid << dendl; - Pusher::complete(std::move(p), -E2BIG); - return; - } - } - - if (data_bufs.empty() ) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " empty push, returning success tid=" << tid << dendl; - Pusher::complete(std::move(p), 0); - return; - } - - if (need_new_head) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " need new head tid=" << tid << dendl; - p->new_head(dpp, std::move(p)); - } else { - p->prep_then_push(dpp, std::move(p), 0); - } -} - -int FIFO::list(const DoutPrefixProvider *dpp, int max_entries, - std::optional markstr, - std::vector* presult, bool* pmore, - optional_yield y) -{ - std::unique_lock l(m); - auto tid = ++next_tid; - std::int64_t part_num = info.tail_part_num; - l.unlock(); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::uint64_t ofs = 0; - if (markstr) { - auto marker = to_marker(*markstr); - if (!marker) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " invalid marker string: " << markstr - << " tid= "<< tid << dendl; - return -EINVAL; - } - part_num = marker->num; - ofs = marker->ofs; - } - - std::vector result; - result.reserve(max_entries); - bool more = false; - - std::vector entries; - int r = 0; - while (max_entries > 0) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " max_entries=" << max_entries << " tid=" << tid << dendl; - bool part_more = false; - bool part_full = false; - - std::unique_lock l(m); - auto part_oid = info.part_oid(part_num); - l.unlock(); - - r = list_part(dpp, ioctx, part_oid, {}, ofs, max_entries, &entries, - &part_more, &part_full, nullptr, tid, y); - if (r == -ENOENT) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " missing part, rereading metadata" - << " tid= "<< tid << dendl; - r = read_meta(dpp, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " read_meta failed: r=" << r - << " tid= "<< tid << dendl; - return r; - } - if (part_num < info.tail_part_num) { - /* raced with trim? restart */ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " raced with trim, restarting: tid=" << tid << dendl; - max_entries += result.size(); - result.clear(); - std::unique_lock l(m); - part_num = info.tail_part_num; - l.unlock(); - ofs = 0; - continue; - } - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " assuming part was not written yet, so end of data: " - << "tid=" << tid << dendl; - more = false; - r = 0; - break; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " list_entries failed: r=" << r - << " tid= "<< tid << dendl; - return r; - } - more = part_full || part_more; - for (auto& entry : entries) { - list_entry e; - e.data = std::move(entry.data); - e.marker = marker{part_num, entry.ofs}.to_string(); - e.mtime = entry.mtime; - result.push_back(std::move(e)); - --max_entries; - if (max_entries == 0) - break; - } - entries.clear(); - if (max_entries > 0 && - part_more) { - } - - if (!part_full) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " head part is not full, so we can assume we're done: " - << "tid=" << tid << dendl; - break; - } - if (!part_more) { - ++part_num; - ofs = 0; - } - } - if (presult) - *presult = std::move(result); - if (pmore) - *pmore = more; - return 0; -} - -int FIFO::trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, optional_yield y) -{ - bool overshoot = false; - auto marker = to_marker(markstr); - if (!marker) { - return -EINVAL; - } - auto part_num = marker->num; - auto ofs = marker->ofs; - std::unique_lock l(m); - auto tid = ++next_tid; - auto hn = info.head_part_num; - const auto max_part_size = info.params.max_part_size; - if (part_num > hn) { - l.unlock(); - auto r = read_meta(dpp, tid, y); - if (r < 0) { - return r; - } - l.lock(); - auto hn = info.head_part_num; - if (part_num > hn) { - overshoot = true; - part_num = hn; - ofs = max_part_size; - } - } - if (part_num < info.tail_part_num) { - return -ENODATA; - } - auto pn = info.tail_part_num; - l.unlock(); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - - int r = 0; - while (pn < part_num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " pn=" << pn << " tid=" << tid << dendl; - std::unique_lock l(m); - l.unlock(); - r = trim_part(dpp, pn, max_part_size, std::nullopt, false, tid, y); - if (r < 0 && r == -ENOENT) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " trim_part failed: r=" << r - << " tid= "<< tid << dendl; - return r; - } - ++pn; - } - r = trim_part(dpp, part_num, ofs, std::nullopt, exclusive, tid, y); - if (r < 0 && r != -ENOENT) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " trim_part failed: r=" << r - << " tid= "<< tid << dendl; - return r; - } - - l.lock(); - auto tail_part_num = info.tail_part_num; - auto objv = info.version; - l.unlock(); - bool canceled = tail_part_num < part_num; - int retries = 0; - while ((tail_part_num < part_num) && - canceled && - (retries <= MAX_RACE_RETRIES)) { - r = _update_meta(dpp, fifo::update{}.tail_part_num(part_num), objv, &canceled, - tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " _update_meta failed: r=" << r - << " tid= "<< tid << dendl; - return r; - } - if (canceled) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled: retries=" << retries - << " tid=" << tid << dendl; - l.lock(); - tail_part_num = info.tail_part_num; - objv = info.version; - l.unlock(); - ++retries; - } - } - if (canceled) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - return -EIO; - } - return overshoot ? -ENODATA : 0; -} - -struct Trimmer : public Completion { - FIFO* fifo; - std::int64_t part_num; - std::uint64_t ofs; - std::int64_t pn; - bool exclusive; - std::uint64_t tid; - bool update = false; - bool reread = false; - bool canceled = false; - bool overshoot = false; - int retries = 0; - - Trimmer(const DoutPrefixProvider *dpp, FIFO* fifo, std::int64_t part_num, std::uint64_t ofs, std::int64_t pn, - bool exclusive, lr::AioCompletion* super, std::uint64_t tid) - : Completion(dpp, super), fifo(fifo), part_num(part_num), ofs(ofs), pn(pn), - exclusive(exclusive), tid(tid) {} - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - - if (reread) { - reread = false; - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " read_meta failed: r=" - << r << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - std::unique_lock l(fifo->m); - auto hn = fifo->info.head_part_num; - const auto max_part_size = fifo->info.params.max_part_size; - const auto tail_part_num = fifo->info.tail_part_num; - l.unlock(); - if (part_num > hn) { - part_num = hn; - ofs = max_part_size; - overshoot = true; - } - if (part_num < tail_part_num) { - complete(std::move(p), -ENODATA); - return; - } - pn = tail_part_num; - if (pn < part_num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " pn=" << pn << " tid=" << tid << dendl; - fifo->trim_part(dpp, pn++, max_part_size, std::nullopt, - false, tid, call(std::move(p))); - } else { - update = true; - canceled = tail_part_num < part_num; - fifo->trim_part(dpp, part_num, ofs, std::nullopt, exclusive, tid, - call(std::move(p))); - } - return; - } - - if (r == -ENOENT) { - r = 0; - } - - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << (update ? " update_meta " : " trim ") << "failed: r=" - << r << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } - - if (!update) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " handling preceding trim callback: tid=" << tid << dendl; - retries = 0; - if (pn < part_num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " pn=" << pn << " tid=" << tid << dendl; - std::unique_lock l(fifo->m); - const auto max_part_size = fifo->info.params.max_part_size; - l.unlock(); - fifo->trim_part(dpp, pn++, max_part_size, std::nullopt, - false, tid, call(std::move(p))); - return; - } - - std::unique_lock l(fifo->m); - const auto tail_part_num = fifo->info.tail_part_num; - l.unlock(); - update = true; - canceled = tail_part_num < part_num; - fifo->trim_part(dpp, part_num, ofs, std::nullopt, exclusive, tid, - call(std::move(p))); - return; - } - - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " handling update-needed callback: tid=" << tid << dendl; - std::unique_lock l(fifo->m); - auto tail_part_num = fifo->info.tail_part_num; - auto objv = fifo->info.version; - l.unlock(); - if ((tail_part_num < part_num) && - canceled) { - if (retries > MAX_RACE_RETRIES) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" << tid << dendl; - complete(std::move(p), -EIO); - return; - } - ++retries; - fifo->_update_meta(dpp, fifo::update{} - .tail_part_num(part_num), objv, &canceled, - tid, call(std::move(p))); - } else { - complete(std::move(p), overshoot ? -ENODATA : 0); - } - } -}; - -void FIFO::trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, - lr::AioCompletion* c) { - auto marker = to_marker(markstr); - auto realmark = marker.value_or(::rgw::cls::fifo::marker{}); - std::unique_lock l(m); - const auto hn = info.head_part_num; - const auto max_part_size = info.params.max_part_size; - const auto pn = info.tail_part_num; - const auto part_oid = info.part_oid(pn); - auto tid = ++next_tid; - l.unlock(); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - auto trimmer = std::make_unique(dpp, this, realmark.num, realmark.ofs, - pn, exclusive, c, tid); - if (!marker) { - Trimmer::complete(std::move(trimmer), -EINVAL); - return; - } - ++trimmer->pn; - auto ofs = marker->ofs; - if (marker->num > hn) { - trimmer->reread = true; - read_meta(dpp, tid, Trimmer::call(std::move(trimmer))); - return; - } - if (pn < marker->num) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " pn=" << pn << " tid=" << tid << dendl; - ofs = max_part_size; - } else { - trimmer->update = true; - } - trim_part(dpp, pn, ofs, std::nullopt, exclusive, - tid, Trimmer::call(std::move(trimmer))); -} - -int FIFO::get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, - fifo::part_header* header, - optional_yield y) -{ - std::unique_lock l(m); - const auto part_oid = info.part_oid(part_num); - auto tid = ++next_tid; - l.unlock(); - auto r = rgw::cls::fifo::get_part_info(dpp, ioctx, part_oid, header, tid, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " get_part_info failed: r=" - << r << " tid=" << tid << dendl; - } - return r; -} - -void FIFO::get_part_info(int64_t part_num, - fifo::part_header* header, - lr::AioCompletion* c) -{ - std::unique_lock l(m); - const auto part_oid = info.part_oid(part_num); - auto tid = ++next_tid; - l.unlock(); - auto op = rgw::cls::fifo::get_part_info(cct, header, tid); - auto r = ioctx.aio_operate(part_oid, c, &op, nullptr); - ceph_assert(r >= 0); -} - -struct InfoGetter : Completion { - FIFO* fifo; - fifo::part_header header; - fu2::function f; - std::uint64_t tid; - bool headerread = false; - - InfoGetter(const DoutPrefixProvider *dpp, FIFO* fifo, fu2::function f, - std::uint64_t tid, lr::AioCompletion* super) - : Completion(dpp, super), fifo(fifo), f(std::move(f)), tid(tid) {} - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - if (!headerread) { - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " read_meta failed: r=" - << r << " tid=" << tid << dendl; - if (f) - f(r, {}); - complete(std::move(p), r); - return; - } - - auto info = fifo->meta(); - auto hpn = info.head_part_num; - if (hpn < 0) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " no head, returning empty partinfo r=" - << r << " tid=" << tid << dendl; - if (f) - f(0, {}); - complete(std::move(p), r); - return; - } - headerread = true; - auto op = rgw::cls::fifo::get_part_info(fifo->cct, &header, tid); - std::unique_lock l(fifo->m); - auto oid = fifo->info.part_oid(hpn); - l.unlock(); - r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op, - nullptr); - ceph_assert(r >= 0); - return; - } - - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " get_part_info failed: r=" - << r << " tid=" << tid << dendl; - } - - if (f) - f(r, std::move(header)); - complete(std::move(p), r); - return; - } -}; - -void FIFO::get_head_info(const DoutPrefixProvider *dpp, fu2::unique_function f, - lr::AioCompletion* c) -{ - std::unique_lock l(m); - auto tid = ++next_tid; - l.unlock(); - auto ig = std::make_unique(dpp, this, std::move(f), tid, c); - read_meta(dpp, tid, InfoGetter::call(std::move(ig))); -} - -struct JournalProcessor : public Completion { -private: - FIFO* const fifo; - - std::vector processed; - std::multimap journal; - std::multimap::iterator iter; - std::int64_t new_tail; - std::int64_t new_head; - std::int64_t new_max; - int race_retries = 0; - bool first_pp = true; - bool canceled = false; - std::uint64_t tid; - - enum { - entry_callback, - pp_callback, - } state; - - void create_part(const DoutPrefixProvider *dpp, Ptr&& p, int64_t part_num, - std::string_view tag) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - state = entry_callback; - lr::ObjectWriteOperation op; - op.create(false); /* We don't need exclusivity, part_init ensures - we're creating from the same journal entry. */ - std::unique_lock l(fifo->m); - part_init(&op, tag, fifo->info.params); - auto oid = fifo->info.part_oid(part_num); - l.unlock(); - auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op); - ceph_assert(r >= 0); - return; - } - - void remove_part(const DoutPrefixProvider *dpp, Ptr&& p, int64_t part_num, - std::string_view tag) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - state = entry_callback; - lr::ObjectWriteOperation op; - op.remove(); - std::unique_lock l(fifo->m); - auto oid = fifo->info.part_oid(part_num); - l.unlock(); - auto r = fifo->ioctx.aio_operate(oid, call(std::move(p)), &op); - ceph_assert(r >= 0); - return; - } - - void finish_je(const DoutPrefixProvider *dpp, Ptr&& p, int r, - const fifo::journal_entry& entry) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " finishing entry: entry=" << entry - << " tid=" << tid << dendl; - - if (entry.op == fifo::journal_entry::Op::remove && r == -ENOENT) - r = 0; - - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " processing entry failed: entry=" << entry - << " r=" << r << " tid=" << tid << dendl; - complete(std::move(p), r); - return; - } else { - switch (entry.op) { - case fifo::journal_entry::Op::unknown: - case fifo::journal_entry::Op::set_head: - // Can't happen. Filtered out in process. - complete(std::move(p), -EIO); - return; - - case fifo::journal_entry::Op::create: - if (entry.part_num > new_max) { - new_max = entry.part_num; - } - break; - case fifo::journal_entry::Op::remove: - if (entry.part_num >= new_tail) { - new_tail = entry.part_num + 1; - } - break; - } - processed.push_back(entry); - } - ++iter; - process(dpp, std::move(p)); - } - - void postprocess(const DoutPrefixProvider *dpp, Ptr&& p) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - if (processed.empty()) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " nothing to update any more: race_retries=" - << race_retries << " tid=" << tid << dendl; - complete(std::move(p), 0); - return; - } - pp_run(dpp, std::move(p), 0, false); - } - -public: - - JournalProcessor(const DoutPrefixProvider *dpp, FIFO* fifo, std::uint64_t tid, lr::AioCompletion* super) - : Completion(dpp, super), fifo(fifo), tid(tid) { - std::unique_lock l(fifo->m); - journal = fifo->info.journal; - iter = journal.begin(); - new_tail = fifo->info.tail_part_num; - new_head = fifo->info.head_part_num; - new_max = fifo->info.max_push_part_num; - } - - void pp_run(const DoutPrefixProvider *dpp, Ptr&& p, int r, bool canceled) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - std::optional tail_part_num; - std::optional head_part_num; - std::optional max_part_num; - - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " failed, r=: " << r << " tid=" << tid << dendl; - complete(std::move(p), r); - } - - - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " postprocessing: race_retries=" - << race_retries << " tid=" << tid << dendl; - - if (!first_pp && r == 0 && !canceled) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " nothing to update any more: race_retries=" - << race_retries << " tid=" << tid << dendl; - complete(std::move(p), 0); - return; - } - - first_pp = false; - - if (canceled) { - if (race_retries >= MAX_RACE_RETRIES) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " canceled too many times, giving up: tid=" - << tid << dendl; - complete(std::move(p), -ECANCELED); - return; - } - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " update canceled, retrying: race_retries=" - << race_retries << " tid=" << tid << dendl; - - ++race_retries; - - std::vector new_processed; - std::unique_lock l(fifo->m); - for (auto& e : processed) { - auto jiter = fifo->info.journal.find(e.part_num); - /* journal entry was already processed */ - if (jiter == fifo->info.journal.end() || - !(jiter->second == e)) { - continue; - } - new_processed.push_back(e); - } - processed = std::move(new_processed); - } - - std::unique_lock l(fifo->m); - auto objv = fifo->info.version; - if (new_tail > fifo->info.tail_part_num) { - tail_part_num = new_tail; - } - - if (new_head > fifo->info.head_part_num) { - head_part_num = new_head; - } - - if (new_max > fifo->info.max_push_part_num) { - max_part_num = new_max; - } - l.unlock(); - - if (processed.empty() && - !tail_part_num && - !max_part_num) { - /* nothing to update anymore */ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " nothing to update any more: race_retries=" - << race_retries << " tid=" << tid << dendl; - complete(std::move(p), 0); - return; - } - state = pp_callback; - fifo->_update_meta(dpp, fifo::update{} - .tail_part_num(tail_part_num) - .head_part_num(head_part_num) - .max_push_part_num(max_part_num) - .journal_entries_rm(processed), - objv, &this->canceled, tid, call(std::move(p))); - return; - } - - JournalProcessor(const JournalProcessor&) = delete; - JournalProcessor& operator =(const JournalProcessor&) = delete; - JournalProcessor(JournalProcessor&&) = delete; - JournalProcessor& operator =(JournalProcessor&&) = delete; - - void process(const DoutPrefixProvider *dpp, Ptr&& p) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - while (iter != journal.end()) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " processing entry: entry=" << *iter - << " tid=" << tid << dendl; - const auto entry = iter->second; - switch (entry.op) { - case fifo::journal_entry::Op::create: - create_part(dpp, std::move(p), entry.part_num, entry.part_tag); - return; - case fifo::journal_entry::Op::set_head: - if (entry.part_num > new_head) { - new_head = entry.part_num; - } - processed.push_back(entry); - ++iter; - continue; - case fifo::journal_entry::Op::remove: - remove_part(dpp, std::move(p), entry.part_num, entry.part_tag); - return; - default: - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " unknown journaled op: entry=" << entry << " tid=" - << tid << dendl; - complete(std::move(p), -EIO); - return; - } - } - postprocess(dpp, std::move(p)); - return; - } - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " entering: tid=" << tid << dendl; - switch (state) { - case entry_callback: - finish_je(dpp, std::move(p), r, iter->second); - return; - case pp_callback: - auto c = canceled; - canceled = false; - pp_run(dpp, std::move(p), r, c); - return; - } - - abort(); - } - -}; - -void FIFO::process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c) { - auto p = std::make_unique(dpp, this, tid, c); - p->process(dpp, std::move(p)); -} - -struct Lister : Completion { - FIFO* f; - std::vector result; - bool more = false; - std::int64_t part_num; - std::uint64_t ofs; - int max_entries; - int r_out = 0; - std::vector entries; - bool part_more = false; - bool part_full = false; - std::vector* entries_out; - bool* more_out; - std::uint64_t tid; - - bool read = false; - - void complete(Ptr&& p, int r) { - if (r >= 0) { - if (more_out) *more_out = more; - if (entries_out) *entries_out = std::move(result); - } - Completion::complete(std::move(p), r); - } - -public: - Lister(const DoutPrefixProvider *dpp, FIFO* f, std::int64_t part_num, std::uint64_t ofs, int max_entries, - std::vector* entries_out, bool* more_out, - std::uint64_t tid, lr::AioCompletion* super) - : Completion(dpp, super), f(f), part_num(part_num), ofs(ofs), max_entries(max_entries), - entries_out(entries_out), more_out(more_out), tid(tid) { - result.reserve(max_entries); - } - - Lister(const Lister&) = delete; - Lister& operator =(const Lister&) = delete; - Lister(Lister&&) = delete; - Lister& operator =(Lister&&) = delete; - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - if (read) - handle_read(std::move(p), r); - else - handle_list(dpp, std::move(p), r); - } - - void list(Ptr&& p) { - if (max_entries > 0) { - part_more = false; - part_full = false; - entries.clear(); - - std::unique_lock l(f->m); - auto part_oid = f->info.part_oid(part_num); - l.unlock(); - - read = false; - auto op = list_part(f->cct, {}, ofs, max_entries, &r_out, - &entries, &part_more, &part_full, - nullptr, tid); - f->ioctx.aio_operate(part_oid, call(std::move(p)), &op, nullptr); - } else { - complete(std::move(p), 0); - } - } - - void handle_read(Ptr&& p, int r) { - read = false; - if (r >= 0) r = r_out; - r_out = 0; - - if (r < 0) { - complete(std::move(p), r); - return; - } - - if (part_num < f->info.tail_part_num) { - /* raced with trim? restart */ - max_entries += result.size(); - result.clear(); - part_num = f->info.tail_part_num; - ofs = 0; - list(std::move(p)); - return; - } - /* assuming part was not written yet, so end of data */ - more = false; - complete(std::move(p), 0); - return; - } - - void handle_list(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - if (r >= 0) r = r_out; - r_out = 0; - std::unique_lock l(f->m); - auto part_oid = f->info.part_oid(part_num); - l.unlock(); - if (r == -ENOENT) { - read = true; - f->read_meta(dpp, tid, call(std::move(p))); - return; - } - if (r < 0) { - complete(std::move(p), r); - return; - } - - more = part_full || part_more; - for (auto& entry : entries) { - list_entry e; - e.data = std::move(entry.data); - e.marker = marker{part_num, entry.ofs}.to_string(); - e.mtime = entry.mtime; - result.push_back(std::move(e)); - } - max_entries -= entries.size(); - entries.clear(); - if (max_entries > 0 && part_more) { - list(std::move(p)); - return; - } - - if (!part_full) { /* head part is not full */ - complete(std::move(p), 0); - return; - } - ++part_num; - ofs = 0; - list(std::move(p)); - } -}; - -void FIFO::list(const DoutPrefixProvider *dpp, int max_entries, - std::optional markstr, - std::vector* out, - bool* more, - lr::AioCompletion* c) { - std::unique_lock l(m); - auto tid = ++next_tid; - std::int64_t part_num = info.tail_part_num; - l.unlock(); - std::uint64_t ofs = 0; - std::optional<::rgw::cls::fifo::marker> marker; - - if (markstr) { - marker = to_marker(*markstr); - if (marker) { - part_num = marker->num; - ofs = marker->ofs; - } - } - - auto ls = std::make_unique(dpp, this, part_num, ofs, max_entries, out, - more, tid, c); - if (markstr && !marker) { - auto l = ls.get(); - l->complete(std::move(ls), -EINVAL); - } else { - ls->list(std::move(ls)); - } -} -} diff --git a/src/rgw/store/rados/cls_fifo_legacy.h b/src/rgw/store/rados/cls_fifo_legacy.h deleted file mode 100644 index 9a35e4dd251..00000000000 --- a/src/rgw/store/rados/cls_fifo_legacy.h +++ /dev/null @@ -1,342 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2020 Red Hat - * Author: Adam C. Emerson - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef CEPH_RGW_CLS_FIFO_LEGACY_H -#define CEPH_RGW_CLS_FIFO_LEGACY_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include "include/rados/librados.hpp" -#include "include/buffer.h" -#include "include/function2.hpp" - -#include "common/async/yield_context.h" - -#include "cls/fifo/cls_fifo_types.h" -#include "cls/fifo/cls_fifo_ops.h" - -#include "librados/AioCompletionImpl.h" - -#include "rgw_tools.h" - -namespace rgw::cls::fifo { -namespace cb = ceph::buffer; -namespace fifo = rados::cls::fifo; -namespace lr = librados; - -inline constexpr std::uint64_t default_max_part_size = 4 * 1024 * 1024; -inline constexpr std::uint64_t default_max_entry_size = 32 * 1024; - -void create_meta(lr::ObjectWriteOperation* op, std::string_view id, - std::optional objv, - std::optional oid_prefix, - bool exclusive = false, - std::uint64_t max_part_size = default_max_part_size, - std::uint64_t max_entry_size = default_max_entry_size); -int get_meta(const DoutPrefixProvider *dpp, lr::IoCtx& ioctx, const std::string& oid, - std::optional objv, fifo::info* info, - std::uint32_t* part_header_size, - std::uint32_t* part_entry_overhead, - std::uint64_t tid, optional_yield y, - bool probe = false); -struct marker { - std::int64_t num = 0; - std::uint64_t ofs = 0; - - marker() = default; - marker(std::int64_t num, std::uint64_t ofs) : num(num), ofs(ofs) {} - static marker max() { - return { std::numeric_limits::max(), - std::numeric_limits::max() }; - } - - std::string to_string() { - return fmt::format("{:0>20}:{:0>20}", num, ofs); - } -}; - -struct list_entry { - cb::list data; - std::string marker; - ceph::real_time mtime; -}; - -using part_info = fifo::part_header; - -/// This is an implementation of FIFO using librados to facilitate -/// backports. Please see /src/neorados/cls/fifo.h for full -/// information. -/// -/// This library uses optional_yield. Please see -/// /src/common/async/yield_context.h. In summary, optional_yield -/// contains either a spawn::yield_context (in which case the current -/// coroutine is suspended until completion) or null_yield (in which -/// case the current thread is blocked until completion.) -/// -/// Please see the librados documentation for information on -/// AioCompletion and IoCtx. - -class FIFO { - friend struct Reader; - friend struct Updater; - friend struct Trimmer; - friend struct InfoGetter; - friend struct Pusher; - friend struct NewPartPreparer; - friend struct NewHeadPreparer; - friend struct JournalProcessor; - friend struct Lister; - - mutable lr::IoCtx ioctx; - CephContext* cct = static_cast(ioctx.cct()); - const std::string oid; - std::mutex m; - std::uint64_t next_tid = 0; - - fifo::info info; - - std::uint32_t part_header_size = 0xdeadbeef; - std::uint32_t part_entry_overhead = 0xdeadbeef; - - std::optional to_marker(std::string_view s); - - FIFO(lr::IoCtx&& ioc, - std::string oid) - : ioctx(std::move(ioc)), oid(oid) {} - - std::string generate_tag() const; - - int apply_update(const DoutPrefixProvider *dpp, - fifo::info* info, - const fifo::objv& objv, - const fifo::update& update, - std::uint64_t tid); - int _update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, - fifo::objv version, bool* pcanceled, - std::uint64_t tid, optional_yield y); - void _update_meta(const DoutPrefixProvider *dpp, const fifo::update& update, - fifo::objv version, bool* pcanceled, - std::uint64_t tid, lr::AioCompletion* c); - int create_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, - optional_yield y); - int remove_part(const DoutPrefixProvider *dpp, int64_t part_num, std::string_view tag, std::uint64_t tid, - optional_yield y); - int process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y); - void process_journal(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c); - int _prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, optional_yield y); - void _prepare_new_part(const DoutPrefixProvider *dpp, bool is_head, std::uint64_t tid, lr::AioCompletion* c); - int _prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y); - void _prepare_new_head(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c); - int push_entries(const DoutPrefixProvider *dpp, const std::deque& data_bufs, - std::uint64_t tid, optional_yield y); - void push_entries(const std::deque& data_bufs, - std::uint64_t tid, lr::AioCompletion* c); - int trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, - std::optional tag, bool exclusive, - std::uint64_t tid, optional_yield y); - void trim_part(const DoutPrefixProvider *dpp, int64_t part_num, uint64_t ofs, - std::optional tag, bool exclusive, - std::uint64_t tid, lr::AioCompletion* c); - - /// Force refresh of metadata, yielding/blocking style - int read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, optional_yield y); - /// Force refresh of metadata, with a librados Completion - void read_meta(const DoutPrefixProvider *dpp, std::uint64_t tid, lr::AioCompletion* c); - -public: - - FIFO(const FIFO&) = delete; - FIFO& operator =(const FIFO&) = delete; - FIFO(FIFO&&) = delete; - FIFO& operator =(FIFO&&) = delete; - - /// Open an existing FIFO. - static int open(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, //< IO Context - std::string oid, //< OID for metadata object - std::unique_ptr* fifo, //< OUT: Pointer to FIFO object - optional_yield y, //< Optional yield context - /// Operation will fail if FIFO is not at this version - std::optional objv = std::nullopt, - /// Probing for existence, don't print errors if we - /// can't find it. - bool probe = false); - /// Create a new or open an existing FIFO. - static int create(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, //< IO Context - std::string oid, //< OID for metadata object - std::unique_ptr* fifo, //< OUT: Pointer to FIFO object - optional_yield y, //< Optional yield context - /// Operation will fail if the FIFO exists and is - /// not of this version. - std::optional objv = std::nullopt, - /// Prefix for all objects - std::optional oid_prefix = std::nullopt, - /// Fail if the FIFO already exists - bool exclusive = false, - /// Maximum allowed size of parts - std::uint64_t max_part_size = default_max_part_size, - /// Maximum allowed size of entries - std::uint64_t max_entry_size = default_max_entry_size); - - /// Force refresh of metadata, yielding/blocking style - int read_meta(const DoutPrefixProvider *dpp, optional_yield y); - /// Get currently known metadata - const fifo::info& meta() const; - /// Get partition header and entry overhead size - std::pair get_part_layout_info() const; - /// Push an entry to the FIFO - int push(const DoutPrefixProvider *dpp, - const cb::list& bl, //< Entry to push - optional_yield y //< Optional yield - ); - /// Push an entry to the FIFO - void push(const DoutPrefixProvider *dpp, const cb::list& bl, //< Entry to push - lr::AioCompletion* c //< Async Completion - ); - /// Push entries to the FIFO - int push(const DoutPrefixProvider *dpp, - const std::vector& data_bufs, //< Entries to push - optional_yield y //< Optional yield - ); - /// Push entries to the FIFO - void push(const DoutPrefixProvider *dpp, const std::vector& data_bufs, //< Entries to push - lr::AioCompletion* c //< Async Completion - ); - /// List entries - int list(const DoutPrefixProvider *dpp, - int max_entries, //< Maximum entries to list - /// Point after which to begin listing. Start at tail if null - std::optional markstr, - std::vector* out, //< OUT: entries - /// OUT: True if more entries in FIFO beyond the last returned - bool* more, - optional_yield y //< Optional yield - ); - void list(const DoutPrefixProvider *dpp, - int max_entries, //< Maximum entries to list - /// Point after which to begin listing. Start at tail if null - std::optional markstr, - std::vector* out, //< OUT: entries - /// OUT: True if more entries in FIFO beyond the last returned - bool* more, - lr::AioCompletion* c //< Async Completion - ); - /// Trim entries, coroutine/block style - int trim(const DoutPrefixProvider *dpp, - std::string_view markstr, //< Position to which to trim, inclusive - bool exclusive, //< If true, do not trim the target entry - //< itself, just all those before it. - optional_yield y //< Optional yield - ); - /// Trim entries, librados AioCompletion style - void trim(const DoutPrefixProvider *dpp, - std::string_view markstr, //< Position to which to trim, inclusive - bool exclusive, //< If true, do not trim the target entry - //< itself, just all those before it. - lr::AioCompletion* c //< librados AIO Completion - ); - /// Get part info - int get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, /// Part number - fifo::part_header* header, //< OUT: Information - optional_yield y //< Optional yield - ); - /// Get part info - void get_part_info(int64_t part_num, //< Part number - fifo::part_header* header, //< OUT: Information - lr::AioCompletion* c //< AIO Completion - ); - /// A convenience method to fetch the part information for the FIFO - /// head, using librados::AioCompletion, since - /// libradio::AioCompletions compose lousily. - void get_head_info(const DoutPrefixProvider *dpp, fu2::unique_function< //< Function to receive info - void(int r, fifo::part_header&&)>, - lr::AioCompletion* c //< AIO Completion - ); -}; - -template -struct Completion { -private: - const DoutPrefixProvider *_dpp; - lr::AioCompletion* _cur = nullptr; - lr::AioCompletion* _super; -public: - - using Ptr = std::unique_ptr; - - lr::AioCompletion* cur() const { - return _cur; - } - lr::AioCompletion* super() const { - return _super; - } - - Completion(const DoutPrefixProvider *dpp, lr::AioCompletion* super) : _dpp(dpp), _super(super) { - super->pc->get(); - } - - ~Completion() { - if (_super) { - _super->pc->put(); - } - if (_cur) - _cur->release(); - _super = nullptr; - _cur = nullptr; - } - - // The only times that aio_operate can return an error are: - // 1. The completion contains a null pointer. This should just - // crash, and in our case it does. - // 2. An attempt is made to write to a snapshot. RGW doesn't use - // snapshots, so we don't care. - // - // So we will just assert that initiating an Aio operation succeeds - // and not worry about recovering. - static lr::AioCompletion* call(Ptr&& p) { - p->_cur = lr::Rados::aio_create_completion(static_cast(p.get()), - &cb); - auto c = p->_cur; - p.release(); - return c; - } - static void complete(Ptr&& p, int r) { - auto c = p->_super; - p->_super = nullptr; - rgw_complete_aio_completion(c, r); - } - - static void cb(lr::completion_t, void* arg) { - auto t = static_cast(arg); - auto r = t->_cur->get_return_value(); - t->_cur->release(); - t->_cur = nullptr; - t->handle(t->_dpp, Ptr(t), r); - } -}; - -} - -#endif // CEPH_RGW_CLS_FIFO_LEGACY_H diff --git a/src/rgw/store/rados/config/impl.cc b/src/rgw/store/rados/config/impl.cc deleted file mode 100644 index f1b2befadcd..00000000000 --- a/src/rgw/store/rados/config/impl.cc +++ /dev/null @@ -1,129 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "impl.h" - -#include "common/async/yield_context.h" -#include "common/errno.h" -#include "rgw_string.h" -#include "rgw_zone.h" - -namespace rgw::rados { - -// default pool names -constexpr std::string_view default_zone_root_pool = "rgw.root"; -constexpr std::string_view default_zonegroup_root_pool = "rgw.root"; -constexpr std::string_view default_realm_root_pool = "rgw.root"; -constexpr std::string_view default_period_root_pool = "rgw.root"; - -static rgw_pool default_pool(std::string_view name, - std::string_view default_name) -{ - return std::string{name_or_default(name, default_name)}; -} - -ConfigImpl::ConfigImpl(const ceph::common::ConfigProxy& conf) - : realm_pool(default_pool(conf->rgw_realm_root_pool, - default_realm_root_pool)), - period_pool(default_pool(conf->rgw_period_root_pool, - default_period_root_pool)), - zonegroup_pool(default_pool(conf->rgw_zonegroup_root_pool, - default_zonegroup_root_pool)), - zone_pool(default_pool(conf->rgw_zone_root_pool, - default_zone_root_pool)) -{ -} - -int ConfigImpl::read(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - bufferlist& bl, RGWObjVersionTracker* objv) -{ - librados::IoCtx ioctx; - int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); - if (r < 0) { - return r; - } - librados::ObjectReadOperation op; - if (objv) { - objv->prepare_op_for_read(&op); - } - op.read(0, 0, &bl, nullptr); - return rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); -} - -int ConfigImpl::write(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - Create create, const bufferlist& bl, - RGWObjVersionTracker* objv) -{ - librados::IoCtx ioctx; - int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); - if (r < 0) { - return r; - } - - librados::ObjectWriteOperation op; - switch (create) { - case Create::MustNotExist: op.create(true); break; - case Create::MayExist: op.create(false); break; - case Create::MustExist: op.assert_exists(); break; - } - if (objv) { - objv->prepare_op_for_write(&op); - } - op.write_full(bl); - - r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r >= 0 && objv) { - objv->apply_write(); - } - return r; -} - -int ConfigImpl::remove(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - RGWObjVersionTracker* objv) -{ - librados::IoCtx ioctx; - int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); - if (r < 0) { - return r; - } - - librados::ObjectWriteOperation op; - if (objv) { - objv->prepare_op_for_write(&op); - } - op.remove(); - - r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r >= 0 && objv) { - objv->apply_write(); - } - return r; -} - -int ConfigImpl::notify(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - bufferlist& bl, uint64_t timeout_ms) -{ - librados::IoCtx ioctx; - int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); - if (r < 0) { - return r; - } - return rgw_rados_notify(dpp, ioctx, oid, bl, timeout_ms, nullptr, y); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/impl.h b/src/rgw/store/rados/config/impl.h deleted file mode 100644 index 3aed451f996..00000000000 --- a/src/rgw/store/rados/config/impl.h +++ /dev/null @@ -1,139 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "include/rados/librados.hpp" -#include "common/dout.h" -#include "rgw_basic_types.h" -#include "rgw_tools.h" -#include "rgw_sal_config.h" - -namespace rgw::rados { - -// write options that control object creation -enum class Create { - MustNotExist, // fail with EEXIST if the object already exists - MayExist, // create if the object didn't exist, overwrite if it did - MustExist, // fail with ENOENT if the object doesn't exist -}; - -struct ConfigImpl { - librados::Rados rados; - - const rgw_pool realm_pool; - const rgw_pool period_pool; - const rgw_pool zonegroup_pool; - const rgw_pool zone_pool; - - ConfigImpl(const ceph::common::ConfigProxy& conf); - - int read(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - bufferlist& bl, RGWObjVersionTracker* objv); - - template - int read(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - T& data, RGWObjVersionTracker* objv) - { - bufferlist bl; - int r = read(dpp, y, pool, oid, bl, objv); - if (r < 0) { - return r; - } - try { - auto p = bl.cbegin(); - decode(data, p); - } catch (const buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode obj from " - << pool << ":" << oid << dendl; - return -EIO; - } - return 0; - } - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, Create create, - const bufferlist& bl, RGWObjVersionTracker* objv); - - template - int write(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, Create create, - const T& data, RGWObjVersionTracker* objv) - { - bufferlist bl; - encode(data, bl); - - return write(dpp, y, pool, oid, create, bl, objv); - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - RGWObjVersionTracker* objv); - - int list(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& marker, - std::regular_invocable auto filter, - std::span entries, - sal::ListResult& result) - { - librados::IoCtx ioctx; - int r = rgw_init_ioctx(dpp, &rados, pool, ioctx, true, false); - if (r < 0) { - return r; - } - librados::ObjectCursor oc; - if (!oc.from_str(marker)) { - ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl; - return -EINVAL; - } - std::size_t count = 0; - try { - auto iter = ioctx.nobjects_begin(oc); - const auto end = ioctx.nobjects_end(); - for (; count < entries.size() && iter != end; ++iter) { - std::string entry = filter(iter->get_oid()); - if (!entry.empty()) { - entries[count++] = std::move(entry); - } - } - if (iter == end) { - result.next.clear(); - } else { - result.next = iter.get_cursor().to_str(); - } - } catch (const std::exception& e) { - ldpp_dout(dpp, 10) << "NObjectIterator exception " << e.what() << dendl; - return -EIO; - } - result.entries = entries.first(count); - return 0; - } - - int notify(const DoutPrefixProvider* dpp, optional_yield y, - const rgw_pool& pool, const std::string& oid, - bufferlist& bl, uint64_t timeout_ms); -}; - -inline std::string_view name_or_default(std::string_view name, - std::string_view default_name) -{ - if (!name.empty()) { - return name; - } - return default_name; -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/period.cc b/src/rgw/store/rados/config/period.cc deleted file mode 100644 index 04650cebfaf..00000000000 --- a/src/rgw/store/rados/config/period.cc +++ /dev/null @@ -1,230 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/dout.h" -#include "common/errno.h" -#include "rgw_zone.h" -#include "store/rados/config/store.h" - -#include "impl.h" - -namespace rgw::rados { - -// period oids -constexpr std::string_view period_info_oid_prefix = "periods."; -constexpr std::string_view period_latest_epoch_info_oid = ".latest_epoch"; -constexpr std::string_view period_staging_suffix = ":staging"; - -static std::string period_oid(std::string_view period_id, uint32_t epoch) -{ - // omit the epoch for the staging period - if (period_id.ends_with(period_staging_suffix)) { - return string_cat_reserve(period_info_oid_prefix, period_id); - } - return fmt::format("{}{}.{}", period_info_oid_prefix, period_id, epoch); -} - -static std::string latest_epoch_oid(const ceph::common::ConfigProxy& conf, - std::string_view period_id) -{ - return string_cat_reserve( - period_info_oid_prefix, period_id, - name_or_default(conf->rgw_period_latest_epoch_info_oid, - period_latest_epoch_info_oid)); -} - -static int read_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, - ConfigImpl* impl, std::string_view period_id, - uint32_t& epoch, RGWObjVersionTracker* objv) -{ - const auto& pool = impl->period_pool; - const auto latest_oid = latest_epoch_oid(dpp->get_cct()->_conf, period_id); - RGWPeriodLatestEpochInfo latest; - int r = impl->read(dpp, y, pool, latest_oid, latest, objv); - if (r >= 0) { - epoch = latest.epoch; - } - return r; -} - -static int write_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, - ConfigImpl* impl, bool exclusive, - std::string_view period_id, uint32_t epoch, - RGWObjVersionTracker* objv) -{ - const auto& pool = impl->period_pool; - const auto latest_oid = latest_epoch_oid(dpp->get_cct()->_conf, period_id); - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - RGWPeriodLatestEpochInfo latest{epoch}; - return impl->write(dpp, y, pool, latest_oid, create, latest, objv); -} - -static int delete_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, - ConfigImpl* impl, std::string_view period_id, - RGWObjVersionTracker* objv) -{ - const auto& pool = impl->period_pool; - const auto latest_oid = latest_epoch_oid(dpp->get_cct()->_conf, period_id); - return impl->remove(dpp, y, pool, latest_oid, objv); -} - -static int update_latest_epoch(const DoutPrefixProvider* dpp, optional_yield y, - ConfigImpl* impl, std::string_view period_id, - uint32_t epoch) -{ - static constexpr int MAX_RETRIES = 20; - - for (int i = 0; i < MAX_RETRIES; i++) { - uint32_t existing_epoch = 0; - RGWObjVersionTracker objv; - bool exclusive = false; - - // read existing epoch - int r = read_latest_epoch(dpp, y, impl, period_id, existing_epoch, &objv); - if (r == -ENOENT) { - // use an exclusive create to set the epoch atomically - exclusive = true; - objv.generate_new_write_ver(dpp->get_cct()); - ldpp_dout(dpp, 20) << "creating initial latest_epoch=" << epoch - << " for period=" << period_id << dendl; - } else if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to read latest_epoch" << dendl; - return r; - } else if (epoch <= existing_epoch) { - r = -EEXIST; // fail with EEXIST if epoch is not newer - ldpp_dout(dpp, 10) << "found existing latest_epoch " << existing_epoch - << " >= given epoch " << epoch << ", returning r=" << r << dendl; - return r; - } else { - ldpp_dout(dpp, 20) << "updating latest_epoch from " << existing_epoch - << " -> " << epoch << " on period=" << period_id << dendl; - } - - r = write_latest_epoch(dpp, y, impl, exclusive, period_id, epoch, &objv); - if (r == -EEXIST) { - continue; // exclusive create raced with another update, retry - } else if (r == -ECANCELED) { - continue; // write raced with a conflicting version, retry - } - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to write latest_epoch" << dendl; - return r; - } - return 0; // return success - } - - return -ECANCELED; // fail after max retries -} - -int RadosConfigStore::create_period(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWPeriod& info) -{ - if (info.get_id().empty()) { - ldpp_dout(dpp, 0) << "period cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.get_epoch() == 0) { - ldpp_dout(dpp, 0) << "period cannot have an empty epoch" << dendl; - return -EINVAL; - } - const auto& pool = impl->period_pool; - const auto info_oid = period_oid(info.get_id(), info.get_epoch()); - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - RGWObjVersionTracker objv; - objv.generate_new_write_ver(dpp->get_cct()); - int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); - if (r < 0) { - return r; - } - - (void) update_latest_epoch(dpp, y, impl.get(), info.get_id(), info.get_epoch()); - return 0; -} - -int RadosConfigStore::read_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id, - std::optional epoch, - RGWPeriod& info) -{ - int r = 0; - if (!epoch) { - epoch = 0; - r = read_latest_epoch(dpp, y, impl.get(), period_id, *epoch, nullptr); - if (r < 0) { - return r; - } - } - - const auto& pool = impl->period_pool; - const auto info_oid = period_oid(period_id, *epoch); - return impl->read(dpp, y, pool, info_oid, info, nullptr); -} - -int RadosConfigStore::delete_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id) -{ - const auto& pool = impl->period_pool; - - // read the latest_epoch - uint32_t latest_epoch = 0; - RGWObjVersionTracker latest_objv; - int r = read_latest_epoch(dpp, y, impl.get(), period_id, - latest_epoch, &latest_objv); - if (r < 0 && r != -ENOENT) { // just delete epoch=0 on ENOENT - ldpp_dout(dpp, 0) << "failed to read latest epoch for period " - << period_id << ": " << cpp_strerror(r) << dendl; - return r; - } - - for (uint32_t epoch = 0; epoch <= latest_epoch; epoch++) { - const auto info_oid = period_oid(period_id, epoch); - r = impl->remove(dpp, y, pool, info_oid, nullptr); - if (r < 0 && r != -ENOENT) { // ignore ENOENT - ldpp_dout(dpp, 0) << "failed to delete period " << info_oid - << ": " << cpp_strerror(r) << dendl; - return r; - } - } - - return delete_latest_epoch(dpp, y, impl.get(), period_id, &latest_objv); -} - -int RadosConfigStore::list_period_ids(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - const auto& pool = impl->period_pool; - constexpr auto prefix = [] (std::string oid) -> std::string { - if (!oid.starts_with(period_info_oid_prefix)) { - return {}; - } - if (!oid.ends_with(period_latest_epoch_info_oid)) { - return {}; - } - // trim the prefix and suffix - const std::size_t count = oid.size() - - period_info_oid_prefix.size() - - period_latest_epoch_info_oid.size(); - return oid.substr(period_info_oid_prefix.size(), count); - }; - - return impl->list(dpp, y, pool, marker, prefix, entries, result); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/period_config.cc b/src/rgw/store/rados/config/period_config.cc deleted file mode 100644 index b17a48aaa60..00000000000 --- a/src/rgw/store/rados/config/period_config.cc +++ /dev/null @@ -1,55 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "rgw_zone.h" -#include "store/rados/config/store.h" - -#include "impl.h" - -namespace rgw::rados { - -// period config oids -constexpr std::string_view period_config_prefix = "period_config."; -constexpr std::string_view period_config_realm_default = "default"; - -std::string period_config_oid(std::string_view realm_id) -{ - if (realm_id.empty()) { - realm_id = period_config_realm_default; - } - return string_cat_reserve(period_config_prefix, realm_id); -} - -int RadosConfigStore::read_period_config(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWPeriodConfig& info) -{ - const auto& pool = impl->period_pool; - const auto oid = period_config_oid(realm_id); - return impl->read(dpp, y, pool, oid, info, nullptr); -} - -int RadosConfigStore::write_period_config(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - const RGWPeriodConfig& info) -{ - const auto& pool = impl->period_pool; - const auto oid = period_config_oid(realm_id); - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - return impl->write(dpp, y, pool, oid, create, info, nullptr); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/realm.cc b/src/rgw/store/rados/config/realm.cc deleted file mode 100644 index f62cb7a2879..00000000000 --- a/src/rgw/store/rados/config/realm.cc +++ /dev/null @@ -1,364 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/dout.h" -#include "common/errno.h" -#include "rgw_realm_watcher.h" -#include "rgw_zone.h" -#include "store/rados/config/store.h" - -#include "impl.h" - -namespace rgw::rados { - -// realm oids -constexpr std::string_view realm_names_oid_prefix = "realms_names."; -constexpr std::string_view realm_info_oid_prefix = "realms."; -constexpr std::string_view realm_control_oid_suffix = ".control"; -constexpr std::string_view default_realm_info_oid = "default.realm"; - -static std::string realm_info_oid(std::string_view realm_id) -{ - return string_cat_reserve(realm_info_oid_prefix, realm_id); -} -static std::string realm_name_oid(std::string_view realm_id) -{ - return string_cat_reserve(realm_names_oid_prefix, realm_id); -} -static std::string realm_control_oid(std::string_view realm_id) -{ - return string_cat_reserve(realm_info_oid_prefix, realm_id, - realm_control_oid_suffix); -} -static std::string default_realm_oid(const ceph::common::ConfigProxy& conf) -{ - return std::string{name_or_default(conf->rgw_default_realm_info_oid, - default_realm_info_oid)}; -} - - -int RadosConfigStore::write_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id) -{ - const auto& pool = impl->realm_pool; - const auto oid = default_realm_oid(dpp->get_cct()->_conf); - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - - RGWDefaultSystemMetaObjInfo default_info; - default_info.default_id = realm_id; - - return impl->write(dpp, y, pool, oid, create, default_info, nullptr); -} - -int RadosConfigStore::read_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string& realm_id) -{ - const auto& pool = impl->realm_pool; - const auto oid = default_realm_oid(dpp->get_cct()->_conf); - - RGWDefaultSystemMetaObjInfo default_info; - int r = impl->read(dpp, y, pool, oid, default_info, nullptr); - if (r >= 0) { - realm_id = default_info.default_id; - } - return r; -} - -int RadosConfigStore::delete_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y) -{ - const auto& pool = impl->realm_pool; - const auto oid = default_realm_oid(dpp->get_cct()->_conf); - - return impl->remove(dpp, y, pool, oid, nullptr); -} - - -class RadosRealmWriter : public sal::RealmWriter { - ConfigImpl* impl; - RGWObjVersionTracker objv; - std::string realm_id; - std::string realm_name; - public: - RadosRealmWriter(ConfigImpl* impl, RGWObjVersionTracker objv, - std::string_view realm_id, std::string_view realm_name) - : impl(impl), objv(std::move(objv)), - realm_id(realm_id), realm_name(realm_name) - { - } - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWRealm& info) override - { - if (realm_id != info.get_id() || realm_name != info.get_name()) { - return -EINVAL; // can't modify realm id or name directly - } - - const auto& pool = impl->realm_pool; - const auto info_oid = realm_info_oid(info.get_id()); - return impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); - } - - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWRealm& info, std::string_view new_name) override - { - if (realm_id != info.get_id() || realm_name != info.get_name()) { - return -EINVAL; // can't modify realm id or name directly - } - if (new_name.empty()) { - ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; - return -EINVAL; - } - - const auto& pool = impl->realm_pool; - const auto name = RGWNameToId{info.get_id()}; - const auto info_oid = realm_info_oid(info.get_id()); - const auto old_oid = realm_name_oid(info.get_name()); - const auto new_oid = realm_name_oid(new_name); - - // link the new name - RGWObjVersionTracker new_objv; - new_objv.generate_new_write_ver(dpp->get_cct()); - int r = impl->write(dpp, y, pool, new_oid, Create::MustNotExist, - name, &new_objv); - if (r < 0) { - return r; - } - - // write the info with updated name - info.set_name(std::string{new_name}); - r = impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); - if (r < 0) { - // on failure, unlink the new name - (void) impl->remove(dpp, y, pool, new_oid, &new_objv); - return r; - } - - // unlink the old name - (void) impl->remove(dpp, y, pool, old_oid, nullptr); - - realm_name = new_name; - return 0; - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - const auto& pool = impl->realm_pool; - const auto info_oid = realm_info_oid(realm_id); - int r = impl->remove(dpp, y, pool, info_oid, &objv); - if (r < 0) { - return r; - } - const auto name_oid = realm_name_oid(realm_name); - (void) impl->remove(dpp, y, pool, name_oid, nullptr); - const auto control_oid = realm_control_oid(realm_id); - (void) impl->remove(dpp, y, pool, control_oid, nullptr); - return 0; - } -}; // RadosRealmWriter - - -int RadosConfigStore::create_realm(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWRealm& info, - std::unique_ptr* writer) -{ - if (info.get_id().empty()) { - ldpp_dout(dpp, 0) << "realm cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.get_name().empty()) { - ldpp_dout(dpp, 0) << "realm cannot have an empty name" << dendl; - return -EINVAL; - } - - const auto& pool = impl->realm_pool; - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - - // write the realm info - const auto info_oid = realm_info_oid(info.get_id()); - RGWObjVersionTracker objv; - objv.generate_new_write_ver(dpp->get_cct()); - - int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); - if (r < 0) { - return r; - } - - // write the realm name - const auto name_oid = realm_name_oid(info.get_name()); - const auto name = RGWNameToId{info.get_id()}; - RGWObjVersionTracker name_objv; - name_objv.generate_new_write_ver(dpp->get_cct()); - - r = impl->write(dpp, y, pool, name_oid, create, name, &name_objv); - if (r < 0) { - (void) impl->remove(dpp, y, pool, info_oid, &objv); - return r; - } - - // create control object for watch/notify - const auto control_oid = realm_control_oid(info.get_id()); - bufferlist empty_bl; - r = impl->write(dpp, y, pool, control_oid, Create::MayExist, - empty_bl, nullptr); - if (r < 0) { - (void) impl->remove(dpp, y, pool, name_oid, &name_objv); - (void) impl->remove(dpp, y, pool, info_oid, &objv); - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_realm_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWRealm& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->realm_pool; - const auto info_oid = realm_info_oid(realm_id); - RGWObjVersionTracker objv; - int r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_realm_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->realm_pool; - - // look up realm id by name - RGWNameToId name; - const auto name_oid = realm_name_oid(realm_name); - int r = impl->read(dpp, y, pool, name_oid, name, nullptr); - if (r < 0) { - return r; - } - - const auto info_oid = realm_info_oid(name.obj_id); - RGWObjVersionTracker objv; - r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_default_realm(const DoutPrefixProvider* dpp, - optional_yield y, - RGWRealm& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->realm_pool; - - // read default realm id - RGWDefaultSystemMetaObjInfo default_info; - const auto default_oid = default_realm_oid(dpp->get_cct()->_conf); - int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); - if (r < 0) { - return r; - } - - const auto info_oid = realm_info_oid(default_info.default_id); - RGWObjVersionTracker objv; - r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - std::string& realm_id) -{ - const auto& pool = impl->realm_pool; - RGWNameToId name; - - // look up realm id by name - const auto name_oid = realm_name_oid(realm_name); - int r = impl->read(dpp, y, pool, name_oid, name, nullptr); - if (r < 0) { - return r; - } - realm_id = std::move(name.obj_id); - return 0; -} - -int RadosConfigStore::realm_notify_new_period(const DoutPrefixProvider* dpp, - optional_yield y, - const RGWPeriod& period) -{ - const auto& pool = impl->realm_pool; - const auto control_oid = realm_control_oid(period.get_realm()); - - bufferlist bl; - using ceph::encode; - // push the period to dependent zonegroups/zones - encode(RGWRealmNotify::ZonesNeedPeriod, bl); - encode(period, bl); - // reload the gateway with the new period - encode(RGWRealmNotify::Reload, bl); - - constexpr uint64_t timeout_ms = 0; - return impl->notify(dpp, y, pool, control_oid, bl, timeout_ms); -} - -int RadosConfigStore::list_realm_names(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - const auto& pool = impl->realm_pool; - constexpr auto prefix = [] (std::string oid) -> std::string { - if (!oid.starts_with(realm_names_oid_prefix)) { - return {}; - } - return oid.substr(realm_names_oid_prefix.size()); - }; - return impl->list(dpp, y, pool, marker, prefix, entries, result); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/store.cc b/src/rgw/store/rados/config/store.cc deleted file mode 100644 index ec2b034a8e0..00000000000 --- a/src/rgw/store/rados/config/store.cc +++ /dev/null @@ -1,52 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "include/rados/librados.hpp" -#include "common/errno.h" -#include "impl.h" -#include "store.h" - -namespace rgw::rados { - -RadosConfigStore::RadosConfigStore(std::unique_ptr impl) - : impl(std::move(impl)) -{ -} - -RadosConfigStore::~RadosConfigStore() = default; - - -auto create_config_store(const DoutPrefixProvider* dpp) - -> std::unique_ptr -{ - auto impl = std::make_unique(dpp->get_cct()->_conf); - - // initialize a Rados client - int r = impl->rados.init_with_context(dpp->get_cct()); - if (r < 0) { - ldpp_dout(dpp, -1) << "Rados client initialization failed with " - << cpp_strerror(-r) << dendl; - return nullptr; - } - r = impl->rados.connect(); - if (r < 0) { - ldpp_dout(dpp, -1) << "Rados client connection failed with " - << cpp_strerror(-r) << dendl; - return nullptr; - } - - return std::make_unique(std::move(impl)); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/store.h b/src/rgw/store/rados/config/store.h deleted file mode 100644 index 1b93a803db3..00000000000 --- a/src/rgw/store/rados/config/store.h +++ /dev/null @@ -1,182 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include -#include -#include -#include "rgw_common.h" -#include "rgw_sal_config.h" - -class DoutPrefixProvider; -class optional_yield; - -namespace rgw::rados { - -struct ConfigImpl; - -class RadosConfigStore : public sal::ConfigStore { - public: - explicit RadosConfigStore(std::unique_ptr impl); - virtual ~RadosConfigStore() override; - - // Realm - virtual int write_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id) override; - virtual int read_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string& realm_id) override; - virtual int delete_default_realm_id(const DoutPrefixProvider* dpp, - optional_yield y) override; - - virtual int create_realm(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_realm_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_realm_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_default_realm(const DoutPrefixProvider* dpp, - optional_yield y, - RGWRealm& info, - std::unique_ptr* writer) override; - virtual int read_realm_id(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view realm_name, - std::string& realm_id) override; - virtual int realm_notify_new_period(const DoutPrefixProvider* dpp, - optional_yield y, - const RGWPeriod& period) override; - virtual int list_realm_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - // Period - virtual int create_period(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWPeriod& info) override; - virtual int read_period(const DoutPrefixProvider* dpp, - optional_yield y, std::string_view period_id, - std::optional epoch, RGWPeriod& info) override; - virtual int delete_period(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view period_id) override; - virtual int list_period_ids(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - // ZoneGroup - virtual int write_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zonegroup_id) override; - virtual int read_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zonegroup_id) override; - virtual int delete_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) override; - - virtual int create_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int read_zonegroup_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_id, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int read_zonegroup_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int read_default_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneGroup& info, - std::unique_ptr* writer) override; - virtual int list_zonegroup_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - // Zone - virtual int write_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - std::string_view zone_id) override; - virtual int read_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zone_id) override; - virtual int delete_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) override; - - virtual int create_zone(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int read_zone_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_id, - RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int read_zone_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int read_default_zone(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneParams& info, - std::unique_ptr* writer) override; - virtual int list_zone_names(const DoutPrefixProvider* dpp, - optional_yield y, const std::string& marker, - std::span entries, - sal::ListResult& result) override; - - // PeriodConfig - virtual int read_period_config(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWPeriodConfig& info) override; - virtual int write_period_config(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - std::string_view realm_id, - const RGWPeriodConfig& info) override; - - private: - std::unique_ptr impl; -}; // RadosConfigStore - - -/// RadosConfigStore factory function -auto create_config_store(const DoutPrefixProvider* dpp) - -> std::unique_ptr; - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/zone.cc b/src/rgw/store/rados/config/zone.cc deleted file mode 100644 index c1151f57510..00000000000 --- a/src/rgw/store/rados/config/zone.cc +++ /dev/null @@ -1,312 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/dout.h" -#include "common/errno.h" -#include "rgw_zone.h" -#include "store/rados/config/store.h" - -#include "impl.h" - -namespace rgw::rados { - -// zone oids -constexpr std::string_view zone_info_oid_prefix = "zone_info."; -constexpr std::string_view zone_names_oid_prefix = "zone_names."; - -std::string zone_info_oid(std::string_view zone_id) -{ - return string_cat_reserve(zone_info_oid_prefix, zone_id); -} -std::string zone_name_oid(std::string_view zone_id) -{ - return string_cat_reserve(zone_names_oid_prefix, zone_id); -} -std::string default_zone_oid(const ceph::common::ConfigProxy& conf, - std::string_view realm_id) -{ - return fmt::format("{}.{}", conf->rgw_default_zone_info_oid, realm_id); -} - - -int RadosConfigStore::write_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - bool exclusive, - std::string_view realm_id, - std::string_view zone_id) -{ - const auto& pool = impl->zone_pool; - const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - - RGWDefaultSystemMetaObjInfo default_info; - default_info.default_id = zone_id; - - return impl->write(dpp, y, pool, default_oid, create, default_info, nullptr); -} - -int RadosConfigStore::read_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zone_id) -{ - const auto& pool = impl->zone_pool; - const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); - - RGWDefaultSystemMetaObjInfo default_info; - int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); - if (r >= 0) { - zone_id = default_info.default_id; - } - return r; -} - -int RadosConfigStore::delete_default_zone_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) -{ - const auto& pool = impl->zone_pool; - const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); - - return impl->remove(dpp, y, pool, default_oid, nullptr); -} - - -class RadosZoneWriter : public sal::ZoneWriter { - ConfigImpl* impl; - RGWObjVersionTracker objv; - std::string zone_id; - std::string zone_name; - public: - RadosZoneWriter(ConfigImpl* impl, RGWObjVersionTracker objv, - std::string_view zone_id, std::string_view zone_name) - : impl(impl), objv(std::move(objv)), - zone_id(zone_id), zone_name(zone_name) - { - } - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWZoneParams& info) override - { - if (zone_id != info.get_id() || zone_name != info.get_name()) { - return -EINVAL; // can't modify zone id or name directly - } - - const auto& pool = impl->zone_pool; - const auto info_oid = zone_info_oid(info.get_id()); - return impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); - } - - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWZoneParams& info, std::string_view new_name) override - { - if (zone_id != info.get_id() || zone_name != info.get_name()) { - return -EINVAL; // can't modify zone id or name directly - } - if (new_name.empty()) { - ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl; - return -EINVAL; - } - - const auto& pool = impl->zone_pool; - const auto name = RGWNameToId{info.get_id()}; - const auto info_oid = zone_info_oid(info.get_id()); - const auto old_oid = zone_name_oid(info.get_name()); - const auto new_oid = zone_name_oid(new_name); - - // link the new name - RGWObjVersionTracker new_objv; - new_objv.generate_new_write_ver(dpp->get_cct()); - int r = impl->write(dpp, y, pool, new_oid, Create::MustNotExist, - name, &new_objv); - if (r < 0) { - return r; - } - - // write the info with updated name - info.set_name(std::string{new_name}); - r = impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); - if (r < 0) { - // on failure, unlink the new name - (void) impl->remove(dpp, y, pool, new_oid, &new_objv); - return r; - } - - // unlink the old name - (void) impl->remove(dpp, y, pool, old_oid, nullptr); - - zone_name = new_name; - return 0; - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - const auto& pool = impl->zone_pool; - const auto info_oid = zone_info_oid(zone_id); - int r = impl->remove(dpp, y, pool, info_oid, &objv); - if (r < 0) { - return r; - } - const auto name_oid = zone_name_oid(zone_name); - (void) impl->remove(dpp, y, pool, name_oid, nullptr); - return 0; - } -}; // RadosZoneWriter - - -int RadosConfigStore::create_zone(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneParams& info, - std::unique_ptr* writer) -{ - if (info.get_id().empty()) { - ldpp_dout(dpp, 0) << "zone cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.get_name().empty()) { - ldpp_dout(dpp, 0) << "zone cannot have an empty name" << dendl; - return -EINVAL; - } - - const auto& pool = impl->zone_pool; - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - - // write the zone info - const auto info_oid = zone_info_oid(info.get_id()); - RGWObjVersionTracker objv; - objv.generate_new_write_ver(dpp->get_cct()); - - int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); - if (r < 0) { - return r; - } - - // write the zone name - const auto name_oid = zone_name_oid(info.get_name()); - const auto name = RGWNameToId{info.get_id()}; - RGWObjVersionTracker name_objv; - name_objv.generate_new_write_ver(dpp->get_cct()); - - r = impl->write(dpp, y, pool, name_oid, create, name, &name_objv); - if (r < 0) { - (void) impl->remove(dpp, y, pool, info_oid, &objv); - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_zone_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_id, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->zone_pool; - const auto info_oid = zone_info_oid(zone_id); - RGWObjVersionTracker objv; - - int r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_zone_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->zone_pool; - - // look up zone id by name - const auto name_oid = zone_name_oid(zone_name); - RGWNameToId name; - int r = impl->read(dpp, y, pool, name_oid, name, nullptr); - if (r < 0) { - return r; - } - - const auto info_oid = zone_info_oid(name.obj_id); - RGWObjVersionTracker objv; - r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_default_zone(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->zone_pool; - - // read default zone id - const auto default_oid = default_zone_oid(dpp->get_cct()->_conf, realm_id); - RGWDefaultSystemMetaObjInfo default_info; - int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); - if (r < 0) { - return r; - } - - const auto info_oid = zone_info_oid(default_info.default_id); - RGWObjVersionTracker objv; - r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::list_zone_names(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - const auto& pool = impl->zone_pool; - constexpr auto prefix = [] (std::string oid) -> std::string { - if (!oid.starts_with(zone_names_oid_prefix)) { - return {}; - } - return oid.substr(zone_names_oid_prefix.size()); - }; - return impl->list(dpp, y, pool, marker, prefix, entries, result); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/config/zonegroup.cc b/src/rgw/store/rados/config/zonegroup.cc deleted file mode 100644 index 984fda17407..00000000000 --- a/src/rgw/store/rados/config/zonegroup.cc +++ /dev/null @@ -1,315 +0,0 @@ -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2022 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/dout.h" -#include "common/errno.h" -#include "rgw_zone.h" -#include "store/rados/config/store.h" - -#include "impl.h" - -namespace rgw::rados { - -// zonegroup oids -constexpr std::string_view zonegroup_names_oid_prefix = "zonegroups_names."; -constexpr std::string_view zonegroup_info_oid_prefix = "zonegroup_info."; -constexpr std::string_view default_zonegroup_info_oid = "default.zonegroup"; - -static std::string zonegroup_info_oid(std::string_view zonegroup_id) -{ - return string_cat_reserve(zonegroup_info_oid_prefix, zonegroup_id); -} -static std::string zonegroup_name_oid(std::string_view zonegroup_id) -{ - return string_cat_reserve(zonegroup_names_oid_prefix, zonegroup_id); -} -static std::string default_zonegroup_oid(const ceph::common::ConfigProxy& conf, - std::string_view realm_id) -{ - const auto prefix = name_or_default(conf->rgw_default_zonegroup_info_oid, - default_zonegroup_info_oid); - return fmt::format("{}.{}", prefix, realm_id); -} - - -int RadosConfigStore::write_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - bool exclusive, - std::string_view realm_id, - std::string_view zonegroup_id) -{ - const auto& pool = impl->zonegroup_pool; - const auto oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - - RGWDefaultSystemMetaObjInfo default_info; - default_info.default_id = zonegroup_id; - - return impl->write(dpp, y, pool, oid, create, default_info, nullptr); -} - -int RadosConfigStore::read_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - std::string& zonegroup_id) -{ - const auto& pool = impl->zonegroup_pool; - const auto oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); - - RGWDefaultSystemMetaObjInfo default_info; - int r = impl->read(dpp, y, pool, oid, default_info, nullptr); - if (r >= 0) { - zonegroup_id = default_info.default_id; - } - return r; -} - -int RadosConfigStore::delete_default_zonegroup_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id) -{ - const auto& pool = impl->zonegroup_pool; - const auto oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); - return impl->remove(dpp, y, pool, oid, nullptr); -} - - -class RadosZoneGroupWriter : public sal::ZoneGroupWriter { - ConfigImpl* impl; - RGWObjVersionTracker objv; - std::string zonegroup_id; - std::string zonegroup_name; - public: - RadosZoneGroupWriter(ConfigImpl* impl, RGWObjVersionTracker objv, - std::string_view zonegroup_id, - std::string_view zonegroup_name) - : impl(impl), objv(std::move(objv)), - zonegroup_id(zonegroup_id), zonegroup_name(zonegroup_name) - { - } - - int write(const DoutPrefixProvider* dpp, optional_yield y, - const RGWZoneGroup& info) override - { - if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) { - return -EINVAL; // can't modify zonegroup id or name directly - } - - const auto& pool = impl->zonegroup_pool; - const auto info_oid = zonegroup_info_oid(info.get_id()); - return impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); - } - - int rename(const DoutPrefixProvider* dpp, optional_yield y, - RGWZoneGroup& info, std::string_view new_name) override - { - if (zonegroup_id != info.get_id() || zonegroup_name != info.get_name()) { - return -EINVAL; // can't modify zonegroup id or name directly - } - if (new_name.empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; - return -EINVAL; - } - - const auto& pool = impl->zonegroup_pool; - const auto name = RGWNameToId{info.get_id()}; - const auto info_oid = zonegroup_info_oid(info.get_id()); - const auto old_oid = zonegroup_name_oid(info.get_name()); - const auto new_oid = zonegroup_name_oid(new_name); - - // link the new name - RGWObjVersionTracker new_objv; - new_objv.generate_new_write_ver(dpp->get_cct()); - int r = impl->write(dpp, y, pool, new_oid, Create::MustNotExist, - name, &new_objv); - if (r < 0) { - return r; - } - - // write the info with updated name - info.set_name(std::string{new_name}); - r = impl->write(dpp, y, pool, info_oid, Create::MustExist, info, &objv); - if (r < 0) { - // on failure, unlink the new name - (void) impl->remove(dpp, y, pool, new_oid, &new_objv); - return r; - } - - // unlink the old name - (void) impl->remove(dpp, y, pool, old_oid, nullptr); - - zonegroup_name = new_name; - return 0; - } - - int remove(const DoutPrefixProvider* dpp, optional_yield y) override - { - const auto& pool = impl->zonegroup_pool; - const auto info_oid = zonegroup_info_oid(zonegroup_id); - int r = impl->remove(dpp, y, pool, info_oid, &objv); - if (r < 0) { - return r; - } - const auto name_oid = zonegroup_name_oid(zonegroup_name); - (void) impl->remove(dpp, y, pool, name_oid, nullptr); - return 0; - } -}; // RadosZoneGroupWriter - - -int RadosConfigStore::create_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, bool exclusive, - const RGWZoneGroup& info, - std::unique_ptr* writer) -{ - if (info.get_id().empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty id" << dendl; - return -EINVAL; - } - if (info.get_name().empty()) { - ldpp_dout(dpp, 0) << "zonegroup cannot have an empty name" << dendl; - return -EINVAL; - } - - const auto& pool = impl->zonegroup_pool; - const auto create = exclusive ? Create::MustNotExist : Create::MayExist; - - // write the zonegroup info - const auto info_oid = zonegroup_info_oid(info.get_id()); - RGWObjVersionTracker objv; - objv.generate_new_write_ver(dpp->get_cct()); - - int r = impl->write(dpp, y, pool, info_oid, create, info, &objv); - if (r < 0) { - return r; - } - - // write the zonegroup name - const auto name_oid = zonegroup_name_oid(info.get_name()); - const auto name = RGWNameToId{info.get_id()}; - RGWObjVersionTracker name_objv; - name_objv.generate_new_write_ver(dpp->get_cct()); - - r = impl->write(dpp, y, pool, name_oid, create, name, &name_objv); - if (r < 0) { - (void) impl->remove(dpp, y, pool, info_oid, &objv); - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_zonegroup_by_id(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_id, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->zonegroup_pool; - const auto info_oid = zonegroup_info_oid(zonegroup_id); - RGWObjVersionTracker objv; - - int r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_zonegroup_by_name(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->zonegroup_pool; - - // look up zonegroup id by name - RGWNameToId name; - const auto name_oid = zonegroup_name_oid(zonegroup_name); - int r = impl->read(dpp, y, pool, name_oid, name, nullptr); - if (r < 0) { - return r; - } - - const auto info_oid = zonegroup_info_oid(name.obj_id); - RGWObjVersionTracker objv; - r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::read_default_zonegroup(const DoutPrefixProvider* dpp, - optional_yield y, - std::string_view realm_id, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - const auto& pool = impl->zonegroup_pool; - - // read default zonegroup id - RGWDefaultSystemMetaObjInfo default_info; - const auto default_oid = default_zonegroup_oid(dpp->get_cct()->_conf, realm_id); - int r = impl->read(dpp, y, pool, default_oid, default_info, nullptr); - if (r < 0) { - return r; - } - - const auto info_oid = zonegroup_info_oid(default_info.default_id); - RGWObjVersionTracker objv; - r = impl->read(dpp, y, pool, info_oid, info, &objv); - if (r < 0) { - return r; - } - - if (writer) { - *writer = std::make_unique( - impl.get(), std::move(objv), info.get_id(), info.get_name()); - } - return 0; -} - -int RadosConfigStore::list_zonegroup_names(const DoutPrefixProvider* dpp, - optional_yield y, - const std::string& marker, - std::span entries, - sal::ListResult& result) -{ - const auto& pool = impl->zonegroup_pool; - constexpr auto prefix = [] (std::string oid) -> std::string { - if (!oid.starts_with(zonegroup_names_oid_prefix)) { - return {}; - } - return oid.substr(zonegroup_names_oid_prefix.size()); - }; - return impl->list(dpp, y, pool, marker, prefix, entries, result); -} - -} // namespace rgw::rados diff --git a/src/rgw/store/rados/rgw_bucket.cc b/src/rgw/store/rados/rgw_bucket.cc deleted file mode 100644 index 7f600fe457e..00000000000 --- a/src/rgw/store/rados/rgw_bucket.cc +++ /dev/null @@ -1,2971 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_acl_s3.h" -#include "rgw_tag_s3.h" - -#include "rgw_bucket.h" -#include "rgw_op.h" -#include "rgw_bucket_sync.h" - -#include "services/svc_zone.h" -#include "services/svc_bucket.h" -#include "services/svc_user.h" - -#include "rgw_reshard.h" - -// stolen from src/cls/version/cls_version.cc -#define VERSION_ATTR "ceph.objclass.version" - -#include "cls/user/cls_user_types.h" - -#include "rgw_sal_rados.h" - -#define dout_subsys ceph_subsys_rgw - -// seconds for timeout during RGWBucket::check_object_index -constexpr uint64_t BUCKET_TAG_QUICK_TIMEOUT = 30; - -using namespace std; - -// default number of entries to list with each bucket listing call -// (use marker to bridge between calls) -static constexpr size_t listing_max_entries = 1000; - -/* - * The tenant_name is always returned on purpose. May be empty, of course. - */ -static void parse_bucket(const string& bucket, - string *tenant_name, - string *bucket_name, - string *bucket_instance = nullptr /* optional */) -{ - /* - * expected format: [tenant/]bucket:bucket_instance - */ - int pos = bucket.find('/'); - if (pos >= 0) { - *tenant_name = bucket.substr(0, pos); - } else { - tenant_name->clear(); - } - string bn = bucket.substr(pos + 1); - pos = bn.find (':'); - if (pos < 0) { - *bucket_name = std::move(bn); - return; - } - *bucket_name = bn.substr(0, pos); - if (bucket_instance) { - *bucket_instance = bn.substr(pos + 1); - } - - /* - * deal with the possible tenant:bucket:bucket_instance case - */ - if (tenant_name->empty()) { - pos = bucket_instance->find(':'); - if (pos >= 0) { - *tenant_name = *bucket_name; - *bucket_name = bucket_instance->substr(0, pos); - *bucket_instance = bucket_instance->substr(pos + 1); - } - } -} - -static void dump_mulipart_index_results(list& objs_to_unlink, - Formatter *f) -{ - for (const auto& o : objs_to_unlink) { - f->dump_string("object", o.name); - } -} - -void check_bad_user_bucket_mapping(rgw::sal::Driver* driver, rgw::sal::User* user, - bool fix, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - rgw::sal::BucketList user_buckets; - string marker; - - CephContext *cct = driver->ctx(); - - size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk; - - do { - int ret = user->list_buckets(dpp, marker, string(), max_entries, false, user_buckets, y); - if (ret < 0) { - ldout(driver->ctx(), 0) << "failed to read user buckets: " - << cpp_strerror(-ret) << dendl; - return; - } - - map>& buckets = user_buckets.get_buckets(); - for (auto i = buckets.begin(); - i != buckets.end(); - ++i) { - marker = i->first; - - auto& bucket = i->second; - - std::unique_ptr actual_bucket; - int r = driver->get_bucket(dpp, user, user->get_tenant(), bucket->get_name(), &actual_bucket, null_yield); - if (r < 0) { - ldout(driver->ctx(), 0) << "could not get bucket info for bucket=" << bucket << dendl; - continue; - } - - if (actual_bucket->get_name().compare(bucket->get_name()) != 0 || - actual_bucket->get_tenant().compare(bucket->get_tenant()) != 0 || - actual_bucket->get_marker().compare(bucket->get_marker()) != 0 || - actual_bucket->get_bucket_id().compare(bucket->get_bucket_id()) != 0) { - cout << "bucket info mismatch: expected " << actual_bucket << " got " << bucket << std::endl; - if (fix) { - cout << "fixing" << std::endl; - r = actual_bucket->chown(dpp, user, nullptr, null_yield); - if (r < 0) { - cerr << "failed to fix bucket: " << cpp_strerror(-r) << std::endl; - } - } - } - } - } while (user_buckets.is_truncated()); -} - -// returns true if entry is in the empty namespace. note: function -// type conforms to type RGWBucketListNameFilter -bool rgw_bucket_object_check_filter(const std::string& oid) -{ - const static std::string empty_ns; - rgw_obj_key key; // thrown away but needed for parsing - return rgw_obj_key::oid_to_key_in_ns(oid, &key, empty_ns); -} - -int rgw_remove_object(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, rgw_obj_key& key) -{ - if (key.instance.empty()) { - key.instance = "null"; - } - - std::unique_ptr object = bucket->get_object(key); - - return object->delete_object(dpp, null_yield); -} - -static void set_err_msg(std::string *sink, std::string msg) -{ - if (sink && !msg.empty()) - *sink = msg; -} - -int RGWBucket::init(rgw::sal::Driver* _driver, RGWBucketAdminOpState& op_state, - optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg) -{ - if (!_driver) { - set_err_msg(err_msg, "no storage!"); - return -EINVAL; - } - - driver = _driver; - - std::string bucket_name = op_state.get_bucket_name(); - - if (bucket_name.empty() && op_state.get_user_id().empty()) - return -EINVAL; - - user = driver->get_user(op_state.get_user_id()); - std::string tenant = user->get_tenant(); - - // split possible tenant/name - auto pos = bucket_name.find('/'); - if (pos != string::npos) { - tenant = bucket_name.substr(0, pos); - bucket_name = bucket_name.substr(pos + 1); - } - - int r = driver->get_bucket(dpp, user.get(), tenant, bucket_name, &bucket, y); - if (r < 0) { - set_err_msg(err_msg, "failed to fetch bucket info for bucket=" + bucket_name); - return r; - } - - op_state.set_bucket(bucket->clone()); - - if (!rgw::sal::User::empty(user.get())) { - r = user->load_user(dpp, y); - if (r < 0) { - set_err_msg(err_msg, "failed to fetch user info"); - return r; - } - } - - op_state.display_name = user->get_display_name(); - - clear_failure(); - return 0; -} - -bool rgw_find_bucket_by_id(const DoutPrefixProvider *dpp, CephContext *cct, rgw::sal::Driver* driver, - const string& marker, const string& bucket_id, rgw_bucket* bucket_out) -{ - void *handle = NULL; - bool truncated = false; - string s; - - int ret = driver->meta_list_keys_init(dpp, "bucket.instance", marker, &handle); - if (ret < 0) { - cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl; - driver->meta_list_keys_complete(handle); - return -ret; - } - do { - list keys; - ret = driver->meta_list_keys_next(dpp, handle, 1000, keys, &truncated); - if (ret < 0) { - cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl; - driver->meta_list_keys_complete(handle); - return -ret; - } - for (list::iterator iter = keys.begin(); iter != keys.end(); ++iter) { - s = *iter; - ret = rgw_bucket_parse_bucket_key(cct, s, bucket_out, nullptr); - if (ret < 0) { - continue; - } - if (bucket_id == bucket_out->bucket_id) { - driver->meta_list_keys_complete(handle); - return true; - } - } - } while (truncated); - driver->meta_list_keys_complete(handle); - return false; -} - -int RGWBucket::chown(RGWBucketAdminOpState& op_state, const string& marker, - optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg) -{ - int ret = bucket->chown(dpp, user.get(), user.get(), y, &marker); - if (ret < 0) { - set_err_msg(err_msg, "Failed to change object ownership: " + cpp_strerror(-ret)); - } - - return ret; -} - -int RGWBucket::set_quota(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg) -{ - bucket = op_state.get_bucket()->clone(); - - bucket->get_info().quota = op_state.quota; - int r = bucket->put_info(dpp, false, real_time()); - if (r < 0) { - set_err_msg(err_msg, "ERROR: failed writing bucket instance info: " + cpp_strerror(-r)); - return r; - } - return r; -} - -int RGWBucket::remove_object(const DoutPrefixProvider *dpp, RGWBucketAdminOpState& op_state, std::string *err_msg) -{ - std::string object_name = op_state.get_object_name(); - - rgw_obj_key key(object_name); - - bucket = op_state.get_bucket()->clone(); - - int ret = rgw_remove_object(dpp, driver, bucket.get(), key); - if (ret < 0) { - set_err_msg(err_msg, "unable to remove object" + cpp_strerror(-ret)); - return ret; - } - - return 0; -} - -static void dump_bucket_index(const vector& objs, Formatter *f) -{ - for (auto iter = objs.begin(); iter != objs.end(); ++iter) { - f->dump_string("object", iter->key.name); - } -} - -static void dump_bucket_usage(map& stats, Formatter *formatter) -{ - map::iterator iter; - - formatter->open_object_section("usage"); - for (iter = stats.begin(); iter != stats.end(); ++iter) { - RGWStorageStats& s = iter->second; - formatter->open_object_section(to_string(iter->first)); - s.dump(formatter); - formatter->close_section(); - } - formatter->close_section(); -} - -static void dump_index_check(map existing_stats, - map calculated_stats, - Formatter *formatter) -{ - formatter->open_object_section("check_result"); - formatter->open_object_section("existing_header"); - dump_bucket_usage(existing_stats, formatter); - formatter->close_section(); - formatter->open_object_section("calculated_header"); - dump_bucket_usage(calculated_stats, formatter); - formatter->close_section(); - formatter->close_section(); -} - -int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp, - std::string *err_msg) -{ - const bool fix_index = op_state.will_fix_index(); - - bucket = op_state.get_bucket()->clone(); - - rgw::sal::Bucket::ListParams params; - params.list_versions = true; - params.ns = RGW_OBJ_NS_MULTIPART; - - std::map meta_objs; - std::map all_objs; - bool is_truncated; - do { - rgw::sal::Bucket::ListResults results; - int r = bucket->list(dpp, params, listing_max_entries, results, null_yield); - if (r < 0) { - set_err_msg(err_msg, "failed to list objects in bucket=" + bucket->get_name() + - " err=" + cpp_strerror(-r)); - - return r; - } - is_truncated = results.is_truncated; - - for (const auto& o : results.objs) { - rgw_obj_index_key key = o.key; - rgw_obj obj(bucket->get_key(), key); - std::string oid = obj.get_oid(); - - int pos = oid.find_last_of('.'); - if (pos < 0) { - /* obj has no suffix */ - all_objs[key] = oid; - } else { - /* obj has suffix */ - std::string name = oid.substr(0, pos); - std::string suffix = oid.substr(pos + 1); - - if (suffix.compare("meta") == 0) { - meta_objs[name] = true; - } else { - all_objs[key] = name; - } - } - } - } while (is_truncated); - - std::list objs_to_unlink; - Formatter *f = flusher.get_formatter(); - - f->open_array_section("invalid_multipart_entries"); - - for (const auto& o : all_objs) { - const std::string& name = o.second; - if (meta_objs.find(name) == meta_objs.end()) { - objs_to_unlink.push_back(o.first); - } - - if (objs_to_unlink.size() > listing_max_entries) { - if (fix_index) { - // note: under rados this removes directly from rados index objects - int r = bucket->remove_objs_from_index(dpp, objs_to_unlink); - if (r < 0) { - set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " + - cpp_strerror(-r)); - return r; - } - } - - dump_mulipart_index_results(objs_to_unlink, f); - flusher.flush(); - objs_to_unlink.clear(); - } - } - - if (fix_index) { - // note: under rados this removes directly from rados index objects - int r = bucket->remove_objs_from_index(dpp, objs_to_unlink); - if (r < 0) { - set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " + - cpp_strerror(-r)); - - return r; - } - } - - dump_mulipart_index_results(objs_to_unlink, f); - f->close_section(); - flusher.flush(); - - return 0; -} - -int RGWBucket::check_object_index(const DoutPrefixProvider *dpp, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y, - std::string *err_msg) -{ - - bool fix_index = op_state.will_fix_index(); - - if (!fix_index) { - set_err_msg(err_msg, "check-objects flag requires fix index enabled"); - return -EINVAL; - } - - // use a quicker/shorter tag timeout during this process - bucket->set_tag_timeout(dpp, BUCKET_TAG_QUICK_TIMEOUT); - - rgw::sal::Bucket::ListResults results; - results.is_truncated = true; - - Formatter *formatter = flusher.get_formatter(); - formatter->open_object_section("objects"); - - while (results.is_truncated) { - rgw::sal::Bucket::ListParams params; - params.marker = results.next_marker; - params.force_check_filter = rgw_bucket_object_check_filter; - - int r = bucket->list(dpp, params, listing_max_entries, results, y); - - if (r == -ENOENT) { - break; - } else if (r < 0) { - set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r)); - } - - dump_bucket_index(results.objs, formatter); - flusher.flush(); - } - - formatter->close_section(); - - // restore normal tag timeout for bucket - bucket->set_tag_timeout(dpp, 0); - - return 0; -} - - -int RGWBucket::check_index(const DoutPrefixProvider *dpp, - RGWBucketAdminOpState& op_state, - map& existing_stats, - map& calculated_stats, - std::string *err_msg) -{ - bool fix_index = op_state.will_fix_index(); - - int r = bucket->check_index(dpp, existing_stats, calculated_stats); - if (r < 0) { - set_err_msg(err_msg, "failed to check index error=" + cpp_strerror(-r)); - return r; - } - - if (fix_index) { - r = bucket->rebuild_index(dpp); - if (r < 0) { - set_err_msg(err_msg, "failed to rebuild index err=" + cpp_strerror(-r)); - return r; - } - } - - return 0; -} - -int RGWBucket::sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg) -{ - if (!driver->is_meta_master()) { - set_err_msg(err_msg, "ERROR: failed to update bucket sync: only allowed on meta master zone"); - return -EINVAL; - } - bool sync = op_state.will_sync_bucket(); - if (sync) { - bucket->get_info().flags &= ~BUCKET_DATASYNC_DISABLED; - } else { - bucket->get_info().flags |= BUCKET_DATASYNC_DISABLED; - } - - // when writing this metadata, RGWSI_BucketIndex_RADOS::handle_overwrite() - // will write the corresponding datalog and bilog entries - int r = bucket->put_info(dpp, false, real_time()); - if (r < 0) { - set_err_msg(err_msg, "ERROR: failed writing bucket instance info:" + cpp_strerror(-r)); - return r; - } - - return 0; -} - - -int RGWBucket::policy_bl_to_stream(bufferlist& bl, ostream& o) -{ - RGWAccessControlPolicy_S3 policy(g_ceph_context); - int ret = decode_bl(bl, policy); - if (ret < 0) { - ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; - } - policy.to_xml(o); - return 0; -} - -int rgw_object_get_attr(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, rgw::sal::Object* obj, - const char* attr_name, bufferlist& out_bl, optional_yield y) -{ - std::unique_ptr rop = obj->get_read_op(); - - return rop->get_attr(dpp, attr_name, out_bl, y); -} - -int RGWBucket::get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy, optional_yield y, const DoutPrefixProvider *dpp) -{ - int ret; - std::string object_name = op_state.get_object_name(); - - bucket = op_state.get_bucket()->clone(); - - if (!object_name.empty()) { - bufferlist bl; - std::unique_ptr obj = bucket->get_object(rgw_obj_key(object_name)); - - ret = rgw_object_get_attr(dpp, driver, obj.get(), RGW_ATTR_ACL, bl, y); - if (ret < 0){ - return ret; - } - - ret = decode_bl(bl, policy); - if (ret < 0) { - ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; - } - return ret; - } - - map::iterator aiter = bucket->get_attrs().find(RGW_ATTR_ACL); - if (aiter == bucket->get_attrs().end()) { - return -ENOENT; - } - - ret = decode_bl(aiter->second, policy); - if (ret < 0) { - ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; - } - - return ret; -} - - -int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWAccessControlPolicy& policy, const DoutPrefixProvider *dpp) -{ - RGWBucket bucket; - - int ret = bucket.init(driver, op_state, null_yield, dpp); - if (ret < 0) - return ret; - - ret = bucket.get_policy(op_state, policy, null_yield, dpp); - if (ret < 0) - return ret; - - return 0; -} - -/* Wrappers to facilitate RESTful interface */ - - -int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp) -{ - RGWAccessControlPolicy policy(driver->ctx()); - - int ret = get_policy(driver, op_state, policy, dpp); - if (ret < 0) - return ret; - - Formatter *formatter = flusher.get_formatter(); - - flusher.start(0); - - formatter->open_object_section("policy"); - policy.dump(formatter); - formatter->close_section(); - - flusher.flush(); - - return 0; -} - -int RGWBucketAdminOp::dump_s3_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - ostream& os, const DoutPrefixProvider *dpp) -{ - RGWAccessControlPolicy_S3 policy(driver->ctx()); - - int ret = get_policy(driver, op_state, policy, dpp); - if (ret < 0) - return ret; - - policy.to_xml(os); - - return 0; -} - -int RGWBucketAdminOp::unlink(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp) -{ - RGWBucket bucket; - - int ret = bucket.init(driver, op_state, null_yield, dpp); - if (ret < 0) - return ret; - - return static_cast(driver)->ctl()->bucket->unlink_bucket(op_state.get_user_id(), op_state.get_bucket()->get_info().bucket, null_yield, dpp, true); -} - -int RGWBucketAdminOp::link(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, string *err) -{ - if (!op_state.is_user_op()) { - set_err_msg(err, "empty user id"); - return -EINVAL; - } - - RGWBucket bucket; - int ret = bucket.init(driver, op_state, null_yield, dpp, err); - if (ret < 0) - return ret; - - string bucket_id = op_state.get_bucket_id(); - std::string display_name = op_state.get_user_display_name(); - std::unique_ptr loc_bucket; - std::unique_ptr old_bucket; - - loc_bucket = op_state.get_bucket()->clone(); - - if (!bucket_id.empty() && bucket_id != loc_bucket->get_bucket_id()) { - set_err_msg(err, - "specified bucket id does not match " + loc_bucket->get_bucket_id()); - return -EINVAL; - } - - old_bucket = loc_bucket->clone(); - - loc_bucket->get_key().tenant = op_state.get_user_id().tenant; - - if (!op_state.new_bucket_name.empty()) { - auto pos = op_state.new_bucket_name.find('/'); - if (pos != string::npos) { - loc_bucket->get_key().tenant = op_state.new_bucket_name.substr(0, pos); - loc_bucket->get_key().name = op_state.new_bucket_name.substr(pos + 1); - } else { - loc_bucket->get_key().name = op_state.new_bucket_name; - } - } - - RGWObjVersionTracker objv_tracker; - RGWObjVersionTracker old_version = loc_bucket->get_info().objv_tracker; - - map::iterator aiter = loc_bucket->get_attrs().find(RGW_ATTR_ACL); - if (aiter == loc_bucket->get_attrs().end()) { - // should never happen; only pre-argonaut buckets lacked this. - ldpp_dout(dpp, 0) << "WARNING: can't bucket link because no acl on bucket=" << old_bucket << dendl; - set_err_msg(err, - "While crossing the Anavros you have displeased the goddess Hera." - " You must sacrifice your ancient bucket " + loc_bucket->get_bucket_id()); - return -EINVAL; - } - bufferlist& aclbl = aiter->second; - RGWAccessControlPolicy policy; - ACLOwner owner; - try { - auto iter = aclbl.cbegin(); - decode(policy, iter); - owner = policy.get_owner(); - } catch (buffer::error& e) { - set_err_msg(err, "couldn't decode policy"); - return -EIO; - } - - int r = static_cast(driver)->ctl()->bucket->unlink_bucket(owner.get_id(), old_bucket->get_info().bucket, null_yield, dpp, false); - if (r < 0) { - set_err_msg(err, "could not unlink policy from user " + owner.get_id().to_str()); - return r; - } - - // now update the user for the bucket... - if (display_name.empty()) { - ldpp_dout(dpp, 0) << "WARNING: user " << op_state.get_user_id() << " has no display name set" << dendl; - } - - RGWAccessControlPolicy policy_instance; - policy_instance.create_default(op_state.get_user_id(), display_name); - owner = policy_instance.get_owner(); - - aclbl.clear(); - policy_instance.encode(aclbl); - - bool exclusive = false; - loc_bucket->get_info().owner = op_state.get_user_id(); - if (*loc_bucket != *old_bucket) { - loc_bucket->get_info().bucket = loc_bucket->get_key(); - loc_bucket->get_info().objv_tracker.version_for_read()->ver = 0; - exclusive = true; - } - - r = loc_bucket->put_info(dpp, exclusive, ceph::real_time()); - if (r < 0) { - set_err_msg(err, "ERROR: failed writing bucket instance info: " + cpp_strerror(-r)); - return r; - } - - /* link to user */ - RGWBucketEntryPoint ep; - ep.bucket = loc_bucket->get_info().bucket; - ep.owner = op_state.get_user_id(); - ep.creation_time = loc_bucket->get_info().creation_time; - ep.linked = true; - rgw::sal::Attrs ep_attrs; - rgw_ep_info ep_data{ep, ep_attrs}; - - r = static_cast(driver)->ctl()->bucket->link_bucket(op_state.get_user_id(), loc_bucket->get_info().bucket, loc_bucket->get_info().creation_time, null_yield, dpp, true, &ep_data); - if (r < 0) { - set_err_msg(err, "failed to relink bucket"); - return r; - } - - if (*loc_bucket != *old_bucket) { - // like RGWRados::delete_bucket -- excepting no bucket_index work. - r = static_cast(driver)->ctl()->bucket->remove_bucket_entrypoint_info( - old_bucket->get_key(), null_yield, dpp, - RGWBucketCtl::Bucket::RemoveParams() - .set_objv_tracker(&ep_data.ep_objv)); - if (r < 0) { - set_err_msg(err, "failed to unlink old bucket " + old_bucket->get_tenant() + "/" + old_bucket->get_name()); - return r; - } - r = static_cast(driver)->ctl()->bucket->remove_bucket_instance_info( - old_bucket->get_key(), old_bucket->get_info(), - null_yield, dpp, - RGWBucketCtl::BucketInstance::RemoveParams() - .set_objv_tracker(&ep_data.ep_objv)); - if (r < 0) { - set_err_msg(err, "failed to unlink old bucket " + old_bucket->get_tenant() + "/" + old_bucket->get_name()); - return r; - } - } - - return 0; -} - -int RGWBucketAdminOp::chown(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const string& marker, const DoutPrefixProvider *dpp, string *err) -{ - RGWBucket bucket; - - int ret = bucket.init(driver, op_state, null_yield, dpp, err); - if (ret < 0) - return ret; - - return bucket.chown(op_state, marker, null_yield, dpp, err); - -} - -int RGWBucketAdminOp::check_index(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y, const DoutPrefixProvider *dpp) -{ - int ret; - map existing_stats; - map calculated_stats; - - - RGWBucket bucket; - - ret = bucket.init(driver, op_state, null_yield, dpp); - if (ret < 0) - return ret; - - Formatter *formatter = flusher.get_formatter(); - flusher.start(0); - - ret = bucket.check_bad_index_multipart(op_state, flusher, dpp); - if (ret < 0) - return ret; - - ret = bucket.check_object_index(dpp, op_state, flusher, y); - if (ret < 0) - return ret; - - ret = bucket.check_index(dpp, op_state, existing_stats, calculated_stats); - if (ret < 0) - return ret; - - dump_index_check(existing_stats, calculated_stats, formatter); - flusher.flush(); - - return 0; -} - -int RGWBucketAdminOp::remove_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - optional_yield y, const DoutPrefixProvider *dpp, - bool bypass_gc, bool keep_index_consistent) -{ - std::unique_ptr bucket; - std::unique_ptr user = driver->get_user(op_state.get_user_id()); - - int ret = driver->get_bucket(dpp, user.get(), user->get_tenant(), op_state.get_bucket_name(), - &bucket, y); - if (ret < 0) - return ret; - - if (bypass_gc) - ret = bucket->remove_bucket_bypass_gc(op_state.get_max_aio(), keep_index_consistent, y, dpp); - else - ret = bucket->remove_bucket(dpp, op_state.will_delete_children(), - false, nullptr, y); - - return ret; -} - -int RGWBucketAdminOp::remove_object(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp) -{ - RGWBucket bucket; - - int ret = bucket.init(driver, op_state, null_yield, dpp); - if (ret < 0) - return ret; - - return bucket.remove_object(dpp, op_state); -} - -int RGWBucketAdminOp::sync_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, string *err_msg) -{ - RGWBucket bucket; - int ret = bucket.init(driver, op_state, null_yield, dpp, err_msg); - if (ret < 0) - { - return ret; - } - return bucket.sync(op_state, dpp, err_msg); -} - -static int bucket_stats(rgw::sal::Driver* driver, - const std::string& tenant_name, - const std::string& bucket_name, - Formatter *formatter, - const DoutPrefixProvider *dpp) -{ - std::unique_ptr bucket; - map stats; - - real_time mtime; - int ret = driver->get_bucket(dpp, nullptr, tenant_name, bucket_name, &bucket, null_yield); - if (ret < 0) { - return ret; - } - - const auto& index = bucket->get_info().get_current_index(); - if (is_layout_indexless(index)) { - cerr << "error, indexless buckets do not maintain stats; bucket=" << - bucket->get_name() << std::endl; - return -EINVAL; - } - - std::string bucket_ver, master_ver; - std::string max_marker; - ret = bucket->read_stats(dpp, index, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, &max_marker); - if (ret < 0) { - cerr << "error getting bucket stats bucket=" << bucket->get_name() << " ret=" << ret << std::endl; - return ret; - } - - utime_t ut(mtime); - utime_t ctime_ut(bucket->get_creation_time()); - - formatter->open_object_section("stats"); - formatter->dump_string("bucket", bucket->get_name()); - formatter->dump_int("num_shards", - bucket->get_info().layout.current_index.layout.normal.num_shards); - formatter->dump_string("tenant", bucket->get_tenant()); - formatter->dump_string("zonegroup", bucket->get_info().zonegroup); - formatter->dump_string("placement_rule", bucket->get_info().placement_rule.to_str()); - ::encode_json("explicit_placement", bucket->get_key().explicit_placement, formatter); - formatter->dump_string("id", bucket->get_bucket_id()); - formatter->dump_string("marker", bucket->get_marker()); - formatter->dump_stream("index_type") << bucket->get_info().layout.current_index.layout.type; - ::encode_json("owner", bucket->get_info().owner, formatter); - formatter->dump_string("ver", bucket_ver); - formatter->dump_string("master_ver", master_ver); - ut.gmtime(formatter->dump_stream("mtime")); - ctime_ut.gmtime(formatter->dump_stream("creation_time")); - formatter->dump_string("max_marker", max_marker); - dump_bucket_usage(stats, formatter); - encode_json("bucket_quota", bucket->get_info().quota, formatter); - - // bucket tags - auto iter = bucket->get_attrs().find(RGW_ATTR_TAGS); - if (iter != bucket->get_attrs().end()) { - RGWObjTagSet_S3 tagset; - bufferlist::const_iterator piter{&iter->second}; - try { - tagset.decode(piter); - tagset.dump(formatter); - } catch (buffer::error& err) { - cerr << "ERROR: caught buffer:error, couldn't decode TagSet" << std::endl; - } - } - - // TODO: bucket CORS - // TODO: bucket LC - formatter->close_section(); - - return 0; -} - -int RGWBucketAdminOp::limit_check(rgw::sal::Driver* driver, - RGWBucketAdminOpState& op_state, - const std::list& user_ids, - RGWFormatterFlusher& flusher, optional_yield y, - const DoutPrefixProvider *dpp, - bool warnings_only) -{ - int ret = 0; - const size_t max_entries = - driver->ctx()->_conf->rgw_list_buckets_max_chunk; - - const size_t safe_max_objs_per_shard = - driver->ctx()->_conf->rgw_safe_max_objects_per_shard; - - uint16_t shard_warn_pct = - driver->ctx()->_conf->rgw_shard_warning_threshold; - if (shard_warn_pct > 100) - shard_warn_pct = 90; - - Formatter *formatter = flusher.get_formatter(); - flusher.start(0); - - formatter->open_array_section("users"); - - for (const auto& user_id : user_ids) { - - formatter->open_object_section("user"); - formatter->dump_string("user_id", user_id); - formatter->open_array_section("buckets"); - - string marker; - rgw::sal::BucketList buckets; - do { - std::unique_ptr user = driver->get_user(rgw_user(user_id)); - - ret = user->list_buckets(dpp, marker, string(), max_entries, false, buckets, y); - - if (ret < 0) - return ret; - - map>& m_buckets = buckets.get_buckets(); - - for (const auto& iter : m_buckets) { - auto& bucket = iter.second; - uint64_t num_objects = 0; - - marker = bucket->get_name(); /* Casey's location for marker update, - * as we may now not reach the end of - * the loop body */ - - ret = bucket->load_bucket(dpp, null_yield); - if (ret < 0) - continue; - - const auto& index = bucket->get_info().get_current_index(); - if (is_layout_indexless(index)) { - continue; // indexless buckets don't have stats - } - - /* need stats for num_entries */ - string bucket_ver, master_ver; - std::map stats; - ret = bucket->read_stats(dpp, index, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, nullptr); - - if (ret < 0) - continue; - - for (const auto& s : stats) { - num_objects += s.second.num_objects; - } - - const uint32_t num_shards = rgw::num_shards(index.layout.normal); - uint64_t objs_per_shard = - (num_shards) ? num_objects/num_shards : num_objects; - { - bool warn; - stringstream ss; - uint64_t fill_pct = objs_per_shard * 100 / safe_max_objs_per_shard; - if (fill_pct > 100) { - ss << "OVER " << fill_pct << "%"; - warn = true; - } else if (fill_pct >= shard_warn_pct) { - ss << "WARN " << fill_pct << "%"; - warn = true; - } else { - ss << "OK"; - warn = false; - } - - if (warn || !warnings_only) { - formatter->open_object_section("bucket"); - formatter->dump_string("bucket", bucket->get_name()); - formatter->dump_string("tenant", bucket->get_tenant()); - formatter->dump_int("num_objects", num_objects); - formatter->dump_int("num_shards", num_shards); - formatter->dump_int("objects_per_shard", objs_per_shard); - formatter->dump_string("fill_status", ss.str()); - formatter->close_section(); - } - } - } - formatter->flush(cout); - } while (buckets.is_truncated()); /* foreach: bucket */ - - formatter->close_section(); - formatter->close_section(); - formatter->flush(cout); - - } /* foreach: user_id */ - - formatter->close_section(); - formatter->flush(cout); - - return ret; -} /* RGWBucketAdminOp::limit_check */ - -int RGWBucketAdminOp::info(rgw::sal::Driver* driver, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - RGWBucket bucket; - int ret = 0; - const std::string& bucket_name = op_state.get_bucket_name(); - if (!bucket_name.empty()) { - ret = bucket.init(driver, op_state, null_yield, dpp); - if (-ENOENT == ret) - return -ERR_NO_SUCH_BUCKET; - else if (ret < 0) - return ret; - } - - Formatter *formatter = flusher.get_formatter(); - flusher.start(0); - - CephContext *cct = driver->ctx(); - - const size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk; - - const bool show_stats = op_state.will_fetch_stats(); - const rgw_user& user_id = op_state.get_user_id(); - if (op_state.is_user_op()) { - formatter->open_array_section("buckets"); - - rgw::sal::BucketList buckets; - std::unique_ptr user = driver->get_user(op_state.get_user_id()); - std::string marker; - const std::string empty_end_marker; - constexpr bool no_need_stats = false; // set need_stats to false - - do { - ret = user->list_buckets(dpp, marker, empty_end_marker, max_entries, - no_need_stats, buckets, y); - if (ret < 0) { - return ret; - } - - const std::string* marker_cursor = nullptr; - map>& m = buckets.get_buckets(); - - for (const auto& i : m) { - const std::string& obj_name = i.first; - if (!bucket_name.empty() && bucket_name != obj_name) { - continue; - } - - if (show_stats) { - bucket_stats(driver, user_id.tenant, obj_name, formatter, dpp); - } else { - formatter->dump_string("bucket", obj_name); - } - - marker_cursor = &obj_name; - } // for loop - if (marker_cursor) { - marker = *marker_cursor; - } - - flusher.flush(); - } while (buckets.is_truncated()); - - formatter->close_section(); - } else if (!bucket_name.empty()) { - ret = bucket_stats(driver, user_id.tenant, bucket_name, formatter, dpp); - if (ret < 0) { - return ret; - } - } else { - void *handle = nullptr; - bool truncated = true; - - formatter->open_array_section("buckets"); - ret = driver->meta_list_keys_init(dpp, "bucket", string(), &handle); - while (ret == 0 && truncated) { - std::list buckets; - constexpr int max_keys = 1000; - ret = driver->meta_list_keys_next(dpp, handle, max_keys, buckets, - &truncated); - for (auto& bucket_name : buckets) { - if (show_stats) { - bucket_stats(driver, user_id.tenant, bucket_name, formatter, dpp); - } else { - formatter->dump_string("bucket", bucket_name); - } - } - } - driver->meta_list_keys_complete(handle); - - formatter->close_section(); - } - - flusher.flush(); - - return 0; -} - -int RGWBucketAdminOp::set_quota(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp) -{ - RGWBucket bucket; - - int ret = bucket.init(driver, op_state, null_yield, dpp); - if (ret < 0) - return ret; - return bucket.set_quota(op_state, dpp); -} - -inline auto split_tenant(const std::string& bucket_name){ - auto p = bucket_name.find('/'); - if(p != std::string::npos) { - return std::make_pair(bucket_name.substr(0,p), bucket_name.substr(p+1)); - } - return std::make_pair(std::string(), bucket_name); -} - -using bucket_instance_ls = std::vector; -void get_stale_instances(rgw::sal::Driver* driver, const std::string& bucket_name, - const vector& lst, - bucket_instance_ls& stale_instances, - const DoutPrefixProvider *dpp) -{ - - bucket_instance_ls other_instances; -// first iterate over the entries, and pick up the done buckets; these -// are guaranteed to be stale - for (const auto& bucket_instance : lst){ - RGWBucketInfo binfo; - std::unique_ptr bucket; - rgw_bucket rbucket; - rgw_bucket_parse_bucket_key(driver->ctx(), bucket_instance, &rbucket, nullptr); - int r = driver->get_bucket(dpp, nullptr, rbucket, &bucket, null_yield); - if (r < 0){ - // this can only happen if someone deletes us right when we're processing - ldpp_dout(dpp, -1) << "Bucket instance is invalid: " << bucket_instance - << cpp_strerror(-r) << dendl; - continue; - } - binfo = bucket->get_info(); - if (binfo.reshard_status == cls_rgw_reshard_status::DONE) - stale_instances.emplace_back(std::move(binfo)); - else { - other_instances.emplace_back(std::move(binfo)); - } - } - - // Read the cur bucket info, if the bucket doesn't exist we can simply return - // all the instances - auto [tenant, bname] = split_tenant(bucket_name); - RGWBucketInfo cur_bucket_info; - std::unique_ptr cur_bucket; - int r = driver->get_bucket(dpp, nullptr, tenant, bname, &cur_bucket, null_yield); - if (r < 0) { - if (r == -ENOENT) { - // bucket doesn't exist, everything is stale then - stale_instances.insert(std::end(stale_instances), - std::make_move_iterator(other_instances.begin()), - std::make_move_iterator(other_instances.end())); - } else { - // all bets are off if we can't read the bucket, just return the sureshot stale instances - ldpp_dout(dpp, -1) << "error: reading bucket info for bucket: " - << bname << cpp_strerror(-r) << dendl; - } - return; - } - - // Don't process further in this round if bucket is resharding - cur_bucket_info = cur_bucket->get_info(); - if (cur_bucket_info.reshard_status == cls_rgw_reshard_status::IN_PROGRESS) - return; - - other_instances.erase(std::remove_if(other_instances.begin(), other_instances.end(), - [&cur_bucket_info](const RGWBucketInfo& b){ - return (b.bucket.bucket_id == cur_bucket_info.bucket.bucket_id || - b.bucket.bucket_id == cur_bucket_info.new_bucket_instance_id); - }), - other_instances.end()); - - // check if there are still instances left - if (other_instances.empty()) { - return; - } - - // Now we have a bucket with instances where the reshard status is none, this - // usually happens when the reshard process couldn't complete, lockdown the - // bucket and walk through these instances to make sure no one else interferes - // with these - { - RGWBucketReshardLock reshard_lock(static_cast(driver), cur_bucket->get_info(), true); - r = reshard_lock.lock(dpp); - if (r < 0) { - // most likely bucket is under reshard, return the sureshot stale instances - ldpp_dout(dpp, 5) << __func__ - << "failed to take reshard lock; reshard underway likey" << dendl; - return; - } - auto sg = make_scope_guard([&reshard_lock](){ reshard_lock.unlock();} ); - // this should be fast enough that we may not need to renew locks and check - // exit status?, should we read the values of the instances again? - stale_instances.insert(std::end(stale_instances), - std::make_move_iterator(other_instances.begin()), - std::make_move_iterator(other_instances.end())); - } - - return; -} - -static int process_stale_instances(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp, - std::function process_f) -{ - std::string marker; - void *handle; - Formatter *formatter = flusher.get_formatter(); - static constexpr auto default_max_keys = 1000; - - int ret = driver->meta_list_keys_init(dpp, "bucket.instance", marker, &handle); - if (ret < 0) { - cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl; - return ret; - } - - bool truncated; - - formatter->open_array_section("keys"); - auto g = make_scope_guard([&driver, &handle, &formatter]() { - driver->meta_list_keys_complete(handle); - formatter->close_section(); // keys - formatter->flush(cout); - }); - - do { - list keys; - - ret = driver->meta_list_keys_next(dpp, handle, default_max_keys, keys, &truncated); - if (ret < 0 && ret != -ENOENT) { - cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl; - return ret; - } if (ret != -ENOENT) { - // partition the list of buckets by buckets as the listing is un sorted, - // since it would minimize the reads to bucket_info - std::unordered_map> bucket_instance_map; - for (auto &key: keys) { - auto pos = key.find(':'); - if(pos != std::string::npos) - bucket_instance_map[key.substr(0,pos)].emplace_back(std::move(key)); - } - for (const auto& kv: bucket_instance_map) { - bucket_instance_ls stale_lst; - get_stale_instances(driver, kv.first, kv.second, stale_lst, dpp); - process_f(stale_lst, formatter, driver); - } - } - } while (truncated); - - return 0; -} - -int RGWBucketAdminOp::list_stale_instances(rgw::sal::Driver* driver, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp) -{ - auto process_f = [](const bucket_instance_ls& lst, - Formatter *formatter, - rgw::sal::Driver*){ - for (const auto& binfo: lst) - formatter->dump_string("key", binfo.bucket.get_key()); - }; - return process_stale_instances(driver, op_state, flusher, dpp, process_f); -} - - -int RGWBucketAdminOp::clear_stale_instances(rgw::sal::Driver* driver, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp) -{ - auto process_f = [dpp](const bucket_instance_ls& lst, - Formatter *formatter, - rgw::sal::Driver* driver){ - for (const auto &binfo: lst) { - std::unique_ptr bucket; - driver->get_bucket(nullptr, binfo, &bucket); - int ret = bucket->purge_instance(dpp); - if (ret == 0){ - auto md_key = "bucket.instance:" + binfo.bucket.get_key(); - ret = driver->meta_remove(dpp, md_key, null_yield); - } - formatter->open_object_section("delete_status"); - formatter->dump_string("bucket_instance", binfo.bucket.get_key()); - formatter->dump_int("status", -ret); - formatter->close_section(); - } - }; - - return process_stale_instances(driver, op_state, flusher, dpp, process_f); -} - -static int fix_single_bucket_lc(rgw::sal::Driver* driver, - const std::string& tenant_name, - const std::string& bucket_name, - const DoutPrefixProvider *dpp) -{ - std::unique_ptr bucket; - int ret = driver->get_bucket(dpp, nullptr, tenant_name, bucket_name, &bucket, null_yield); - if (ret < 0) { - // TODO: Should we handle the case where the bucket could've been removed between - // listing and fetching? - return ret; - } - - return rgw::lc::fix_lc_shard_entry(dpp, driver, driver->get_rgwlc()->get_lc(), bucket.get()); -} - -static void format_lc_status(Formatter* formatter, - const std::string& tenant_name, - const std::string& bucket_name, - int status) -{ - formatter->open_object_section("bucket_entry"); - std::string entry = tenant_name.empty() ? bucket_name : tenant_name + "/" + bucket_name; - formatter->dump_string("bucket", entry); - formatter->dump_int("status", status); - formatter->close_section(); // bucket_entry -} - -static void process_single_lc_entry(rgw::sal::Driver* driver, - Formatter *formatter, - const std::string& tenant_name, - const std::string& bucket_name, - const DoutPrefixProvider *dpp) -{ - int ret = fix_single_bucket_lc(driver, tenant_name, bucket_name, dpp); - format_lc_status(formatter, tenant_name, bucket_name, -ret); -} - -int RGWBucketAdminOp::fix_lc_shards(rgw::sal::Driver* driver, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp) -{ - std::string marker; - void *handle; - Formatter *formatter = flusher.get_formatter(); - static constexpr auto default_max_keys = 1000; - - bool truncated; - if (const std::string& bucket_name = op_state.get_bucket_name(); - ! bucket_name.empty()) { - const rgw_user user_id = op_state.get_user_id(); - process_single_lc_entry(driver, formatter, user_id.tenant, bucket_name, dpp); - formatter->flush(cout); - } else { - int ret = driver->meta_list_keys_init(dpp, "bucket", marker, &handle); - if (ret < 0) { - std::cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl; - return ret; - } - - { - formatter->open_array_section("lc_fix_status"); - auto sg = make_scope_guard([&driver, &handle, &formatter](){ - driver->meta_list_keys_complete(handle); - formatter->close_section(); // lc_fix_status - formatter->flush(cout); - }); - do { - list keys; - ret = driver->meta_list_keys_next(dpp, handle, default_max_keys, keys, &truncated); - if (ret < 0 && ret != -ENOENT) { - std::cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl; - return ret; - } if (ret != -ENOENT) { - for (const auto &key:keys) { - auto [tenant_name, bucket_name] = split_tenant(key); - process_single_lc_entry(driver, formatter, tenant_name, bucket_name, dpp); - } - } - formatter->flush(cout); // regularly flush every 1k entries - } while (truncated); - } - - } - return 0; - -} - -static bool has_object_expired(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - rgw::sal::Bucket* bucket, - const rgw_obj_key& key, utime_t& delete_at) -{ - std::unique_ptr obj = bucket->get_object(key); - bufferlist delete_at_bl; - - int ret = rgw_object_get_attr(dpp, driver, obj.get(), RGW_ATTR_DELETE_AT, delete_at_bl, null_yield); - if (ret < 0) { - return false; // no delete at attr, proceed - } - - ret = decode_bl(delete_at_bl, delete_at); - if (ret < 0) { - return false; // failed to parse - } - - if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { - return true; - } - - return false; -} - -static int fix_bucket_obj_expiry(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - rgw::sal::Bucket* bucket, - RGWFormatterFlusher& flusher, bool dry_run) -{ - if (bucket->get_key().bucket_id == bucket->get_key().marker) { - ldpp_dout(dpp, -1) << "Not a resharded bucket skipping" << dendl; - return 0; // not a resharded bucket, move along - } - - Formatter *formatter = flusher.get_formatter(); - formatter->open_array_section("expired_deletion_status"); - auto sg = make_scope_guard([&formatter] { - formatter->close_section(); - formatter->flush(std::cout); - }); - - rgw::sal::Bucket::ListParams params; - rgw::sal::Bucket::ListResults results; - - params.list_versions = bucket->versioned(); - params.allow_unordered = true; - - do { - int ret = bucket->list(dpp, params, listing_max_entries, results, null_yield); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR failed to list objects in the bucket" << dendl; - return ret; - } - for (const auto& obj : results.objs) { - rgw_obj_key key(obj.key); - utime_t delete_at; - if (has_object_expired(dpp, driver, bucket, key, delete_at)) { - formatter->open_object_section("object_status"); - formatter->dump_string("object", key.name); - formatter->dump_stream("delete_at") << delete_at; - - if (!dry_run) { - ret = rgw_remove_object(dpp, driver, bucket, key); - formatter->dump_int("status", ret); - } - - formatter->close_section(); // object_status - } - } - formatter->flush(cout); // regularly flush every 1k entries - } while (results.is_truncated); - - return 0; -} - -int RGWBucketAdminOp::fix_obj_expiry(rgw::sal::Driver* driver, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp, bool dry_run) -{ - RGWBucket admin_bucket; - int ret = admin_bucket.init(driver, op_state, null_yield, dpp); - if (ret < 0) { - ldpp_dout(dpp, -1) << "failed to initialize bucket" << dendl; - return ret; - } - std::unique_ptr bucket; - ret = driver->get_bucket(nullptr, admin_bucket.get_bucket_info(), &bucket); - if (ret < 0) { - return ret; - } - - return fix_bucket_obj_expiry(dpp, driver, bucket.get(), flusher, dry_run); -} - -void RGWBucketCompleteInfo::dump(Formatter *f) const { - encode_json("bucket_info", info, f); - encode_json("attrs", attrs, f); -} - -void RGWBucketCompleteInfo::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("bucket_info", info, obj); - JSONDecoder::decode_json("attrs", attrs, obj); -} - -class RGWBucketMetadataHandler : public RGWBucketMetadataHandlerBase { -public: - struct Svc { - RGWSI_Bucket *bucket{nullptr}; - } svc; - - struct Ctl { - RGWBucketCtl *bucket{nullptr}; - } ctl; - - RGWBucketMetadataHandler() {} - - void init(RGWSI_Bucket *bucket_svc, - RGWBucketCtl *bucket_ctl) override { - base_init(bucket_svc->ctx(), - bucket_svc->get_ep_be_handler().get()); - svc.bucket = bucket_svc; - ctl.bucket = bucket_ctl; - } - - string get_type() override { return "bucket"; } - - RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { - RGWBucketEntryPoint be; - - try { - decode_json_obj(be, jo); - } catch (JSONDecoder::err& e) { - return nullptr; - } - - return new RGWBucketEntryMetadataObject(be, objv, mtime); - } - - int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { - RGWObjVersionTracker ot; - RGWBucketEntryPoint be; - - real_time mtime; - map attrs; - - RGWSI_Bucket_EP_Ctx ctx(op->ctx()); - - int ret = svc.bucket->read_bucket_entrypoint_info(ctx, entry, &be, &ot, &mtime, &attrs, y, dpp); - if (ret < 0) - return ret; - - RGWBucketEntryMetadataObject *mdo = new RGWBucketEntryMetadataObject(be, ot.read_version, mtime, std::move(attrs)); - - *obj = mdo; - - return 0; - } - - int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) override; - - int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp) override { - RGWBucketEntryPoint be; - - real_time orig_mtime; - - RGWSI_Bucket_EP_Ctx ctx(op->ctx()); - - int ret = svc.bucket->read_bucket_entrypoint_info(ctx, entry, &be, &objv_tracker, &orig_mtime, nullptr, y, dpp); - if (ret < 0) - return ret; - - /* - * We're unlinking the bucket but we don't want to update the entrypoint here - we're removing - * it immediately and don't want to invalidate our cached objv_version or the bucket obj removal - * will incorrectly fail. - */ - ret = ctl.bucket->unlink_bucket(be.owner, be.bucket, y, dpp, false); - if (ret < 0) { - ldpp_dout(dpp, -1) << "could not unlink bucket=" << entry << " owner=" << be.owner << dendl; - } - - ret = svc.bucket->remove_bucket_entrypoint_info(ctx, entry, &objv_tracker, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, -1) << "could not delete bucket=" << entry << dendl; - } - /* idempotent */ - return 0; - } - - int call(std::function f) { - return call(nullopt, f); - } - - int call(std::optional bectx_params, - std::function f) { - return be_handler->call(bectx_params, [&](RGWSI_MetaBackend_Handler::Op *op) { - RGWSI_Bucket_EP_Ctx ctx(op->ctx()); - return f(ctx); - }); - } -}; - -class RGWMetadataHandlerPut_Bucket : public RGWMetadataHandlerPut_SObj -{ - RGWBucketMetadataHandler *bhandler; - RGWBucketEntryMetadataObject *obj; -public: - RGWMetadataHandlerPut_Bucket(RGWBucketMetadataHandler *_handler, - RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, - optional_yield y, - RGWMDLogSyncType type, bool from_remote_zone) : RGWMetadataHandlerPut_SObj(_handler, op, entry, obj, objv_tracker, y, type, from_remote_zone), - bhandler(_handler) { - obj = static_cast(_obj); - } - ~RGWMetadataHandlerPut_Bucket() {} - - void encode_obj(bufferlist *bl) override { - obj->get_ep().encode(*bl); - } - - int put_checked(const DoutPrefixProvider *dpp) override; - int put_post(const DoutPrefixProvider *dpp) override; -}; - -int RGWBucketMetadataHandler::do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) -{ - RGWMetadataHandlerPut_Bucket put_op(this, op, entry, obj, objv_tracker, y, type, from_remote_zone); - return do_put_operate(&put_op, dpp); -} - -int RGWMetadataHandlerPut_Bucket::put_checked(const DoutPrefixProvider *dpp) -{ - RGWBucketEntryMetadataObject *orig_obj = static_cast(old_obj); - - if (orig_obj) { - obj->set_pattrs(&orig_obj->get_attrs()); - } - - auto& be = obj->get_ep(); - auto mtime = obj->get_mtime(); - auto pattrs = obj->get_pattrs(); - - RGWSI_Bucket_EP_Ctx ctx(op->ctx()); - - return bhandler->svc.bucket->store_bucket_entrypoint_info(ctx, entry, - be, - false, - mtime, - pattrs, - &objv_tracker, - y, - dpp); -} - -int RGWMetadataHandlerPut_Bucket::put_post(const DoutPrefixProvider *dpp) -{ - auto& be = obj->get_ep(); - - int ret; - - /* link bucket */ - if (be.linked) { - ret = bhandler->ctl.bucket->link_bucket(be.owner, be.bucket, be.creation_time, y, dpp, false); - } else { - ret = bhandler->ctl.bucket->unlink_bucket(be.owner, be.bucket, y, dpp, false); - } - - return ret; -} - -static void get_md5_digest(const RGWBucketEntryPoint *be, string& md5_digest) { - - char md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; - unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; - bufferlist bl; - - Formatter *f = new JSONFormatter(false); - be->dump(f); - f->flush(bl); - - MD5 hash; - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - hash.Update((const unsigned char *)bl.c_str(), bl.length()); - hash.Final(m); - - buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, md5); - - delete f; - - md5_digest = md5; -} - -#define ARCHIVE_META_ATTR RGW_ATTR_PREFIX "zone.archive.info" - -struct archive_meta_info { - rgw_bucket orig_bucket; - - bool from_attrs(CephContext *cct, map& attrs) { - auto iter = attrs.find(ARCHIVE_META_ATTR); - if (iter == attrs.end()) { - return false; - } - - auto bliter = iter->second.cbegin(); - try { - decode(bliter); - } catch (buffer::error& err) { - ldout(cct, 0) << "ERROR: failed to decode archive meta info" << dendl; - return false; - } - - return true; - } - - void store_in_attrs(map& attrs) const { - encode(attrs[ARCHIVE_META_ATTR]); - } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(orig_bucket, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(orig_bucket, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(archive_meta_info) - -class RGWArchiveBucketMetadataHandler : public RGWBucketMetadataHandler { -public: - RGWArchiveBucketMetadataHandler() {} - - int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp) override { - auto cct = svc.bucket->ctx(); - - RGWSI_Bucket_EP_Ctx ctx(op->ctx()); - - ldpp_dout(dpp, 5) << "SKIP: bucket removal is not allowed on archive zone: bucket:" << entry << " ... proceeding to rename" << dendl; - - string tenant_name, bucket_name; - parse_bucket(entry, &tenant_name, &bucket_name); - rgw_bucket entry_bucket; - entry_bucket.tenant = tenant_name; - entry_bucket.name = bucket_name; - - real_time mtime; - - /* read original entrypoint */ - - RGWBucketEntryPoint be; - map attrs; - int ret = svc.bucket->read_bucket_entrypoint_info(ctx, entry, &be, &objv_tracker, &mtime, &attrs, y, dpp); - if (ret < 0) { - return ret; - } - - string bi_meta_name = RGWSI_Bucket::get_bi_meta_key(be.bucket); - - /* read original bucket instance info */ - - map attrs_m; - ceph::real_time orig_mtime; - RGWBucketInfo old_bi; - - ret = ctl.bucket->read_bucket_instance_info(be.bucket, &old_bi, y, dpp, RGWBucketCtl::BucketInstance::GetParams() - .set_mtime(&orig_mtime) - .set_attrs(&attrs_m)); - if (ret < 0) { - return ret; - } - - archive_meta_info ami; - - if (!ami.from_attrs(svc.bucket->ctx(), attrs_m)) { - ami.orig_bucket = old_bi.bucket; - ami.store_in_attrs(attrs_m); - } - - /* generate a new bucket instance. We could have avoided this if we could just point a new - * bucket entry point to the old bucket instance, however, due to limitation in the way - * we index buckets under the user, bucket entrypoint and bucket instance of the same - * bucket need to have the same name, so we need to copy the old bucket instance into - * to a new entry with the new name - */ - - string new_bucket_name; - - RGWBucketInfo new_bi = old_bi; - RGWBucketEntryPoint new_be = be; - - string md5_digest; - - get_md5_digest(&new_be, md5_digest); - new_bucket_name = ami.orig_bucket.name + "-deleted-" + md5_digest; - - new_bi.bucket.name = new_bucket_name; - new_bi.objv_tracker.clear(); - - new_be.bucket.name = new_bucket_name; - - ret = ctl.bucket->store_bucket_instance_info(be.bucket, new_bi, y, dpp, RGWBucketCtl::BucketInstance::PutParams() - .set_exclusive(false) - .set_mtime(orig_mtime) - .set_attrs(&attrs_m) - .set_orig_info(&old_bi)); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to put new bucket instance info for bucket=" << new_bi.bucket << " ret=" << ret << dendl; - return ret; - } - - /* store a new entrypoint */ - - RGWObjVersionTracker ot; - ot.generate_new_write_ver(cct); - - ret = svc.bucket->store_bucket_entrypoint_info(ctx, RGWSI_Bucket::get_entrypoint_meta_key(new_be.bucket), - new_be, true, mtime, &attrs, nullptr, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to put new bucket entrypoint for bucket=" << new_be.bucket << " ret=" << ret << dendl; - return ret; - } - - /* link new bucket */ - - ret = ctl.bucket->link_bucket(new_be.owner, new_be.bucket, new_be.creation_time, y, dpp, false); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to link new bucket for bucket=" << new_be.bucket << " ret=" << ret << dendl; - return ret; - } - - /* clean up old stuff */ - - ret = ctl.bucket->unlink_bucket(be.owner, entry_bucket, y, dpp, false); - if (ret < 0) { - ldpp_dout(dpp, -1) << "could not unlink bucket=" << entry << " owner=" << be.owner << dendl; - } - - // if (ret == -ECANCELED) it means that there was a race here, and someone - // wrote to the bucket entrypoint just before we removed it. The question is - // whether it was a newly created bucket entrypoint ... in which case we - // should ignore the error and move forward, or whether it is a higher version - // of the same bucket instance ... in which we should retry - ret = svc.bucket->remove_bucket_entrypoint_info(ctx, - RGWSI_Bucket::get_entrypoint_meta_key(be.bucket), - &objv_tracker, - y, - dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to put new bucket entrypoint for bucket=" << new_be.bucket << " ret=" << ret << dendl; - return ret; - } - - ret = ctl.bucket->remove_bucket_instance_info(be.bucket, old_bi, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, -1) << "could not delete bucket=" << entry << dendl; - } - - - /* idempotent */ - - return 0; - } - - int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) override { - if (entry.find("-deleted-") != string::npos) { - RGWObjVersionTracker ot; - RGWMetadataObject *robj; - int ret = do_get(op, entry, &robj, y, dpp); - if (ret != -ENOENT) { - if (ret < 0) { - return ret; - } - ot.read_version = robj->get_version(); - delete robj; - - ret = do_remove(op, entry, ot, y, dpp); - if (ret < 0) { - return ret; - } - } - } - - return RGWBucketMetadataHandler::do_put(op, entry, obj, - objv_tracker, y, dpp, type, from_remote_zone); - } - -}; - -class RGWBucketInstanceMetadataHandler : public RGWBucketInstanceMetadataHandlerBase { - int read_bucket_instance_entry(RGWSI_Bucket_BI_Ctx& ctx, - const string& entry, - RGWBucketCompleteInfo *bi, - ceph::real_time *pmtime, - optional_yield y, - const DoutPrefixProvider *dpp) { - return svc.bucket->read_bucket_instance_info(ctx, - entry, - &bi->info, - pmtime, &bi->attrs, - y, - dpp); - } - -public: - struct Svc { - RGWSI_Zone *zone{nullptr}; - RGWSI_Bucket *bucket{nullptr}; - RGWSI_BucketIndex *bi{nullptr}; - } svc; - - rgw::sal::Driver* driver; - - RGWBucketInstanceMetadataHandler(rgw::sal::Driver* driver) - : driver(driver) {} - - void init(RGWSI_Zone *zone_svc, - RGWSI_Bucket *bucket_svc, - RGWSI_BucketIndex *bi_svc) override { - base_init(bucket_svc->ctx(), - bucket_svc->get_bi_be_handler().get()); - svc.zone = zone_svc; - svc.bucket = bucket_svc; - svc.bi = bi_svc; - } - - string get_type() override { return "bucket.instance"; } - - RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { - RGWBucketCompleteInfo bci; - - try { - decode_json_obj(bci, jo); - } catch (JSONDecoder::err& e) { - return nullptr; - } - - return new RGWBucketInstanceMetadataObject(bci, objv, mtime); - } - - int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { - RGWBucketCompleteInfo bci; - real_time mtime; - - RGWSI_Bucket_BI_Ctx ctx(op->ctx()); - - int ret = svc.bucket->read_bucket_instance_info(ctx, entry, &bci.info, &mtime, &bci.attrs, y, dpp); - if (ret < 0) - return ret; - - RGWBucketInstanceMetadataObject *mdo = new RGWBucketInstanceMetadataObject(bci, bci.info.objv_tracker.read_version, mtime); - - *obj = mdo; - - return 0; - } - - int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp, - RGWMDLogSyncType sync_type, bool from_remote_zone) override; - - int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp) override { - RGWBucketCompleteInfo bci; - - RGWSI_Bucket_BI_Ctx ctx(op->ctx()); - - int ret = read_bucket_instance_entry(ctx, entry, &bci, nullptr, y, dpp); - if (ret < 0 && ret != -ENOENT) - return ret; - - return svc.bucket->remove_bucket_instance_info(ctx, entry, bci.info, &bci.info.objv_tracker, y, dpp); - } - - int call(std::function f) { - return call(nullopt, f); - } - - int call(std::optional bectx_params, - std::function f) { - return be_handler->call(bectx_params, [&](RGWSI_MetaBackend_Handler::Op *op) { - RGWSI_Bucket_BI_Ctx ctx(op->ctx()); - return f(ctx); - }); - } -}; - -class RGWMetadataHandlerPut_BucketInstance : public RGWMetadataHandlerPut_SObj -{ - CephContext *cct; - RGWBucketInstanceMetadataHandler *bihandler; - RGWBucketInstanceMetadataObject *obj; -public: - RGWMetadataHandlerPut_BucketInstance(CephContext *_cct, - RGWBucketInstanceMetadataHandler *_handler, - RGWSI_MetaBackend_Handler::Op *_op, string& entry, - RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, - optional_yield y, - RGWMDLogSyncType type, bool from_remote_zone) : RGWMetadataHandlerPut_SObj(_handler, _op, entry, obj, objv_tracker, y, type, from_remote_zone), - cct(_cct), bihandler(_handler) { - obj = static_cast(_obj); - - auto& bci = obj->get_bci(); - obj->set_pattrs(&bci.attrs); - } - - void encode_obj(bufferlist *bl) override { - obj->get_bucket_info().encode(*bl); - } - - int put_check(const DoutPrefixProvider *dpp) override; - int put_checked(const DoutPrefixProvider *dpp) override; - int put_post(const DoutPrefixProvider *dpp) override; -}; - -int RGWBucketInstanceMetadataHandler::do_put(RGWSI_MetaBackend_Handler::Op *op, - string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) -{ - RGWMetadataHandlerPut_BucketInstance put_op(svc.bucket->ctx(), this, op, entry, obj, - objv_tracker, y, type, from_remote_zone); - return do_put_operate(&put_op, dpp); -} - -void init_default_bucket_layout(CephContext *cct, rgw::BucketLayout& layout, - const RGWZone& zone, - std::optional shards, - std::optional type) { - layout.current_index.gen = 0; - layout.current_index.layout.normal.hash_type = rgw::BucketHashType::Mod; - - layout.current_index.layout.type = - type.value_or(rgw::BucketIndexType::Normal); - - if (shards) { - layout.current_index.layout.normal.num_shards = *shards; - } else if (cct->_conf->rgw_override_bucket_index_max_shards > 0) { - layout.current_index.layout.normal.num_shards = - cct->_conf->rgw_override_bucket_index_max_shards; - } else { - layout.current_index.layout.normal.num_shards = - zone.bucket_index_max_shards; - } - - if (layout.current_index.layout.type == rgw::BucketIndexType::Normal) { - layout.logs.push_back(log_layout_from_index(0, layout.current_index)); - } -} - -int RGWMetadataHandlerPut_BucketInstance::put_check(const DoutPrefixProvider *dpp) -{ - int ret; - - RGWBucketCompleteInfo& bci = obj->get_bci(); - - RGWBucketInstanceMetadataObject *orig_obj = static_cast(old_obj); - - RGWBucketCompleteInfo *old_bci = (orig_obj ? &orig_obj->get_bci() : nullptr); - - const bool exists = (!!orig_obj); - - if (from_remote_zone) { - // don't sync bucket layout changes - if (!exists) { - // replace peer's layout with default-constructed, then apply our defaults - bci.info.layout = rgw::BucketLayout{}; - init_default_bucket_layout(cct, bci.info.layout, - bihandler->svc.zone->get_zone(), - std::nullopt, std::nullopt); - } else { - bci.info.layout = old_bci->info.layout; - } - } - - if (!exists || old_bci->info.bucket.bucket_id != bci.info.bucket.bucket_id) { - /* a new bucket, we need to select a new bucket placement for it */ - string tenant_name; - string bucket_name; - string bucket_instance; - parse_bucket(entry, &tenant_name, &bucket_name, &bucket_instance); - - RGWZonePlacementInfo rule_info; - bci.info.bucket.name = bucket_name; - bci.info.bucket.bucket_id = bucket_instance; - bci.info.bucket.tenant = tenant_name; - // if the sync module never writes data, don't require the zone to specify all placement targets - if (bihandler->svc.zone->sync_module_supports_writes()) { - ret = bihandler->svc.zone->select_bucket_location_by_rule(dpp, bci.info.placement_rule, &rule_info, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: select_bucket_placement() returned " << ret << dendl; - return ret; - } - } - bci.info.layout.current_index.layout.type = rule_info.index_type; - } else { - /* existing bucket, keep its placement */ - bci.info.bucket.explicit_placement = old_bci->info.bucket.explicit_placement; - bci.info.placement_rule = old_bci->info.placement_rule; - } - - /* record the read version (if any), store the new version */ - bci.info.objv_tracker.read_version = objv_tracker.read_version; - bci.info.objv_tracker.write_version = objv_tracker.write_version; - - return 0; -} - -int RGWMetadataHandlerPut_BucketInstance::put_checked(const DoutPrefixProvider *dpp) -{ - RGWBucketInstanceMetadataObject *orig_obj = static_cast(old_obj); - - RGWBucketInfo *orig_info = (orig_obj ? &orig_obj->get_bucket_info() : nullptr); - - auto& info = obj->get_bucket_info(); - auto mtime = obj->get_mtime(); - auto pattrs = obj->get_pattrs(); - - RGWSI_Bucket_BI_Ctx ctx(op->ctx()); - - return bihandler->svc.bucket->store_bucket_instance_info(ctx, - entry, - info, - orig_info, - false, - mtime, - pattrs, - y, - dpp); -} - -int RGWMetadataHandlerPut_BucketInstance::put_post(const DoutPrefixProvider *dpp) -{ - RGWBucketCompleteInfo& bci = obj->get_bci(); - - objv_tracker = bci.info.objv_tracker; - - int ret = bihandler->svc.bi->init_index(dpp, bci.info, bci.info.layout.current_index); - if (ret < 0) { - return ret; - } - - /* update lifecyle policy */ - { - std::unique_ptr bucket; - ret = bihandler->driver->get_bucket(nullptr, bci.info, &bucket); - if (ret < 0) { - ldpp_dout(dpp, 0) << __func__ << " failed to get_bucket(...) for " - << bci.info.bucket.name - << dendl; - return ret; - } - - auto lc = bihandler->driver->get_rgwlc(); - - auto lc_it = bci.attrs.find(RGW_ATTR_LC); - if (lc_it != bci.attrs.end()) { - ldpp_dout(dpp, 20) << "set lc config for " << bci.info.bucket.name << dendl; - ret = lc->set_bucket_config(bucket.get(), bci.attrs, nullptr); - if (ret < 0) { - ldpp_dout(dpp, 0) << __func__ << " failed to set lc config for " - << bci.info.bucket.name - << dendl; - return ret; - } - - } else { - ldpp_dout(dpp, 20) << "remove lc config for " << bci.info.bucket.name << dendl; - ret = lc->remove_bucket_config(bucket.get(), bci.attrs, false /* cannot merge attrs */); - if (ret < 0) { - ldpp_dout(dpp, 0) << __func__ << " failed to remove lc config for " - << bci.info.bucket.name - << dendl; - return ret; - } - } - } /* update lc */ - - return STATUS_APPLIED; -} - -class RGWArchiveBucketInstanceMetadataHandler : public RGWBucketInstanceMetadataHandler { -public: - RGWArchiveBucketInstanceMetadataHandler(rgw::sal::Driver* driver) - : RGWBucketInstanceMetadataHandler(driver) {} - - // N.B. replication of lifecycle policy relies on logic in RGWBucketInstanceMetadataHandler::do_put(...), override with caution - - int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) override { - ldpp_dout(dpp, 0) << "SKIP: bucket instance removal is not allowed on archive zone: bucket.instance:" << entry << dendl; - return 0; - } -}; - -RGWBucketCtl::RGWBucketCtl(RGWSI_Zone *zone_svc, - RGWSI_Bucket *bucket_svc, - RGWSI_Bucket_Sync *bucket_sync_svc, - RGWSI_BucketIndex *bi_svc, - RGWSI_User* user_svc) - : cct(zone_svc->ctx()) -{ - svc.zone = zone_svc; - svc.bucket = bucket_svc; - svc.bucket_sync = bucket_sync_svc; - svc.bi = bi_svc; - svc.user = user_svc; -} - -void RGWBucketCtl::init(RGWUserCtl *user_ctl, - RGWBucketMetadataHandler *_bm_handler, - RGWBucketInstanceMetadataHandler *_bmi_handler, - RGWDataChangesLog *datalog, - const DoutPrefixProvider *dpp) -{ - ctl.user = user_ctl; - - bm_handler = _bm_handler; - bmi_handler = _bmi_handler; - - bucket_be_handler = bm_handler->get_be_handler(); - bi_be_handler = bmi_handler->get_be_handler(); - - datalog->set_bucket_filter( - [this](const rgw_bucket& bucket, optional_yield y, const DoutPrefixProvider *dpp) { - return bucket_exports_data(bucket, y, dpp); - }); -} - -int RGWBucketCtl::call(std::function f) { - return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ep_ctx) { - return bmi_handler->call([&](RGWSI_Bucket_BI_Ctx& bi_ctx) { - RGWSI_Bucket_X_Ctx ctx{ep_ctx, bi_ctx}; - return f(ctx); - }); - }); -} - -int RGWBucketCtl::read_bucket_entrypoint_info(const rgw_bucket& bucket, - RGWBucketEntryPoint *info, - optional_yield y, const DoutPrefixProvider *dpp, - const Bucket::GetParams& params) -{ - return bm_handler->call(params.bectx_params, [&](RGWSI_Bucket_EP_Ctx& ctx) { - return svc.bucket->read_bucket_entrypoint_info(ctx, - RGWSI_Bucket::get_entrypoint_meta_key(bucket), - info, - params.objv_tracker, - params.mtime, - params.attrs, - y, - dpp, - params.cache_info, - params.refresh_version); - }); -} - -int RGWBucketCtl::store_bucket_entrypoint_info(const rgw_bucket& bucket, - RGWBucketEntryPoint& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const Bucket::PutParams& params) -{ - return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { - return svc.bucket->store_bucket_entrypoint_info(ctx, - RGWSI_Bucket::get_entrypoint_meta_key(bucket), - info, - params.exclusive, - params.mtime, - params.attrs, - params.objv_tracker, - y, - dpp); - }); -} - -int RGWBucketCtl::remove_bucket_entrypoint_info(const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp, - const Bucket::RemoveParams& params) -{ - return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { - return svc.bucket->remove_bucket_entrypoint_info(ctx, - RGWSI_Bucket::get_entrypoint_meta_key(bucket), - params.objv_tracker, - y, - dpp); - }); -} - -int RGWBucketCtl::read_bucket_instance_info(const rgw_bucket& bucket, - RGWBucketInfo *info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::GetParams& params) -{ - int ret = bmi_handler->call(params.bectx_params, [&](RGWSI_Bucket_BI_Ctx& ctx) { - return svc.bucket->read_bucket_instance_info(ctx, - RGWSI_Bucket::get_bi_meta_key(bucket), - info, - params.mtime, - params.attrs, - y, - dpp, - params.cache_info, - params.refresh_version); - }); - - if (ret < 0) { - return ret; - } - - if (params.objv_tracker) { - *params.objv_tracker = info->objv_tracker; - } - - return 0; -} - -int RGWBucketCtl::read_bucket_info(const rgw_bucket& bucket, - RGWBucketInfo *info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::GetParams& params, - RGWObjVersionTracker *ep_objv_tracker) -{ - const rgw_bucket *b = &bucket; - - std::optional ep; - - if (b->bucket_id.empty()) { - ep.emplace(); - - int r = read_bucket_entrypoint_info(*b, &(*ep), y, dpp, RGWBucketCtl::Bucket::GetParams() - .set_bectx_params(params.bectx_params) - .set_objv_tracker(ep_objv_tracker)); - if (r < 0) { - return r; - } - - b = &ep->bucket; - } - - int ret = bmi_handler->call(params.bectx_params, [&](RGWSI_Bucket_BI_Ctx& ctx) { - return svc.bucket->read_bucket_instance_info(ctx, - RGWSI_Bucket::get_bi_meta_key(*b), - info, - params.mtime, - params.attrs, - y, dpp, - params.cache_info, - params.refresh_version); - }); - - if (ret < 0) { - return ret; - } - - if (params.objv_tracker) { - *params.objv_tracker = info->objv_tracker; - } - - return 0; -} - -int RGWBucketCtl::do_store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, - const rgw_bucket& bucket, - RGWBucketInfo& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::PutParams& params) -{ - if (params.objv_tracker) { - info.objv_tracker = *params.objv_tracker; - } - - return svc.bucket->store_bucket_instance_info(ctx, - RGWSI_Bucket::get_bi_meta_key(bucket), - info, - params.orig_info, - params.exclusive, - params.mtime, - params.attrs, - y, - dpp); -} - -int RGWBucketCtl::store_bucket_instance_info(const rgw_bucket& bucket, - RGWBucketInfo& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::PutParams& params) -{ - return bmi_handler->call([&](RGWSI_Bucket_BI_Ctx& ctx) { - return do_store_bucket_instance_info(ctx, bucket, info, y, dpp, params); - }); -} - -int RGWBucketCtl::remove_bucket_instance_info(const rgw_bucket& bucket, - RGWBucketInfo& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::RemoveParams& params) -{ - if (params.objv_tracker) { - info.objv_tracker = *params.objv_tracker; - } - - return bmi_handler->call([&](RGWSI_Bucket_BI_Ctx& ctx) { - return svc.bucket->remove_bucket_instance_info(ctx, - RGWSI_Bucket::get_bi_meta_key(bucket), - info, - &info.objv_tracker, - y, - dpp); - }); -} - -int RGWBucketCtl::do_store_linked_bucket_info(RGWSI_Bucket_X_Ctx& ctx, - RGWBucketInfo& info, - RGWBucketInfo *orig_info, - bool exclusive, real_time mtime, - obj_version *pep_objv, - map *pattrs, - bool create_entry_point, - optional_yield y, const DoutPrefixProvider *dpp) -{ - bool create_head = !info.has_instance_obj || create_entry_point; - - int ret = svc.bucket->store_bucket_instance_info(ctx.bi, - RGWSI_Bucket::get_bi_meta_key(info.bucket), - info, - orig_info, - exclusive, - mtime, pattrs, - y, dpp); - if (ret < 0) { - return ret; - } - - if (!create_head) - return 0; /* done! */ - - RGWBucketEntryPoint entry_point; - entry_point.bucket = info.bucket; - entry_point.owner = info.owner; - entry_point.creation_time = info.creation_time; - entry_point.linked = true; - RGWObjVersionTracker ot; - if (pep_objv && !pep_objv->tag.empty()) { - ot.write_version = *pep_objv; - } else { - ot.generate_new_write_ver(cct); - if (pep_objv) { - *pep_objv = ot.write_version; - } - } - ret = svc.bucket->store_bucket_entrypoint_info(ctx.ep, - RGWSI_Bucket::get_entrypoint_meta_key(info.bucket), - entry_point, - exclusive, - mtime, - pattrs, - &ot, - y, - dpp); - if (ret < 0) - return ret; - - return 0; -} -int RGWBucketCtl::convert_old_bucket_info(RGWSI_Bucket_X_Ctx& ctx, - const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - RGWBucketEntryPoint entry_point; - real_time ep_mtime; - RGWObjVersionTracker ot; - map attrs; - RGWBucketInfo info; - auto cct = svc.bucket->ctx(); - - ldpp_dout(dpp, 10) << "RGWRados::convert_old_bucket_info(): bucket=" << bucket << dendl; - - int ret = svc.bucket->read_bucket_entrypoint_info(ctx.ep, - RGWSI_Bucket::get_entrypoint_meta_key(bucket), - &entry_point, &ot, &ep_mtime, &attrs, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: get_bucket_entrypoint_info() returned " << ret << " bucket=" << bucket << dendl; - return ret; - } - - if (!entry_point.has_bucket_info) { - /* already converted! */ - return 0; - } - - info = entry_point.old_bucket_info; - - ot.generate_new_write_ver(cct); - - ret = do_store_linked_bucket_info(ctx, info, nullptr, false, ep_mtime, &ot.write_version, &attrs, true, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to put_linked_bucket_info(): " << ret << dendl; - return ret; - } - - return 0; -} - -int RGWBucketCtl::set_bucket_instance_attrs(RGWBucketInfo& bucket_info, - map& attrs, - RGWObjVersionTracker *objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - return call([&](RGWSI_Bucket_X_Ctx& ctx) { - rgw_bucket& bucket = bucket_info.bucket; - - if (!bucket_info.has_instance_obj) { - /* an old bucket object, need to convert it */ - int ret = convert_old_bucket_info(ctx, bucket, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed converting old bucket info: " << ret << dendl; - return ret; - } - } - - return do_store_bucket_instance_info(ctx.bi, - bucket, - bucket_info, - y, - dpp, - BucketInstance::PutParams().set_attrs(&attrs) - .set_objv_tracker(objv_tracker) - .set_orig_info(&bucket_info)); - }); -} - - -int RGWBucketCtl::link_bucket(const rgw_user& user_id, - const rgw_bucket& bucket, - ceph::real_time creation_time, - optional_yield y, - const DoutPrefixProvider *dpp, - bool update_entrypoint, - rgw_ep_info *pinfo) -{ - return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { - return do_link_bucket(ctx, user_id, bucket, creation_time, - update_entrypoint, pinfo, y, dpp); - }); -} - -int RGWBucketCtl::do_link_bucket(RGWSI_Bucket_EP_Ctx& ctx, - const rgw_user& user_id, - const rgw_bucket& bucket, - ceph::real_time creation_time, - bool update_entrypoint, - rgw_ep_info *pinfo, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - int ret; - - RGWBucketEntryPoint ep; - RGWObjVersionTracker ot; - RGWObjVersionTracker& rot = (pinfo) ? pinfo->ep_objv : ot; - map attrs, *pattrs = nullptr; - string meta_key; - - if (update_entrypoint) { - meta_key = RGWSI_Bucket::get_entrypoint_meta_key(bucket); - if (pinfo) { - ep = pinfo->ep; - pattrs = &pinfo->attrs; - } else { - ret = svc.bucket->read_bucket_entrypoint_info(ctx, - meta_key, - &ep, &rot, - nullptr, &attrs, - y, dpp); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: read_bucket_entrypoint_info() returned: " - << cpp_strerror(-ret) << dendl; - } - pattrs = &attrs; - } - } - - ret = svc.user->add_bucket(dpp, user_id, bucket, creation_time, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: error adding bucket to user directory:" - << " user=" << user_id - << " bucket=" << bucket - << " err=" << cpp_strerror(-ret) - << dendl; - goto done_err; - } - - if (!update_entrypoint) - return 0; - - ep.linked = true; - ep.owner = user_id; - ep.bucket = bucket; - ret = svc.bucket->store_bucket_entrypoint_info( - ctx, meta_key, ep, false, real_time(), pattrs, &rot, y, dpp); - if (ret < 0) - goto done_err; - - return 0; - -done_err: - int r = do_unlink_bucket(ctx, user_id, bucket, true, y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed unlinking bucket on error cleanup: " - << cpp_strerror(-r) << dendl; - } - return ret; -} - -int RGWBucketCtl::unlink_bucket(const rgw_user& user_id, const rgw_bucket& bucket, optional_yield y, const DoutPrefixProvider *dpp, bool update_entrypoint) -{ - return bm_handler->call([&](RGWSI_Bucket_EP_Ctx& ctx) { - return do_unlink_bucket(ctx, user_id, bucket, update_entrypoint, y, dpp); - }); -} - -int RGWBucketCtl::do_unlink_bucket(RGWSI_Bucket_EP_Ctx& ctx, - const rgw_user& user_id, - const rgw_bucket& bucket, - bool update_entrypoint, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - int ret = svc.user->remove_bucket(dpp, user_id, bucket, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: error removing bucket from directory: " - << cpp_strerror(-ret)<< dendl; - } - - if (!update_entrypoint) - return 0; - - RGWBucketEntryPoint ep; - RGWObjVersionTracker ot; - map attrs; - string meta_key = RGWSI_Bucket::get_entrypoint_meta_key(bucket); - ret = svc.bucket->read_bucket_entrypoint_info(ctx, meta_key, &ep, &ot, nullptr, &attrs, y, dpp); - if (ret == -ENOENT) - return 0; - if (ret < 0) - return ret; - - if (!ep.linked) - return 0; - - if (ep.owner != user_id) { - ldpp_dout(dpp, 0) << "bucket entry point user mismatch, can't unlink bucket: " << ep.owner << " != " << user_id << dendl; - return -EINVAL; - } - - ep.linked = false; - return svc.bucket->store_bucket_entrypoint_info(ctx, meta_key, ep, false, real_time(), &attrs, &ot, y, dpp); -} - -// TODO: remove RGWRados dependency for bucket listing -int RGWBucketCtl::chown(rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, - const rgw_user& user_id, const std::string& display_name, - const std::string& marker, optional_yield y, const DoutPrefixProvider *dpp) -{ - map common_prefixes; - - rgw::sal::Bucket::ListParams params; - rgw::sal::Bucket::ListResults results; - - params.list_versions = true; - params.allow_unordered = true; - params.marker = marker; - - int count = 0; - int max_entries = 1000; - - //Loop through objects and update object acls to point to bucket owner - - do { - RGWObjectCtx obj_ctx(driver); - results.objs.clear(); - int ret = bucket->list(dpp, params, max_entries, results, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: list objects failed: " << cpp_strerror(-ret) << dendl; - return ret; - } - - params.marker = results.next_marker; - count += results.objs.size(); - - for (const auto& obj : results.objs) { - std::unique_ptr r_obj = bucket->get_object(obj.key); - - ret = r_obj->get_obj_attrs(y, dpp); - if (ret < 0){ - ldpp_dout(dpp, 0) << "ERROR: failed to read object " << obj.key.name << cpp_strerror(-ret) << dendl; - continue; - } - const auto& aiter = r_obj->get_attrs().find(RGW_ATTR_ACL); - if (aiter == r_obj->get_attrs().end()) { - ldpp_dout(dpp, 0) << "ERROR: no acls found for object " << obj.key.name << " .Continuing with next object." << dendl; - continue; - } else { - bufferlist& bl = aiter->second; - RGWAccessControlPolicy policy(driver->ctx()); - ACLOwner owner; - try { - decode(policy, bl); - owner = policy.get_owner(); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: decode policy failed" << err.what() - << dendl; - return -EIO; - } - - //Get the ACL from the policy - RGWAccessControlList& acl = policy.get_acl(); - - //Remove grant that is set to old owner - acl.remove_canon_user_grant(owner.get_id()); - - //Create a grant and add grant - ACLGrant grant; - grant.set_canon(user_id, display_name, RGW_PERM_FULL_CONTROL); - acl.add_grant(&grant); - - //Update the ACL owner to the new user - owner.set_id(user_id); - owner.set_name(display_name); - policy.set_owner(owner); - - bl.clear(); - encode(policy, bl); - - r_obj->set_atomic(); - map attrs; - attrs[RGW_ATTR_ACL] = bl; - ret = r_obj->set_obj_attrs(dpp, &attrs, nullptr, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: modify attr failed " << cpp_strerror(-ret) << dendl; - return ret; - } - } - } - cerr << count << " objects processed in " << bucket - << ". Next marker " << params.marker.name << std::endl; - } while(results.is_truncated); - return 0; -} - -int RGWBucketCtl::read_bucket_stats(const rgw_bucket& bucket, - RGWBucketEnt *result, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - return call([&](RGWSI_Bucket_X_Ctx& ctx) { - return svc.bucket->read_bucket_stats(ctx, bucket, result, y, dpp); - }); -} - -int RGWBucketCtl::read_buckets_stats(map& m, - optional_yield y, const DoutPrefixProvider *dpp) -{ - return call([&](RGWSI_Bucket_X_Ctx& ctx) { - return svc.bucket->read_buckets_stats(ctx, m, y, dpp); - }); -} - -int RGWBucketCtl::sync_user_stats(const DoutPrefixProvider *dpp, - const rgw_user& user_id, - const RGWBucketInfo& bucket_info, - optional_yield y, - RGWBucketEnt* pent) -{ - RGWBucketEnt ent; - if (!pent) { - pent = &ent; - } - int r = svc.bi->read_stats(dpp, bucket_info, pent, null_yield); - if (r < 0) { - ldpp_dout(dpp, 20) << __func__ << "(): failed to read bucket stats (r=" << r << ")" << dendl; - return r; - } - - return svc.user->flush_bucket_stats(dpp, user_id, *pent, y); -} - -int RGWBucketCtl::get_sync_policy_handler(std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef *phandler, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - int r = call([&](RGWSI_Bucket_X_Ctx& ctx) { - return svc.bucket_sync->get_policy_handler(ctx, zone, bucket, phandler, y, dpp); - }); - if (r < 0) { - ldpp_dout(dpp, 20) << __func__ << "(): failed to get policy handler for bucket=" << bucket << " (r=" << r << ")" << dendl; - return r; - } - return 0; -} - -int RGWBucketCtl::bucket_exports_data(const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - - RGWBucketSyncPolicyHandlerRef handler; - - int r = get_sync_policy_handler(std::nullopt, bucket, &handler, y, dpp); - if (r < 0) { - return r; - } - - return handler->bucket_exports_data(); -} - -int RGWBucketCtl::bucket_imports_data(const rgw_bucket& bucket, - optional_yield y, const DoutPrefixProvider *dpp) -{ - - RGWBucketSyncPolicyHandlerRef handler; - - int r = get_sync_policy_handler(std::nullopt, bucket, &handler, y, dpp); - if (r < 0) { - return r; - } - - return handler->bucket_imports_data(); -} - -RGWBucketMetadataHandlerBase* RGWBucketMetaHandlerAllocator::alloc() -{ - return new RGWBucketMetadataHandler(); -} - -RGWBucketInstanceMetadataHandlerBase* RGWBucketInstanceMetaHandlerAllocator::alloc(rgw::sal::Driver* driver) -{ - return new RGWBucketInstanceMetadataHandler(driver); -} - -RGWBucketMetadataHandlerBase* RGWArchiveBucketMetaHandlerAllocator::alloc() -{ - return new RGWArchiveBucketMetadataHandler(); -} - -RGWBucketInstanceMetadataHandlerBase* RGWArchiveBucketInstanceMetaHandlerAllocator::alloc(rgw::sal::Driver* driver) -{ - return new RGWArchiveBucketInstanceMetadataHandler(driver); -} - - -void RGWBucketEntryPoint::generate_test_instances(list& o) -{ - RGWBucketEntryPoint *bp = new RGWBucketEntryPoint(); - init_bucket(&bp->bucket, "tenant", "bucket", "pool", ".index.pool", "marker", "10"); - bp->owner = "owner"; - bp->creation_time = ceph::real_clock::from_ceph_timespec({ceph_le32(2), ceph_le32(3)}); - - o.push_back(bp); - o.push_back(new RGWBucketEntryPoint); -} - -void RGWBucketEntryPoint::dump(Formatter *f) const -{ - encode_json("bucket", bucket, f); - encode_json("owner", owner, f); - utime_t ut(creation_time); - encode_json("creation_time", ut, f); - encode_json("linked", linked, f); - encode_json("has_bucket_info", has_bucket_info, f); - if (has_bucket_info) { - encode_json("old_bucket_info", old_bucket_info, f); - } -} - -void RGWBucketEntryPoint::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("bucket", bucket, obj); - JSONDecoder::decode_json("owner", owner, obj); - utime_t ut; - JSONDecoder::decode_json("creation_time", ut, obj); - creation_time = ut.to_real_time(); - JSONDecoder::decode_json("linked", linked, obj); - JSONDecoder::decode_json("has_bucket_info", has_bucket_info, obj); - if (has_bucket_info) { - JSONDecoder::decode_json("old_bucket_info", old_bucket_info, obj); - } -} - diff --git a/src/rgw/store/rados/rgw_bucket.h b/src/rgw/store/rados/rgw_bucket.h deleted file mode 100644 index 636a1f2f6c0..00000000000 --- a/src/rgw/store/rados/rgw_bucket.h +++ /dev/null @@ -1,765 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include -#include -#include - -#include -#include - -#include "include/types.h" -#include "rgw_common.h" -#include "rgw_tools.h" -#include "rgw_metadata.h" - -#include "rgw_string.h" -#include "rgw_sal.h" - -#include "common/Formatter.h" -#include "common/lru_map.h" -#include "common/ceph_time.h" - -#include "rgw_formats.h" - -#include "services/svc_bucket_types.h" -#include "services/svc_bucket_sync.h" - -// define as static when RGWBucket implementation completes -extern void rgw_get_buckets_obj(const rgw_user& user_id, std::string& buckets_obj_id); - -class RGWSI_Meta; -class RGWBucketMetadataHandler; -class RGWBucketInstanceMetadataHandler; -class RGWUserCtl; -class RGWBucketCtl; -class RGWZone; -struct RGWZoneParams; - -extern void init_bucket(rgw_bucket *b, const char *t, const char *n, const char *dp, const char *ip, const char *m, const char *id); -extern int rgw_bucket_parse_bucket_key(CephContext *cct, const std::string& key, - rgw_bucket* bucket, int *shard_id); - -extern std::string rgw_make_bucket_entry_name(const std::string& tenant_name, - const std::string& bucket_name); - -extern void rgw_parse_url_bucket(const std::string& bucket, - const std::string& auth_tenant, - std::string &tenant_name, std::string &bucket_name); - -// this is used as a filter to RGWRados::cls_bucket_list_ordered; it -// conforms to the type RGWBucketListNameFilter -extern bool rgw_bucket_object_check_filter(const std::string& oid); - -void init_default_bucket_layout(CephContext *cct, rgw::BucketLayout& layout, - const RGWZone& zone, - std::optional shards, - std::optional type); - -struct RGWBucketCompleteInfo { - RGWBucketInfo info; - std::map attrs; - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; - -class RGWBucketEntryMetadataObject : public RGWMetadataObject { - RGWBucketEntryPoint ep; - std::map attrs; -public: - RGWBucketEntryMetadataObject(RGWBucketEntryPoint& _ep, const obj_version& v, real_time m) : ep(_ep) { - objv = v; - mtime = m; - set_pattrs (&attrs); - } - RGWBucketEntryMetadataObject(RGWBucketEntryPoint& _ep, const obj_version& v, real_time m, std::map&& _attrs) : - ep(_ep), attrs(std::move(_attrs)) { - objv = v; - mtime = m; - set_pattrs (&attrs); - } - - void dump(Formatter *f) const override { - ep.dump(f); - } - - RGWBucketEntryPoint& get_ep() { - return ep; - } - - std::map& get_attrs() { - return attrs; - } -}; - -class RGWBucketInstanceMetadataObject : public RGWMetadataObject { - RGWBucketCompleteInfo info; -public: - RGWBucketInstanceMetadataObject() {} - RGWBucketInstanceMetadataObject(RGWBucketCompleteInfo& i, const obj_version& v, real_time m) : info(i) { - objv = v; - mtime = m; - } - - void dump(Formatter *f) const override { - info.dump(f); - } - - void decode_json(JSONObj *obj) { - info.decode_json(obj); - } - - RGWBucketCompleteInfo& get_bci() { - return info; - } - RGWBucketInfo& get_bucket_info() { - return info.info; - } -}; - -/** - * store a list of the user's buckets, with associated functinos. - */ -class RGWUserBuckets { - std::map buckets; - -public: - RGWUserBuckets() = default; - RGWUserBuckets(RGWUserBuckets&&) = default; - - RGWUserBuckets& operator=(const RGWUserBuckets&) = default; - - void encode(bufferlist& bl) const { - using ceph::encode; - encode(buckets, bl); - } - void decode(bufferlist::const_iterator& bl) { - using ceph::decode; - decode(buckets, bl); - } - /** - * Check if the user owns a bucket by the given name. - */ - bool owns(std::string& name) { - std::map::iterator iter; - iter = buckets.find(name); - return (iter != buckets.end()); - } - - /** - * Add a (created) bucket to the user's bucket list. - */ - void add(const RGWBucketEnt& bucket) { - buckets[bucket.bucket.name] = bucket; - } - - /** - * Remove a bucket from the user's list by name. - */ - void remove(const std::string& name) { - std::map::iterator iter; - iter = buckets.find(name); - if (iter != buckets.end()) { - buckets.erase(iter); - } - } - - /** - * Get the user's buckets as a map. - */ - std::map& get_buckets() { return buckets; } - - /** - * Cleanup data structure - */ - void clear() { buckets.clear(); } - - size_t count() { return buckets.size(); } -}; -WRITE_CLASS_ENCODER(RGWUserBuckets) - -class RGWBucketMetadataHandlerBase : public RGWMetadataHandler_GenericMetaBE { -public: - virtual ~RGWBucketMetadataHandlerBase() {} - virtual void init(RGWSI_Bucket *bucket_svc, - RGWBucketCtl *bucket_ctl) = 0; - -}; - -class RGWBucketInstanceMetadataHandlerBase : public RGWMetadataHandler_GenericMetaBE { -public: - virtual ~RGWBucketInstanceMetadataHandlerBase() {} - virtual void init(RGWSI_Zone *zone_svc, - RGWSI_Bucket *bucket_svc, - RGWSI_BucketIndex *bi_svc) = 0; -}; - -class RGWBucketMetaHandlerAllocator { -public: - static RGWBucketMetadataHandlerBase *alloc(); -}; - -class RGWBucketInstanceMetaHandlerAllocator { -public: - static RGWBucketInstanceMetadataHandlerBase *alloc(rgw::sal::Driver* driver); -}; - -class RGWArchiveBucketMetaHandlerAllocator { -public: - static RGWBucketMetadataHandlerBase *alloc(); -}; - -class RGWArchiveBucketInstanceMetaHandlerAllocator { -public: - static RGWBucketInstanceMetadataHandlerBase *alloc(rgw::sal::Driver* driver); -}; - -extern int rgw_remove_object(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, rgw_obj_key& key); - -extern int rgw_object_get_attr(rgw::sal::Driver* driver, rgw::sal::Object* obj, - const char* attr_name, bufferlist& out_bl, - optional_yield y); - -extern void check_bad_user_bucket_mapping(rgw::sal::Driver* driver, rgw::sal::User* user, bool fix, optional_yield y, const DoutPrefixProvider *dpp); - -struct RGWBucketAdminOpState { - rgw_user uid; - std::string display_name; - std::string bucket_name; - std::string bucket_id; - std::string object_name; - std::string new_bucket_name; - - bool list_buckets; - bool stat_buckets; - bool check_objects; - bool fix_index; - bool delete_child_objects; - bool bucket_stored; - bool sync_bucket; - int max_aio = 0; - - std::unique_ptr bucket; - - RGWQuotaInfo quota; - RGWRateLimitInfo ratelimit_info; - - void set_fetch_stats(bool value) { stat_buckets = value; } - void set_check_objects(bool value) { check_objects = value; } - void set_fix_index(bool value) { fix_index = value; } - void set_delete_children(bool value) { delete_child_objects = value; } - - void set_max_aio(int value) { max_aio = value; } - - void set_user_id(const rgw_user& user_id) { - if (!user_id.empty()) - uid = user_id; - } - void set_tenant(const std::string& tenant_str) { - uid.tenant = tenant_str; - } - void set_bucket_name(const std::string& bucket_str) { - bucket_name = bucket_str; - } - void set_object(std::string& object_str) { - object_name = object_str; - } - void set_new_bucket_name(std::string& new_bucket_str) { - new_bucket_name = new_bucket_str; - } - void set_quota(RGWQuotaInfo& value) { - quota = value; - } - void set_bucket_ratelimit(RGWRateLimitInfo& value) { - ratelimit_info = value; - } - - - void set_sync_bucket(bool value) { sync_bucket = value; } - - rgw_user& get_user_id() { return uid; } - std::string& get_user_display_name() { return display_name; } - std::string& get_bucket_name() { return bucket_name; } - std::string& get_object_name() { return object_name; } - std::string& get_tenant() { return uid.tenant; } - - rgw::sal::Bucket* get_bucket() { return bucket.get(); } - void set_bucket(std::unique_ptr _bucket) { - bucket = std::move(_bucket); - bucket_stored = true; - } - - void set_bucket_id(const std::string& bi) { - bucket_id = bi; - } - const std::string& get_bucket_id() { return bucket_id; } - - bool will_fetch_stats() { return stat_buckets; } - bool will_fix_index() { return fix_index; } - bool will_delete_children() { return delete_child_objects; } - bool will_check_objects() { return check_objects; } - bool is_user_op() { return !uid.empty(); } - bool is_system_op() { return uid.empty(); } - bool has_bucket_stored() { return bucket_stored; } - int get_max_aio() { return max_aio; } - bool will_sync_bucket() { return sync_bucket; } - - RGWBucketAdminOpState() : list_buckets(false), stat_buckets(false), check_objects(false), - fix_index(false), delete_child_objects(false), - bucket_stored(false), sync_bucket(true) {} -}; - - -/* - * A simple wrapper class for administrative bucket operations - */ -class RGWBucket { - RGWUserBuckets buckets; - rgw::sal::Driver* driver; - RGWAccessHandle handle; - - std::unique_ptr bucket; - std::unique_ptr user; - - bool failure; - - RGWObjVersionTracker ep_objv; // entrypoint object version - -public: - RGWBucket() : driver(NULL), handle(NULL), failure(false) {} - int init(rgw::sal::Driver* storage, RGWBucketAdminOpState& op_state, optional_yield y, - const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - - int check_bad_index_multipart(RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - - int check_object_index(const DoutPrefixProvider *dpp, - RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y, - std::string *err_msg = NULL); - - int check_index(const DoutPrefixProvider *dpp, - RGWBucketAdminOpState& op_state, - std::map& existing_stats, - std::map& calculated_stats, - std::string *err_msg = NULL); - - int chown(RGWBucketAdminOpState& op_state, const std::string& marker, - optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - int set_quota(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - - int remove_object(const DoutPrefixProvider *dpp, RGWBucketAdminOpState& op_state, std::string *err_msg = NULL); - int policy_bl_to_stream(bufferlist& bl, std::ostream& o); - int get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy, optional_yield y, const DoutPrefixProvider *dpp); - int sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - - void clear_failure() { failure = false; } - - const RGWBucketInfo& get_bucket_info() const { return bucket->get_info(); } -}; - -class RGWBucketAdminOp { -public: - static int get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); - static int get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWAccessControlPolicy& policy, const DoutPrefixProvider *dpp); - static int dump_s3_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - std::ostream& os, const DoutPrefixProvider *dpp); - - static int unlink(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp); - static int link(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - static int chown(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const std::string& marker, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - - static int check_index(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y, const DoutPrefixProvider *dpp); - - static int remove_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, optional_yield y, - const DoutPrefixProvider *dpp, bool bypass_gc = false, bool keep_index_consistent = true); - static int remove_object(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp); - static int info(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher, optional_yield y, const DoutPrefixProvider *dpp); - static int limit_check(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - const std::list& user_ids, - RGWFormatterFlusher& flusher, optional_yield y, - const DoutPrefixProvider *dpp, - bool warnings_only = false); - static int set_quota(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp); - - static int list_stale_instances(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); - - static int clear_stale_instances(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); - static int fix_lc_shards(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp); - static int fix_obj_expiry(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, - RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp, bool dry_run = false); - - static int sync_bucket(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); -}; - -struct rgw_ep_info { - RGWBucketEntryPoint &ep; - std::map& attrs; - RGWObjVersionTracker ep_objv; - rgw_ep_info(RGWBucketEntryPoint &ep, std::map& attrs) - : ep(ep), attrs(attrs) {} -}; - -class RGWBucketCtl { - CephContext *cct; - - struct Svc { - RGWSI_Zone *zone{nullptr}; - RGWSI_Bucket *bucket{nullptr}; - RGWSI_Bucket_Sync *bucket_sync{nullptr}; - RGWSI_BucketIndex *bi{nullptr}; - RGWSI_User* user = nullptr; - } svc; - - struct Ctl { - RGWUserCtl *user{nullptr}; - } ctl; - - RGWBucketMetadataHandler *bm_handler; - RGWBucketInstanceMetadataHandler *bmi_handler; - - RGWSI_Bucket_BE_Handler bucket_be_handler; /* bucket backend handler */ - RGWSI_BucketInstance_BE_Handler bi_be_handler; /* bucket instance backend handler */ - - int call(std::function f); - -public: - RGWBucketCtl(RGWSI_Zone *zone_svc, - RGWSI_Bucket *bucket_svc, - RGWSI_Bucket_Sync *bucket_sync_svc, - RGWSI_BucketIndex *bi_svc, - RGWSI_User* user_svc); - - void init(RGWUserCtl *user_ctl, - RGWBucketMetadataHandler *_bm_handler, - RGWBucketInstanceMetadataHandler *_bmi_handler, - RGWDataChangesLog *datalog, - const DoutPrefixProvider *dpp); - - struct Bucket { - struct GetParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - real_time *mtime{nullptr}; - std::map *attrs{nullptr}; - rgw_cache_entry_info *cache_info{nullptr}; - boost::optional refresh_version; - std::optional bectx_params; - - GetParams() {} - - GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - GetParams& set_mtime(ceph::real_time *_mtime) { - mtime = _mtime; - return *this; - } - - GetParams& set_attrs(std::map *_attrs) { - attrs = _attrs; - return *this; - } - - GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { - cache_info = _cache_info; - return *this; - } - - GetParams& set_refresh_version(const obj_version& _refresh_version) { - refresh_version = _refresh_version; - return *this; - } - - GetParams& set_bectx_params(std::optional _bectx_params) { - bectx_params = _bectx_params; - return *this; - } - }; - - struct PutParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - ceph::real_time mtime; - bool exclusive{false}; - std::map *attrs{nullptr}; - - PutParams() {} - - PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - PutParams& set_mtime(const ceph::real_time& _mtime) { - mtime = _mtime; - return *this; - } - - PutParams& set_exclusive(bool _exclusive) { - exclusive = _exclusive; - return *this; - } - - PutParams& set_attrs(std::map *_attrs) { - attrs = _attrs; - return *this; - } - }; - - struct RemoveParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - - RemoveParams() {} - - RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - }; - }; - - struct BucketInstance { - struct GetParams { - real_time *mtime{nullptr}; - std::map *attrs{nullptr}; - rgw_cache_entry_info *cache_info{nullptr}; - boost::optional refresh_version; - RGWObjVersionTracker *objv_tracker{nullptr}; - std::optional bectx_params; - - GetParams() {} - - GetParams& set_mtime(ceph::real_time *_mtime) { - mtime = _mtime; - return *this; - } - - GetParams& set_attrs(std::map *_attrs) { - attrs = _attrs; - return *this; - } - - GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { - cache_info = _cache_info; - return *this; - } - - GetParams& set_refresh_version(const obj_version& _refresh_version) { - refresh_version = _refresh_version; - return *this; - } - - GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - GetParams& set_bectx_params(std::optional _bectx_params) { - bectx_params = _bectx_params; - return *this; - } - }; - - struct PutParams { - std::optional orig_info; /* nullopt: orig_info was not fetched, - nullptr: orig_info was not found (new bucket instance */ - ceph::real_time mtime; - bool exclusive{false}; - std::map *attrs{nullptr}; - RGWObjVersionTracker *objv_tracker{nullptr}; - - PutParams() {} - - PutParams& set_orig_info(RGWBucketInfo *pinfo) { - orig_info = pinfo; - return *this; - } - - PutParams& set_mtime(const ceph::real_time& _mtime) { - mtime = _mtime; - return *this; - } - - PutParams& set_exclusive(bool _exclusive) { - exclusive = _exclusive; - return *this; - } - - PutParams& set_attrs(std::map *_attrs) { - attrs = _attrs; - return *this; - } - - PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - }; - - struct RemoveParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - - RemoveParams() {} - - RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - }; - }; - - /* bucket entrypoint */ - int read_bucket_entrypoint_info(const rgw_bucket& bucket, - RGWBucketEntryPoint *info, - optional_yield y, - const DoutPrefixProvider *dpp, - const Bucket::GetParams& params = {}); - int store_bucket_entrypoint_info(const rgw_bucket& bucket, - RGWBucketEntryPoint& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const Bucket::PutParams& params = {}); - int remove_bucket_entrypoint_info(const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp, - const Bucket::RemoveParams& params = {}); - - /* bucket instance */ - int read_bucket_instance_info(const rgw_bucket& bucket, - RGWBucketInfo *info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::GetParams& params = {}); - int store_bucket_instance_info(const rgw_bucket& bucket, - RGWBucketInfo& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::PutParams& params = {}); - int remove_bucket_instance_info(const rgw_bucket& bucket, - RGWBucketInfo& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::RemoveParams& params = {}); - - /* - * bucket_id may or may not be provided - * - * ep_objv_tracker might not be populated even if provided. Will only be set if entrypoint is read - * (that is: if bucket_id is empty). - */ - int read_bucket_info(const rgw_bucket& bucket, - RGWBucketInfo *info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::GetParams& params = {}, - RGWObjVersionTracker *ep_objv_tracker = nullptr); - - - int set_bucket_instance_attrs(RGWBucketInfo& bucket_info, - std::map& attrs, - RGWObjVersionTracker *objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp); - - /* user/bucket */ - int link_bucket(const rgw_user& user_id, - const rgw_bucket& bucket, - ceph::real_time creation_time, - optional_yield y, - const DoutPrefixProvider *dpp, - bool update_entrypoint = true, - rgw_ep_info *pinfo = nullptr); - - int unlink_bucket(const rgw_user& user_id, - const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp, - bool update_entrypoint = true); - - int chown(rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, - const rgw_user& user_id, const std::string& display_name, - const std::string& marker, optional_yield y, const DoutPrefixProvider *dpp); - - int read_buckets_stats(std::map& m, - optional_yield y, - const DoutPrefixProvider *dpp); - - int read_bucket_stats(const rgw_bucket& bucket, - RGWBucketEnt *result, - optional_yield y, - const DoutPrefixProvider *dpp); - - /* quota related */ - int sync_user_stats(const DoutPrefixProvider *dpp, - const rgw_user& user_id, const RGWBucketInfo& bucket_info, - optional_yield y, - RGWBucketEnt* pent); - - /* bucket sync */ - int get_sync_policy_handler(std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef *phandler, - optional_yield y, - const DoutPrefixProvider *dpp); - int bucket_exports_data(const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp); - int bucket_imports_data(const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp); - -private: - int convert_old_bucket_info(RGWSI_Bucket_X_Ctx& ctx, - const rgw_bucket& bucket, - optional_yield y, - const DoutPrefixProvider *dpp); - - int do_store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, - const rgw_bucket& bucket, - RGWBucketInfo& info, - optional_yield y, - const DoutPrefixProvider *dpp, - const BucketInstance::PutParams& params); - - int do_store_linked_bucket_info(RGWSI_Bucket_X_Ctx& ctx, - RGWBucketInfo& info, - RGWBucketInfo *orig_info, - bool exclusive, real_time mtime, - obj_version *pep_objv, - std::map *pattrs, - bool create_entry_point, - optional_yield, - const DoutPrefixProvider *dpp); - - int do_link_bucket(RGWSI_Bucket_EP_Ctx& ctx, - const rgw_user& user, - const rgw_bucket& bucket, - ceph::real_time creation_time, - bool update_entrypoint, - rgw_ep_info *pinfo, - optional_yield y, - const DoutPrefixProvider *dpp); - - int do_unlink_bucket(RGWSI_Bucket_EP_Ctx& ctx, - const rgw_user& user_id, - const rgw_bucket& bucket, - bool update_entrypoint, - optional_yield y, - const DoutPrefixProvider *dpp); - -}; - -bool rgw_find_bucket_by_id(const DoutPrefixProvider *dpp, CephContext *cct, rgw::sal::Driver* driver, const std::string& marker, - const std::string& bucket_id, rgw_bucket* bucket_out); diff --git a/src/rgw/store/rados/rgw_bucket_sync.cc b/src/rgw/store/rados/rgw_bucket_sync.cc deleted file mode 100644 index 5fd81c53b1e..00000000000 --- a/src/rgw/store/rados/rgw_bucket_sync.cc +++ /dev/null @@ -1,941 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_common.h" -#include "rgw_bucket_sync.h" -#include "rgw_data_sync.h" -#include "rgw_zone.h" - -#include "services/svc_zone.h" -#include "services/svc_bucket_sync.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -ostream& operator<<(ostream& os, const rgw_sync_bucket_entity& e) { - os << "{b=" << rgw_sync_bucket_entities::bucket_key(e.bucket) << ",z=" << e.zone.value_or(rgw_zone_id()) << ",az=" << (int)e.all_zones << "}"; - return os; -} - -ostream& operator<<(ostream& os, const rgw_sync_bucket_pipe& pipe) { - os << "{s=" << pipe.source << ",d=" << pipe.dest << "}"; - return os; -} - -ostream& operator<<(ostream& os, const rgw_sync_bucket_entities& e) { - os << "{b=" << rgw_sync_bucket_entities::bucket_key(e.bucket) << ",z=" << e.zones.value_or(std::set()) << "}"; - return os; -} - -ostream& operator<<(ostream& os, const rgw_sync_bucket_pipes& pipe) { - os << "{id=" << pipe.id << ",s=" << pipe.source << ",d=" << pipe.dest << "}"; - return os; -} - -static std::vector filter_relevant_pipes(const std::vector& pipes, - const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone) -{ - std::vector relevant_pipes; - for (auto& p : pipes) { - if (p.source.match_zone(source_zone) && - p.dest.match_zone(dest_zone)) { - for (auto pipe : p.expand()) { - pipe.source.apply_zone(source_zone); - pipe.dest.apply_zone(dest_zone); - relevant_pipes.push_back(pipe); - } - } - } - - return relevant_pipes; -} - -static bool is_wildcard_bucket(const rgw_bucket& bucket) -{ - return bucket.name.empty(); -} - -void rgw_sync_group_pipe_map::dump(ceph::Formatter *f) const -{ - encode_json("zone", zone.id, f); - encode_json("buckets", rgw_sync_bucket_entities::bucket_key(bucket), f); - encode_json("sources", sources, f); - encode_json("dests", dests, f); -} - - -template -void rgw_sync_group_pipe_map::try_add_to_pipe_map(const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone, - const std::vector& pipes, - zb_pipe_map_t *pipe_map, - CB1 filter_cb, - CB2 call_filter_cb) -{ - if (!filter_cb(source_zone, nullopt, dest_zone, nullopt)) { - return; - } - auto relevant_pipes = filter_relevant_pipes(pipes, source_zone, dest_zone); - - for (auto& pipe : relevant_pipes) { - rgw_sync_bucket_entity zb; - if (!call_filter_cb(pipe, &zb)) { - continue; - } - pipe_map->insert(make_pair(zb, pipe)); - } -} - -template -void rgw_sync_group_pipe_map::try_add_source(const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone, - const std::vector& pipes, - CB filter_cb) -{ - return try_add_to_pipe_map(source_zone, dest_zone, pipes, - &sources, - filter_cb, - [&](const rgw_sync_bucket_pipe& pipe, rgw_sync_bucket_entity *zb) { - *zb = rgw_sync_bucket_entity{source_zone, pipe.source.get_bucket()}; - return filter_cb(source_zone, zb->bucket, dest_zone, pipe.dest.get_bucket()); - }); -} - -template -void rgw_sync_group_pipe_map::try_add_dest(const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone, - const std::vector& pipes, - CB filter_cb) -{ - return try_add_to_pipe_map(source_zone, dest_zone, pipes, - &dests, - filter_cb, - [&](const rgw_sync_bucket_pipe& pipe, rgw_sync_bucket_entity *zb) { - *zb = rgw_sync_bucket_entity{dest_zone, pipe.dest.get_bucket()}; - return filter_cb(source_zone, pipe.source.get_bucket(), dest_zone, zb->bucket); - }); -} - -using zb_pipe_map_t = rgw_sync_group_pipe_map::zb_pipe_map_t; - -pair rgw_sync_group_pipe_map::find_pipes(const zb_pipe_map_t& m, - const rgw_zone_id& zone, - std::optional b) const -{ - if (!b) { - return m.equal_range(rgw_sync_bucket_entity{zone, rgw_bucket()}); - } - - auto zb = rgw_sync_bucket_entity{zone, *b}; - - auto range = m.equal_range(zb); - if (range.first == range.second && - !is_wildcard_bucket(*b)) { - /* couldn't find the specific bucket, try to find by wildcard */ - zb.bucket = rgw_bucket(); - range = m.equal_range(zb); - } - - return range; -} - - -template -void rgw_sync_group_pipe_map::init(const DoutPrefixProvider *dpp, - CephContext *cct, - const rgw_zone_id& _zone, - std::optional _bucket, - const rgw_sync_policy_group& group, - rgw_sync_data_flow_group *_default_flow, - std::set *_pall_zones, - CB filter_cb) { - zone = _zone; - bucket = _bucket; - default_flow = _default_flow; - pall_zones = _pall_zones; - - rgw_sync_bucket_entity zb(zone, bucket); - - status = group.status; - - std::vector zone_pipes; - - string bucket_key = (bucket ? bucket->get_key() : "*"); - - /* only look at pipes that touch the specific zone and bucket */ - for (auto& pipe : group.pipes) { - if (pipe.contains_zone_bucket(zone, bucket)) { - ldpp_dout(dpp, 20) << __func__ << "(): pipe_map (zone=" << zone << " bucket=" << bucket_key << "): adding potential pipe: " << pipe << dendl; - zone_pipes.push_back(pipe); - } - } - - const rgw_sync_data_flow_group *pflow; - - if (!group.data_flow.empty()) { - pflow = &group.data_flow; - } else { - if (!default_flow) { - return; - } - pflow = default_flow; - } - - auto& flow = *pflow; - - pall_zones->insert(zone); - - /* symmetrical */ - for (auto& symmetrical_group : flow.symmetrical) { - if (symmetrical_group.zones.find(zone) != symmetrical_group.zones.end()) { - for (auto& z : symmetrical_group.zones) { - if (z != zone) { - pall_zones->insert(z); - try_add_source(z, zone, zone_pipes, filter_cb); - try_add_dest(zone, z, zone_pipes, filter_cb); - } - } - } - } - - /* directional */ - for (auto& rule : flow.directional) { - if (rule.source_zone == zone) { - pall_zones->insert(rule.dest_zone); - try_add_dest(zone, rule.dest_zone, zone_pipes, filter_cb); - } else if (rule.dest_zone == zone) { - pall_zones->insert(rule.source_zone); - try_add_source(rule.source_zone, zone, zone_pipes, filter_cb); - } - } -} - -/* - * find all relevant pipes in our zone that match {dest_bucket} <- {source_zone, source_bucket} - */ -vector rgw_sync_group_pipe_map::find_source_pipes(const rgw_zone_id& source_zone, - std::optional source_bucket, - std::optional dest_bucket) const { - vector result; - - auto range = find_pipes(sources, source_zone, source_bucket); - - for (auto iter = range.first; iter != range.second; ++iter) { - auto pipe = iter->second; - if (pipe.dest.match_bucket(dest_bucket)) { - result.push_back(pipe); - } - } - return result; -} - -/* - * find all relevant pipes in other zones that pull from a specific - * source bucket in out zone {source_bucket} -> {dest_zone, dest_bucket} - */ -vector rgw_sync_group_pipe_map::find_dest_pipes(std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket) const { - vector result; - - auto range = find_pipes(dests, dest_zone, dest_bucket); - - for (auto iter = range.first; iter != range.second; ++iter) { - auto pipe = iter->second; - if (pipe.source.match_bucket(source_bucket)) { - result.push_back(pipe); - } - } - - return result; -} - -/* - * find all relevant pipes from {source_zone, source_bucket} -> {dest_zone, dest_bucket} - */ -vector rgw_sync_group_pipe_map::find_pipes(const rgw_zone_id& source_zone, - std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket) const { - if (dest_zone == zone) { - return find_source_pipes(source_zone, source_bucket, dest_bucket); - } - - if (source_zone == zone) { - return find_dest_pipes(source_bucket, dest_zone, dest_bucket); - } - - return vector(); -} - -void RGWBucketSyncFlowManager::pipe_rules::insert(const rgw_sync_bucket_pipe& pipe) -{ - pipes.push_back(pipe); - - auto ppipe = &pipes.back(); - auto prefix = ppipe->params.source.filter.prefix.value_or(string()); - - prefix_refs.insert(make_pair(prefix, ppipe)); - - for (auto& t : ppipe->params.source.filter.tags) { - string tag = t.key + "=" + t.value; - auto titer = tag_refs.find(tag); - if (titer != tag_refs.end() && - ppipe->params.priority > titer->second->params.priority) { - titer->second = ppipe; - } else { - tag_refs[tag] = ppipe; - } - } -} - -bool RGWBucketSyncFlowManager::pipe_rules::find_basic_info_without_tags(const rgw_obj_key& key, - std::optional *user, - std::optional *acl_translation_owner, - std::optional *storage_class, - rgw_sync_pipe_params::Mode *mode, - bool *need_more_info) const -{ - std::optional owner; - - *need_more_info = false; - - if (prefix_refs.empty()) { - return false; - } - - auto end = prefix_refs.upper_bound(key.name); - auto iter = end; - if (iter != prefix_refs.begin()) { - --iter; - } - if (iter == prefix_refs.end()) { - return false; - } - - if (iter != prefix_refs.begin()) { - iter = prefix_refs.find(iter->first); /* prefix_refs is multimap, find first element - holding that key */ - } - - std::vector iters; - - std::optional priority; - - for (; iter != end; ++iter) { - auto& prefix = iter->first; - if (!boost::starts_with(key.name, prefix)) { - continue; - } - - auto& rule_params = iter->second->params; - auto& filter = rule_params.source.filter; - - if (rule_params.priority > priority) { - priority = rule_params.priority; - - if (!filter.has_tags()) { - iters.clear(); - } - iters.push_back(iter); - - *need_more_info = filter.has_tags(); /* if highest priority filter has tags, then - we can't be sure if it would be used. - We need to first read the info from the source object */ - } - } - - if (iters.empty()) { - return false; - } - - std::optional _user; - std::optional _acl_translation; - std::optional _storage_class; - rgw_sync_pipe_params::Mode _mode{rgw_sync_pipe_params::Mode::MODE_SYSTEM}; - - // make sure all params are the same by saving the first one - // encountered and comparing all subsequent to it - bool first_iter = true; - for (auto& iter : iters) { - const rgw_sync_pipe_params& rule_params = iter->second->params; - if (first_iter) { - _user = rule_params.user; - _acl_translation = rule_params.dest.acl_translation; - _storage_class = rule_params.dest.storage_class; - _mode = rule_params.mode; - first_iter = false; - } else { - // note: three of these == operators are comparing std::optional - // against std::optional; as one would expect they are equal a) - // if both do not contain values or b) if both do and those - // contained values are the same - const bool conflict = - !(_user == rule_params.user && - _acl_translation == rule_params.dest.acl_translation && - _storage_class == rule_params.dest.storage_class && - _mode == rule_params.mode); - if (conflict) { - *need_more_info = true; - return false; - } - } - } - - *user = _user; - if (_acl_translation) { - *acl_translation_owner = _acl_translation->owner; - } - *storage_class = _storage_class; - *mode = _mode; - - return true; -} - -bool RGWBucketSyncFlowManager::pipe_rules::find_obj_params(const rgw_obj_key& key, - const RGWObjTags::tag_map_t& tags, - rgw_sync_pipe_params *params) const -{ - if (prefix_refs.empty()) { - return false; - } - - auto iter = prefix_refs.upper_bound(key.name); - if (iter != prefix_refs.begin()) { - --iter; - } - if (iter == prefix_refs.end()) { - return false; - } - - auto end = prefix_refs.upper_bound(key.name); - auto max = end; - - std::optional priority; - - for (; iter != end; ++iter) { - /* NOTE: this is not the most efficient way to do it, - * a trie data structure would be better - */ - auto& prefix = iter->first; - if (!boost::starts_with(key.name, prefix)) { - continue; - } - - auto& rule_params = iter->second->params; - auto& filter = rule_params.source.filter; - - if (!filter.check_tags(tags)) { - continue; - } - - if (rule_params.priority > priority) { - priority = rule_params.priority; - max = iter; - } - } - - if (max == end) { - return false; - } - - *params = max->second->params; - return true; -} - -/* - * return either the current prefix for s, or the next one if s is not within a prefix - */ - -RGWBucketSyncFlowManager::pipe_rules::prefix_map_t::const_iterator RGWBucketSyncFlowManager::pipe_rules::prefix_search(const std::string& s) const -{ - if (prefix_refs.empty()) { - return prefix_refs.end(); - } - auto next = prefix_refs.upper_bound(s); - auto iter = next; - if (iter != prefix_refs.begin()) { - --iter; - } - if (!boost::starts_with(s, iter->first)) { - return next; - } - - return iter; -} - -void RGWBucketSyncFlowManager::pipe_set::insert(const rgw_sync_bucket_pipe& pipe) { - pipe_map.insert(make_pair(pipe.id, pipe)); - - auto& rules_ref = rules[endpoints_pair(pipe)]; - - if (!rules_ref) { - rules_ref = make_shared(); - } - - rules_ref->insert(pipe); - - pipe_handler h(rules_ref, pipe); - - handlers.insert(h); -} - -void RGWBucketSyncFlowManager::pipe_set::dump(ceph::Formatter *f) const -{ - encode_json("pipes", pipe_map, f); -} - -bool RGWBucketSyncFlowManager::allowed_data_flow(const rgw_zone_id& source_zone, - std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket, - bool check_activated) const -{ - bool found = false; - bool found_activated = false; - - for (auto m : flow_groups) { - auto& fm = m.second; - auto pipes = fm.find_pipes(source_zone, source_bucket, - dest_zone, dest_bucket); - - bool is_found = !pipes.empty(); - - if (is_found) { - switch (fm.status) { - case rgw_sync_policy_group::Status::FORBIDDEN: - return false; - case rgw_sync_policy_group::Status::ENABLED: - found = true; - found_activated = true; - break; - case rgw_sync_policy_group::Status::ALLOWED: - found = true; - break; - default: - break; /* unknown -- ignore */ - } - } - } - - if (check_activated && found_activated) { - return true; - } - - return found; -} - -void RGWBucketSyncFlowManager::init(const DoutPrefixProvider *dpp, const rgw_sync_policy_info& sync_policy) { - std::optional default_flow; - if (parent) { - default_flow.emplace(); - default_flow->init_default(parent->all_zones); - } - - for (auto& item : sync_policy.groups) { - auto& group = item.second; - auto& flow_group_map = flow_groups[group.id]; - - flow_group_map.init(dpp, cct, zone_id, bucket, group, - (default_flow ? &(*default_flow) : nullptr), - &all_zones, - [&](const rgw_zone_id& source_zone, - std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket) { - if (!parent) { - return true; - } - return parent->allowed_data_flow(source_zone, - source_bucket, - dest_zone, - dest_bucket, - false); /* just check that it's not disabled */ - }); - } -} - -void RGWBucketSyncFlowManager::reflect(const DoutPrefixProvider *dpp, - std::optional effective_bucket, - RGWBucketSyncFlowManager::pipe_set *source_pipes, - RGWBucketSyncFlowManager::pipe_set *dest_pipes, - bool only_enabled) const - -{ - string effective_bucket_key; - if (effective_bucket) { - effective_bucket_key = effective_bucket->get_key(); - } - if (parent) { - parent->reflect(dpp, effective_bucket, source_pipes, dest_pipes, only_enabled); - } - - for (auto& item : flow_groups) { - auto& flow_group_map = item.second; - - /* only return enabled groups */ - if (flow_group_map.status != rgw_sync_policy_group::Status::ENABLED && - (only_enabled || flow_group_map.status != rgw_sync_policy_group::Status::ALLOWED)) { - continue; - } - - for (auto& entry : flow_group_map.sources) { - rgw_sync_bucket_pipe pipe = entry.second; - if (!pipe.dest.match_bucket(effective_bucket)) { - continue; - } - - pipe.source.apply_bucket(effective_bucket); - pipe.dest.apply_bucket(effective_bucket); - - ldpp_dout(dpp, 20) << __func__ << "(): flow manager (bucket=" << effective_bucket_key << "): adding source pipe: " << pipe << dendl; - source_pipes->insert(pipe); - } - - for (auto& entry : flow_group_map.dests) { - rgw_sync_bucket_pipe pipe = entry.second; - - if (!pipe.source.match_bucket(effective_bucket)) { - continue; - } - - pipe.source.apply_bucket(effective_bucket); - pipe.dest.apply_bucket(effective_bucket); - - ldpp_dout(dpp, 20) << __func__ << "(): flow manager (bucket=" << effective_bucket_key << "): adding dest pipe: " << pipe << dendl; - dest_pipes->insert(pipe); - } - } -} - - -RGWBucketSyncFlowManager::RGWBucketSyncFlowManager(CephContext *_cct, - const rgw_zone_id& _zone_id, - std::optional _bucket, - const RGWBucketSyncFlowManager *_parent) : cct(_cct), - zone_id(_zone_id), - bucket(_bucket), - parent(_parent) {} - - -void RGWSyncPolicyCompat::convert_old_sync_config(RGWSI_Zone *zone_svc, - RGWSI_SyncModules *sync_modules_svc, - rgw_sync_policy_info *ppolicy) -{ - bool found = false; - - rgw_sync_policy_info policy; - - auto& group = policy.groups["default"]; - auto& zonegroup = zone_svc->get_zonegroup(); - - for (const auto& ziter1 : zonegroup.zones) { - auto& id1 = ziter1.first; - const RGWZone& z1 = ziter1.second; - - for (const auto& ziter2 : zonegroup.zones) { - auto& id2 = ziter2.first; - const RGWZone& z2 = ziter2.second; - - if (id1 == id2) { - continue; - } - - if (z1.syncs_from(z2.name)) { - found = true; - rgw_sync_directional_rule *rule; - group.data_flow.find_or_create_directional(id2, - id1, - &rule); - } - } - } - - if (!found) { /* nothing syncs */ - return; - } - - rgw_sync_bucket_pipes pipes; - pipes.id = "all"; - pipes.source.all_zones = true; - pipes.dest.all_zones = true; - - group.pipes.emplace_back(std::move(pipes)); - - - group.status = rgw_sync_policy_group::Status::ENABLED; - - *ppolicy = std::move(policy); -} - -RGWBucketSyncPolicyHandler::RGWBucketSyncPolicyHandler(RGWSI_Zone *_zone_svc, - RGWSI_SyncModules *sync_modules_svc, - RGWSI_Bucket_Sync *_bucket_sync_svc, - std::optional effective_zone) : zone_svc(_zone_svc) , - bucket_sync_svc(_bucket_sync_svc) { - zone_id = effective_zone.value_or(zone_svc->zone_id()); - flow_mgr.reset(new RGWBucketSyncFlowManager(zone_svc->ctx(), - zone_id, - nullopt, - nullptr)); - sync_policy = zone_svc->get_zonegroup().sync_policy; - - if (sync_policy.empty()) { - RGWSyncPolicyCompat::convert_old_sync_config(zone_svc, sync_modules_svc, &sync_policy); - legacy_config = true; - } -} - -RGWBucketSyncPolicyHandler::RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, - const RGWBucketInfo& _bucket_info, - map&& _bucket_attrs) : parent(_parent), - bucket_info(_bucket_info), - bucket_attrs(std::move(_bucket_attrs)) { - if (_bucket_info.sync_policy) { - sync_policy = *_bucket_info.sync_policy; - - for (auto& entry : sync_policy.groups) { - for (auto& pipe : entry.second.pipes) { - if (pipe.params.mode == rgw_sync_pipe_params::MODE_USER && - pipe.params.user.empty()) { - pipe.params.user = _bucket_info.owner; - } - } - } - } - legacy_config = parent->legacy_config; - bucket = _bucket_info.bucket; - zone_svc = parent->zone_svc; - bucket_sync_svc = parent->bucket_sync_svc; - flow_mgr.reset(new RGWBucketSyncFlowManager(zone_svc->ctx(), - parent->zone_id, - _bucket_info.bucket, - parent->flow_mgr.get())); -} - -RGWBucketSyncPolicyHandler::RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, - const rgw_bucket& _bucket, - std::optional _sync_policy) : parent(_parent) { - if (_sync_policy) { - sync_policy = *_sync_policy; - } - legacy_config = parent->legacy_config; - bucket = _bucket; - zone_svc = parent->zone_svc; - bucket_sync_svc = parent->bucket_sync_svc; - flow_mgr.reset(new RGWBucketSyncFlowManager(zone_svc->ctx(), - parent->zone_id, - _bucket, - parent->flow_mgr.get())); -} - -RGWBucketSyncPolicyHandler *RGWBucketSyncPolicyHandler::alloc_child(const RGWBucketInfo& bucket_info, - map&& bucket_attrs) const -{ - return new RGWBucketSyncPolicyHandler(this, bucket_info, std::move(bucket_attrs)); -} - -RGWBucketSyncPolicyHandler *RGWBucketSyncPolicyHandler::alloc_child(const rgw_bucket& bucket, - std::optional sync_policy) const -{ - return new RGWBucketSyncPolicyHandler(this, bucket, sync_policy); -} - -int RGWBucketSyncPolicyHandler::init(const DoutPrefixProvider *dpp, optional_yield y) -{ - int r = bucket_sync_svc->get_bucket_sync_hints(dpp, bucket.value_or(rgw_bucket()), - &source_hints, - &target_hints, - y); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize bucket sync policy handler: get_bucket_sync_hints() on bucket=" - << bucket << " returned r=" << r << dendl; - return r; - } - - flow_mgr->init(dpp, sync_policy); - - reflect(dpp, &source_pipes, - &target_pipes, - &sources, - &targets, - &source_zones, - &target_zones, - true); - - return 0; -} - -void RGWBucketSyncPolicyHandler::reflect(const DoutPrefixProvider *dpp, RGWBucketSyncFlowManager::pipe_set *psource_pipes, - RGWBucketSyncFlowManager::pipe_set *ptarget_pipes, - map *psources, - map *ptargets, - std::set *psource_zones, - std::set *ptarget_zones, - bool only_enabled) const -{ - RGWBucketSyncFlowManager::pipe_set _source_pipes; - RGWBucketSyncFlowManager::pipe_set _target_pipes; - map _sources; - map _targets; - std::set _source_zones; - std::set _target_zones; - - flow_mgr->reflect(dpp, bucket, &_source_pipes, &_target_pipes, only_enabled); - - for (auto& entry : _source_pipes.pipe_map) { - auto& pipe = entry.second; - if (!pipe.source.zone) { - continue; - } - _source_zones.insert(*pipe.source.zone); - _sources[*pipe.source.zone].insert(pipe); - } - - for (auto& entry : _target_pipes.pipe_map) { - auto& pipe = entry.second; - if (!pipe.dest.zone) { - continue; - } - _target_zones.insert(*pipe.dest.zone); - _targets[*pipe.dest.zone].insert(pipe); - } - - if (psource_pipes) { - *psource_pipes = std::move(_source_pipes); - } - if (ptarget_pipes) { - *ptarget_pipes = std::move(_target_pipes); - } - if (psources) { - *psources = std::move(_sources); - } - if (ptargets) { - *ptargets = std::move(_targets); - } - if (psource_zones) { - *psource_zones = std::move(_source_zones); - } - if (ptarget_zones) { - *ptarget_zones = std::move(_target_zones); - } -} - -multimap RGWBucketSyncPolicyHandler::get_all_sources() const -{ - multimap m; - - for (auto& source_entry : sources) { - auto& zone_id = source_entry.first; - - auto& pipes = source_entry.second.pipe_map; - - for (auto& entry : pipes) { - auto& pipe = entry.second; - m.insert(make_pair(zone_id, pipe)); - } - } - - for (auto& pipe : resolved_sources) { - if (!pipe.source.zone) { - continue; - } - - m.insert(make_pair(*pipe.source.zone, pipe)); - } - - return m; -} - -multimap RGWBucketSyncPolicyHandler::get_all_dests() const -{ - multimap m; - - for (auto& dest_entry : targets) { - auto& zone_id = dest_entry.first; - - auto& pipes = dest_entry.second.pipe_map; - - for (auto& entry : pipes) { - auto& pipe = entry.second; - m.insert(make_pair(zone_id, pipe)); - } - } - - for (auto& pipe : resolved_dests) { - if (!pipe.dest.zone) { - continue; - } - - m.insert(make_pair(*pipe.dest.zone, pipe)); - } - - return m; -} - -multimap RGWBucketSyncPolicyHandler::get_all_dests_in_zone(const rgw_zone_id& zone_id) const -{ - multimap m; - - auto iter = targets.find(zone_id); - if (iter != targets.end()) { - auto& pipes = iter->second.pipe_map; - - for (auto& entry : pipes) { - auto& pipe = entry.second; - m.insert(make_pair(zone_id, pipe)); - } - } - - for (auto& pipe : resolved_dests) { - if (!pipe.dest.zone || - *pipe.dest.zone != zone_id) { - continue; - } - - m.insert(make_pair(*pipe.dest.zone, pipe)); - } - - return m; -} - -void RGWBucketSyncPolicyHandler::get_pipes(std::set *_sources, std::set *_targets, - std::optional filter_peer) { /* return raw pipes */ - for (auto& entry : source_pipes.pipe_map) { - auto& source_pipe = entry.second; - if (!filter_peer || - source_pipe.source.match(*filter_peer)) { - _sources->insert(source_pipe); - } - } - - for (auto& entry : target_pipes.pipe_map) { - auto& target_pipe = entry.second; - if (!filter_peer || - target_pipe.dest.match(*filter_peer)) { - _targets->insert(target_pipe); - } - } -} - -bool RGWBucketSyncPolicyHandler::bucket_exports_data() const -{ - if (!bucket) { - return false; - } - - if (bucket_is_sync_source()) { - return true; - } - - return (zone_svc->need_to_log_data() && - bucket_info->datasync_flag_enabled()); -} - -bool RGWBucketSyncPolicyHandler::bucket_imports_data() const -{ - return bucket_is_sync_target(); -} - diff --git a/src/rgw/store/rados/rgw_bucket_sync.h b/src/rgw/store/rados/rgw_bucket_sync.h deleted file mode 100644 index 76143773e8d..00000000000 --- a/src/rgw/store/rados/rgw_bucket_sync.h +++ /dev/null @@ -1,412 +0,0 @@ - -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2018 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "rgw_common.h" -#include "rgw_sync_policy.h" - -class RGWSI_Zone; -class RGWSI_SyncModules; -class RGWSI_Bucket_Sync; - -struct rgw_sync_group_pipe_map; -struct rgw_sync_bucket_pipes; -struct rgw_sync_policy_info; - -struct rgw_sync_group_pipe_map { - rgw_zone_id zone; - std::optional bucket; - - rgw_sync_policy_group::Status status{rgw_sync_policy_group::Status::FORBIDDEN}; - - using zb_pipe_map_t = std::multimap; - - zb_pipe_map_t sources; /* all the pipes where zone is pulling from */ - zb_pipe_map_t dests; /* all the pipes that pull from zone */ - - std::set *pall_zones{nullptr}; - rgw_sync_data_flow_group *default_flow{nullptr}; /* flow to use if policy doesn't define it, - used in the case of bucket sync policy, not at the - zonegroup level */ - - void dump(ceph::Formatter *f) const; - - template - void try_add_to_pipe_map(const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone, - const std::vector& pipes, - zb_pipe_map_t *pipe_map, - CB1 filter_cb, - CB2 call_filter_cb); - - template - void try_add_source(const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone, - const std::vector& pipes, - CB filter_cb); - - template - void try_add_dest(const rgw_zone_id& source_zone, - const rgw_zone_id& dest_zone, - const std::vector& pipes, - CB filter_cb); - - std::pair find_pipes(const zb_pipe_map_t& m, - const rgw_zone_id& zone, - std::optional b) const; - - template - void init(const DoutPrefixProvider *dpp, CephContext *cct, - const rgw_zone_id& _zone, - std::optional _bucket, - const rgw_sync_policy_group& group, - rgw_sync_data_flow_group *_default_flow, - std::set *_pall_zones, - CB filter_cb); - - /* - * find all relevant pipes in our zone that match {dest_bucket} <- {source_zone, source_bucket} - */ - std::vector find_source_pipes(const rgw_zone_id& source_zone, - std::optional source_bucket, - std::optional dest_bucket) const; - - /* - * find all relevant pipes in other zones that pull from a specific - * source bucket in out zone {source_bucket} -> {dest_zone, dest_bucket} - */ - std::vector find_dest_pipes(std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket) const; - - /* - * find all relevant pipes from {source_zone, source_bucket} -> {dest_zone, dest_bucket} - */ - std::vector find_pipes(const rgw_zone_id& source_zone, - std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket) const; -}; - -class RGWSyncPolicyCompat { -public: - static void convert_old_sync_config(RGWSI_Zone *zone_svc, - RGWSI_SyncModules *sync_modules_svc, - rgw_sync_policy_info *ppolicy); -}; - -class RGWBucketSyncFlowManager { - friend class RGWBucketSyncPolicyHandler; -public: - struct endpoints_pair { - rgw_sync_bucket_entity source; - rgw_sync_bucket_entity dest; - - endpoints_pair() {} - endpoints_pair(const rgw_sync_bucket_pipe& pipe) { - source = pipe.source; - dest = pipe.dest; - } - - bool operator<(const endpoints_pair& e) const { - if (source < e.source) { - return true; - } - if (e.source < source) { - return false; - } - return (dest < e.dest); - } - }; - - /* - * pipe_rules: deal with a set of pipes that have common endpoints_pair - */ - class pipe_rules { - std::list pipes; - - public: - using prefix_map_t = std::multimap; - - std::map tag_refs; - prefix_map_t prefix_refs; - - void insert(const rgw_sync_bucket_pipe& pipe); - - bool find_basic_info_without_tags(const rgw_obj_key& key, - std::optional *user, - std::optional *acl_translation, - std::optional *storage_class, - rgw_sync_pipe_params::Mode *mode, - bool *need_more_info) const; - bool find_obj_params(const rgw_obj_key& key, - const RGWObjTags::tag_map_t& tags, - rgw_sync_pipe_params *params) const; - - void scan_prefixes(std::vector *prefixes) const; - - prefix_map_t::const_iterator prefix_begin() const { - return prefix_refs.begin(); - } - prefix_map_t::const_iterator prefix_search(const std::string& s) const; - prefix_map_t::const_iterator prefix_end() const { - return prefix_refs.end(); - } - }; - - using pipe_rules_ref = std::shared_ptr; - - /* - * pipe_handler: extends endpoints_rule to point at the corresponding rules handler - */ - struct pipe_handler : public endpoints_pair { - pipe_rules_ref rules; - - pipe_handler() {} - pipe_handler(pipe_rules_ref& _rules, - const rgw_sync_bucket_pipe& _pipe) : endpoints_pair(_pipe), - rules(_rules) {} - bool specific() const { - return source.specific() && dest.specific(); - } - - bool find_basic_info_without_tags(const rgw_obj_key& key, - std::optional *user, - std::optional *acl_translation, - std::optional *storage_class, - rgw_sync_pipe_params::Mode *mode, - bool *need_more_info) const { - if (!rules) { - return false; - } - return rules->find_basic_info_without_tags(key, user, acl_translation, storage_class, mode, need_more_info); - } - - bool find_obj_params(const rgw_obj_key& key, - const RGWObjTags::tag_map_t& tags, - rgw_sync_pipe_params *params) const { - if (!rules) { - return false; - } - return rules->find_obj_params(key, tags, params); - } - }; - - struct pipe_set { - std::map rules; - std::multimap pipe_map; - - std::set handlers; - - using iterator = std::set::iterator; - - void clear() { - rules.clear(); - pipe_map.clear(); - handlers.clear(); - } - - void insert(const rgw_sync_bucket_pipe& pipe); - - iterator begin() const { - return handlers.begin(); - } - - iterator end() const { - return handlers.end(); - } - - void dump(ceph::Formatter *f) const; - }; - -private: - - CephContext *cct; - - rgw_zone_id zone_id; - std::optional bucket; - - const RGWBucketSyncFlowManager *parent{nullptr}; - - std::map flow_groups; - - std::set all_zones; - - bool allowed_data_flow(const rgw_zone_id& source_zone, - std::optional source_bucket, - const rgw_zone_id& dest_zone, - std::optional dest_bucket, - bool check_activated) const; - - /* - * find all the matching flows om a flow map for a specific bucket - */ - void update_flow_maps(const rgw_sync_bucket_pipes& pipe); - - void init(const DoutPrefixProvider *dpp, const rgw_sync_policy_info& sync_policy); - -public: - - RGWBucketSyncFlowManager(CephContext *_cct, - const rgw_zone_id& _zone_id, - std::optional _bucket, - const RGWBucketSyncFlowManager *_parent); - - void reflect(const DoutPrefixProvider *dpp, std::optional effective_bucket, - pipe_set *flow_by_source, - pipe_set *flow_by_dest, - bool only_enabled) const; - -}; - -static inline std::ostream& operator<<(std::ostream& os, const RGWBucketSyncFlowManager::endpoints_pair& e) { - os << e.dest << " -> " << e.source; - return os; -} - -class RGWBucketSyncPolicyHandler { - bool legacy_config{false}; - const RGWBucketSyncPolicyHandler *parent{nullptr}; - RGWSI_Zone *zone_svc; - RGWSI_Bucket_Sync *bucket_sync_svc; - rgw_zone_id zone_id; - std::optional bucket_info; - std::optional > bucket_attrs; - std::optional bucket; - std::unique_ptr flow_mgr; - rgw_sync_policy_info sync_policy; - - RGWBucketSyncFlowManager::pipe_set source_pipes; - RGWBucketSyncFlowManager::pipe_set target_pipes; - - std::map sources; /* source pipes by source zone id */ - std::map targets; /* target pipes by target zone id */ - - std::set source_zones; - std::set target_zones; - - std::set source_hints; - std::set target_hints; - std::set resolved_sources; - std::set resolved_dests; - - - bool bucket_is_sync_source() const { - return !targets.empty() || !resolved_dests.empty(); - } - - bool bucket_is_sync_target() const { - return !sources.empty() || !resolved_sources.empty(); - } - - RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, - const RGWBucketInfo& _bucket_info, - std::map&& _bucket_attrs); - - RGWBucketSyncPolicyHandler(const RGWBucketSyncPolicyHandler *_parent, - const rgw_bucket& _bucket, - std::optional _sync_policy); -public: - RGWBucketSyncPolicyHandler(RGWSI_Zone *_zone_svc, - RGWSI_SyncModules *sync_modules_svc, - RGWSI_Bucket_Sync *bucket_sync_svc, - std::optional effective_zone = std::nullopt); - - RGWBucketSyncPolicyHandler *alloc_child(const RGWBucketInfo& bucket_info, - std::map&& bucket_attrs) const; - RGWBucketSyncPolicyHandler *alloc_child(const rgw_bucket& bucket, - std::optional sync_policy) const; - - int init(const DoutPrefixProvider *dpp, optional_yield y); - - void reflect(const DoutPrefixProvider *dpp, RGWBucketSyncFlowManager::pipe_set *psource_pipes, - RGWBucketSyncFlowManager::pipe_set *ptarget_pipes, - std::map *psources, - std::map *ptargets, - std::set *psource_zones, - std::set *ptarget_zones, - bool only_enabled) const; - - void set_resolved_hints(std::set&& _resolved_sources, - std::set&& _resolved_dests) { - resolved_sources = std::move(_resolved_sources); - resolved_dests = std::move(_resolved_dests); - } - - const std::set& get_resolved_source_hints() { - return resolved_sources; - } - - const std::set& get_resolved_dest_hints() { - return resolved_dests; - } - - const std::set& get_source_zones() const { - return source_zones; - } - - const std::set& get_target_zones() const { - return target_zones; - } - - const std::map& get_sources() { - return sources; - } - - std::multimap get_all_sources() const; - std::multimap get_all_dests() const; - std::multimap get_all_dests_in_zone(const rgw_zone_id& zone_id) const; - - const std::map& get_targets() { - return targets; - } - - const std::optional& get_bucket_info() const { - return bucket_info; - } - - const std::optional >& get_bucket_attrs() const { - return bucket_attrs; - } - - void get_pipes(RGWBucketSyncFlowManager::pipe_set **_sources, RGWBucketSyncFlowManager::pipe_set **_targets) { /* return raw pipes (with zone name) */ - *_sources = &source_pipes; - *_targets = &target_pipes; - } - void get_pipes(std::set *sources, std::set *targets, - std::optional filter_peer); - - const std::set& get_source_hints() const { - return source_hints; - } - - const std::set& get_target_hints() const { - return target_hints; - } - - bool bucket_exports_data() const; - bool bucket_imports_data() const; - - const rgw_sync_policy_info& get_sync_policy() const { - return sync_policy; - } - - bool is_legacy_config() const { - return legacy_config; - } -}; - diff --git a/src/rgw/store/rados/rgw_cr_rados.cc b/src/rgw/store/rados/rgw_cr_rados.cc deleted file mode 100644 index 05079723792..00000000000 --- a/src/rgw/store/rados/rgw_cr_rados.cc +++ /dev/null @@ -1,1138 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "include/compat.h" -#include "rgw_sal.h" -#include "rgw_zone.h" -#include "rgw_coroutine.h" -#include "rgw_cr_rados.h" -#include "rgw_sync_counters.h" -#include "rgw_bucket.h" -#include "rgw_datalog_notify.h" -#include "rgw_cr_rest.h" -#include "rgw_rest_conn.h" -#include "rgw_rados.h" - -#include "services/svc_zone.h" -#include "services/svc_zone_utils.h" -#include "services/svc_sys_obj.h" -#include "services/svc_cls.h" - -#include "cls/lock/cls_lock_client.h" -#include "cls/rgw/cls_rgw_client.h" - -#include -#include - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -bool RGWAsyncRadosProcessor::RGWWQ::_enqueue(RGWAsyncRadosRequest *req) { - if (processor->is_going_down()) { - return false; - } - req->get(); - processor->m_req_queue.push_back(req); - dout(20) << "enqueued request req=" << hex << req << dec << dendl; - _dump_queue(); - return true; -} - -bool RGWAsyncRadosProcessor::RGWWQ::_empty() { - return processor->m_req_queue.empty(); -} - -RGWAsyncRadosRequest *RGWAsyncRadosProcessor::RGWWQ::_dequeue() { - if (processor->m_req_queue.empty()) - return NULL; - RGWAsyncRadosRequest *req = processor->m_req_queue.front(); - processor->m_req_queue.pop_front(); - dout(20) << "dequeued request req=" << hex << req << dec << dendl; - _dump_queue(); - return req; -} - -void RGWAsyncRadosProcessor::RGWWQ::_process(RGWAsyncRadosRequest *req, ThreadPool::TPHandle& handle) { - processor->handle_request(this, req); - processor->req_throttle.put(1); -} - -void RGWAsyncRadosProcessor::RGWWQ::_dump_queue() { - if (!g_conf()->subsys.should_gather()) { - return; - } - deque::iterator iter; - if (processor->m_req_queue.empty()) { - dout(20) << "RGWWQ: empty" << dendl; - return; - } - dout(20) << "RGWWQ:" << dendl; - for (iter = processor->m_req_queue.begin(); iter != processor->m_req_queue.end(); ++iter) { - dout(20) << "req: " << hex << *iter << dec << dendl; - } -} - -RGWAsyncRadosProcessor::RGWAsyncRadosProcessor(CephContext *_cct, int num_threads) - : cct(_cct), m_tp(cct, "RGWAsyncRadosProcessor::m_tp", "rados_async", num_threads), - req_throttle(_cct, "rgw_async_rados_ops", num_threads * 2), - req_wq(this, - ceph::make_timespan(g_conf()->rgw_op_thread_timeout), - ceph::make_timespan(g_conf()->rgw_op_thread_suicide_timeout), - &m_tp) { -} - -void RGWAsyncRadosProcessor::start() { - m_tp.start(); -} - -void RGWAsyncRadosProcessor::stop() { - going_down = true; - m_tp.drain(&req_wq); - m_tp.stop(); - for (auto iter = m_req_queue.begin(); iter != m_req_queue.end(); ++iter) { - (*iter)->put(); - } -} - -void RGWAsyncRadosProcessor::handle_request(const DoutPrefixProvider *dpp, RGWAsyncRadosRequest *req) { - req->send_request(dpp); - req->put(); -} - -void RGWAsyncRadosProcessor::queue(RGWAsyncRadosRequest *req) { - req_throttle.get(1); - req_wq.queue(req); -} - -int RGWAsyncGetSystemObj::_send_request(const DoutPrefixProvider *dpp) -{ - map *pattrs = want_attrs ? &attrs : nullptr; - - auto sysobj = svc_sysobj->get_obj(obj); - return sysobj.rop() - .set_objv_tracker(&objv_tracker) - .set_attrs(pattrs) - .set_raw_attrs(raw_attrs) - .read(dpp, &bl, null_yield); -} - -RGWAsyncGetSystemObj::RGWAsyncGetSystemObj(const DoutPrefixProvider *_dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - bool want_attrs, bool raw_attrs) - : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), svc_sysobj(_svc), - obj(_obj), want_attrs(want_attrs), raw_attrs(raw_attrs) -{ - if (_objv_tracker) { - objv_tracker = *_objv_tracker; - } -} - -int RGWSimpleRadosReadAttrsCR::send_request(const DoutPrefixProvider *dpp) -{ - req = new RGWAsyncGetSystemObj(dpp, this, stack->create_completion_notifier(), - svc, objv_tracker, obj, true, raw_attrs); - async_rados->queue(req); - return 0; -} - -int RGWSimpleRadosReadAttrsCR::request_complete() -{ - if (pattrs) { - *pattrs = std::move(req->attrs); - } - if (objv_tracker) { - *objv_tracker = req->objv_tracker; - } - return req->get_ret_status(); -} - -int RGWAsyncPutSystemObj::_send_request(const DoutPrefixProvider *dpp) -{ - auto sysobj = svc->get_obj(obj); - return sysobj.wop() - .set_objv_tracker(&objv_tracker) - .set_exclusive(exclusive) - .write_data(dpp, bl, null_yield); -} - -RGWAsyncPutSystemObj::RGWAsyncPutSystemObj(const DoutPrefixProvider *_dpp, - RGWCoroutine *caller, - RGWAioCompletionNotifier *cn, - RGWSI_SysObj *_svc, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - bool _exclusive, bufferlist _bl) - : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), svc(_svc), - obj(_obj), exclusive(_exclusive), bl(std::move(_bl)) -{ - if (_objv_tracker) { - objv_tracker = *_objv_tracker; - } -} - -int RGWAsyncPutSystemObjAttrs::_send_request(const DoutPrefixProvider *dpp) -{ - auto sysobj = svc->get_obj(obj); - return sysobj.wop() - .set_objv_tracker(&objv_tracker) - .set_exclusive(exclusive) - .set_attrs(attrs) - .write_attrs(dpp, null_yield); -} - -RGWAsyncPutSystemObjAttrs::RGWAsyncPutSystemObjAttrs(const DoutPrefixProvider *_dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, - RGWSI_SysObj *_svc, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - map _attrs, bool exclusive) - : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), svc(_svc), - obj(_obj), attrs(std::move(_attrs)), exclusive(exclusive) -{ - if (_objv_tracker) { - objv_tracker = *_objv_tracker; - } -} - - -RGWOmapAppend::RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, const rgw_raw_obj& _obj, - uint64_t _window_size) - : RGWConsumerCR(_store->ctx()), async_rados(_async_rados), - store(_store), obj(_obj), going_down(false), num_pending_entries(0), window_size(_window_size), total_entries(0) -{ -} - -int RGWAsyncLockSystemObj::_send_request(const DoutPrefixProvider *dpp) -{ - rgw_rados_ref ref; - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - rados::cls::lock::Lock l(lock_name); - utime_t duration(duration_secs, 0); - l.set_duration(duration); - l.set_cookie(cookie); - l.set_may_renew(true); - - return l.lock_exclusive(&ref.pool.ioctx(), ref.obj.oid); -} - -RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - const string& _name, const string& _cookie, uint32_t _duration_secs) : RGWAsyncRadosRequest(caller, cn), store(_store), - obj(_obj), - lock_name(_name), - cookie(_cookie), - duration_secs(_duration_secs) -{ -} - -int RGWAsyncUnlockSystemObj::_send_request(const DoutPrefixProvider *dpp) -{ - rgw_rados_ref ref; - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - rados::cls::lock::Lock l(lock_name); - - l.set_cookie(cookie); - - return l.unlock(&ref.pool.ioctx(), ref.obj.oid); -} - -RGWAsyncUnlockSystemObj::RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - const string& _name, const string& _cookie) : RGWAsyncRadosRequest(caller, cn), store(_store), - obj(_obj), - lock_name(_name), cookie(_cookie) -{ -} - -RGWRadosSetOmapKeysCR::RGWRadosSetOmapKeysCR(rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - map& _entries) : RGWSimpleCoroutine(_store->ctx()), - store(_store), - entries(_entries), - obj(_obj), cn(NULL) -{ - stringstream& s = set_description(); - s << "set omap keys dest=" << obj << " keys=[" << s.str() << "]"; - for (auto i = entries.begin(); i != entries.end(); ++i) { - if (i != entries.begin()) { - s << ", "; - } - s << i->first; - } - s << "]"; -} - -int RGWRadosSetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) -{ - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - set_status() << "sending request"; - - librados::ObjectWriteOperation op; - op.omap_set(entries); - - cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); -} - -int RGWRadosSetOmapKeysCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWRadosGetOmapKeysCR::RGWRadosGetOmapKeysCR(rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const string& _marker, - int _max_entries, - ResultPtr _result) - : RGWSimpleCoroutine(_store->ctx()), store(_store), obj(_obj), - marker(_marker), max_entries(_max_entries), - result(std::move(_result)) -{ - ceph_assert(result); // must be allocated - set_description() << "get omap keys dest=" << obj << " marker=" << marker; -} - -int RGWRadosGetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) { - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &result->ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - set_status() << "send request"; - - librados::ObjectReadOperation op; - op.omap_get_keys2(marker, max_entries, &result->entries, &result->more, nullptr); - - cn = stack->create_completion_notifier(result); - return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); -} - -int RGWRadosGetOmapKeysCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWRadosGetOmapValsCR::RGWRadosGetOmapValsCR(rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const string& _marker, - int _max_entries, - ResultPtr _result) - : RGWSimpleCoroutine(_store->ctx()), store(_store), obj(_obj), - marker(_marker), max_entries(_max_entries), - result(std::move(_result)) -{ - ceph_assert(result); // must be allocated - set_description() << "get omap keys dest=" << obj << " marker=" << marker; -} - -int RGWRadosGetOmapValsCR::send_request(const DoutPrefixProvider *dpp) { - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &result->ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - set_status() << "send request"; - - librados::ObjectReadOperation op; - op.omap_get_vals2(marker, max_entries, &result->entries, &result->more, nullptr); - - cn = stack->create_completion_notifier(result); - return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); -} - -int RGWRadosGetOmapValsCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWRadosRemoveOmapKeysCR::RGWRadosRemoveOmapKeysCR(rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const set& _keys) : RGWSimpleCoroutine(_store->ctx()), - store(_store), - keys(_keys), - obj(_obj), cn(NULL) -{ - set_description() << "remove omap keys dest=" << obj << " keys=" << keys; -} - -int RGWRadosRemoveOmapKeysCR::send_request(const DoutPrefixProvider *dpp) { - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - set_status() << "send request"; - - librados::ObjectWriteOperation op; - op.omap_rm_keys(keys); - - cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); -} - -int RGWRadosRemoveOmapKeysCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWRadosRemoveCR::RGWRadosRemoveCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, - RGWObjVersionTracker* objv_tracker) - : RGWSimpleCoroutine(store->ctx()), - store(store), obj(obj), objv_tracker(objv_tracker) -{ - set_description() << "remove dest=" << obj; -} - -int RGWRadosRemoveCR::send_request(const DoutPrefixProvider *dpp) -{ - auto rados = store->getRados()->get_rados_handle(); - int r = rados->ioctx_create(obj.pool.name.c_str(), ioctx); - if (r < 0) { - lderr(cct) << "ERROR: failed to open pool (" << obj.pool.name << ") ret=" << r << dendl; - return r; - } - ioctx.locator_set_key(obj.loc); - - set_status() << "send request"; - - librados::ObjectWriteOperation op; - if (objv_tracker) { - objv_tracker->prepare_op_for_write(&op); - } - op.remove(); - - cn = stack->create_completion_notifier(); - return ioctx.aio_operate(obj.oid, cn->completion(), &op); -} - -int RGWRadosRemoveCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - librados::IoCtx&& ioctx, - std::string_view oid, - RGWObjVersionTracker* objv_tracker) - : RGWSimpleCoroutine(store->ctx()), ioctx(std::move(ioctx)), - oid(std::string(oid)), objv_tracker(objv_tracker) -{ - set_description() << "remove dest=" << oid; -} - -RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj& obj, - RGWObjVersionTracker* objv_tracker) - : RGWSimpleCoroutine(store->ctx()), - ioctx(librados::IoCtx(obj.get_ref().pool.ioctx())), - oid(obj.get_ref().obj.oid), - objv_tracker(objv_tracker) -{ - set_description() << "remove dest=" << oid; -} - -RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj&& obj, - RGWObjVersionTracker* objv_tracker) - : RGWSimpleCoroutine(store->ctx()), - ioctx(std::move(obj.get_ref().pool.ioctx())), - oid(std::move(obj.get_ref().obj.oid)), - objv_tracker(objv_tracker) -{ - set_description() << "remove dest=" << oid; -} - -int RGWRadosRemoveOidCR::send_request(const DoutPrefixProvider *dpp) -{ - librados::ObjectWriteOperation op; - if (objv_tracker) { - objv_tracker->prepare_op_for_write(&op); - } - op.remove(); - - cn = stack->create_completion_notifier(); - return ioctx.aio_operate(oid, cn->completion(), &op); -} - -int RGWRadosRemoveOidCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const string& _lock_name, - const string& _cookie, - uint32_t _duration) : RGWSimpleCoroutine(_store->ctx()), - async_rados(_async_rados), - store(_store), - lock_name(_lock_name), - cookie(_cookie), - duration(_duration), - obj(_obj), - req(NULL) -{ - set_description() << "rados lock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie << " duration=" << duration; -} - -void RGWSimpleRadosLockCR::request_cleanup() -{ - if (req) { - req->finish(); - req = NULL; - } -} - -int RGWSimpleRadosLockCR::send_request(const DoutPrefixProvider *dpp) -{ - set_status() << "sending request"; - req = new RGWAsyncLockSystemObj(this, stack->create_completion_notifier(), - store, NULL, obj, lock_name, cookie, duration); - async_rados->queue(req); - return 0; -} - -int RGWSimpleRadosLockCR::request_complete() -{ - set_status() << "request complete; ret=" << req->get_ret_status(); - return req->get_ret_status(); -} - -RGWSimpleRadosUnlockCR::RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const string& _lock_name, - const string& _cookie) : RGWSimpleCoroutine(_store->ctx()), - async_rados(_async_rados), - store(_store), - lock_name(_lock_name), - cookie(_cookie), - obj(_obj), - req(NULL) -{ - set_description() << "rados unlock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie; -} - -void RGWSimpleRadosUnlockCR::request_cleanup() -{ - if (req) { - req->finish(); - req = NULL; - } -} - -int RGWSimpleRadosUnlockCR::send_request(const DoutPrefixProvider *dpp) -{ - set_status() << "sending request"; - - req = new RGWAsyncUnlockSystemObj(this, stack->create_completion_notifier(), - store, NULL, obj, lock_name, cookie); - async_rados->queue(req); - return 0; -} - -int RGWSimpleRadosUnlockCR::request_complete() -{ - set_status() << "request complete; ret=" << req->get_ret_status(); - return req->get_ret_status(); -} - -int RGWOmapAppend::operate(const DoutPrefixProvider *dpp) { - reenter(this) { - for (;;) { - if (!has_product() && going_down) { - set_status() << "going down"; - break; - } - set_status() << "waiting for product"; - yield wait_for_product(); - yield { - string entry; - while (consume(&entry)) { - set_status() << "adding entry: " << entry; - entries[entry] = bufferlist(); - if (entries.size() >= window_size) { - break; - } - } - if (entries.size() >= window_size || going_down) { - set_status() << "flushing to omap"; - call(new RGWRadosSetOmapKeysCR(store, obj, entries)); - entries.clear(); - } - } - if (get_ret_status() < 0) { - ldout(cct, 0) << "ERROR: failed to store entries in omap" << dendl; - return set_state(RGWCoroutine_Error); - } - } - /* done with coroutine */ - return set_state(RGWCoroutine_Done); - } - return 0; -} - -void RGWOmapAppend::flush_pending() { - receive(pending_entries); - num_pending_entries = 0; -} - -bool RGWOmapAppend::append(const string& s) { - if (is_done()) { - return false; - } - ++total_entries; - pending_entries.push_back(s); - if (++num_pending_entries >= (int)window_size) { - flush_pending(); - } - return true; -} - -bool RGWOmapAppend::finish() { - going_down = true; - flush_pending(); - set_sleeping(false); - return (!is_done()); -} - -int RGWAsyncGetBucketInstanceInfo::_send_request(const DoutPrefixProvider *dpp) -{ - int r; - if (!bucket.bucket_id.empty()) { - r = store->getRados()->get_bucket_instance_info(bucket, bucket_info, nullptr, &attrs, null_yield, dpp); - } else { - r = store->ctl()->bucket->read_bucket_info(bucket, &bucket_info, null_yield, dpp, - RGWBucketCtl::BucketInstance::GetParams().set_attrs(&attrs)); - } - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to get bucket instance info for " - << bucket << dendl; - return r; - } - - return 0; -} - -int RGWAsyncPutBucketInstanceInfo::_send_request(const DoutPrefixProvider *dpp) -{ - auto r = store->getRados()->put_bucket_instance_info(bucket_info, exclusive, - mtime, attrs, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to put bucket instance info for " - << bucket_info.bucket << dendl; - return r; - } - - return 0; -} - -RGWRadosBILogTrimCR::RGWRadosBILogTrimCR( - const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, - const RGWBucketInfo& bucket_info, - int shard_id, - const rgw::bucket_index_layout_generation& generation, - const std::string& start_marker, - const std::string& end_marker) - : RGWSimpleCoroutine(store->ctx()), bucket_info(bucket_info), - shard_id(shard_id), generation(generation), bs(store->getRados()), - start_marker(BucketIndexShardsManager::get_shard_marker(start_marker)), - end_marker(BucketIndexShardsManager::get_shard_marker(end_marker)) -{ -} - -int RGWRadosBILogTrimCR::send_request(const DoutPrefixProvider *dpp) -{ - int r = bs.init(dpp, bucket_info, generation, shard_id); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: bucket shard init failed ret=" << r << dendl; - return r; - } - - bufferlist in; - cls_rgw_bi_log_trim_op call; - call.start_marker = std::move(start_marker); - call.end_marker = std::move(end_marker); - encode(call, in); - - librados::ObjectWriteOperation op; - op.exec(RGW_CLASS, RGW_BI_LOG_TRIM, in); - - cn = stack->create_completion_notifier(); - return bs.bucket_obj.aio_operate(cn->completion(), &op); -} - -int RGWRadosBILogTrimCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - set_status() << "request complete; ret=" << r; - return r; -} - -int RGWAsyncFetchRemoteObj::_send_request(const DoutPrefixProvider *dpp) -{ - RGWObjectCtx obj_ctx(store); - - char buf[16]; - snprintf(buf, sizeof(buf), ".%lld", (long long)store->getRados()->instance_id()); - rgw::sal::Attrs attrs; - - rgw::sal::RadosBucket bucket(store, src_bucket); - rgw::sal::RadosObject src_obj(store, key, &bucket); - rgw::sal::RadosBucket dest_bucket(store, dest_bucket_info); - rgw::sal::RadosObject dest_obj(store, dest_key.value_or(key), &dest_bucket); - - std::string etag; - - std::optional bytes_transferred; - int r = store->getRados()->fetch_remote_obj(obj_ctx, - user_id.value_or(rgw_user()), - NULL, /* req_info */ - source_zone, - &dest_obj, - &src_obj, - &dest_bucket, /* dest */ - nullptr, /* source */ - dest_placement_rule, - nullptr, /* real_time* src_mtime, */ - NULL, /* real_time* mtime, */ - NULL, /* const real_time* mod_ptr, */ - NULL, /* const real_time* unmod_ptr, */ - false, /* high precision time */ - NULL, /* const char *if_match, */ - NULL, /* const char *if_nomatch, */ - RGWRados::ATTRSMOD_NONE, - copy_if_newer, - attrs, - RGWObjCategory::Main, - versioned_epoch, - real_time(), /* delete_at */ - NULL, /* string *ptag, */ - &etag, /* string *petag, */ - NULL, /* void (*progress_cb)(off_t, void *), */ - NULL, /* void *progress_data*); */ - dpp, - filter.get(), - &zones_trace, - &bytes_transferred); - - if (r < 0) { - ldpp_dout(dpp, 0) << "store->fetch_remote_obj() returned r=" << r << dendl; - if (counters) { - counters->inc(sync_counters::l_fetch_err, 1); - } - } else { - // r >= 0 - if (bytes_transferred) { - // send notification that object was succesfully synced - std::string user_id = "rgw sync"; - std::string req_id = "0"; - - RGWObjTags obj_tags; - auto iter = attrs.find(RGW_ATTR_TAGS); - if (iter != attrs.end()) { - try { - auto it = iter->second.cbegin(); - obj_tags.decode(it); - } catch (buffer::error &err) { - ldpp_dout(dpp, 1) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; - } - } - - // NOTE: we create a mutable copy of bucket.get_tenant as the get_notification function expects a std::string&, not const - std::string tenant(dest_bucket.get_tenant()); - - std::unique_ptr notify - = store->get_notification(dpp, &dest_obj, nullptr, rgw::notify::ObjectSyncedCreate, - &dest_bucket, user_id, - tenant, - req_id, null_yield); - - auto notify_res = static_cast(notify.get())->get_reservation(); - int ret = rgw::notify::publish_reserve(dpp, rgw::notify::ObjectSyncedCreate, notify_res, &obj_tags); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: reserving notification failed, with error: " << ret << dendl; - // no need to return, the sync already happened - } else { - ret = rgw::notify::publish_commit(&dest_obj, dest_obj.get_obj_size(), ceph::real_clock::now(), etag, dest_obj.get_instance(), rgw::notify::ObjectSyncedCreate, notify_res, dpp); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; - } - } - } - - if (counters) { - if (bytes_transferred) { - counters->inc(sync_counters::l_fetch, *bytes_transferred); - } else { - counters->inc(sync_counters::l_fetch_not_modified); - } - } - } - return r; -} - -int RGWAsyncStatRemoteObj::_send_request(const DoutPrefixProvider *dpp) -{ - RGWObjectCtx obj_ctx(store); - - string user_id; - char buf[16]; - snprintf(buf, sizeof(buf), ".%lld", (long long)store->getRados()->instance_id()); - - rgw::sal::RadosBucket bucket(store, src_bucket); - rgw::sal::RadosObject src_obj(store, key, &bucket); - - int r = store->getRados()->stat_remote_obj(dpp, - obj_ctx, - rgw_user(user_id), - nullptr, /* req_info */ - source_zone, - &src_obj, - nullptr, /* source */ - pmtime, /* real_time* src_mtime, */ - psize, /* uint64_t * */ - nullptr, /* const real_time* mod_ptr, */ - nullptr, /* const real_time* unmod_ptr, */ - true, /* high precision time */ - nullptr, /* const char *if_match, */ - nullptr, /* const char *if_nomatch, */ - pattrs, - pheaders, - nullptr, - nullptr, /* string *ptag, */ - petag); /* string *petag, */ - - if (r < 0) { - ldpp_dout(dpp, 0) << "store->stat_remote_obj() returned r=" << r << dendl; - } - return r; -} - - -int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) -{ - ldpp_dout(dpp, 0) << __func__ << "(): deleting obj=" << obj << dendl; - - obj->set_atomic(); - - RGWObjState *state; - - int ret = obj->get_obj_state(dpp, &state, null_yield); - if (ret < 0) { - ldpp_dout(dpp, 20) << __func__ << "(): get_obj_state() obj=" << obj << " returned ret=" << ret << dendl; - return ret; - } - - /* has there been any racing object write? */ - if (del_if_older && (state->mtime > timestamp)) { - ldpp_dout(dpp, 20) << __func__ << "(): skipping object removal obj=" << obj << " (obj mtime=" << state->mtime << ", request timestamp=" << timestamp << ")" << dendl; - return 0; - } - - RGWAccessControlPolicy policy; - - /* decode policy */ - map::iterator iter = state->attrset.find(RGW_ATTR_ACL); - if (iter != state->attrset.end()) { - auto bliter = iter->second.cbegin(); - try { - policy.decode(bliter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl; - return -EIO; - } - } - - std::unique_ptr del_op = obj->get_delete_op(); - - del_op->params.bucket_owner = bucket->get_info().owner; - del_op->params.obj_owner = policy.get_owner(); - if (del_if_older) { - del_op->params.unmod_since = timestamp; - } - if (versioned) { - del_op->params.versioning_status = BUCKET_VERSIONED; - } - del_op->params.olh_epoch = versioned_epoch; - del_op->params.marker_version_id = marker_version_id; - del_op->params.obj_owner.set_id(rgw_user(owner)); - del_op->params.obj_owner.set_name(owner_display_name); - del_op->params.mtime = timestamp; - del_op->params.high_precision_time = true; - del_op->params.zones_trace = &zones_trace; - - ret = del_op->delete_obj(dpp, null_yield); - if (ret < 0) { - ldpp_dout(dpp, 20) << __func__ << "(): delete_obj() obj=" << obj << " returned ret=" << ret << dendl; - } - return ret; -} - -int RGWContinuousLeaseCR::operate(const DoutPrefixProvider *dpp) -{ - if (aborted) { - caller->set_sleeping(false); - return set_cr_done(); - } - reenter(this) { - last_renew_try_time = ceph::coarse_mono_clock::now(); - while (!going_down) { - yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, interval)); - current_time = ceph::coarse_mono_clock::now(); - if (current_time - last_renew_try_time > interval_tolerance) { - // renewal should happen between 50%-90% of interval - ldout(store->ctx(), 1) << *this << ": WARNING: did not renew lock " << obj << ":" << lock_name << ": within 90\% of interval. " << - (current_time - last_renew_try_time) << " > " << interval_tolerance << dendl; - } - last_renew_try_time = current_time; - - caller->set_sleeping(false); /* will only be relevant when we return, that's why we can do it early */ - if (retcode < 0) { - set_locked(false); - ldout(store->ctx(), 20) << *this << ": couldn't lock " << obj << ":" << lock_name << ": retcode=" << retcode << dendl; - return set_state(RGWCoroutine_Error, retcode); - } - ldout(store->ctx(), 20) << *this << ": successfully locked " << obj << ":" << lock_name << dendl; - set_locked(true); - yield wait(utime_t(interval / 2, 0)); - } - set_locked(false); /* moot at this point anyway */ - yield call(new RGWSimpleRadosUnlockCR(async_rados, store, obj, lock_name, cookie)); - return set_state(RGWCoroutine_Done); - } - return 0; -} - -RGWRadosTimelogAddCR::RGWRadosTimelogAddCR(const DoutPrefixProvider *_dpp, rgw::sal::RadosStore* _store, const string& _oid, - const cls_log_entry& entry) : RGWSimpleCoroutine(_store->ctx()), - dpp(_dpp), - store(_store), - oid(_oid), cn(NULL) -{ - stringstream& s = set_description(); - s << "timelog add entry oid=" << oid << "entry={id=" << entry.id << ", section=" << entry.section << ", name=" << entry.name << "}"; - entries.push_back(entry); -} - -int RGWRadosTimelogAddCR::send_request(const DoutPrefixProvider *dpp) -{ - set_status() << "sending request"; - - cn = stack->create_completion_notifier(); - return store->svc()->cls->timelog.add(dpp, oid, entries, cn->completion(), true, null_yield); -} - -int RGWRadosTimelogAddCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - -RGWRadosTimelogTrimCR::RGWRadosTimelogTrimCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, - const std::string& oid, - const real_time& start_time, - const real_time& end_time, - const std::string& from_marker, - const std::string& to_marker) - : RGWSimpleCoroutine(store->ctx()), dpp(dpp), store(store), oid(oid), - start_time(start_time), end_time(end_time), - from_marker(from_marker), to_marker(to_marker) -{ - set_description() << "timelog trim oid=" << oid - << " start_time=" << start_time << " end_time=" << end_time - << " from_marker=" << from_marker << " to_marker=" << to_marker; -} - -int RGWRadosTimelogTrimCR::send_request(const DoutPrefixProvider *dpp) -{ - set_status() << "sending request"; - - cn = stack->create_completion_notifier(); - return store->svc()->cls->timelog.trim(dpp, oid, start_time, end_time, from_marker, - to_marker, cn->completion(), - null_yield); -} - -int RGWRadosTimelogTrimCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - - -RGWSyncLogTrimCR::RGWSyncLogTrimCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, const std::string& oid, - const std::string& to_marker, - std::string *last_trim_marker) - : RGWRadosTimelogTrimCR(dpp, store, oid, real_time{}, real_time{}, - std::string{}, to_marker), - cct(store->ctx()), last_trim_marker(last_trim_marker) -{ -} - -int RGWSyncLogTrimCR::request_complete() -{ - int r = RGWRadosTimelogTrimCR::request_complete(); - if (r != -ENODATA) { - return r; - } - // nothing left to trim, update last_trim_marker - if (*last_trim_marker < to_marker && to_marker != max_marker) { - *last_trim_marker = to_marker; - } - return 0; -} - - -int RGWAsyncStatObj::_send_request(const DoutPrefixProvider *dpp) -{ - rgw_raw_obj raw_obj; - store->getRados()->obj_to_raw(bucket_info.placement_rule, obj, &raw_obj); - return store->getRados()->raw_obj_stat(dpp, raw_obj, psize, pmtime, pepoch, - nullptr, nullptr, objv_tracker, null_yield); -} - -RGWStatObjCR::RGWStatObjCR(const DoutPrefixProvider *dpp, - RGWAsyncRadosProcessor *async_rados, rgw::sal::RadosStore* store, - const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize, - real_time* pmtime, uint64_t *pepoch, - RGWObjVersionTracker *objv_tracker) - : RGWSimpleCoroutine(store->ctx()), dpp(dpp), store(store), async_rados(async_rados), - bucket_info(_bucket_info), obj(obj), psize(psize), pmtime(pmtime), pepoch(pepoch), - objv_tracker(objv_tracker) -{ -} - -void RGWStatObjCR::request_cleanup() -{ - if (req) { - req->finish(); - req = NULL; - } -} - -int RGWStatObjCR::send_request(const DoutPrefixProvider *dpp) -{ - req = new RGWAsyncStatObj(dpp, this, stack->create_completion_notifier(), - store, bucket_info, obj, psize, pmtime, pepoch, objv_tracker); - async_rados->queue(req); - return 0; -} - -int RGWStatObjCR::request_complete() -{ - return req->get_ret_status(); -} - -RGWRadosNotifyCR::RGWRadosNotifyCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, - bufferlist& request, uint64_t timeout_ms, - bufferlist *response) - : RGWSimpleCoroutine(store->ctx()), store(store), obj(obj), - request(request), timeout_ms(timeout_ms), response(response) -{ - set_description() << "notify dest=" << obj; -} - -int RGWRadosNotifyCR::send_request(const DoutPrefixProvider *dpp) -{ - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; - return r; - } - - set_status() << "sending request"; - - cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_notify(ref.obj.oid, cn->completion(), request, - timeout_ms, response); -} - -int RGWRadosNotifyCR::request_complete() -{ - int r = cn->completion()->get_return_value(); - - set_status() << "request complete; ret=" << r; - - return r; -} - - -int RGWDataPostNotifyCR::operate(const DoutPrefixProvider* dpp) -{ - reenter(this) { - using PostNotify2 = RGWPostRESTResourceCR>, int>; - yield { - rgw_http_param_pair pairs[] = { { "type", "data" }, - { "notify2", NULL }, - { "source-zone", source_zone }, - { NULL, NULL } }; - call(new PostNotify2(store->ctx(), conn, &http_manager, "/admin/log", pairs, shards, nullptr)); - } - if (retcode == -ERR_METHOD_NOT_ALLOWED) { - using PostNotify1 = RGWPostRESTResourceCR; - yield { - rgw_http_param_pair pairs[] = { { "type", "data" }, - { "notify", NULL }, - { "source-zone", source_zone }, - { NULL, NULL } }; - auto encoder = rgw_data_notify_v1_encoder{shards}; - call(new PostNotify1(store->ctx(), conn, &http_manager, "/admin/log", pairs, encoder, nullptr)); - } - } - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; -} diff --git a/src/rgw/store/rados/rgw_cr_rados.h b/src/rgw/store/rados/rgw_cr_rados.h deleted file mode 100644 index 03c5303ebf7..00000000000 --- a/src/rgw/store/rados/rgw_cr_rados.h +++ /dev/null @@ -1,1595 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_CR_RADOS_H -#define CEPH_RGW_CR_RADOS_H - -#include -#include "include/ceph_assert.h" -#include "rgw_coroutine.h" -#include "rgw_sal.h" -#include "rgw_sal_rados.h" -#include "common/WorkQueue.h" -#include "common/Throttle.h" - -#include -#include "common/ceph_time.h" - -#include "services/svc_sys_obj.h" -#include "services/svc_bucket.h" - -struct rgw_http_param_pair; -class RGWRESTConn; - -class RGWAsyncRadosRequest : public RefCountedObject { - RGWCoroutine *caller; - RGWAioCompletionNotifier *notifier; - - int retcode; - - ceph::mutex lock = ceph::make_mutex("RGWAsyncRadosRequest::lock"); - -protected: - virtual int _send_request(const DoutPrefixProvider *dpp) = 0; -public: - RGWAsyncRadosRequest(RGWCoroutine *_caller, RGWAioCompletionNotifier *_cn) - : caller(_caller), notifier(_cn), retcode(0) { - } - ~RGWAsyncRadosRequest() override { - if (notifier) { - notifier->put(); - } - } - - void send_request(const DoutPrefixProvider *dpp) { - get(); - retcode = _send_request(dpp); - { - std::lock_guard l{lock}; - if (notifier) { - notifier->cb(); // drops its own ref - notifier = nullptr; - } - } - put(); - } - - int get_ret_status() { return retcode; } - - void finish() { - { - std::lock_guard l{lock}; - if (notifier) { - // we won't call notifier->cb() to drop its ref, so drop it here - notifier->put(); - notifier = nullptr; - } - } - put(); - } -}; - - -class RGWAsyncRadosProcessor { - std::deque m_req_queue; - std::atomic going_down = { false }; -protected: - CephContext *cct; - ThreadPool m_tp; - Throttle req_throttle; - - struct RGWWQ : public DoutPrefixProvider, public ThreadPool::WorkQueue { - RGWAsyncRadosProcessor *processor; - RGWWQ(RGWAsyncRadosProcessor *p, - ceph::timespan timeout, ceph::timespan suicide_timeout, - ThreadPool *tp) - : ThreadPool::WorkQueue("RGWWQ", timeout, suicide_timeout, tp), processor(p) {} - - bool _enqueue(RGWAsyncRadosRequest *req) override; - void _dequeue(RGWAsyncRadosRequest *req) override { - ceph_abort(); - } - bool _empty() override; - RGWAsyncRadosRequest *_dequeue() override; - using ThreadPool::WorkQueue::_process; - void _process(RGWAsyncRadosRequest *req, ThreadPool::TPHandle& handle) override; - void _dump_queue(); - void _clear() override { - ceph_assert(processor->m_req_queue.empty()); - } - - CephContext *get_cct() const { return processor->cct; } - unsigned get_subsys() const { return ceph_subsys_rgw; } - std::ostream& gen_prefix(std::ostream& out) const { return out << "rgw async rados processor: ";} - - } req_wq; - -public: - RGWAsyncRadosProcessor(CephContext *_cct, int num_threads); - ~RGWAsyncRadosProcessor() {} - void start(); - void stop(); - void handle_request(const DoutPrefixProvider *dpp, RGWAsyncRadosRequest *req); - void queue(RGWAsyncRadosRequest *req); - - bool is_going_down() { - return going_down; - } - -}; - -template -class RGWSimpleWriteOnlyAsyncCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - - P params; - const DoutPrefixProvider *dpp; - - class Request : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - P params; - const DoutPrefixProvider *dpp; - protected: - int _send_request(const DoutPrefixProvider *dpp) override; - public: - Request(RGWCoroutine *caller, - RGWAioCompletionNotifier *cn, - rgw::sal::RadosStore* store, - const P& _params, - const DoutPrefixProvider *dpp) : RGWAsyncRadosRequest(caller, cn), - store(store), - params(_params), - dpp(dpp) {} - } *req{nullptr}; - - public: - RGWSimpleWriteOnlyAsyncCR(RGWAsyncRadosProcessor *_async_rados, - rgw::sal::RadosStore* _store, - const P& _params, - const DoutPrefixProvider *_dpp) : RGWSimpleCoroutine(_store->ctx()), - async_rados(_async_rados), - store(_store), - params(_params), - dpp(_dpp) {} - - ~RGWSimpleWriteOnlyAsyncCR() override { - request_cleanup(); - } - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new Request(this, - stack->create_completion_notifier(), - store, - params, - dpp); - - async_rados->queue(req); - return 0; - } - int request_complete() override { - return req->get_ret_status(); - } -}; - - -template -class RGWSimpleAsyncCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - - P params; - std::shared_ptr result; - const DoutPrefixProvider *dpp; - - class Request : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - P params; - std::shared_ptr result; - const DoutPrefixProvider *dpp; - protected: - int _send_request(const DoutPrefixProvider *dpp) override; - public: - Request(const DoutPrefixProvider *dpp, - RGWCoroutine *caller, - RGWAioCompletionNotifier *cn, - rgw::sal::RadosStore* _store, - const P& _params, - std::shared_ptr& _result, - const DoutPrefixProvider *_dpp) : RGWAsyncRadosRequest(caller, cn), - store(_store), - params(_params), - result(_result), - dpp(_dpp) {} - } *req{nullptr}; - - public: - RGWSimpleAsyncCR(RGWAsyncRadosProcessor *_async_rados, - rgw::sal::RadosStore* _store, - const P& _params, - std::shared_ptr& _result, - const DoutPrefixProvider *_dpp) : RGWSimpleCoroutine(_store->ctx()), - async_rados(_async_rados), - store(_store), - params(_params), - result(_result), - dpp(_dpp) {} - - ~RGWSimpleAsyncCR() override { - request_cleanup(); - } - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new Request(dpp, - this, - stack->create_completion_notifier(), - store, - params, - result, - dpp); - - async_rados->queue(req); - return 0; - } - int request_complete() override { - return req->get_ret_status(); - } -}; - -class RGWGenericAsyncCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - - -public: - class Action { - public: - virtual ~Action() {} - virtual int operate() = 0; - }; - -private: - std::shared_ptr action; - - class Request : public RGWAsyncRadosRequest { - std::shared_ptr action; - protected: - int _send_request(const DoutPrefixProvider *dpp) override { - if (!action) { - return 0; - } - return action->operate(); - } - public: - Request(const DoutPrefixProvider *dpp, - RGWCoroutine *caller, - RGWAioCompletionNotifier *cn, - std::shared_ptr& _action) : RGWAsyncRadosRequest(caller, cn), - action(_action) {} - } *req{nullptr}; - - public: - RGWGenericAsyncCR(CephContext *_cct, - RGWAsyncRadosProcessor *_async_rados, - std::shared_ptr& _action) : RGWSimpleCoroutine(_cct), - async_rados(_async_rados), - action(_action) {} - template - RGWGenericAsyncCR(CephContext *_cct, - RGWAsyncRadosProcessor *_async_rados, - std::shared_ptr& _action) : RGWSimpleCoroutine(_cct), - async_rados(_async_rados), - action(std::static_pointer_cast(_action)) {} - - ~RGWGenericAsyncCR() override { - request_cleanup(); - } - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new Request(dpp, this, - stack->create_completion_notifier(), - action); - - async_rados->queue(req); - return 0; - } - int request_complete() override { - return req->get_ret_status(); - } -}; - - -class RGWAsyncGetSystemObj : public RGWAsyncRadosRequest { - const DoutPrefixProvider *dpp; - RGWSI_SysObj* svc_sysobj; - rgw_raw_obj obj; - const bool want_attrs; - const bool raw_attrs; -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncGetSystemObj(const DoutPrefixProvider *dpp, - RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - bool want_attrs, bool raw_attrs); - - bufferlist bl; - std::map attrs; - RGWObjVersionTracker objv_tracker; -}; - -class RGWAsyncPutSystemObj : public RGWAsyncRadosRequest { - const DoutPrefixProvider *dpp; - RGWSI_SysObj *svc; - rgw_raw_obj obj; - bool exclusive; - bufferlist bl; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncPutSystemObj(const DoutPrefixProvider *dpp, RGWCoroutine *caller, - RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - bool _exclusive, bufferlist _bl); - - RGWObjVersionTracker objv_tracker; -}; - -class RGWAsyncPutSystemObjAttrs : public RGWAsyncRadosRequest { - const DoutPrefixProvider *dpp; - RGWSI_SysObj *svc; - rgw_raw_obj obj; - std::map attrs; - bool exclusive; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncPutSystemObjAttrs(const DoutPrefixProvider *dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - std::map _attrs, bool exclusive); - - RGWObjVersionTracker objv_tracker; -}; - -class RGWAsyncLockSystemObj : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - rgw_raw_obj obj; - std::string lock_name; - std::string cookie; - uint32_t duration_secs; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - const std::string& _name, const std::string& _cookie, uint32_t _duration_secs); -}; - -class RGWAsyncUnlockSystemObj : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - rgw_raw_obj obj; - std::string lock_name; - std::string cookie; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj, - const std::string& _name, const std::string& _cookie); -}; - -template -class RGWSimpleRadosReadCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - RGWAsyncRadosProcessor *async_rados; - RGWSI_SysObj *svc; - - rgw_raw_obj obj; - T *result; - /// on ENOENT, call handle_data() with an empty object instead of failing - const bool empty_on_enoent; - RGWObjVersionTracker *objv_tracker; - RGWAsyncGetSystemObj *req{nullptr}; - -public: - RGWSimpleRadosReadCR(const DoutPrefixProvider *_dpp, - RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc, - const rgw_raw_obj& _obj, - T *_result, bool empty_on_enoent = true, - RGWObjVersionTracker *objv_tracker = nullptr) - : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), svc(_svc), - obj(_obj), result(_result), - empty_on_enoent(empty_on_enoent), objv_tracker(objv_tracker) {} - ~RGWSimpleRadosReadCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; - - virtual int handle_data(T& data) { - return 0; - } -}; - -template -int RGWSimpleRadosReadCR::send_request(const DoutPrefixProvider *dpp) -{ - req = new RGWAsyncGetSystemObj(dpp, this, stack->create_completion_notifier(), svc, - objv_tracker, obj, false, false); - async_rados->queue(req); - return 0; -} - -template -int RGWSimpleRadosReadCR::request_complete() -{ - int ret = req->get_ret_status(); - retcode = ret; - if (ret == -ENOENT && empty_on_enoent) { - *result = T(); - } else { - if (ret < 0) { - return ret; - } - if (objv_tracker) { // copy the updated version - *objv_tracker = req->objv_tracker; - } - try { - auto iter = req->bl.cbegin(); - if (iter.end()) { - // allow successful reads with empty buffers. ReadSyncStatus coroutines - // depend on this to be able to read without locking, because the - // cls lock from InitSyncStatus will create an empty object if it didn't - // exist - *result = T(); - } else { - decode(*result, iter); - } - } catch (buffer::error& err) { - return -EIO; - } - } - - return handle_data(*result); -} - -class RGWSimpleRadosReadAttrsCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - RGWAsyncRadosProcessor *async_rados; - RGWSI_SysObj *svc; - - rgw_raw_obj obj; - std::map *pattrs; - bool raw_attrs; - RGWObjVersionTracker* objv_tracker; - RGWAsyncGetSystemObj *req = nullptr; - -public: - RGWSimpleRadosReadAttrsCR(const DoutPrefixProvider *_dpp, RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc, - const rgw_raw_obj& _obj, std::map *_pattrs, - bool _raw_attrs, RGWObjVersionTracker* objv_tracker = nullptr) - : RGWSimpleCoroutine(_svc->ctx()), - dpp(_dpp), - async_rados(_async_rados), svc(_svc), - obj(_obj), - pattrs(_pattrs), - raw_attrs(_raw_attrs), - objv_tracker(objv_tracker) - {} - ~RGWSimpleRadosReadAttrsCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -template -class RGWSimpleRadosWriteCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - RGWAsyncRadosProcessor *async_rados; - RGWSI_SysObj *svc; - bufferlist bl; - rgw_raw_obj obj; - RGWObjVersionTracker *objv_tracker; - bool exclusive; - RGWAsyncPutSystemObj *req{nullptr}; - -public: - RGWSimpleRadosWriteCR(const DoutPrefixProvider *_dpp, - RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc, - const rgw_raw_obj& _obj, const T& _data, - RGWObjVersionTracker *objv_tracker = nullptr, - bool exclusive = false) - : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), - svc(_svc), obj(_obj), objv_tracker(objv_tracker), exclusive(exclusive) { - encode(_data, bl); - } - - ~RGWSimpleRadosWriteCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncPutSystemObj(dpp, this, stack->create_completion_notifier(), - svc, objv_tracker, obj, exclusive, std::move(bl)); - async_rados->queue(req); - return 0; - } - - int request_complete() override { - if (objv_tracker) { // copy the updated version - *objv_tracker = req->objv_tracker; - } - return req->get_ret_status(); - } -}; - -class RGWSimpleRadosWriteAttrsCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - RGWAsyncRadosProcessor *async_rados; - RGWSI_SysObj *svc; - RGWObjVersionTracker *objv_tracker; - - rgw_raw_obj obj; - std::map attrs; - bool exclusive; - RGWAsyncPutSystemObjAttrs *req = nullptr; - -public: - RGWSimpleRadosWriteAttrsCR(const DoutPrefixProvider *_dpp, - RGWAsyncRadosProcessor *_async_rados, - RGWSI_SysObj *_svc, const rgw_raw_obj& _obj, - std::map _attrs, - RGWObjVersionTracker *objv_tracker = nullptr, - bool exclusive = false) - : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), - svc(_svc), objv_tracker(objv_tracker), obj(_obj), - attrs(std::move(_attrs)), exclusive(exclusive) { - } - ~RGWSimpleRadosWriteAttrsCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncPutSystemObjAttrs(dpp, this, stack->create_completion_notifier(), - svc, objv_tracker, obj, std::move(attrs), - exclusive); - async_rados->queue(req); - return 0; - } - - int request_complete() override { - if (objv_tracker) { // copy the updated version - *objv_tracker = req->objv_tracker; - } - return req->get_ret_status(); - } -}; - -class RGWRadosSetOmapKeysCR : public RGWSimpleCoroutine { - rgw::sal::RadosStore* store; - std::map entries; - - rgw_rados_ref ref; - - rgw_raw_obj obj; - - boost::intrusive_ptr cn; - -public: - RGWRadosSetOmapKeysCR(rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - std::map& _entries); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -class RGWRadosGetOmapKeysCR : public RGWSimpleCoroutine { - public: - struct Result { - rgw_rados_ref ref; - std::set entries; - bool more = false; - }; - using ResultPtr = std::shared_ptr; - - RGWRadosGetOmapKeysCR(rgw::sal::RadosStore* _store, const rgw_raw_obj& _obj, - const std::string& _marker, int _max_entries, - ResultPtr result); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; - - private: - rgw::sal::RadosStore* store; - rgw_raw_obj obj; - std::string marker; - int max_entries; - ResultPtr result; - boost::intrusive_ptr cn; -}; - -class RGWRadosGetOmapValsCR : public RGWSimpleCoroutine { - public: - struct Result { - rgw_rados_ref ref; - std::map entries; - bool more = false; - }; - using ResultPtr = std::shared_ptr; - - RGWRadosGetOmapValsCR(rgw::sal::RadosStore* _store, const rgw_raw_obj& _obj, - const std::string& _marker, int _max_entries, - ResultPtr result); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; - - private: - rgw::sal::RadosStore* store; - rgw_raw_obj obj; - std::string marker; - int max_entries; - ResultPtr result; - boost::intrusive_ptr cn; -}; - -class RGWRadosRemoveOmapKeysCR : public RGWSimpleCoroutine { - rgw::sal::RadosStore* store; - - rgw_rados_ref ref; - - std::set keys; - - rgw_raw_obj obj; - - boost::intrusive_ptr cn; - -public: - RGWRadosRemoveOmapKeysCR(rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const std::set& _keys); - - int send_request(const DoutPrefixProvider *dpp) override; - - int request_complete() override; -}; - -class RGWRadosRemoveCR : public RGWSimpleCoroutine { - rgw::sal::RadosStore* store; - librados::IoCtx ioctx; - const rgw_raw_obj obj; - RGWObjVersionTracker* objv_tracker; - boost::intrusive_ptr cn; - -public: - RGWRadosRemoveCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, - RGWObjVersionTracker* objv_tracker = nullptr); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -class RGWRadosRemoveOidCR : public RGWSimpleCoroutine { - librados::IoCtx ioctx; - const std::string oid; - RGWObjVersionTracker* objv_tracker; - boost::intrusive_ptr cn; - -public: - RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - librados::IoCtx&& ioctx, std::string_view oid, - RGWObjVersionTracker* objv_tracker = nullptr); - - RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj& obj, - RGWObjVersionTracker* objv_tracker = nullptr); - - RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj&& obj, - RGWObjVersionTracker* objv_tracker = nullptr); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -class RGWSimpleRadosLockCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - std::string lock_name; - std::string cookie; - uint32_t duration; - - rgw_raw_obj obj; - - RGWAsyncLockSystemObj *req; - -public: - RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const std::string& _lock_name, - const std::string& _cookie, - uint32_t _duration); - ~RGWSimpleRadosLockCR() override { - request_cleanup(); - } - void request_cleanup() override; - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; - - static std::string gen_random_cookie(CephContext* cct) { -#define COOKIE_LEN 16 - char buf[COOKIE_LEN + 1]; - gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1); - return buf; - } -}; - -class RGWSimpleRadosUnlockCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - std::string lock_name; - std::string cookie; - - rgw_raw_obj obj; - - RGWAsyncUnlockSystemObj *req; - -public: - RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const std::string& _lock_name, - const std::string& _cookie); - ~RGWSimpleRadosUnlockCR() override { - request_cleanup(); - } - void request_cleanup() override; - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -#define OMAP_APPEND_MAX_ENTRIES_DEFAULT 100 - -class RGWOmapAppend : public RGWConsumerCR { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - - rgw_raw_obj obj; - - bool going_down; - - int num_pending_entries; - std::list pending_entries; - - std::map entries; - - uint64_t window_size; - uint64_t total_entries; -public: - RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - uint64_t _window_size = OMAP_APPEND_MAX_ENTRIES_DEFAULT); - int operate(const DoutPrefixProvider *dpp) override; - void flush_pending(); - bool append(const std::string& s); - bool finish(); - - uint64_t get_total_entries() { - return total_entries; - } - - const rgw_raw_obj& get_obj() { - return obj; - } -}; - -class RGWShardedOmapCRManager { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - RGWCoroutine *op; - - int num_shards; - - std::vector shards; -public: - RGWShardedOmapCRManager(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, RGWCoroutine *_op, int _num_shards, const rgw_pool& pool, const std::string& oid_prefix) - : async_rados(_async_rados), - store(_store), op(_op), num_shards(_num_shards) { - shards.reserve(num_shards); - for (int i = 0; i < num_shards; ++i) { - char buf[oid_prefix.size() + 16]; - snprintf(buf, sizeof(buf), "%s.%d", oid_prefix.c_str(), i); - RGWOmapAppend *shard = new RGWOmapAppend(async_rados, store, rgw_raw_obj(pool, buf)); - shard->get(); - shards.push_back(shard); - op->spawn(shard, false); - } - } - - ~RGWShardedOmapCRManager() { - for (auto shard : shards) { - shard->put(); - } - } - - bool append(const std::string& entry, int shard_id) { - return shards[shard_id]->append(entry); - } - bool finish() { - bool success = true; - for (auto& append_op : shards) { - success &= (append_op->finish() && (!append_op->is_error())); - } - return success; - } - - uint64_t get_total_entries(int shard_id) { - return shards[shard_id]->get_total_entries(); - } -}; - -class RGWAsyncGetBucketInstanceInfo : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - rgw_bucket bucket; - const DoutPrefixProvider *dpp; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncGetBucketInstanceInfo(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, - rgw::sal::RadosStore* _store, const rgw_bucket& bucket, - const DoutPrefixProvider *dpp) - : RGWAsyncRadosRequest(caller, cn), store(_store), bucket(bucket), dpp(dpp) {} - - RGWBucketInfo bucket_info; - std::map attrs; -}; - -class RGWAsyncPutBucketInstanceInfo : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - RGWBucketInfo& bucket_info; - bool exclusive; - real_time mtime; - std::map* attrs; - const DoutPrefixProvider *dpp; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncPutBucketInstanceInfo(RGWCoroutine* caller, - RGWAioCompletionNotifier* cn, - rgw::sal::RadosStore* store, - RGWBucketInfo& bucket_info, - bool exclusive, - real_time mtime, - std::map* attrs, - const DoutPrefixProvider* dpp) - : RGWAsyncRadosRequest(caller, cn), store(store), bucket_info(bucket_info), - exclusive(exclusive), mtime(mtime), attrs(attrs), dpp(dpp) {} -}; - -class RGWGetBucketInstanceInfoCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - rgw_bucket bucket; - RGWBucketInfo *bucket_info; - std::map *pattrs; - const DoutPrefixProvider *dpp; - - RGWAsyncGetBucketInstanceInfo *req{nullptr}; - -public: - // rgw_bucket constructor - RGWGetBucketInstanceInfoCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_bucket& _bucket, RGWBucketInfo *_bucket_info, - std::map *_pattrs, const DoutPrefixProvider *dpp) - : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store), - bucket(_bucket), bucket_info(_bucket_info), pattrs(_pattrs), dpp(dpp) {} - ~RGWGetBucketInstanceInfoCR() override { - request_cleanup(); - } - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncGetBucketInstanceInfo(this, stack->create_completion_notifier(), store, bucket, dpp); - async_rados->queue(req); - return 0; - } - int request_complete() override { - if (bucket_info) { - *bucket_info = std::move(req->bucket_info); - } - if (pattrs) { - *pattrs = std::move(req->attrs); - } - return req->get_ret_status(); - } -}; - -class RGWPutBucketInstanceInfoCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - RGWBucketInfo& bucket_info; - bool exclusive; - real_time mtime; - std::map* attrs; - const DoutPrefixProvider *dpp; - - RGWAsyncPutBucketInstanceInfo* req = nullptr; - -public: - // rgw_bucket constructor - RGWPutBucketInstanceInfoCR(RGWAsyncRadosProcessor *async_rados, - rgw::sal::RadosStore* store, - RGWBucketInfo& bucket_info, - bool exclusive, - real_time mtime, - std::map* attrs, - const DoutPrefixProvider *dpp) - : RGWSimpleCoroutine(store->ctx()), async_rados(async_rados), store(store), - bucket_info(bucket_info), exclusive(exclusive), - mtime(mtime), attrs(attrs), dpp(dpp) {} - ~RGWPutBucketInstanceInfoCR() override { - request_cleanup(); - } - void request_cleanup() override { - if (req) { - req->finish(); - req = nullptr; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncPutBucketInstanceInfo(this, - stack->create_completion_notifier(), - store, bucket_info, exclusive, - mtime, attrs, dpp); - async_rados->queue(req); - return 0; - } - int request_complete() override { - return req->get_ret_status(); - } -}; - -class RGWRadosBILogTrimCR : public RGWSimpleCoroutine { - const RGWBucketInfo& bucket_info; - int shard_id; - const rgw::bucket_index_layout_generation generation; - RGWRados::BucketShard bs; - std::string start_marker; - std::string end_marker; - boost::intrusive_ptr cn; - public: - RGWRadosBILogTrimCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info, - int shard_id, - const rgw::bucket_index_layout_generation& generation, - const std::string& start_marker, - const std::string& end_marker); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -class RGWAsyncFetchRemoteObj : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - rgw_zone_id source_zone; - - std::optional user_id; - - rgw_bucket src_bucket; - std::optional dest_placement_rule; - RGWBucketInfo dest_bucket_info; - - rgw_obj_key key; - std::optional dest_key; - std::optional versioned_epoch; - - real_time src_mtime; - - bool copy_if_newer; - std::shared_ptr filter; - rgw_zone_set zones_trace; - PerfCounters* counters; - const DoutPrefixProvider *dpp; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncFetchRemoteObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - const rgw_zone_id& _source_zone, - std::optional& _user_id, - const rgw_bucket& _src_bucket, - std::optional _dest_placement_rule, - const RGWBucketInfo& _dest_bucket_info, - const rgw_obj_key& _key, - const std::optional& _dest_key, - std::optional _versioned_epoch, - bool _if_newer, - std::shared_ptr _filter, - rgw_zone_set *_zones_trace, - PerfCounters* counters, const DoutPrefixProvider *dpp) - : RGWAsyncRadosRequest(caller, cn), store(_store), - source_zone(_source_zone), - user_id(_user_id), - src_bucket(_src_bucket), - dest_placement_rule(_dest_placement_rule), - dest_bucket_info(_dest_bucket_info), - key(_key), - dest_key(_dest_key), - versioned_epoch(_versioned_epoch), - copy_if_newer(_if_newer), - filter(_filter), - counters(counters), - dpp(dpp) - { - if (_zones_trace) { - zones_trace = *_zones_trace; - } - } -}; - -class RGWFetchRemoteObjCR : public RGWSimpleCoroutine { - CephContext *cct; - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - rgw_zone_id source_zone; - - std::optional user_id; - - rgw_bucket src_bucket; - std::optional dest_placement_rule; - RGWBucketInfo dest_bucket_info; - - rgw_obj_key key; - std::optional dest_key; - std::optional versioned_epoch; - - real_time src_mtime; - - bool copy_if_newer; - - std::shared_ptr filter; - - RGWAsyncFetchRemoteObj *req; - rgw_zone_set *zones_trace; - PerfCounters* counters; - const DoutPrefixProvider *dpp; - -public: - RGWFetchRemoteObjCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_zone_id& _source_zone, - std::optional _user_id, - const rgw_bucket& _src_bucket, - std::optional _dest_placement_rule, - const RGWBucketInfo& _dest_bucket_info, - const rgw_obj_key& _key, - const std::optional& _dest_key, - std::optional _versioned_epoch, - bool _if_newer, - std::shared_ptr _filter, - rgw_zone_set *_zones_trace, - PerfCounters* counters, const DoutPrefixProvider *dpp) - : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()), - async_rados(_async_rados), store(_store), - source_zone(_source_zone), - user_id(_user_id), - src_bucket(_src_bucket), - dest_placement_rule(_dest_placement_rule), - dest_bucket_info(_dest_bucket_info), - key(_key), - dest_key(_dest_key), - versioned_epoch(_versioned_epoch), - copy_if_newer(_if_newer), - filter(_filter), - req(NULL), - zones_trace(_zones_trace), counters(counters), dpp(dpp) {} - - - ~RGWFetchRemoteObjCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncFetchRemoteObj(this, stack->create_completion_notifier(), store, - source_zone, user_id, src_bucket, dest_placement_rule, dest_bucket_info, - key, dest_key, versioned_epoch, copy_if_newer, filter, - zones_trace, counters, dpp); - async_rados->queue(req); - return 0; - } - - int request_complete() override { - return req->get_ret_status(); - } -}; - -class RGWAsyncStatRemoteObj : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - rgw_zone_id source_zone; - - rgw_bucket src_bucket; - rgw_obj_key key; - - ceph::real_time *pmtime; - uint64_t *psize; - std::string *petag; - std::map *pattrs; - std::map *pheaders; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncStatRemoteObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - const rgw_zone_id& _source_zone, - rgw_bucket& _src_bucket, - const rgw_obj_key& _key, - ceph::real_time *_pmtime, - uint64_t *_psize, - std::string *_petag, - std::map *_pattrs, - std::map *_pheaders) : RGWAsyncRadosRequest(caller, cn), store(_store), - source_zone(_source_zone), - src_bucket(_src_bucket), - key(_key), - pmtime(_pmtime), - psize(_psize), - petag(_petag), - pattrs(_pattrs), - pheaders(_pheaders) {} -}; - -class RGWStatRemoteObjCR : public RGWSimpleCoroutine { - CephContext *cct; - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - rgw_zone_id source_zone; - - rgw_bucket src_bucket; - rgw_obj_key key; - - ceph::real_time *pmtime; - uint64_t *psize; - std::string *petag; - std::map *pattrs; - std::map *pheaders; - - RGWAsyncStatRemoteObj *req; - -public: - RGWStatRemoteObjCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_zone_id& _source_zone, - rgw_bucket& _src_bucket, - const rgw_obj_key& _key, - ceph::real_time *_pmtime, - uint64_t *_psize, - std::string *_petag, - std::map *_pattrs, - std::map *_pheaders) : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()), - async_rados(_async_rados), store(_store), - source_zone(_source_zone), - src_bucket(_src_bucket), - key(_key), - pmtime(_pmtime), - psize(_psize), - petag(_petag), - pattrs(_pattrs), - pheaders(_pheaders), - req(NULL) {} - - - ~RGWStatRemoteObjCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncStatRemoteObj(this, stack->create_completion_notifier(), store, source_zone, - src_bucket, key, pmtime, psize, petag, pattrs, pheaders); - async_rados->queue(req); - return 0; - } - - int request_complete() override { - return req->get_ret_status(); - } -}; - -class RGWAsyncRemoveObj : public RGWAsyncRadosRequest { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - rgw_zone_id source_zone; - - std::unique_ptr bucket; - std::unique_ptr obj; - - std::string owner; - std::string owner_display_name; - bool versioned; - uint64_t versioned_epoch; - std::string marker_version_id; - - bool del_if_older; - ceph::real_time timestamp; - rgw_zone_set zones_trace; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncRemoveObj(const DoutPrefixProvider *_dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, - rgw::sal::RadosStore* _store, - const rgw_zone_id& _source_zone, - RGWBucketInfo& _bucket_info, - const rgw_obj_key& _key, - const std::string& _owner, - const std::string& _owner_display_name, - bool _versioned, - uint64_t _versioned_epoch, - bool _delete_marker, - bool _if_older, - real_time& _timestamp, - rgw_zone_set* _zones_trace) : RGWAsyncRadosRequest(caller, cn), dpp(_dpp), store(_store), - source_zone(_source_zone), - owner(_owner), - owner_display_name(_owner_display_name), - versioned(_versioned), - versioned_epoch(_versioned_epoch), - del_if_older(_if_older), - timestamp(_timestamp) { - if (_delete_marker) { - marker_version_id = _key.instance; - } - - if (_zones_trace) { - zones_trace = *_zones_trace; - } - store->get_bucket(nullptr, _bucket_info, &bucket); - obj = bucket->get_object(_key); - } -}; - -class RGWRemoveObjCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - CephContext *cct; - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - rgw_zone_id source_zone; - - RGWBucketInfo bucket_info; - - rgw_obj_key key; - bool versioned; - uint64_t versioned_epoch; - bool delete_marker; - std::string owner; - std::string owner_display_name; - - bool del_if_older; - real_time timestamp; - - RGWAsyncRemoveObj *req; - - rgw_zone_set *zones_trace; - -public: - RGWRemoveObjCR(const DoutPrefixProvider *_dpp, RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_zone_id& _source_zone, - RGWBucketInfo& _bucket_info, - const rgw_obj_key& _key, - bool _versioned, - uint64_t _versioned_epoch, - std::string *_owner, - std::string *_owner_display_name, - bool _delete_marker, - real_time *_timestamp, - rgw_zone_set *_zones_trace) : RGWSimpleCoroutine(_store->ctx()), dpp(_dpp), cct(_store->ctx()), - async_rados(_async_rados), store(_store), - source_zone(_source_zone), - bucket_info(_bucket_info), - key(_key), - versioned(_versioned), - versioned_epoch(_versioned_epoch), - delete_marker(_delete_marker), req(NULL), zones_trace(_zones_trace) { - del_if_older = (_timestamp != NULL); - if (_timestamp) { - timestamp = *_timestamp; - } - - if (_owner) { - owner = *_owner; - } - - if (_owner_display_name) { - owner_display_name = *_owner_display_name; - } - } - ~RGWRemoveObjCR() override { - request_cleanup(); - } - - void request_cleanup() override { - if (req) { - req->finish(); - req = NULL; - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncRemoveObj(dpp, this, stack->create_completion_notifier(), store, source_zone, bucket_info, - key, owner, owner_display_name, versioned, versioned_epoch, - delete_marker, del_if_older, timestamp, zones_trace); - async_rados->queue(req); - return 0; - } - - int request_complete() override { - return req->get_ret_status(); - } -}; - -class RGWContinuousLeaseCR : public RGWCoroutine { - RGWAsyncRadosProcessor *async_rados; - rgw::sal::RadosStore* store; - - const rgw_raw_obj obj; - - const std::string lock_name; - const std::string cookie; - - int interval; - bool going_down{ false }; - bool locked{false}; - - const ceph::timespan interval_tolerance; - const ceph::timespan ts_interval; - - RGWCoroutine *caller; - - bool aborted{false}; - - ceph::coarse_mono_time last_renew_try_time; - ceph::coarse_mono_time current_time; - -public: - RGWContinuousLeaseCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, - const rgw_raw_obj& _obj, - const std::string& _lock_name, int _interval, RGWCoroutine *_caller) - : RGWCoroutine(_store->ctx()), async_rados(_async_rados), store(_store), - obj(_obj), lock_name(_lock_name), - cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)), - interval(_interval), interval_tolerance(ceph::make_timespan(9*interval/10)), ts_interval(ceph::make_timespan(interval)), - caller(_caller) - {} - - virtual ~RGWContinuousLeaseCR() override; - - int operate(const DoutPrefixProvider *dpp) override; - - bool is_locked() const { - if (ceph::coarse_mono_clock::now() - last_renew_try_time > ts_interval) { - return false; - } - return locked; - } - - void set_locked(bool status) { - locked = status; - } - - void go_down() { - going_down = true; - wakeup(); - } - - void abort() { - aborted = true; - } -}; - -class RGWRadosTimelogAddCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - std::list entries; - - std::string oid; - - boost::intrusive_ptr cn; - -public: - RGWRadosTimelogAddCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* _store, const std::string& _oid, - const cls_log_entry& entry); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -class RGWRadosTimelogTrimCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - boost::intrusive_ptr cn; - protected: - std::string oid; - real_time start_time; - real_time end_time; - std::string from_marker; - std::string to_marker; - - public: - RGWRadosTimelogTrimCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, const std::string& oid, - const real_time& start_time, const real_time& end_time, - const std::string& from_marker, - const std::string& to_marker); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -// wrapper to update last_trim_marker on success -class RGWSyncLogTrimCR : public RGWRadosTimelogTrimCR { - CephContext *cct; - std::string *last_trim_marker; - public: - static constexpr const char* max_marker = "99999999"; - - RGWSyncLogTrimCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, const std::string& oid, - const std::string& to_marker, std::string *last_trim_marker); - int request_complete() override; -}; - -class RGWAsyncStatObj : public RGWAsyncRadosRequest { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - RGWBucketInfo bucket_info; - rgw_obj obj; - uint64_t *psize; - real_time *pmtime; - uint64_t *pepoch; - RGWObjVersionTracker *objv_tracker; -protected: - int _send_request(const DoutPrefixProvider *dpp) override; -public: - RGWAsyncStatObj(const DoutPrefixProvider *dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* store, - const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize = nullptr, - real_time *pmtime = nullptr, uint64_t *pepoch = nullptr, - RGWObjVersionTracker *objv_tracker = nullptr) - : RGWAsyncRadosRequest(caller, cn), dpp(dpp), store(store), obj(obj), psize(psize), - pmtime(pmtime), pepoch(pepoch), objv_tracker(objv_tracker) {} -}; - -class RGWStatObjCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - RGWAsyncRadosProcessor *async_rados; - RGWBucketInfo bucket_info; - rgw_obj obj; - uint64_t *psize; - real_time *pmtime; - uint64_t *pepoch; - RGWObjVersionTracker *objv_tracker; - RGWAsyncStatObj *req = nullptr; - public: - RGWStatObjCR(const DoutPrefixProvider *dpp, RGWAsyncRadosProcessor *async_rados, rgw::sal::RadosStore* store, - const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize = nullptr, - real_time* pmtime = nullptr, uint64_t *pepoch = nullptr, - RGWObjVersionTracker *objv_tracker = nullptr); - ~RGWStatObjCR() override { - request_cleanup(); - } - void request_cleanup() override; - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -/// coroutine wrapper for IoCtx::aio_notify() -class RGWRadosNotifyCR : public RGWSimpleCoroutine { - rgw::sal::RadosStore* const store; - const rgw_raw_obj obj; - bufferlist request; - const uint64_t timeout_ms; - bufferlist *response; - rgw_rados_ref ref; - boost::intrusive_ptr cn; - -public: - RGWRadosNotifyCR(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, - bufferlist& request, uint64_t timeout_ms, - bufferlist *response); - - int send_request(const DoutPrefixProvider *dpp) override; - int request_complete() override; -}; - -class RGWDataPostNotifyCR : public RGWCoroutine { - RGWRados *store; - RGWHTTPManager& http_manager; - bc::flat_map >& shards; - const char *source_zone; - RGWRESTConn *conn; - -public: - RGWDataPostNotifyCR(RGWRados *_store, RGWHTTPManager& _http_manager, bc::flat_map >& _shards, const char *_zone, RGWRESTConn *_conn) - : RGWCoroutine(_store->ctx()), store(_store), http_manager(_http_manager), - shards(_shards), source_zone(_zone), conn(_conn) {} - - int operate(const DoutPrefixProvider* dpp) override; -}; - -#endif diff --git a/src/rgw/store/rados/rgw_cr_tools.cc b/src/rgw/store/rados/rgw_cr_tools.cc deleted file mode 100644 index 94665a35aaa..00000000000 --- a/src/rgw/store/rados/rgw_cr_tools.cc +++ /dev/null @@ -1,292 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/errno.h" - -#include "rgw_cr_tools.h" -#include "rgw_bucket.h" -#include "rgw_user.h" -#include "rgw_op.h" -#include "rgw_acl_s3.h" -#include "rgw_zone.h" - -#include "services/svc_zone.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -template<> -int RGWUserCreateCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - CephContext *cct = store->ctx(); - - const int32_t default_max_buckets = - cct->_conf.get_val("rgw_user_max_buckets"); - - RGWUserAdminOpState op_state(store); - - auto& user = params.user; - - op_state.set_user_id(user); - op_state.set_display_name(params.display_name); - op_state.set_user_email(params.email); - op_state.set_caps(params.caps); - op_state.set_access_key(params.access_key); - op_state.set_secret_key(params.secret_key); - - if (!params.key_type.empty()) { - int32_t key_type = KEY_TYPE_S3; - if (params.key_type == "swift") { - key_type = KEY_TYPE_SWIFT; - } - - op_state.set_key_type(key_type); - } - - op_state.set_max_buckets(params.max_buckets.value_or(default_max_buckets)); - op_state.set_suspension(params.suspended); - op_state.set_system(params.system); - op_state.set_exclusive(params.exclusive); - - if (params.generate_key) { - op_state.set_generate_key(); - } - - - if (params.apply_quota) { - RGWQuota quota; - - if (cct->_conf->rgw_bucket_default_quota_max_objects >= 0) { - quota.bucket_quota.max_objects = cct->_conf->rgw_bucket_default_quota_max_objects; - quota.bucket_quota.enabled = true; - } - - if (cct->_conf->rgw_bucket_default_quota_max_size >= 0) { - quota.bucket_quota.max_size = cct->_conf->rgw_bucket_default_quota_max_size; - quota.bucket_quota.enabled = true; - } - - if (cct->_conf->rgw_user_default_quota_max_objects >= 0) { - quota.user_quota.max_objects = cct->_conf->rgw_user_default_quota_max_objects; - quota.user_quota.enabled = true; - } - - if (cct->_conf->rgw_user_default_quota_max_size >= 0) { - quota.user_quota.max_size = cct->_conf->rgw_user_default_quota_max_size; - quota.user_quota.enabled = true; - } - - if (quota.bucket_quota.enabled) { - op_state.set_bucket_quota(quota.bucket_quota); - } - - if (quota.user_quota.enabled) { - op_state.set_user_quota(quota.user_quota); - } - } - - RGWNullFlusher flusher; - return RGWUserAdminOp_User::create(dpp, store, op_state, flusher, null_yield); -} - -template<> -int RGWGetUserInfoCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - return store->ctl()->user->get_info_by_uid(dpp, params.user, result.get(), null_yield); -} - -template<> -int RGWGetBucketInfoCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - return store->get_bucket(dpp, nullptr, params.tenant, params.bucket_name, &result->bucket, null_yield); -} - -template<> -int RGWBucketCreateLocalCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - CephContext *cct = store->ctx(); - auto& zone_svc = store->svc()->zone; - - const auto& user_info = params.user_info.get(); - const auto& user = user_info->user_id; - const auto& bucket_name = params.bucket_name; - auto& placement_rule = params.placement_rule; - - if (!placement_rule.empty() && - !zone_svc->get_zone_params().valid_placement(placement_rule)) { - ldpp_dout(dpp, 0) << "placement target (" << placement_rule << ")" - << " doesn't exist in the placement targets of zonegroup" - << " (" << zone_svc->get_zonegroup().api_name << ")" << dendl; - return -ERR_INVALID_LOCATION_CONSTRAINT; - } - - /* we need to make sure we read bucket info, it's not read before for this - * specific request */ - RGWBucketInfo bucket_info; - map bucket_attrs; - - int ret = store->getRados()->get_bucket_info(store->svc(), user.tenant, bucket_name, - bucket_info, nullptr, null_yield, dpp, &bucket_attrs); - if (ret < 0 && ret != -ENOENT) - return ret; - bool bucket_exists = (ret != -ENOENT); - - RGWAccessControlPolicy old_policy(cct); - ACLOwner bucket_owner; - bucket_owner.set_id(user); - bucket_owner.set_name(user_info->display_name); - if (bucket_exists) { - ret = rgw_op_get_bucket_policy_from_attr(dpp, cct, store, bucket_info, - bucket_attrs, &old_policy, null_yield); - if (ret >= 0) { - if (old_policy.get_owner().get_id().compare(user) != 0) { - return -EEXIST; - } - } - } - - RGWBucketInfo master_info; - rgw_bucket *pmaster_bucket = nullptr; - uint32_t *pmaster_num_shards = nullptr; - real_time creation_time; - - string zonegroup_id = zone_svc->get_zonegroup().get_id(); - - if (bucket_exists) { - rgw_placement_rule selected_placement_rule; - rgw_bucket bucket; - bucket.tenant = user.tenant; - bucket.name = bucket_name; - ret = zone_svc->select_bucket_placement(dpp, *user_info, zonegroup_id, - placement_rule, - &selected_placement_rule, nullptr, null_yield); - if (selected_placement_rule != bucket_info.placement_rule) { - ldpp_dout(dpp, 0) << "bucket already exists on a different placement rule: " - << " selected_rule= " << selected_placement_rule - << " existing_rule= " << bucket_info.placement_rule << dendl; - return -EEXIST; - } - } - - /* Encode special metadata first as we're using std::map::emplace under - * the hood. This method will add the new items only if the map doesn't - * contain such keys yet. */ - RGWAccessControlPolicy_S3 policy(cct); - policy.create_canned(bucket_owner, bucket_owner, string()); /* default private policy */ - bufferlist aclbl; - policy.encode(aclbl); - map attrs; - attrs.emplace(std::move(RGW_ATTR_ACL), std::move(aclbl)); - - RGWQuotaInfo quota_info; - const RGWQuotaInfo * pquota_info = nullptr; - - rgw_bucket bucket; - bucket.tenant = user.tenant; - bucket.name = bucket_name; - - RGWBucketInfo info; - obj_version ep_objv; - - ret = store->getRados()->create_bucket(*user_info, bucket, zonegroup_id, - placement_rule, bucket_info.swift_ver_location, - pquota_info, attrs, - info, nullptr, &ep_objv, creation_time, - pmaster_bucket, pmaster_num_shards, null_yield, dpp, true); - - - if (ret && ret != -EEXIST) - return ret; - - bool existed = (ret == -EEXIST); - - if (existed) { - if (info.owner != user) { - ldpp_dout(dpp, 20) << "NOTICE: bucket already exists under a different user (bucket=" << bucket << " user=" << user << " bucket_owner=" << info.owner << dendl; - return -EEXIST; - } - bucket = info.bucket; - } - - ret = store->ctl()->bucket->link_bucket(user, bucket, info.creation_time, null_yield, dpp, false); - if (ret && !existed && ret != -EEXIST) { - /* if it exists (or previously existed), don't remove it! */ - int r = store->ctl()->bucket->unlink_bucket(user, bucket, null_yield, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to unlink bucket: ret=" << r << dendl; - } - } else if (ret == -EEXIST || (ret == 0 && existed)) { - ret = -ERR_BUCKET_EXISTS; - } - - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: bucket creation (bucket=" << bucket << ") return ret=" << ret << dendl; - } - - return ret; -} - -template<> -int RGWObjectSimplePutCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - RGWDataAccess::ObjectRef obj; - - CephContext *cct = store->ctx(); - - int ret = params.bucket->get_object(params.key, &obj); - if (ret < 0) { - lderr(cct) << "ERROR: failed to get object: " << cpp_strerror(-ret) << dendl; - return -ret; - } - - if (params.user_data) { - obj->set_user_data(*params.user_data); - } - - ret = obj->put(params.data, params.attrs, dpp, null_yield); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: put object returned error: " << cpp_strerror(-ret) << dendl; - } - - return 0; -} - -template<> -int RGWBucketLifecycleConfigCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - CephContext *cct = store->ctx(); - - RGWLC *lc = store->getRados()->get_lc(); - if (!lc) { - lderr(cct) << "ERROR: lifecycle object is not initialized!" << dendl; - return -EIO; - } - - int ret = lc->set_bucket_config(params.bucket, - params.bucket_attrs, - ¶ms.config); - if (ret < 0) { - lderr(cct) << "ERROR: failed to set lifecycle on bucke: " << cpp_strerror(-ret) << dendl; - return -ret; - } - - return 0; -} - -template<> -int RGWBucketGetSyncPolicyHandlerCR::Request::_send_request(const DoutPrefixProvider *dpp) -{ - int r = store->ctl()->bucket->get_sync_policy_handler(params.zone, - params.bucket, - &result->policy_handler, - null_yield, - dpp); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: " << __func__ << "(): get_sync_policy_handler() returned " << r << dendl; - return r; - } - - return 0; -} diff --git a/src/rgw/store/rados/rgw_cr_tools.h b/src/rgw/store/rados/rgw_cr_tools.h deleted file mode 100644 index ebdbfeb51b7..00000000000 --- a/src/rgw/store/rados/rgw_cr_tools.h +++ /dev/null @@ -1,87 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_CR_TOOLS_H -#define CEPH_RGW_CR_TOOLS_H - -#include "rgw_cr_rados.h" -#include "rgw_tools.h" -#include "rgw_lc.h" - -#include "services/svc_bucket_sync.h" - -struct rgw_user_create_params { - rgw_user user; - std::string display_name; - std::string email; - std::string access_key; - std::string secret_key; - std::string key_type; /* "swift" or "s3" */ - std::string caps; - - bool generate_key{true}; - bool suspended{false}; - std::optional max_buckets; - bool system{false}; - bool exclusive{false}; - bool apply_quota{true}; -}; - -using RGWUserCreateCR = RGWSimpleWriteOnlyAsyncCR; - -struct rgw_get_user_info_params { - rgw_user user; -}; - -using RGWGetUserInfoCR = RGWSimpleAsyncCR; - -struct rgw_get_bucket_info_params { - std::string tenant; - std::string bucket_name; -}; - -struct rgw_get_bucket_info_result { - std::unique_ptr bucket; -}; - -using RGWGetBucketInfoCR = RGWSimpleAsyncCR; - -struct rgw_bucket_create_local_params { - std::shared_ptr user_info; - std::string bucket_name; - rgw_placement_rule placement_rule; -}; - -using RGWBucketCreateLocalCR = RGWSimpleWriteOnlyAsyncCR; - -struct rgw_object_simple_put_params { - RGWDataAccess::BucketRef bucket; - rgw_obj_key key; - bufferlist data; - std::map attrs; - std::optional user_data; -}; - -using RGWObjectSimplePutCR = RGWSimpleWriteOnlyAsyncCR; - - -struct rgw_bucket_lifecycle_config_params { - rgw::sal::Bucket* bucket; - rgw::sal::Attrs bucket_attrs; - RGWLifecycleConfiguration config; -}; - -using RGWBucketLifecycleConfigCR = RGWSimpleWriteOnlyAsyncCR; - -struct rgw_bucket_get_sync_policy_params { - std::optional zone; - std::optional bucket; -}; - -struct rgw_bucket_get_sync_policy_result { - RGWBucketSyncPolicyHandlerRef policy_handler; -}; - -using RGWBucketGetSyncPolicyHandlerCR = RGWSimpleAsyncCR; - -#endif diff --git a/src/rgw/store/rados/rgw_d3n_datacache.cc b/src/rgw/store/rados/rgw_d3n_datacache.cc deleted file mode 100644 index ed375e2ac94..00000000000 --- a/src/rgw/store/rados/rgw_d3n_datacache.cc +++ /dev/null @@ -1,369 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_d3n_datacache.h" -#include "rgw_rest_client.h" -#include "rgw_auth_s3.h" -#include "rgw_op.h" -#include "rgw_common.h" -#include "rgw_auth_s3.h" -#include "rgw_op.h" -#include "rgw_crypt_sanitize.h" -#if defined(__linux__) -#include -#endif - -#if __has_include() -#include -namespace efs = std::filesystem; -#else -#include -namespace efs = std::experimental::filesystem; -#endif - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -int D3nCacheAioWriteRequest::d3n_prepare_libaio_write_op(bufferlist& bl, unsigned int len, string oid, string cache_location) -{ - std::string location = cache_location + oid; - int r = 0; - - lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): Write To Cache, location=" << location << dendl; - cb = new struct aiocb; - mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; - memset(cb, 0, sizeof(struct aiocb)); - r = fd = ::open(location.c_str(), O_WRONLY | O_CREAT | O_TRUNC, mode); - if (fd < 0) { - ldout(cct, 0) << "ERROR: D3nCacheAioWriteRequest::create_io: open file failed, errno=" << errno << ", location='" << location.c_str() << "'" << dendl; - goto done; - } - if (g_conf()->rgw_d3n_l1_fadvise != POSIX_FADV_NORMAL) - posix_fadvise(fd, 0, 0, g_conf()->rgw_d3n_l1_fadvise); - cb->aio_fildes = fd; - - data = malloc(len); - if (!data) { - ldout(cct, 0) << "ERROR: D3nCacheAioWriteRequest::create_io: memory allocation failed" << dendl; - goto close_file; - } - cb->aio_buf = data; - memcpy((void*)data, bl.c_str(), len); - cb->aio_nbytes = len; - goto done; - -close_file: - ::close(fd); -done: - return r; -} - -D3nDataCache::D3nDataCache() - : cct(nullptr), io_type(_io_type::ASYNC_IO), free_data_cache_size(0), outstanding_write_size(0) -{ - lsubdout(g_ceph_context, rgw_datacache, 5) << "D3nDataCache: " << __func__ << "()" << dendl; -} - -void D3nDataCache::init(CephContext *_cct) { - cct = _cct; - free_data_cache_size = cct->_conf->rgw_d3n_l1_datacache_size; - head = nullptr; - tail = nullptr; - cache_location = cct->_conf->rgw_d3n_l1_datacache_persistent_path; - if(cache_location.back() != '/') { - cache_location += "/"; - } - try { - if (efs::exists(cache_location)) { - // d3n: evict the cache storage directory - if (g_conf()->rgw_d3n_l1_evict_cache_on_start) { - lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: init: evicting the persistent storage directory on start" << dendl; - for (auto& p : efs::directory_iterator(cache_location)) { - efs::remove_all(p.path()); - } - } - } else { - // create the cache storage directory - lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: init: creating the persistent storage directory on start" << dendl; - efs::create_directories(cache_location); - } - } catch (const efs::filesystem_error& e) { - lderr(g_ceph_context) << "D3nDataCache: init: ERROR initializing the cache storage directory '" << cache_location << - "' : " << e.what() << dendl; - } - - auto conf_eviction_policy = cct->_conf.get_val("rgw_d3n_l1_eviction_policy"); - ceph_assert(conf_eviction_policy == "lru" || conf_eviction_policy == "random"); - if (conf_eviction_policy == "lru") - eviction_policy = _eviction_policy::LRU; - if (conf_eviction_policy == "random") - eviction_policy = _eviction_policy::RANDOM; - -#if defined(HAVE_LIBAIO) && defined(__GLIBC__) - // libaio setup - struct aioinit ainit{0}; - ainit.aio_threads = cct->_conf.get_val("rgw_d3n_libaio_aio_threads"); - ainit.aio_num = cct->_conf.get_val("rgw_d3n_libaio_aio_num"); - ainit.aio_idle_time = 120; - aio_init(&ainit); -#endif -} - -int D3nDataCache::d3n_io_write(bufferlist& bl, unsigned int len, std::string oid) -{ - D3nChunkDataInfo* chunk_info = new D3nChunkDataInfo; - std::string location = cache_location + oid; - - lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): location=" << location << dendl; - FILE *cache_file = nullptr; - int r = 0; - size_t nbytes = 0; - - cache_file = fopen(location.c_str(), "w+"); - if (cache_file == nullptr) { - ldout(cct, 0) << "ERROR: D3nDataCache::fopen file has return error, errno=" << errno << dendl; - return -errno; - } - - nbytes = fwrite(bl.c_str(), 1, len, cache_file); - if (nbytes != len) { - ldout(cct, 0) << "ERROR: D3nDataCache::io_write: fwrite has returned error: nbytes!=len, nbytes=" << nbytes << ", len=" << len << dendl; - return -EIO; - } - - r = fclose(cache_file); - if (r != 0) { - ldout(cct, 0) << "ERROR: D3nDataCache::fclsoe file has return error, errno=" << errno << dendl; - return -errno; - } - - { // update cahce_map entries for new chunk in cache - const std::lock_guard l(d3n_cache_lock); - chunk_info->oid = oid; - chunk_info->set_ctx(cct); - chunk_info->size = len; - d3n_cache_map.insert(pair(oid, chunk_info)); - } - - return r; -} - -void d3n_libaio_write_cb(sigval sigval) -{ - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; - D3nCacheAioWriteRequest* c = static_cast(sigval.sival_ptr); - c->priv_data->d3n_libaio_write_completion_cb(c); -} - - -void D3nDataCache::d3n_libaio_write_completion_cb(D3nCacheAioWriteRequest* c) -{ - D3nChunkDataInfo* chunk_info{nullptr}; - - ldout(cct, 5) << "D3nDataCache: " << __func__ << "(): oid=" << c->oid << dendl; - - { // update cache_map entries for new chunk in cache - const std::lock_guard l(d3n_cache_lock); - d3n_outstanding_write_list.erase(c->oid); - chunk_info = new D3nChunkDataInfo; - chunk_info->oid = c->oid; - chunk_info->set_ctx(cct); - chunk_info->size = c->cb->aio_nbytes; - d3n_cache_map.insert(pair(c->oid, chunk_info)); - } - - { // update free size - const std::lock_guard l(d3n_eviction_lock); - free_data_cache_size -= c->cb->aio_nbytes; - outstanding_write_size -= c->cb->aio_nbytes; - lru_insert_head(chunk_info); - } - delete c; - c = nullptr; -} - -int D3nDataCache::d3n_libaio_create_write_request(bufferlist& bl, unsigned int len, std::string oid) -{ - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "(): Write To Cache, oid=" << oid << ", len=" << len << dendl; - struct D3nCacheAioWriteRequest* wr = new struct D3nCacheAioWriteRequest(cct); - int r=0; - if ((r = wr->d3n_prepare_libaio_write_op(bl, len, oid, cache_location)) < 0) { - ldout(cct, 0) << "ERROR: D3nDataCache: " << __func__ << "() prepare libaio write op r=" << r << dendl; - goto done; - } - wr->cb->aio_sigevent.sigev_notify = SIGEV_THREAD; - wr->cb->aio_sigevent.sigev_notify_function = d3n_libaio_write_cb; - wr->cb->aio_sigevent.sigev_notify_attributes = nullptr; - wr->cb->aio_sigevent.sigev_value.sival_ptr = (void*)wr; - wr->oid = oid; - wr->priv_data = this; - - if ((r = ::aio_write(wr->cb)) != 0) { - ldout(cct, 0) << "ERROR: D3nDataCache: " << __func__ << "() aio_write r=" << r << dendl; - goto error; - } - return 0; - -error: - delete wr; -done: - return r; -} - -void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) -{ - size_t sr = 0; - uint64_t freed_size = 0, _free_data_cache_size = 0, _outstanding_write_size = 0; - - ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << ", len=" << len << dendl; - { - const std::lock_guard l(d3n_cache_lock); - std::unordered_map::iterator iter = d3n_cache_map.find(oid); - if (iter != d3n_cache_map.end()) { - ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): data already cached, no rewrite" << dendl; - return; - } - auto it = d3n_outstanding_write_list.find(oid); - if (it != d3n_outstanding_write_list.end()) { - ldout(cct, 10) << "D3nDataCache: NOTE: data put in cache already issued, no rewrite" << dendl; - return; - } - d3n_outstanding_write_list.insert(oid); - } - { - const std::lock_guard l(d3n_eviction_lock); - _free_data_cache_size = free_data_cache_size; - _outstanding_write_size = outstanding_write_size; - } - ldout(cct, 20) << "D3nDataCache: Before eviction _free_data_cache_size:" << _free_data_cache_size << ", _outstanding_write_size:" << _outstanding_write_size << ", freed_size:" << freed_size << dendl; - while (len > (_free_data_cache_size - _outstanding_write_size + freed_size)) { - ldout(cct, 20) << "D3nDataCache: enter eviction" << dendl; - if (eviction_policy == _eviction_policy::LRU) { - sr = lru_eviction(); - } else if (eviction_policy == _eviction_policy::RANDOM) { - sr = random_eviction(); - } else { - ldout(cct, 0) << "D3nDataCache: Warning: unknown cache eviction policy, defaulting to lru eviction" << dendl; - sr = lru_eviction(); - } - if (sr == 0) { - ldout(cct, 2) << "D3nDataCache: Warning: eviction was not able to free disk space, not writing to cache" << dendl; - d3n_outstanding_write_list.erase(oid); - return; - } - ldout(cct, 20) << "D3nDataCache: completed eviction of " << sr << " bytes" << dendl; - freed_size += sr; - } - int r = 0; - r = d3n_libaio_create_write_request(bl, len, oid); - if (r < 0) { - const std::lock_guard l(d3n_cache_lock); - d3n_outstanding_write_list.erase(oid); - ldout(cct, 1) << "D3nDataCache: create_aio_write_request fail, r=" << r << dendl; - return; - } - - const std::lock_guard l(d3n_eviction_lock); - free_data_cache_size += freed_size; - outstanding_write_size += len; -} - -bool D3nDataCache::get(const string& oid, const off_t len) -{ - const std::lock_guard l(d3n_cache_lock); - bool exist = false; - string location = cache_location + oid; - - lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "(): location=" << location << dendl; - std::unordered_map::iterator iter = d3n_cache_map.find(oid); - if (!(iter == d3n_cache_map.end())) { - // check inside cache whether file exists or not!!!! then make exist true; - struct D3nChunkDataInfo* chdo = iter->second; - struct stat st; - int r = stat(location.c_str(), &st); - if ( r != -1 && st.st_size == len) { // file exists and containes required data range length - exist = true; - /*LRU*/ - /*get D3nChunkDataInfo*/ - const std::lock_guard l(d3n_eviction_lock); - lru_remove(chdo); - lru_insert_head(chdo); - } else { - d3n_cache_map.erase(oid); - const std::lock_guard l(d3n_eviction_lock); - lru_remove(chdo); - delete chdo; - exist = false; - } - } - return exist; -} - -size_t D3nDataCache::random_eviction() -{ - lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "()" << dendl; - int n_entries = 0; - int random_index = 0; - size_t freed_size = 0; - D3nChunkDataInfo* del_entry; - string del_oid, location; - { - const std::lock_guard l(d3n_cache_lock); - n_entries = d3n_cache_map.size(); - if (n_entries <= 0) { - return -1; - } - srand (time(NULL)); - random_index = ceph::util::generate_random_number(0, n_entries-1); - std::unordered_map::iterator iter = d3n_cache_map.begin(); - std::advance(iter, random_index); - del_oid = iter->first; - del_entry = iter->second; - ldout(cct, 20) << "D3nDataCache: random_eviction: index:" << random_index << ", free size: " << del_entry->size << dendl; - freed_size = del_entry->size; - delete del_entry; - del_entry = nullptr; - d3n_cache_map.erase(del_oid); // oid - } - - location = cache_location + del_oid; - ::remove(location.c_str()); - return freed_size; -} - -size_t D3nDataCache::lru_eviction() -{ - lsubdout(g_ceph_context, rgw_datacache, 20) << "D3nDataCache: " << __func__ << "()" << dendl; - int n_entries = 0; - size_t freed_size = 0; - D3nChunkDataInfo* del_entry; - string del_oid, location; - - { - const std::lock_guard l(d3n_eviction_lock); - del_entry = tail; - if (del_entry == nullptr) { - ldout(cct, 2) << "D3nDataCache: lru_eviction: del_entry=null_ptr" << dendl; - return 0; - } - lru_remove(del_entry); - } - - { - const std::lock_guard l(d3n_cache_lock); - n_entries = d3n_cache_map.size(); - if (n_entries <= 0) { - ldout(cct, 2) << "D3nDataCache: lru_eviction: cache_map.size<=0" << dendl; - return -1; - } - del_oid = del_entry->oid; - ldout(cct, 20) << "D3nDataCache: lru_eviction: oid to remove: " << del_oid << dendl; - d3n_cache_map.erase(del_oid); // oid - } - freed_size = del_entry->size; - delete del_entry; - location = cache_location + del_oid; - ::remove(location.c_str()); - return freed_size; -} diff --git a/src/rgw/store/rados/rgw_d3n_datacache.h b/src/rgw/store/rados/rgw_d3n_datacache.h deleted file mode 100644 index 5d3537f3b14..00000000000 --- a/src/rgw/store/rados/rgw_d3n_datacache.h +++ /dev/null @@ -1,261 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGWD3NDATACACHE_H -#define CEPH_RGWD3NDATACACHE_H - -#include "rgw_rados.h" -#include - -#include "rgw_common.h" - -#include -#include -#include "include/Context.h" -#include "include/lru.h" -#include "rgw_d3n_cacherequest.h" - - -/*D3nDataCache*/ -struct D3nDataCache; - - -struct D3nChunkDataInfo : public LRUObject { - CephContext *cct; - uint64_t size; - time_t access_time; - std::string address; - std::string oid; - bool complete; - struct D3nChunkDataInfo* lru_prev; - struct D3nChunkDataInfo* lru_next; - - D3nChunkDataInfo(): size(0) {} - - void set_ctx(CephContext *_cct) { - cct = _cct; - } - - void dump(Formatter *f) const; - static void generate_test_instances(std::list& o); -}; - -struct D3nCacheAioWriteRequest { - std::string oid; - void *data; - int fd; - struct aiocb *cb; - D3nDataCache *priv_data; - CephContext *cct; - - D3nCacheAioWriteRequest(CephContext *_cct) : cct(_cct) {} - int d3n_prepare_libaio_write_op(bufferlist& bl, unsigned int len, std::string oid, std::string cache_location); - - ~D3nCacheAioWriteRequest() { - ::close(fd); - cb->aio_buf = nullptr; - free(data); - data = nullptr; - delete(cb); - } -}; - -struct D3nDataCache { - -private: - std::unordered_map d3n_cache_map; - std::set d3n_outstanding_write_list; - std::mutex d3n_cache_lock; - std::mutex d3n_eviction_lock; - - CephContext *cct; - enum class _io_type { - SYNC_IO = 1, - ASYNC_IO = 2, - SEND_FILE = 3 - } io_type; - enum class _eviction_policy { - LRU=0, RANDOM=1 - } eviction_policy; - - struct sigaction action; - uint64_t free_data_cache_size = 0; - uint64_t outstanding_write_size = 0; - struct D3nChunkDataInfo* head; - struct D3nChunkDataInfo* tail; - -private: - void add_io(); - -public: - D3nDataCache(); - ~D3nDataCache() { - while (lru_eviction() > 0); - } - - std::string cache_location; - - bool get(const std::string& oid, const off_t len); - void put(bufferlist& bl, unsigned int len, std::string& obj_key); - int d3n_io_write(bufferlist& bl, unsigned int len, std::string oid); - int d3n_libaio_create_write_request(bufferlist& bl, unsigned int len, std::string oid); - void d3n_libaio_write_completion_cb(D3nCacheAioWriteRequest* c); - size_t random_eviction(); - size_t lru_eviction(); - - void init(CephContext *_cct); - - void lru_insert_head(struct D3nChunkDataInfo* o) { - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; - o->lru_next = head; - o->lru_prev = nullptr; - if (head) { - head->lru_prev = o; - } else { - tail = o; - } - head = o; - } - - void lru_insert_tail(struct D3nChunkDataInfo* o) { - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; - o->lru_next = nullptr; - o->lru_prev = tail; - if (tail) { - tail->lru_next = o; - } else { - head = o; - } - tail = o; - } - - void lru_remove(struct D3nChunkDataInfo* o) { - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "()" << dendl; - if (o->lru_next) - o->lru_next->lru_prev = o->lru_prev; - else - tail = o->lru_prev; - if (o->lru_prev) - o->lru_prev->lru_next = o->lru_next; - else - head = o->lru_next; - o->lru_next = o->lru_prev = nullptr; - } -}; - - -template -class D3nRGWDataCache : public T { - -public: - D3nRGWDataCache() {} - - int init_rados() override { - int ret; - ret = T::init_rados(); - if (ret < 0) - return ret; - - return 0; - } - - int get_obj_iterate_cb(const DoutPrefixProvider *dpp, const rgw_raw_obj& read_obj, off_t obj_ofs, - off_t read_ofs, off_t len, bool is_head_obj, - RGWObjState *astate, void *arg) override; -}; - -template -int D3nRGWDataCache::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const rgw_raw_obj& read_obj, off_t obj_ofs, - off_t read_ofs, off_t len, bool is_head_obj, - RGWObjState *astate, void *arg) { - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache::" << __func__ << "(): is head object : " << is_head_obj << dendl; - librados::ObjectReadOperation op; - struct get_obj_data* d = static_cast(arg); - std::string oid, key; - - if (is_head_obj) { - // only when reading from the head object do we need to do the atomic test - int r = T::append_atomic_test(dpp, astate, op); - if (r < 0) - return r; - - if (astate && - obj_ofs < astate->data.length()) { - unsigned chunk_len = std::min((uint64_t)astate->data.length() - obj_ofs, (uint64_t)len); - - r = d->client_cb->handle_data(astate->data, obj_ofs, chunk_len); - if (r < 0) - return r; - - len -= chunk_len; - d->offset += chunk_len; - read_ofs += chunk_len; - obj_ofs += chunk_len; - if (!len) - return 0; - } - - auto obj = d->rgwrados->svc.rados->obj(read_obj); - r = obj.open(dpp); - if (r < 0) { - lsubdout(g_ceph_context, rgw, 4) << "failed to open rados context for " << read_obj << dendl; - return r; - } - - ldpp_dout(dpp, 20) << "D3nDataCache::" << __func__ << "(): oid=" << read_obj.oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl; - op.read(read_ofs, len, nullptr, nullptr); - - const uint64_t cost = len; - const uint64_t id = obj_ofs; // use logical object offset for sorting replies - - auto completed = d->aio->get(obj, rgw::Aio::librados_op(std::move(op), d->yield), cost, id); - return d->flush(std::move(completed)); - } else { - ldpp_dout(dpp, 20) << "D3nDataCache::" << __func__ << "(): oid=" << read_obj.oid << ", is_head_obj=" << is_head_obj << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << ", len=" << len << dendl; - int r; - - op.read(read_ofs, len, nullptr, nullptr); - - const uint64_t cost = len; - const uint64_t id = obj_ofs; // use logical object offset for sorting replies - oid = read_obj.oid; - - auto obj = d->rgwrados->svc.rados->obj(read_obj); - r = obj.open(dpp); - if (r < 0) { - lsubdout(g_ceph_context, rgw, 0) << "D3nDataCache: Error: failed to open rados context for " << read_obj << ", r=" << r << dendl; - return r; - } - - const bool is_compressed = (astate->attrset.find(RGW_ATTR_COMPRESSION) != astate->attrset.end()); - const bool is_encrypted = (astate->attrset.find(RGW_ATTR_CRYPT_MODE) != astate->attrset.end()); - if (read_ofs != 0 || astate->size != astate->accounted_size || is_compressed || is_encrypted) { - d->d3n_bypass_cache_write = true; - lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: " << __func__ << "(): Note - bypassing datacache: oid=" << read_obj.oid << ", read_ofs!=0 = " << read_ofs << ", size=" << astate->size << " != accounted_size=" << astate->accounted_size << ", is_compressed=" << is_compressed << ", is_encrypted=" << is_encrypted << dendl; - auto completed = d->aio->get(obj, rgw::Aio::librados_op(std::move(op), d->yield), cost, id); - r = d->flush(std::move(completed)); - return r; - } - - if (d->rgwrados->d3n_data_cache->get(oid, len)) { - // Read From Cache - ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): READ FROM CACHE: oid=" << read_obj.oid << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << ", len=" << len << dendl; - auto completed = d->aio->get(obj, rgw::Aio::d3n_cache_op(dpp, d->yield, read_ofs, len, d->rgwrados->d3n_data_cache->cache_location), cost, id); - r = d->flush(std::move(completed)); - if (r < 0) { - lsubdout(g_ceph_context, rgw, 0) << "D3nDataCache: " << __func__ << "(): Error: failed to drain/flush, r= " << r << dendl; - } - return r; - } else { - // Write To Cache - ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): WRITE TO CACHE: oid=" << read_obj.oid << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << " len=" << len << dendl; - auto completed = d->aio->get(obj, rgw::Aio::librados_op(std::move(op), d->yield), cost, id); - return d->flush(std::move(completed)); - } - } - lsubdout(g_ceph_context, rgw, 1) << "D3nDataCache: " << __func__ << "(): Warning: Check head object cache handling flow, oid=" << read_obj.oid << dendl; - - return 0; -} - -#endif diff --git a/src/rgw/store/rados/rgw_data_sync.cc b/src/rgw/store/rados/rgw_data_sync.cc deleted file mode 100644 index 47573b765da..00000000000 --- a/src/rgw/store/rados/rgw_data_sync.cc +++ /dev/null @@ -1,6460 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/ceph_json.h" -#include "common/RefCountedObj.h" -#include "common/WorkQueue.h" -#include "common/Throttle.h" -#include "common/errno.h" - -#include "rgw_common.h" -#include "rgw_zone.h" -#include "rgw_sync.h" -#include "rgw_data_sync.h" -#include "rgw_rest_conn.h" -#include "rgw_cr_rados.h" -#include "rgw_cr_rest.h" -#include "rgw_cr_tools.h" -#include "rgw_http_client.h" -#include "rgw_bucket.h" -#include "rgw_bucket_sync.h" -#include "rgw_bucket_sync_cache.h" -#include "rgw_datalog.h" -#include "rgw_metadata.h" -#include "rgw_sync_counters.h" -#include "rgw_sync_error_repo.h" -#include "rgw_sync_module.h" -#include "rgw_sal.h" - -#include "cls/lock/cls_lock_client.h" -#include "cls/rgw/cls_rgw_client.h" - -#include "services/svc_zone.h" -#include "services/svc_sync_modules.h" -#include "rgw_bucket.h" - -#include "include/common_fwd.h" -#include "include/random.h" - -#include -#include - -#define dout_subsys ceph_subsys_rgw - -#undef dout_prefix -#define dout_prefix (*_dout << "data sync: ") - -using namespace std; - -static const string datalog_sync_status_oid_prefix = "datalog.sync-status"; -static const string datalog_sync_status_shard_prefix = "datalog.sync-status.shard"; -static const string datalog_sync_full_sync_index_prefix = "data.full-sync.index"; -static const string bucket_full_status_oid_prefix = "bucket.full-sync-status"; -static const string bucket_status_oid_prefix = "bucket.sync-status"; -static const string object_status_oid_prefix = "bucket.sync-status"; - -void rgw_datalog_info::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("num_objects", num_shards, obj); -} - -void rgw_datalog_entry::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("key", key, obj); - utime_t ut; - JSONDecoder::decode_json("timestamp", ut, obj); - timestamp = ut.to_real_time(); -} - -void rgw_datalog_shard_data::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("truncated", truncated, obj); - JSONDecoder::decode_json("entries", entries, obj); -}; - -// print a bucket shard with [gen] -std::string to_string(const rgw_bucket_shard& bs, std::optional gen) -{ - constexpr auto digits10 = std::numeric_limits::digits10; - constexpr auto reserve = 2 + digits10; // [value] - auto str = bs.get_key('/', ':', ':', reserve); - str.append(1, '['); - str.append(std::to_string(gen.value_or(0))); - str.append(1, ']'); - return str; -} - -class RGWReadDataSyncStatusMarkersCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - - RGWDataSyncCtx *sc; - RGWDataSyncEnv *env; - const int num_shards; - int shard_id{0};; - - map& markers; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to read data sync status: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - RGWReadDataSyncStatusMarkersCR(RGWDataSyncCtx *sc, int num_shards, - map& markers) - : RGWShardCollectCR(sc->cct, MAX_CONCURRENT_SHARDS), - sc(sc), env(sc->env), num_shards(num_shards), markers(markers) - {} - bool spawn_next() override; -}; - -bool RGWReadDataSyncStatusMarkersCR::spawn_next() -{ - if (shard_id >= num_shards) { - return false; - } - using CR = RGWSimpleRadosReadCR; - spawn(new CR(env->dpp, env->async_rados, env->svc->sysobj, - rgw_raw_obj(env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id)), - &markers[shard_id]), - false); - shard_id++; - return true; -} - -class RGWReadDataSyncRecoveringShardsCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - - RGWDataSyncCtx *sc; - RGWDataSyncEnv *env; - - uint64_t max_entries; - int num_shards; - int shard_id{0}; - - string marker; - std::vector& omapkeys; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to list recovering data sync: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - RGWReadDataSyncRecoveringShardsCR(RGWDataSyncCtx *sc, uint64_t _max_entries, int _num_shards, - std::vector& omapkeys) - : RGWShardCollectCR(sc->cct, MAX_CONCURRENT_SHARDS), sc(sc), env(sc->env), - max_entries(_max_entries), num_shards(_num_shards), omapkeys(omapkeys) - {} - bool spawn_next() override; -}; - -bool RGWReadDataSyncRecoveringShardsCR::spawn_next() -{ - if (shard_id >= num_shards) - return false; - - string error_oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id) + ".retry"; - auto& shard_keys = omapkeys[shard_id]; - shard_keys = std::make_shared(); - spawn(new RGWRadosGetOmapKeysCR(env->driver, rgw_raw_obj(env->svc->zone->get_zone_params().log_pool, error_oid), - marker, max_entries, shard_keys), false); - - ++shard_id; - return true; -} - -class RGWReadDataSyncStatusCoroutine : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_data_sync_status *sync_status; - -public: - RGWReadDataSyncStatusCoroutine(RGWDataSyncCtx *_sc, - rgw_data_sync_status *_status) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(sc->env), sync_status(_status) - {} - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWReadDataSyncStatusCoroutine::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - // read sync info - using ReadInfoCR = RGWSimpleRadosReadCR; - yield { - bool empty_on_enoent = false; // fail on ENOENT - call(new ReadInfoCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sc->source_zone)), - &sync_status->sync_info, empty_on_enoent)); - } - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to read sync status info with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - // read shard markers - using ReadMarkersCR = RGWReadDataSyncStatusMarkersCR; - yield call(new ReadMarkersCR(sc, sync_status->sync_info.num_shards, - sync_status->sync_markers)); - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to read sync status markers with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; -} - -class RGWReadRemoteDataLogShardInfoCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - RGWRESTReadResource *http_op; - - int shard_id; - RGWDataChangesLogInfo *shard_info; - -public: - RGWReadRemoteDataLogShardInfoCR(RGWDataSyncCtx *_sc, - int _shard_id, RGWDataChangesLogInfo *_shard_info) : RGWCoroutine(_sc->cct), - sc(_sc), - sync_env(_sc->env), - http_op(NULL), - shard_id(_shard_id), - shard_info(_shard_info) { - } - - ~RGWReadRemoteDataLogShardInfoCR() override { - if (http_op) { - http_op->put(); - } - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield { - char buf[16]; - snprintf(buf, sizeof(buf), "%d", shard_id); - rgw_http_param_pair pairs[] = { { "type" , "data" }, - { "id", buf }, - { "info" , NULL }, - { NULL, NULL } }; - - string p = "/admin/log/"; - - http_op = new RGWRESTReadResource(sc->conn, p, pairs, NULL, sync_env->http_manager); - - init_new_io(http_op); - - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - return set_cr_error(ret); - } - - return io_block(0); - } - yield { - int ret = http_op->wait(shard_info, null_yield); - if (ret < 0) { - return set_cr_error(ret); - } - return set_cr_done(); - } - } - return 0; - } -}; - -struct read_remote_data_log_response { - string marker; - bool truncated; - vector entries; - - read_remote_data_log_response() : truncated(false) {} - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("truncated", truncated, obj); - JSONDecoder::decode_json("entries", entries, obj); - }; -}; - -class RGWReadRemoteDataLogShardCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - RGWRESTReadResource *http_op = nullptr; - - int shard_id; - const std::string& marker; - string *pnext_marker; - vector *entries; - bool *truncated; - - read_remote_data_log_response response; - std::optional timer; - -public: - RGWReadRemoteDataLogShardCR(RGWDataSyncCtx *_sc, int _shard_id, - const std::string& marker, string *pnext_marker, - vector *_entries, - bool *_truncated) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - shard_id(_shard_id), marker(marker), pnext_marker(pnext_marker), - entries(_entries), truncated(_truncated) { - } - ~RGWReadRemoteDataLogShardCR() override { - if (http_op) { - http_op->put(); - } - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield { - char buf[16]; - snprintf(buf, sizeof(buf), "%d", shard_id); - rgw_http_param_pair pairs[] = { { "type" , "data" }, - { "id", buf }, - { "marker", marker.c_str() }, - { "extra-info", "true" }, - { NULL, NULL } }; - - string p = "/admin/log/"; - - http_op = new RGWRESTReadResource(sc->conn, p, pairs, NULL, sync_env->http_manager); - - init_new_io(http_op); - - if (sync_env->counters) { - timer.emplace(sync_env->counters, sync_counters::l_poll); - } - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - if (sync_env->counters) { - sync_env->counters->inc(sync_counters::l_poll_err); - } - return set_cr_error(ret); - } - - return io_block(0); - } - yield { - timer.reset(); - int ret = http_op->wait(&response, null_yield); - if (ret < 0) { - if (sync_env->counters && ret != -ENOENT) { - sync_env->counters->inc(sync_counters::l_poll_err); - } - return set_cr_error(ret); - } - entries->clear(); - entries->swap(response.entries); - *pnext_marker = response.marker; - *truncated = response.truncated; - return set_cr_done(); - } - } - return 0; - } -}; - -class RGWReadRemoteDataLogInfoCR : public RGWShardCollectCR { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - int num_shards; - map *datalog_info; - - int shard_id; -#define READ_DATALOG_MAX_CONCURRENT 10 - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to fetch remote datalog info: " - << cpp_strerror(r) << dendl; - } - return r; - } -public: - RGWReadRemoteDataLogInfoCR(RGWDataSyncCtx *_sc, - int _num_shards, - map *_datalog_info) : RGWShardCollectCR(_sc->cct, READ_DATALOG_MAX_CONCURRENT), - sc(_sc), sync_env(_sc->env), num_shards(_num_shards), - datalog_info(_datalog_info), shard_id(0) {} - bool spawn_next() override; -}; - -bool RGWReadRemoteDataLogInfoCR::spawn_next() { - if (shard_id >= num_shards) { - return false; - } - spawn(new RGWReadRemoteDataLogShardInfoCR(sc, shard_id, &(*datalog_info)[shard_id]), false); - shard_id++; - return true; -} - -class RGWListRemoteDataLogShardCR : public RGWSimpleCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - RGWRESTReadResource *http_op; - - int shard_id; - string marker; - uint32_t max_entries; - rgw_datalog_shard_data *result; - -public: - RGWListRemoteDataLogShardCR(RGWDataSyncCtx *sc, int _shard_id, - const string& _marker, uint32_t _max_entries, - rgw_datalog_shard_data *_result) - : RGWSimpleCoroutine(sc->cct), sc(sc), sync_env(sc->env), http_op(NULL), - shard_id(_shard_id), marker(_marker), max_entries(_max_entries), result(_result) {} - - int send_request(const DoutPrefixProvider *dpp) override { - RGWRESTConn *conn = sc->conn; - - char buf[32]; - snprintf(buf, sizeof(buf), "%d", shard_id); - - char max_entries_buf[32]; - snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", (int)max_entries); - - const char *marker_key = (marker.empty() ? "" : "marker"); - - rgw_http_param_pair pairs[] = { { "type", "data" }, - { "id", buf }, - { "max-entries", max_entries_buf }, - { marker_key, marker.c_str() }, - { NULL, NULL } }; - - string p = "/admin/log/"; - - http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager); - init_new_io(http_op); - - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - http_op->put(); - return ret; - } - - return 0; - } - - int request_complete() override { - int ret = http_op->wait(result, null_yield); - http_op->put(); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to list remote datalog shard, ret=" << ret << dendl; - return ret; - } - return 0; - } -}; - -class RGWListRemoteDataLogCR : public RGWShardCollectCR { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - map shards; - int max_entries_per_shard; - map *result; - - map::iterator iter; -#define READ_DATALOG_MAX_CONCURRENT 10 - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to list remote datalog: " - << cpp_strerror(r) << dendl; - } - return r; - } -public: - RGWListRemoteDataLogCR(RGWDataSyncCtx *_sc, - map& _shards, - int _max_entries_per_shard, - map *_result) : RGWShardCollectCR(_sc->cct, READ_DATALOG_MAX_CONCURRENT), - sc(_sc), sync_env(_sc->env), max_entries_per_shard(_max_entries_per_shard), - result(_result) { - shards.swap(_shards); - iter = shards.begin(); - } - bool spawn_next() override; -}; - -bool RGWListRemoteDataLogCR::spawn_next() { - if (iter == shards.end()) { - return false; - } - - spawn(new RGWListRemoteDataLogShardCR(sc, iter->first, iter->second, max_entries_per_shard, &(*result)[iter->first]), false); - ++iter; - return true; -} - -class RGWInitDataSyncStatusCoroutine : public RGWCoroutine { - static constexpr uint32_t lock_duration = 30; - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw::sal::RadosStore* driver; // RGWDataSyncEnv also has a pointer to driver - const rgw_pool& pool; - const uint32_t num_shards; - - string sync_status_oid; - - string lock_name; - string cookie; - rgw_data_sync_status *status; - map shards_info; - - RGWSyncTraceNodeRef tn; -public: - RGWInitDataSyncStatusCoroutine(RGWDataSyncCtx *_sc, uint32_t num_shards, - uint64_t instance_id, - RGWSyncTraceNodeRef& _tn_parent, - rgw_data_sync_status *status) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), driver(sync_env->driver), - pool(sync_env->svc->zone->get_zone_params().log_pool), - num_shards(num_shards), status(status), - tn(sync_env->sync_tracer->add_node(_tn_parent, "init_data_sync_status")) { - lock_name = "sync_lock"; - - status->sync_info.instance_id = instance_id; - -#define COOKIE_LEN 16 - char buf[COOKIE_LEN + 1]; - - gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1); - cookie = buf; - - sync_status_oid = RGWDataSyncStatusManager::sync_status_oid(sc->source_zone); - - } - - int operate(const DoutPrefixProvider *dpp) override { - int ret; - reenter(this) { - using LockCR = RGWSimpleRadosLockCR; - yield call(new LockCR(sync_env->async_rados, driver, - rgw_raw_obj{pool, sync_status_oid}, - lock_name, cookie, lock_duration)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to take a lock on " << sync_status_oid)); - return set_cr_error(retcode); - } - using WriteInfoCR = RGWSimpleRadosWriteCR; - yield call(new WriteInfoCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj{pool, sync_status_oid}, - status->sync_info)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to write sync status info with " << retcode)); - return set_cr_error(retcode); - } - - /* take lock again, we just recreated the object */ - yield call(new LockCR(sync_env->async_rados, driver, - rgw_raw_obj{pool, sync_status_oid}, - lock_name, cookie, lock_duration)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to take a lock on " << sync_status_oid)); - return set_cr_error(retcode); - } - - tn->log(10, "took lease"); - - /* fetch current position in logs */ - yield { - RGWRESTConn *conn = sync_env->svc->zone->get_zone_conn(sc->source_zone); - if (!conn) { - tn->log(0, SSTR("ERROR: connection to zone " << sc->source_zone << " does not exist!")); - return set_cr_error(-EIO); - } - for (uint32_t i = 0; i < num_shards; i++) { - spawn(new RGWReadRemoteDataLogShardInfoCR(sc, i, &shards_info[i]), true); - } - } - while (collect(&ret, NULL)) { - if (ret < 0) { - tn->log(0, SSTR("ERROR: failed to read remote data log shards")); - return set_state(RGWCoroutine_Error); - } - yield; - } - yield { - for (uint32_t i = 0; i < num_shards; i++) { - RGWDataChangesLogInfo& info = shards_info[i]; - auto& marker = status->sync_markers[i]; - marker.next_step_marker = info.marker; - marker.timestamp = info.last_update; - const auto& oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, i); - using WriteMarkerCR = RGWSimpleRadosWriteCR; - spawn(new WriteMarkerCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj{pool, oid}, marker), true); - } - } - while (collect(&ret, NULL)) { - if (ret < 0) { - tn->log(0, SSTR("ERROR: failed to write data sync status markers")); - return set_state(RGWCoroutine_Error); - } - yield; - } - - status->sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps; - yield call(new WriteInfoCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj{pool, sync_status_oid}, - status->sync_info)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to write sync status info with " << retcode)); - return set_cr_error(retcode); - } - yield call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, driver, - rgw_raw_obj{pool, sync_status_oid}, - lock_name, cookie)); - return set_cr_done(); - } - return 0; - } -}; - -RGWRemoteDataLog::RGWRemoteDataLog(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* driver, - RGWAsyncRadosProcessor *async_rados) - : RGWCoroutinesManager(driver->ctx(), driver->getRados()->get_cr_registry()), - dpp(dpp), driver(driver), - cct(driver->ctx()), cr_registry(driver->getRados()->get_cr_registry()), - async_rados(async_rados), - http_manager(driver->ctx(), completion_mgr), - data_sync_cr(NULL), - initialized(false) -{ -} - -int RGWRemoteDataLog::read_log_info(const DoutPrefixProvider *dpp, rgw_datalog_info *log_info) -{ - rgw_http_param_pair pairs[] = { { "type", "data" }, - { NULL, NULL } }; - - int ret = sc.conn->get_json_resource(dpp, "/admin/log", pairs, null_yield, *log_info); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch datalog info" << dendl; - return ret; - } - - ldpp_dout(dpp, 20) << "remote datalog, num_shards=" << log_info->num_shards << dendl; - - return 0; -} - -int RGWRemoteDataLog::read_source_log_shards_info(const DoutPrefixProvider *dpp, map *shards_info) -{ - rgw_datalog_info log_info; - int ret = read_log_info(dpp, &log_info); - if (ret < 0) { - return ret; - } - - return run(dpp, new RGWReadRemoteDataLogInfoCR(&sc, log_info.num_shards, shards_info)); -} - -int RGWRemoteDataLog::read_source_log_shards_next(const DoutPrefixProvider *dpp, map shard_markers, map *result) -{ - return run(dpp, new RGWListRemoteDataLogCR(&sc, shard_markers, 1, result)); -} - -int RGWRemoteDataLog::init(const rgw_zone_id& _source_zone, RGWRESTConn *_conn, RGWSyncErrorLogger *_error_logger, - RGWSyncTraceManager *_sync_tracer, RGWSyncModuleInstanceRef& _sync_module, - PerfCounters* counters) -{ - sync_env.init(dpp, cct, driver, driver->svc(), async_rados, &http_manager, _error_logger, - _sync_tracer, _sync_module, counters); - sc.init(&sync_env, _conn, _source_zone); - - if (initialized) { - return 0; - } - - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - - tn = sync_env.sync_tracer->add_node(sync_env.sync_tracer->root_node, "data"); - - initialized = true; - - return 0; -} - -void RGWRemoteDataLog::finish() -{ - stop(); -} - -int RGWRemoteDataLog::read_sync_status(const DoutPrefixProvider *dpp, rgw_data_sync_status *sync_status) -{ - // cannot run concurrently with run_sync(), so run in a separate manager - RGWCoroutinesManager crs(cct, cr_registry); - RGWHTTPManager http_manager(cct, crs.get_completion_mgr()); - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - RGWDataSyncEnv sync_env_local = sync_env; - sync_env_local.http_manager = &http_manager; - - RGWDataSyncCtx sc_local = sc; - sc_local.env = &sync_env_local; - - ret = crs.run(dpp, new RGWReadDataSyncStatusCoroutine(&sc_local, sync_status)); - http_manager.stop(); - return ret; -} - -int RGWRemoteDataLog::read_recovering_shards(const DoutPrefixProvider *dpp, const int num_shards, set& recovering_shards) -{ - // cannot run concurrently with run_sync(), so run in a separate manager - RGWCoroutinesManager crs(cct, cr_registry); - RGWHTTPManager http_manager(cct, crs.get_completion_mgr()); - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - RGWDataSyncEnv sync_env_local = sync_env; - sync_env_local.http_manager = &http_manager; - - RGWDataSyncCtx sc_local = sc; - sc_local.env = &sync_env_local; - - std::vector omapkeys; - omapkeys.resize(num_shards); - uint64_t max_entries{1}; - - ret = crs.run(dpp, new RGWReadDataSyncRecoveringShardsCR(&sc_local, max_entries, num_shards, omapkeys)); - http_manager.stop(); - - if (ret == 0) { - for (int i = 0; i < num_shards; i++) { - if (omapkeys[i]->entries.size() != 0) { - recovering_shards.insert(i); - } - } - } - - return ret; -} - -int RGWRemoteDataLog::init_sync_status(const DoutPrefixProvider *dpp, int num_shards) -{ - rgw_data_sync_status sync_status; - sync_status.sync_info.num_shards = num_shards; - - RGWCoroutinesManager crs(cct, cr_registry); - RGWHTTPManager http_manager(cct, crs.get_completion_mgr()); - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - RGWDataSyncEnv sync_env_local = sync_env; - sync_env_local.http_manager = &http_manager; - auto instance_id = ceph::util::generate_random_number(); - RGWDataSyncCtx sc_local = sc; - sc_local.env = &sync_env_local; - ret = crs.run(dpp, new RGWInitDataSyncStatusCoroutine(&sc_local, num_shards, instance_id, tn, &sync_status)); - http_manager.stop(); - return ret; -} - -static string full_data_sync_index_shard_oid(const rgw_zone_id& source_zone, int shard_id) -{ - char buf[datalog_sync_full_sync_index_prefix.size() + 1 + source_zone.id.size() + 1 + 16]; - snprintf(buf, sizeof(buf), "%s.%s.%d", datalog_sync_full_sync_index_prefix.c_str(), source_zone.id.c_str(), shard_id); - return string(buf); -} - -struct read_metadata_list { - string marker; - bool truncated; - list keys; - int count; - - read_metadata_list() : truncated(false), count(0) {} - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("truncated", truncated, obj); - JSONDecoder::decode_json("keys", keys, obj); - JSONDecoder::decode_json("count", count, obj); - } -}; - -struct bucket_instance_meta_info { - string key; - obj_version ver; - utime_t mtime; - RGWBucketInstanceMetadataObject data; - - bucket_instance_meta_info() {} - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("key", key, obj); - JSONDecoder::decode_json("ver", ver, obj); - JSONDecoder::decode_json("mtime", mtime, obj); - JSONDecoder::decode_json("data", data, obj); - } -}; - -class RGWReadRemoteBucketIndexLogInfoCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - const string instance_key; - - rgw_bucket_index_marker_info *info; - -public: - RGWReadRemoteBucketIndexLogInfoCR(RGWDataSyncCtx *_sc, - const rgw_bucket& bucket, - rgw_bucket_index_marker_info *_info) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - instance_key(bucket.get_key()), info(_info) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield { - rgw_http_param_pair pairs[] = { { "type" , "bucket-index" }, - { "bucket-instance", instance_key.c_str() }, - { "info" , NULL }, - { NULL, NULL } }; - - string p = "/admin/log/"; - call(new RGWReadRESTResourceCR(sync_env->cct, sc->conn, sync_env->http_manager, p, pairs, info)); - } - if (retcode < 0) { - return set_cr_error(retcode); - } - - return set_cr_done(); - } - return 0; - } -}; - - -class RGWListBucketIndexesCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env = sc->env; - - rgw::sal::RadosStore* driver = sync_env->driver; - - rgw_data_sync_status *sync_status; - - int req_ret = 0; - int ret = 0; - - list::iterator iter; - - unique_ptr entries_index; - string oid_prefix = - datalog_sync_full_sync_index_prefix + "." + sc->source_zone.id; - - string path = "/admin/metadata/bucket.instance"; - bucket_instance_meta_info meta_info; - string key; - - bool failed = false; - bool truncated = false; - read_metadata_list result; - -public: - RGWListBucketIndexesCR(RGWDataSyncCtx* sc, - rgw_data_sync_status* sync_status) - : RGWCoroutine(sc->cct), sc(sc), sync_status(sync_status) {} - ~RGWListBucketIndexesCR() override { } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - entries_index = std::make_unique( - sync_env->async_rados, driver, this, - cct->_conf->rgw_data_log_num_shards, - sync_env->svc->zone->get_zone_params().log_pool, - oid_prefix); - yield; // yield so OmapAppendCRs can start - - do { - yield { - string entrypoint = "/admin/metadata/bucket.instance"s; - - rgw_http_param_pair pairs[] = {{"max-entries", "1000"}, - {"marker", result.marker.c_str()}, - {NULL, NULL}}; - - call(new RGWReadRESTResourceCR( - sync_env->cct, sc->conn, sync_env->http_manager, - entrypoint, pairs, &result)); - } - if (retcode < 0) { - ldpp_dout(dpp, 0) - << "ERROR: failed to fetch metadata for section bucket.instance" - << dendl; - return set_cr_error(retcode); - } - - for (iter = result.keys.begin(); iter != result.keys.end(); ++iter) { - ldpp_dout(dpp, 20) << "list metadata: section=bucket.instance key=" - << *iter << dendl; - key = *iter; - - yield { - rgw_http_param_pair pairs[] = {{"key", key.c_str()}, - {NULL, NULL}}; - - call(new RGWReadRESTResourceCR( - sync_env->cct, sc->conn, sync_env->http_manager, path, pairs, - &meta_info)); - } - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch metadata for key: " - << key << dendl; - return set_cr_error(retcode); - } - // Now that bucket full sync is bucket-wide instead of - // per-shard, we only need to register a single shard of - // each bucket to guarantee that sync will see everything - // that happened before data full sync starts. This also - // means we don't have to care about the bucket's current - // shard count. - yield entries_index->append( - fmt::format("{}:{}", key, 0), - sync_env->svc->datalog_rados->get_log_shard_id( - meta_info.data.get_bucket_info().bucket, 0)); - } - truncated = result.truncated; - } while (truncated); - - yield { - if (!entries_index->finish()) { - failed = true; - } - } - if (!failed) { - for (auto iter = sync_status->sync_markers.begin(); - iter != sync_status->sync_markers.end(); - ++iter) { - int shard_id = (int)iter->first; - rgw_data_sync_marker& marker = iter->second; - marker.total_entries = entries_index->get_total_entries(shard_id); - spawn(new RGWSimpleRadosWriteCR( - dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, - RGWDataSyncStatusManager::shard_obj_name( - sc->source_zone, shard_id)), - marker), - true); - } - } else { - yield call(sync_env->error_logger->log_error_cr( - dpp, sc->conn->get_remote_id(), "data.init", "", - EIO, string("failed to build bucket instances map"))); - } - while (collect(&ret, NULL)) { - if (ret < 0) { - yield call(sync_env->error_logger->log_error_cr( - dpp, sc->conn->get_remote_id(), "data.init", "", - -ret, string("failed to driver sync status: ") + - cpp_strerror(-ret))); - req_ret = ret; - } - yield; - } - drain_all(); - if (req_ret < 0) { - yield return set_cr_error(req_ret); - } - yield return set_cr_done(); - } - return 0; - } -}; - -#define DATA_SYNC_UPDATE_MARKER_WINDOW 1 - -class RGWDataSyncShardMarkerTrack : public RGWSyncShardMarkerTrack { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - string marker_oid; - rgw_data_sync_marker sync_marker; - RGWSyncTraceNodeRef tn; - -public: - RGWDataSyncShardMarkerTrack(RGWDataSyncCtx *_sc, - const string& _marker_oid, - const rgw_data_sync_marker& _marker, - RGWSyncTraceNodeRef& _tn) : RGWSyncShardMarkerTrack(DATA_SYNC_UPDATE_MARKER_WINDOW), - sc(_sc), sync_env(_sc->env), - marker_oid(_marker_oid), - sync_marker(_marker), - tn(_tn) {} - - RGWCoroutine* store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override { - sync_marker.marker = new_marker; - sync_marker.pos = index_pos; - sync_marker.timestamp = timestamp; - - tn->log(20, SSTR("updating marker marker_oid=" << marker_oid << " marker=" << new_marker)); - - return new RGWSimpleRadosWriteCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, marker_oid), - sync_marker); - } - - RGWOrderCallCR *allocate_order_control_cr() override { - return new RGWLastCallerWinsCR(sync_env->cct); - } -}; - -// ostream wrappers to print buckets without copying strings -struct bucket_str { - const rgw_bucket& b; - explicit bucket_str(const rgw_bucket& b) : b(b) {} -}; -std::ostream& operator<<(std::ostream& out, const bucket_str& rhs) { - auto& b = rhs.b; - if (!b.tenant.empty()) { - out << b.tenant << '/'; - } - out << b.name; - if (!b.bucket_id.empty()) { - out << ':' << b.bucket_id; - } - return out; -} - -struct bucket_str_noinstance { - const rgw_bucket& b; - explicit bucket_str_noinstance(const rgw_bucket& b) : b(b) {} -}; -std::ostream& operator<<(std::ostream& out, const bucket_str_noinstance& rhs) { - auto& b = rhs.b; - if (!b.tenant.empty()) { - out << b.tenant << '/'; - } - out << b.name; - return out; -} - -struct bucket_shard_str { - const rgw_bucket_shard& bs; - explicit bucket_shard_str(const rgw_bucket_shard& bs) : bs(bs) {} -}; -std::ostream& operator<<(std::ostream& out, const bucket_shard_str& rhs) { - auto& bs = rhs.bs; - out << bucket_str{bs.bucket}; - if (bs.shard_id >= 0) { - out << ':' << bs.shard_id; - } - return out; -} - -struct all_bucket_info { - RGWBucketInfo bucket_info; - map attrs; -}; - -struct rgw_sync_pipe_info_entity -{ -private: - RGWBucketInfo bucket_info; - map bucket_attrs; - bool _has_bucket_info{false}; - -public: - rgw_zone_id zone; - - rgw_sync_pipe_info_entity() {} - rgw_sync_pipe_info_entity(const rgw_sync_bucket_entity& e, - std::optional& binfo) { - if (e.zone) { - zone = *e.zone; - } - if (!e.bucket) { - return; - } - if (!binfo || - binfo->bucket_info.bucket != *e.bucket) { - bucket_info.bucket = *e.bucket; - } else { - set_bucket_info(*binfo); - } - } - - void update_empty_bucket_info(const std::map& buckets_info) { - if (_has_bucket_info) { - return; - } - if (bucket_info.bucket.name.empty()) { - return; - } - - auto iter = buckets_info.find(bucket_info.bucket); - if (iter == buckets_info.end()) { - return; - } - - set_bucket_info(iter->second); - } - - bool has_bucket_info() const { - return _has_bucket_info; - } - - void set_bucket_info(const all_bucket_info& all_info) { - bucket_info = all_info.bucket_info; - bucket_attrs = all_info.attrs; - _has_bucket_info = true; - } - - const RGWBucketInfo& get_bucket_info() const { - return bucket_info; - } - - const rgw_bucket& get_bucket() const { - return bucket_info.bucket; - } - - bool operator<(const rgw_sync_pipe_info_entity& e) const { - if (zone < e.zone) { - return false; - } - if (zone > e.zone) { - return true; - } - return (bucket_info.bucket < e.bucket_info.bucket); - } -}; - -std::ostream& operator<<(std::ostream& out, const rgw_sync_pipe_info_entity& e) { - auto& bucket = e.get_bucket_info().bucket; - - out << e.zone << ":" << bucket.get_key(); - return out; -} - -struct rgw_sync_pipe_handler_info { - RGWBucketSyncFlowManager::pipe_handler handler; - rgw_sync_pipe_info_entity source; - rgw_sync_pipe_info_entity target; - - rgw_sync_pipe_handler_info() {} - rgw_sync_pipe_handler_info(const RGWBucketSyncFlowManager::pipe_handler& _handler, - std::optional source_bucket_info, - std::optional target_bucket_info) : handler(_handler), - source(handler.source, source_bucket_info), - target(handler.dest, target_bucket_info) { - } - - bool operator<(const rgw_sync_pipe_handler_info& p) const { - if (source < p.source) { - return true; - } - if (p.source < source) { - return false; - } - return (target < p.target); - } - - void update_empty_bucket_info(const std::map& buckets_info) { - source.update_empty_bucket_info(buckets_info); - target.update_empty_bucket_info(buckets_info); - } -}; - -std::ostream& operator<<(std::ostream& out, const rgw_sync_pipe_handler_info& p) { - out << p.source << ">" << p.target; - return out; -} - -struct rgw_sync_pipe_info_set { - std::set handlers; - - using iterator = std::set::iterator; - - void clear() { - handlers.clear(); - } - - void insert(const RGWBucketSyncFlowManager::pipe_handler& handler, - std::optional& source_bucket_info, - std::optional& target_bucket_info) { - rgw_sync_pipe_handler_info p(handler, source_bucket_info, target_bucket_info); - handlers.insert(p); - } - - iterator begin() { - return handlers.begin(); - } - - iterator end() { - return handlers.end(); - } - - size_t size() const { - return handlers.size(); - } - - bool empty() const { - return handlers.empty(); - } - - void update_empty_bucket_info(const std::map& buckets_info) { - if (buckets_info.empty()) { - return; - } - - std::set p; - - for (auto pipe : handlers) { - pipe.update_empty_bucket_info(buckets_info); - p.insert(pipe); - } - - handlers = std::move(p); - } -}; - -class RGWRunBucketSourcesSyncCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - boost::intrusive_ptr lease_cr; - - rgw_sync_pipe_info_set pipes; - rgw_sync_pipe_info_set::iterator siter; - - rgw_bucket_sync_pair_info sync_pair; - - RGWSyncTraceNodeRef tn; - ceph::real_time* progress; - std::vector shard_progress; - std::vector::iterator cur_shard_progress; - - RGWRESTConn *conn{nullptr}; - rgw_zone_id last_zone; - - std::optional gen; - rgw_bucket_index_marker_info marker_info; - BucketIndexShardsManager marker_mgr; - -public: - RGWRunBucketSourcesSyncCR(RGWDataSyncCtx *_sc, - boost::intrusive_ptr lease_cr, - const rgw_bucket_shard& source_bs, - const RGWSyncTraceNodeRef& _tn_parent, - std::optional gen, - ceph::real_time* progress); - - int operate(const DoutPrefixProvider *dpp) override; -}; - -class RGWDataSyncSingleEntryCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw::bucket_sync::Handle state; // cached bucket-shard state - rgw_data_sync_obligation obligation; // input obligation - std::optional complete; // obligation to complete - uint32_t obligation_counter = 0; - RGWDataSyncShardMarkerTrack *marker_tracker; - rgw_raw_obj error_repo; - boost::intrusive_ptr lease_cr; - RGWSyncTraceNodeRef tn; - - ceph::real_time progress; - int sync_status = 0; -public: - RGWDataSyncSingleEntryCR(RGWDataSyncCtx *_sc, rgw::bucket_sync::Handle state, - rgw_data_sync_obligation _obligation, - RGWDataSyncShardMarkerTrack *_marker_tracker, - const rgw_raw_obj& error_repo, - boost::intrusive_ptr lease_cr, - const RGWSyncTraceNodeRef& _tn_parent) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - state(std::move(state)), obligation(std::move(_obligation)), - marker_tracker(_marker_tracker), error_repo(error_repo), - lease_cr(std::move(lease_cr)) { - set_description() << "data sync single entry (source_zone=" << sc->source_zone << ") " << obligation; - tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", to_string(obligation.bs, obligation.gen)); - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - if (state->obligation) { - // this is already syncing in another DataSyncSingleEntryCR - if (state->obligation->timestamp < obligation.timestamp) { - // cancel existing obligation and overwrite it - tn->log(10, SSTR("canceling existing obligation " << *state->obligation)); - complete = std::move(*state->obligation); - *state->obligation = std::move(obligation); - state->counter++; - } else { - // cancel new obligation - tn->log(10, SSTR("canceling new obligation " << obligation)); - complete = std::move(obligation); - } - } else { - // start syncing a new obligation - state->obligation = obligation; - obligation_counter = state->counter; - state->counter++; - - // loop until the latest obligation is satisfied, because other callers - // may update the obligation while we're syncing - while ((state->obligation->timestamp == ceph::real_time() || - state->progress_timestamp < state->obligation->timestamp) && - obligation_counter != state->counter) { - obligation_counter = state->counter; - progress = ceph::real_time{}; - - ldout(cct, 4) << "starting sync on " << bucket_shard_str{state->key.first} - << ' ' << *state->obligation << " progress timestamp " << state->progress_timestamp - << " progress " << progress << dendl; - yield call(new RGWRunBucketSourcesSyncCR(sc, lease_cr, - state->key.first, tn, - state->obligation->gen, - &progress)); - if (retcode < 0) { - break; - } - state->progress_timestamp = std::max(progress, state->progress_timestamp); - } - // any new obligations will process themselves - complete = std::move(*state->obligation); - state->obligation.reset(); - - tn->log(10, SSTR("sync finished on " << bucket_shard_str{state->key.first} - << " progress=" << progress << ' ' << complete << " r=" << retcode)); - } - sync_status = retcode; - - if (sync_status == -ENOENT) { - // this was added when 'tenant/' was added to datalog entries, because - // preexisting tenant buckets could never sync and would stay in the - // error_repo forever - tn->log(0, SSTR("WARNING: skipping data log entry for missing bucket " << complete->bs)); - sync_status = 0; - } - - if (sync_status < 0) { - // write actual sync failures for 'radosgw-admin sync error list' - if (sync_status != -EBUSY && sync_status != -EAGAIN) { - yield call(sync_env->error_logger->log_error_cr(dpp, sc->conn->get_remote_id(), "data", - to_string(complete->bs, complete->gen), - -sync_status, string("failed to sync bucket instance: ") + cpp_strerror(-sync_status))); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to log sync failure: retcode=" << retcode)); - } - } - if (complete->timestamp != ceph::real_time{}) { - tn->log(10, SSTR("writing " << *complete << " to error repo for retry")); - yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, - rgw::error_repo::encode_key(complete->bs, complete->gen), - complete->timestamp)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to log sync failure in error repo: retcode=" << retcode)); - } - } - } else if (complete->retry) { - yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, - rgw::error_repo::encode_key(complete->bs, complete->gen), - complete->timestamp)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to remove omap key from error repo (" - << error_repo << " retcode=" << retcode)); - } - } - /* FIXME: what do do in case of error */ - if (marker_tracker && !complete->marker.empty()) { - /* update marker */ - yield call(marker_tracker->finish(complete->marker)); - } - if (sync_status == 0) { - sync_status = retcode; - } - if (sync_status < 0) { - return set_cr_error(sync_status); - } - return set_cr_done(); - } - return 0; - } -}; - -rgw_raw_obj datalog_oid_for_error_repo(RGWDataSyncCtx *sc, rgw::sal::RadosStore* driver, - rgw_pool& pool, rgw_bucket_shard& bs) { - int datalog_shard = driver->svc()->datalog_rados->choose_oid(bs); - string oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, datalog_shard); - return rgw_raw_obj(pool, oid + ".retry"); - } - -class RGWDataIncrementalSyncFullObligationCR: public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_bucket_shard source_bs; - rgw_raw_obj error_repo; - std::string error_marker; - ceph::real_time timestamp; - RGWSyncTraceNodeRef tn; - rgw_bucket_index_marker_info remote_info; - rgw_pool pool; - uint32_t sid; - rgw_bucket_shard bs; - std::vector::const_iterator each; - -public: - RGWDataIncrementalSyncFullObligationCR(RGWDataSyncCtx *_sc, rgw_bucket_shard& _source_bs, - const rgw_raw_obj& error_repo, const std::string& _error_marker, - ceph::real_time& _timestamp, RGWSyncTraceNodeRef& _tn) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), source_bs(_source_bs), - error_repo(error_repo), error_marker(_error_marker), timestamp(_timestamp), - tn(sync_env->sync_tracer->add_node(_tn, "error_repo", SSTR(bucket_shard_str(source_bs)))) - {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield call(new RGWReadRemoteBucketIndexLogInfoCR(sc, source_bs.bucket, &remote_info)); - if (retcode < 0) { - return set_cr_error(retcode); - } - - each = remote_info.generations.cbegin(); - for (; each != remote_info.generations.cend(); each++) { - for (sid = 0; sid < each->num_shards; sid++) { - bs.bucket = source_bs.bucket; - bs.shard_id = sid; - error_repo = datalog_oid_for_error_repo(sc, sync_env->driver, pool, source_bs); - tn->log(10, SSTR("writing shard_id " << sid << " of gen " << each->gen << " to error repo for retry")); - yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, - rgw::error_repo::encode_key(bs, each->gen), - timestamp), cct->_conf->rgw_data_sync_spawn_window, - [&](uint64_t stack_id, int ret) { - if (ret < 0) { - retcode = ret; - } - return 0; - }); - } - } - drain_all_cb([&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, SSTR("writing to error repo returned error: " << ret)); - } - return ret; - }); - - // once everything succeeds, remove the full sync obligation from the error repo - yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, - error_marker, timestamp)); - return set_cr_done(); - } - return 0; - } -}; - -RGWCoroutine* data_sync_single_entry(RGWDataSyncCtx *sc, const rgw_bucket_shard& src, - std::optional gen, - const std::string marker, - ceph::real_time timestamp, - boost::intrusive_ptr lease_cr, - boost::intrusive_ptr bucket_shard_cache, - RGWDataSyncShardMarkerTrack* marker_tracker, - rgw_raw_obj error_repo, - RGWSyncTraceNodeRef& tn, - bool retry) { - auto state = bucket_shard_cache->get(src, gen); - auto obligation = rgw_data_sync_obligation{src, gen, marker, timestamp, retry}; - return new RGWDataSyncSingleEntryCR(sc, std::move(state), std::move(obligation), - &*marker_tracker, error_repo, - lease_cr.get(), tn); -} - -static ceph::real_time timestamp_for_bucket_shard(rgw::sal::RadosStore* driver, - const rgw_data_sync_status& sync_status, - const rgw_bucket_shard& bs) { - int datalog_shard = driver->svc()->datalog_rados->choose_oid(bs); - auto status = sync_status.sync_markers.find(datalog_shard); - if (status == sync_status.sync_markers.end()) { - return ceph::real_clock::zero(); - } - return status->second.timestamp; -} - -class RGWDataFullSyncSingleEntryCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_pool pool; - rgw_bucket_shard source_bs; - const std::string key; - rgw_data_sync_status sync_status; - rgw_raw_obj error_repo; - ceph::real_time timestamp; - boost::intrusive_ptr lease_cr; - boost::intrusive_ptr bucket_shard_cache; - RGWDataSyncShardMarkerTrack* marker_tracker; - RGWSyncTraceNodeRef tn; - rgw_bucket_index_marker_info remote_info; - uint32_t sid; - std::vector::iterator each; - uint64_t i{0}; - RGWCoroutine* shard_cr = nullptr; - bool first_shard = true; - bool error_inject; - -public: - RGWDataFullSyncSingleEntryCR(RGWDataSyncCtx *_sc, const rgw_pool& _pool, const rgw_bucket_shard& _source_bs, - const std::string& _key, const rgw_data_sync_status& sync_status, const rgw_raw_obj& _error_repo, - ceph::real_time _timestamp, boost::intrusive_ptr _lease_cr, - boost::intrusive_ptr _bucket_shard_cache, - RGWDataSyncShardMarkerTrack* _marker_tracker, - RGWSyncTraceNodeRef& _tn) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), pool(_pool), source_bs(_source_bs), key(_key), - error_repo(_error_repo), timestamp(_timestamp), lease_cr(std::move(_lease_cr)), - bucket_shard_cache(_bucket_shard_cache), marker_tracker(_marker_tracker), tn(_tn) { - error_inject = (sync_env->cct->_conf->rgw_sync_data_full_inject_err_probability > 0); - } - - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - if (error_inject && - rand() % 10000 < cct->_conf->rgw_sync_data_full_inject_err_probability * 10000.0) { - tn->log(0, SSTR("injecting read bilog info error on key=" << key)); - retcode = -ENOENT; - } else { - tn->log(0, SSTR("read bilog info key=" << key)); - yield call(new RGWReadRemoteBucketIndexLogInfoCR(sc, source_bs.bucket, &remote_info)); - } - - if (retcode < 0) { - tn->log(10, SSTR("full sync: failed to read remote bucket info. Writing " - << source_bs.shard_id << " to error repo for retry")); - yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, - rgw::error_repo::encode_key(source_bs, std::nullopt), - timestamp)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to log " << source_bs.shard_id << " in error repo: retcode=" << retcode)); - } - yield call(marker_tracker->finish(key)); - return set_cr_error(retcode); - } - - //wait to sync the first shard of the oldest generation and then sync all other shards. - //if any of the operations fail at any time, write them into error repo for later retry. - - each = remote_info.generations.begin(); - for (; each != remote_info.generations.end(); each++) { - for (sid = 0; sid < each->num_shards; sid++) { - source_bs.shard_id = sid; - // use the error repo and sync status timestamp from the datalog shard corresponding to source_bs - error_repo = datalog_oid_for_error_repo(sc, sync_env->driver, pool, source_bs); - timestamp = timestamp_for_bucket_shard(sync_env->driver, sync_status, source_bs); - if (retcode < 0) { - tn->log(10, SSTR("Write " << source_bs.shard_id << " to error repo for retry")); - yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, - rgw::error_repo::encode_key(source_bs, each->gen), - timestamp), cct->_conf->rgw_data_sync_spawn_window, std::nullopt); - } else { - shard_cr = data_sync_single_entry(sc, source_bs, each->gen, key, timestamp, - lease_cr, bucket_shard_cache, nullptr, error_repo, tn, false); - tn->log(10, SSTR("full sync: syncing shard_id " << sid << " of gen " << each->gen)); - if (first_shard) { - yield call(shard_cr); - first_shard = false; - } else { - yield_spawn_window(shard_cr, cct->_conf->rgw_data_sync_spawn_window, - [&](uint64_t stack_id, int ret) { - if (ret < 0) { - retcode = ret; - } - return retcode; - }); - } - } - } - drain_all_cb([&](uint64_t stack_id, int ret) { - if (ret < 0) { - retcode = ret; - } - return retcode; - }); - } - - yield call(marker_tracker->finish(key)); - - return set_cr_done(); - } - return 0; - } -}; - -class RGWDataBaseSyncShardCR : public RGWCoroutine { -protected: - RGWDataSyncCtx *const sc; - const rgw_pool& pool; - const uint32_t shard_id; - rgw_data_sync_marker& sync_marker; - RGWSyncTraceNodeRef tn; - const string& status_oid; - const rgw_raw_obj& error_repo; - boost::intrusive_ptr lease_cr; - const rgw_data_sync_status& sync_status; - boost::intrusive_ptr bucket_shard_cache; - - std::optional marker_tracker; - RGWRadosGetOmapValsCR::ResultPtr omapvals; - rgw_bucket_shard source_bs; - - int parse_bucket_key(const std::string& key, rgw_bucket_shard& bs) const { - return rgw_bucket_parse_bucket_key(sc->env->cct, key, - &bs.bucket, &bs.shard_id); - } - - RGWDataBaseSyncShardCR( - RGWDataSyncCtx *const _sc, const rgw_pool& pool, const uint32_t shard_id, - rgw_data_sync_marker& sync_marker, RGWSyncTraceNodeRef tn, - const string& status_oid, const rgw_raw_obj& error_repo, - boost::intrusive_ptr lease_cr, - const rgw_data_sync_status& sync_status, - const boost::intrusive_ptr& bucket_shard_cache) - : RGWCoroutine(_sc->cct), sc(_sc), pool(pool), shard_id(shard_id), - sync_marker(sync_marker), tn(tn), status_oid(status_oid), - error_repo(error_repo), lease_cr(std::move(lease_cr)), - sync_status(sync_status), bucket_shard_cache(bucket_shard_cache) {} -}; - -class RGWDataFullSyncShardCR : public RGWDataBaseSyncShardCR { - static constexpr auto OMAP_GET_MAX_ENTRIES = 100; - - string oid; - uint64_t total_entries = 0; - ceph::real_time entry_timestamp; - std::map entries; - std::map::iterator iter; - string error_marker; - -public: - - RGWDataFullSyncShardCR( - RGWDataSyncCtx *const sc, const rgw_pool& pool, const uint32_t shard_id, - rgw_data_sync_marker& sync_marker, RGWSyncTraceNodeRef tn, - const string& status_oid, const rgw_raw_obj& error_repo, - boost::intrusive_ptr lease_cr, - const rgw_data_sync_status& sync_status, - const boost::intrusive_ptr& bucket_shard_cache) - : RGWDataBaseSyncShardCR(sc, pool, shard_id, sync_marker, tn, - status_oid, error_repo, std::move(lease_cr), - sync_status, bucket_shard_cache) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - tn->log(10, "start full sync"); - oid = full_data_sync_index_shard_oid(sc->source_zone, shard_id); - marker_tracker.emplace(sc, status_oid, sync_marker, tn); - total_entries = sync_marker.pos; - entry_timestamp = sync_marker.timestamp; // time when full sync started - do { - if (!lease_cr->is_locked()) { - drain_all(); - tn->log(1, "lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - omapvals = std::make_shared(); - yield call(new RGWRadosGetOmapValsCR(sc->env->driver, - rgw_raw_obj(pool, oid), - sync_marker.marker, - OMAP_GET_MAX_ENTRIES, omapvals)); - if (retcode < 0) { - drain_all(); - return set_cr_error(retcode); - } - entries = std::move(omapvals->entries); - if (entries.size() > 0) { - tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - } - tn->log(20, SSTR("retrieved " << entries.size() << " entries to sync")); - iter = entries.begin(); - for (; iter != entries.end(); ++iter) { - retcode = parse_bucket_key(iter->first, source_bs); - if (retcode < 0) { - tn->log(1, SSTR("failed to parse bucket shard: " << iter->first)); - marker_tracker->try_update_high_marker(iter->first, 0, - entry_timestamp); - continue; - } - tn->log(20, SSTR("full sync: " << iter->first)); - total_entries++; - if (!marker_tracker->start(iter->first, total_entries, - entry_timestamp)) { - tn->log(0, SSTR("ERROR: cannot start syncing " << iter->first - << ". Duplicate entry?")); - } else { - tn->log(10, SSTR("timestamp for " << iter->first << " is :" << entry_timestamp)); - yield_spawn_window(new RGWDataFullSyncSingleEntryCR( - sc, pool, source_bs, iter->first, sync_status, - error_repo, entry_timestamp, lease_cr, - bucket_shard_cache, &*marker_tracker, tn), - cct->_conf->rgw_data_sync_spawn_window, - std::nullopt); - } - sync_marker.marker = iter->first; - } - } while (omapvals->more); - omapvals.reset(); - - drain_all(); - - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - - /* update marker to reflect we're done with full sync */ - sync_marker.state = rgw_data_sync_marker::IncrementalSync; - sync_marker.marker = sync_marker.next_step_marker; - sync_marker.next_step_marker.clear(); - yield call(new RGWSimpleRadosWriteCR( - sc->env->dpp,sc->env->async_rados, sc->env->svc->sysobj, - rgw_raw_obj(pool, status_oid), sync_marker)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to set sync marker: retcode=" << retcode)); - return set_cr_error(retcode); - } - - // clean up full sync index, ignoring errors - yield call(new RGWRadosRemoveCR(sc->env->driver, {pool, oid})); - - // transition to incremental sync - return set_cr_done(); - } - return 0; - } -}; - -class RGWDataIncSyncShardCR : public RGWDataBaseSyncShardCR { - static constexpr int max_error_entries = 10; - static constexpr uint32_t retry_backoff_secs = 60; - - ceph::mutex& inc_lock; - bc::flat_set& modified_shards; - - bc::flat_set current_modified; - decltype(current_modified)::iterator modified_iter; - - ceph::coarse_real_time error_retry_time; - string error_marker; - std::map error_entries; - decltype(error_entries)::iterator iter; - ceph::real_time entry_timestamp; - std::optional gen; - - string next_marker; - vector log_entries; - decltype(log_entries)::iterator log_iter; - bool truncated = false; - - utime_t get_idle_interval() const { - ceph::timespan interval = std::chrono::seconds(cct->_conf->rgw_data_sync_poll_interval); - if (!ceph::coarse_real_clock::is_zero(error_retry_time)) { - auto now = ceph::coarse_real_clock::now(); - if (error_retry_time > now) { - auto d = error_retry_time - now; - if (interval > d) { - interval = d; - } - } - } - // convert timespan -> time_point -> utime_t - return utime_t(ceph::coarse_real_clock::zero() + interval); - } - - -public: - - RGWDataIncSyncShardCR( - RGWDataSyncCtx *const sc, const rgw_pool& pool, const uint32_t shard_id, - rgw_data_sync_marker& sync_marker, RGWSyncTraceNodeRef tn, - const string& status_oid, const rgw_raw_obj& error_repo, - boost::intrusive_ptr lease_cr, - const rgw_data_sync_status& sync_status, - const boost::intrusive_ptr& bucket_shard_cache, - ceph::mutex& inc_lock, - bc::flat_set& modified_shards) - : RGWDataBaseSyncShardCR(sc, pool, shard_id, sync_marker, tn, - status_oid, error_repo, std::move(lease_cr), - sync_status, bucket_shard_cache), - inc_lock(inc_lock), modified_shards(modified_shards) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - tn->log(10, "start incremental sync"); - marker_tracker.emplace(sc, status_oid, sync_marker, tn); - do { - if (!lease_cr->is_locked()) { - drain_all(); - tn->log(1, "lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - { - current_modified.clear(); - std::unique_lock il(inc_lock); - current_modified.swap(modified_shards); - il.unlock(); - } - - if (current_modified.size() > 0) { - tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - } - /* process out of band updates */ - for (modified_iter = current_modified.begin(); - modified_iter != current_modified.end(); - ++modified_iter) { - retcode = parse_bucket_key(modified_iter->key, source_bs); - if (retcode < 0) { - tn->log(1, SSTR("failed to parse bucket shard: " - << modified_iter->key)); - continue; - } - tn->log(20, SSTR("received async update notification: " - << modified_iter->key)); - spawn(data_sync_single_entry(sc, source_bs, modified_iter->gen, {}, - ceph::real_time{}, lease_cr, - bucket_shard_cache, &*marker_tracker, - error_repo, tn, false), false); - } - - if (error_retry_time <= ceph::coarse_real_clock::now()) { - /* process bucket shards that previously failed */ - omapvals = std::make_shared(); - yield call(new RGWRadosGetOmapValsCR(sc->env->driver, error_repo, - error_marker, max_error_entries, - omapvals)); - error_entries = std::move(omapvals->entries); - tn->log(20, SSTR("read error repo, got " << error_entries.size() - << " entries")); - iter = error_entries.begin(); - for (; iter != error_entries.end(); ++iter) { - error_marker = iter->first; - entry_timestamp = rgw::error_repo::decode_value(iter->second); - retcode = rgw::error_repo::decode_key(iter->first, source_bs, gen); - if (retcode == -EINVAL) { - // backward compatibility for string keys that don't encode a gen - retcode = parse_bucket_key(error_marker, source_bs); - } - if (retcode < 0) { - tn->log(1, SSTR("failed to parse bucket shard: " << error_marker)); - spawn(rgw::error_repo::remove_cr(sc->env->driver->svc()->rados, - error_repo, error_marker, - entry_timestamp), - false); - continue; - } - tn->log(10, SSTR("gen is " << gen)); - if (!gen) { - // write all full sync obligations for the bucket to error repo - spawn(new RGWDataIncrementalSyncFullObligationCR(sc, source_bs, - error_repo, error_marker, entry_timestamp, tn), false); - } else { - tn->log(20, SSTR("handle error entry key=" - << to_string(source_bs, gen) - << " timestamp=" << entry_timestamp)); - spawn(data_sync_single_entry(sc, source_bs, gen, "", - entry_timestamp, lease_cr, - bucket_shard_cache, &*marker_tracker, - error_repo, tn, true), false); - } - } - if (!omapvals->more) { - error_retry_time = ceph::coarse_real_clock::now() + - make_timespan(retry_backoff_secs); - error_marker.clear(); - } - } - omapvals.reset(); - - tn->log(20, SSTR("shard_id=" << shard_id << " sync_marker=" - << sync_marker.marker)); - yield call(new RGWReadRemoteDataLogShardCR(sc, shard_id, - sync_marker.marker, - &next_marker, &log_entries, - &truncated)); - if (retcode < 0 && retcode != -ENOENT) { - tn->log(0, SSTR("ERROR: failed to read remote data log info: ret=" - << retcode)); - drain_all(); - return set_cr_error(retcode); - } - - if (log_entries.size() > 0) { - tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - } - - for (log_iter = log_entries.begin(); - log_iter != log_entries.end(); - ++log_iter) { - tn->log(20, SSTR("shard_id=" << shard_id << " log_entry: " - << log_iter->log_id << ":" << log_iter->log_timestamp - << ":" << log_iter->entry.key)); - retcode = parse_bucket_key(log_iter->entry.key, source_bs); - if (retcode < 0) { - tn->log(1, SSTR("failed to parse bucket shard: " - << log_iter->entry.key)); - marker_tracker->try_update_high_marker(log_iter->log_id, 0, - log_iter->log_timestamp); - continue; - } - if (!marker_tracker->start(log_iter->log_id, 0, - log_iter->log_timestamp)) { - tn->log(0, SSTR("ERROR: cannot start syncing " << log_iter->log_id - << ". Duplicate entry?")); - } else { - tn->log(1, SSTR("incremental sync on " << log_iter->entry.key - << "shard: " << shard_id << "on gen " - << log_iter->entry.gen)); - yield_spawn_window( - data_sync_single_entry(sc, source_bs,log_iter->entry.gen, - log_iter->log_id, log_iter->log_timestamp, - lease_cr,bucket_shard_cache, - &*marker_tracker, error_repo, tn, false), - cct->_conf->rgw_data_sync_spawn_window, std::nullopt); - } - } - - tn->log(20, SSTR("shard_id=" << shard_id << - " sync_marker="<< sync_marker.marker - << " next_marker=" << next_marker - << " truncated=" << truncated)); - if (!next_marker.empty()) { - sync_marker.marker = next_marker; - } else if (!log_entries.empty()) { - sync_marker.marker = log_entries.back().log_id; - } - if (!truncated) { - // we reached the end, wait a while before checking for more - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - yield wait(get_idle_interval()); - } - } while (true); - } - return 0; - } -}; - -class RGWDataSyncShardCR : public RGWCoroutine { - RGWDataSyncCtx *const sc; - const rgw_pool pool; - const uint32_t shard_id; - rgw_data_sync_marker& sync_marker; - rgw_data_sync_status sync_status; - const RGWSyncTraceNodeRef tn; - bool *reset_backoff; - - ceph::mutex inc_lock = ceph::make_mutex("RGWDataSyncShardCR::inc_lock"); - ceph::condition_variable inc_cond; - - RGWDataSyncEnv *const sync_env{ sc->env }; - - const string status_oid{ RGWDataSyncStatusManager::shard_obj_name( - sc->source_zone, shard_id) }; - const rgw_raw_obj error_repo{ pool, status_oid + ".retry" }; - - // target number of entries to cache before recycling idle ones - static constexpr size_t target_cache_size = 256; - boost::intrusive_ptr bucket_shard_cache { - rgw::bucket_sync::Cache::create(target_cache_size) }; - - boost::intrusive_ptr lease_cr; - boost::intrusive_ptr lease_stack; - - bc::flat_set modified_shards; - -public: - RGWDataSyncShardCR(RGWDataSyncCtx* const _sc, const rgw_pool& pool, - const uint32_t shard_id, rgw_data_sync_marker& marker, - const rgw_data_sync_status& sync_status, - RGWSyncTraceNodeRef& tn, bool *reset_backoff) - : RGWCoroutine(_sc->cct), sc(_sc), pool(pool), shard_id(shard_id), - sync_marker(marker), sync_status(sync_status), tn(tn), - reset_backoff(reset_backoff) { - set_description() << "data sync shard source_zone=" << sc->source_zone - << " shard_id=" << shard_id; - } - - ~RGWDataSyncShardCR() override { - if (lease_cr) { - lease_cr->abort(); - } - } - - void append_modified_shards(bc::flat_set& entries) { - std::lock_guard l{inc_lock}; - modified_shards.insert(entries.begin(), entries.end()); - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield init_lease_cr(); - while (!lease_cr->is_locked()) { - if (lease_cr->is_done()) { - tn->log(5, "failed to take lease"); - set_status("lease lock failed, early abort"); - drain_all(); - return set_cr_error(lease_cr->get_ret_status()); - } - set_sleeping(true); - yield; - } - *reset_backoff = true; - tn->log(10, "took lease"); - - while (true) { - if (sync_marker.state == rgw_data_sync_marker::FullSync) { - yield call(new RGWDataFullSyncShardCR(sc, pool, shard_id, - sync_marker, tn, - status_oid, error_repo, - lease_cr, sync_status, - bucket_shard_cache)); - if (retcode < 0) { - if (retcode != -EBUSY) { - tn->log(10, SSTR("full sync failed (retcode=" << retcode << ")")); - } - lease_cr->go_down(); - drain_all(); - return set_cr_error(retcode); - } - } else if (sync_marker.state == rgw_data_sync_marker::IncrementalSync) { - yield call(new RGWDataIncSyncShardCR(sc, pool, shard_id, - sync_marker, tn, - status_oid, error_repo, - lease_cr, sync_status, - bucket_shard_cache, - inc_lock, modified_shards)); - if (retcode < 0) { - if (retcode != -EBUSY) { - tn->log(10, SSTR("incremental sync failed (retcode=" << retcode - << ")")); - } - lease_cr->go_down(); - drain_all(); - return set_cr_error(retcode); - } - } else { - lease_cr->go_down(); - drain_all(); - return set_cr_error(-EIO); - } - } - } - return 0; - } - - void init_lease_cr() { - set_status("acquiring sync lock"); - uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; - string lock_name = "sync_lock"; - if (lease_cr) { - lease_cr->abort(); - } - auto driver = sync_env->driver; - lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, driver, - rgw_raw_obj(pool, status_oid), - lock_name, lock_duration, this)); - lease_stack.reset(spawn(lease_cr.get(), false)); - } -}; - -class RGWDataSyncShardControlCR : public RGWBackoffControlCR { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - rgw_pool pool; - - uint32_t shard_id; - rgw_data_sync_marker sync_marker; - rgw_data_sync_status sync_status; - - RGWSyncTraceNodeRef tn; -public: - RGWDataSyncShardControlCR(RGWDataSyncCtx *_sc, const rgw_pool& _pool, - uint32_t _shard_id, rgw_data_sync_marker& _marker, const rgw_data_sync_status& sync_status, - RGWSyncTraceNodeRef& _tn_parent) : RGWBackoffControlCR(_sc->cct, false), - sc(_sc), sync_env(_sc->env), - pool(_pool), - shard_id(_shard_id), - sync_marker(_marker) { - tn = sync_env->sync_tracer->add_node(_tn_parent, "shard", std::to_string(shard_id)); - } - - RGWCoroutine *alloc_cr() override { - return new RGWDataSyncShardCR(sc, pool, shard_id, sync_marker, sync_status, tn, backoff_ptr()); - } - - RGWCoroutine *alloc_finisher_cr() override { - return new RGWSimpleRadosReadCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id)), - &sync_marker); - } - - void append_modified_shards(bc::flat_set& keys) { - std::lock_guard l{cr_lock()}; - - RGWDataSyncShardCR *cr = static_cast(get_cr()); - if (!cr) { - return; - } - - cr->append_modified_shards(keys); - } -}; - -class RGWDataSyncCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - uint32_t num_shards; - - rgw_data_sync_status sync_status; - - ceph::mutex shard_crs_lock = - ceph::make_mutex("RGWDataSyncCR::shard_crs_lock"); - map shard_crs; - - bool *reset_backoff; - - RGWSyncTraceNodeRef tn; - - RGWDataSyncModule *data_sync_module{nullptr}; -public: - RGWDataSyncCR(RGWDataSyncCtx *_sc, uint32_t _num_shards, RGWSyncTraceNodeRef& _tn, bool *_reset_backoff) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - num_shards(_num_shards), - reset_backoff(_reset_backoff), tn(_tn) { - - } - - ~RGWDataSyncCR() override { - for (auto iter : shard_crs) { - iter.second->put(); - } - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - - /* read sync status */ - yield call(new RGWReadDataSyncStatusCoroutine(sc, &sync_status)); - - data_sync_module = sync_env->sync_module->get_data_handler(); - - if (retcode < 0 && retcode != -ENOENT) { - tn->log(0, SSTR("ERROR: failed to fetch sync status, retcode=" << retcode)); - return set_cr_error(retcode); - } - - /* state: init status */ - if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateInit) { - tn->log(20, SSTR("init")); - sync_status.sync_info.num_shards = num_shards; - uint64_t instance_id; - instance_id = ceph::util::generate_random_number(); - yield call(new RGWInitDataSyncStatusCoroutine(sc, num_shards, instance_id, tn, &sync_status)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to init sync, retcode=" << retcode)); - return set_cr_error(retcode); - } - // sets state = StateBuildingFullSyncMaps - - *reset_backoff = true; - } - - data_sync_module->init(sc, sync_status.sync_info.instance_id); - - if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateBuildingFullSyncMaps) { - tn->log(10, SSTR("building full sync maps")); - /* call sync module init here */ - sync_status.sync_info.num_shards = num_shards; - yield call(data_sync_module->init_sync(dpp, sc)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: sync module init_sync() failed, retcode=" << retcode)); - return set_cr_error(retcode); - } - /* state: building full sync maps */ - yield call(new RGWListBucketIndexesCR(sc, &sync_status)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to build full sync maps, retcode=" << retcode)); - return set_cr_error(retcode); - } - sync_status.sync_info.state = rgw_data_sync_info::StateSync; - - /* update new state */ - yield call(set_sync_info_cr()); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to write sync status, retcode=" << retcode)); - return set_cr_error(retcode); - } - - *reset_backoff = true; - } - - yield call(data_sync_module->start_sync(dpp, sc)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to start sync, retcode=" << retcode)); - return set_cr_error(retcode); - } - - yield { - if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateSync) { - tn->log(10, SSTR("spawning " << num_shards << " shards sync")); - for (map::iterator iter = sync_status.sync_markers.begin(); - iter != sync_status.sync_markers.end(); ++iter) { - RGWDataSyncShardControlCR *cr = new RGWDataSyncShardControlCR(sc, sync_env->svc->zone->get_zone_params().log_pool, - iter->first, iter->second, sync_status, tn); - cr->get(); - shard_crs_lock.lock(); - shard_crs[iter->first] = cr; - shard_crs_lock.unlock(); - spawn(cr, true); - } - } - } - - return set_cr_done(); - } - return 0; - } - - RGWCoroutine *set_sync_info_cr() { - return new RGWSimpleRadosWriteCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sc->source_zone)), - sync_status.sync_info); - } - - void wakeup(int shard_id, bc::flat_set& entries) { - std::lock_guard l{shard_crs_lock}; - map::iterator iter = shard_crs.find(shard_id); - if (iter == shard_crs.end()) { - return; - } - iter->second->append_modified_shards(entries); - iter->second->wakeup(); - } -}; - -class RGWDefaultDataSyncModule : public RGWDataSyncModule { -public: - RGWDefaultDataSyncModule() {} - - RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override; - RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; - RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; -}; - -class RGWDefaultSyncModuleInstance : public RGWSyncModuleInstance { - RGWDefaultDataSyncModule data_handler; -public: - RGWDefaultSyncModuleInstance() {} - RGWDataSyncModule *get_data_handler() override { - return &data_handler; - } - bool supports_user_writes() override { - return true; - } -}; - -int RGWDefaultSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) -{ - instance->reset(new RGWDefaultSyncModuleInstance()); - return 0; -} - -class RGWUserPermHandler { - friend struct Init; - friend class Bucket; - - RGWDataSyncEnv *sync_env; - rgw_user uid; - - struct _info { - RGWUserInfo user_info; - rgw::IAM::Environment env; - std::unique_ptr identity; - RGWAccessControlPolicy user_acl; - }; - - std::shared_ptr<_info> info; - - struct Init; - - std::shared_ptr init_action; - - struct Init : public RGWGenericAsyncCR::Action { - RGWDataSyncEnv *sync_env; - - rgw_user uid; - std::shared_ptr info; - - int ret{0}; - - Init(RGWUserPermHandler *handler) : sync_env(handler->sync_env), - uid(handler->uid), - info(handler->info) {} - int operate() override { - auto user_ctl = sync_env->driver->getRados()->ctl.user; - - ret = user_ctl->get_info_by_uid(sync_env->dpp, uid, &info->user_info, null_yield); - if (ret < 0) { - return ret; - } - - info->identity = rgw::auth::transform_old_authinfo(sync_env->cct, - uid, - RGW_PERM_FULL_CONTROL, - false, /* system_request? */ - TYPE_RGW); - - map uattrs; - - ret = user_ctl->get_attrs_by_uid(sync_env->dpp, uid, &uattrs, null_yield); - if (ret == 0) { - ret = RGWUserPermHandler::policy_from_attrs(sync_env->cct, uattrs, &info->user_acl); - } - if (ret == -ENOENT) { - info->user_acl.create_default(uid, info->user_info.display_name); - } - - return 0; - } - }; - -public: - RGWUserPermHandler(RGWDataSyncEnv *_sync_env, - const rgw_user& _uid) : sync_env(_sync_env), - uid(_uid) {} - - RGWCoroutine *init_cr() { - info = make_shared<_info>(); - init_action = make_shared(this); - - return new RGWGenericAsyncCR(sync_env->cct, - sync_env->async_rados, - init_action); - } - - class Bucket { - RGWDataSyncEnv *sync_env; - std::shared_ptr<_info> info; - RGWAccessControlPolicy bucket_acl; - std::optional ps; - public: - Bucket() {} - - int init(RGWUserPermHandler *handler, - const RGWBucketInfo& bucket_info, - const map& bucket_attrs); - - bool verify_bucket_permission(int perm); - bool verify_object_permission(const map& obj_attrs, - int perm); - }; - - static int policy_from_attrs(CephContext *cct, - const map& attrs, - RGWAccessControlPolicy *acl) { - acl->set_ctx(cct); - - auto aiter = attrs.find(RGW_ATTR_ACL); - if (aiter == attrs.end()) { - return -ENOENT; - } - auto iter = aiter->second.begin(); - try { - acl->decode(iter); - } catch (buffer::error& err) { - ldout(cct, 0) << "ERROR: " << __func__ << "(): could not decode policy, caught buffer::error" << dendl; - return -EIO; - } - - return 0; - } - - int init_bucket(const RGWBucketInfo& bucket_info, - const map& bucket_attrs, - Bucket *bs) { - return bs->init(this, bucket_info, bucket_attrs); - } -}; - -int RGWUserPermHandler::Bucket::init(RGWUserPermHandler *handler, - const RGWBucketInfo& bucket_info, - const map& bucket_attrs) -{ - sync_env = handler->sync_env; - info = handler->info; - - int r = RGWUserPermHandler::policy_from_attrs(sync_env->cct, bucket_attrs, &bucket_acl); - if (r < 0) { - return r; - } - - ps.emplace(sync_env->cct, - info->env, - info->identity.get(), - bucket_info, - info->identity->get_perm_mask(), - false, /* defer to bucket acls */ - nullptr, /* referer */ - false); /* request_payer */ - - return 0; -} - -bool RGWUserPermHandler::Bucket::verify_bucket_permission(int perm) -{ - return verify_bucket_permission_no_policy(sync_env->dpp, - &(*ps), - &info->user_acl, - &bucket_acl, - perm); -} - -bool RGWUserPermHandler::Bucket::verify_object_permission(const map& obj_attrs, - int perm) -{ - RGWAccessControlPolicy obj_acl; - - int r = policy_from_attrs(sync_env->cct, obj_attrs, &obj_acl); - if (r < 0) { - return r; - } - - return verify_bucket_permission_no_policy(sync_env->dpp, - &(*ps), - &bucket_acl, - &obj_acl, - perm); -} - -class RGWFetchObjFilter_Sync : public RGWFetchObjFilter_Default { - rgw_bucket_sync_pipe sync_pipe; - - std::shared_ptr bucket_perms; - std::optional verify_dest_params; - - std::optional mtime; - std::optional etag; - std::optional obj_size; - - std::unique_ptr identity; - - std::shared_ptr need_retry; - -public: - RGWFetchObjFilter_Sync(rgw_bucket_sync_pipe& _sync_pipe, - std::shared_ptr& _bucket_perms, - std::optional&& _verify_dest_params, - std::shared_ptr& _need_retry) : sync_pipe(_sync_pipe), - bucket_perms(_bucket_perms), - verify_dest_params(std::move(_verify_dest_params)), - need_retry(_need_retry) { - *need_retry = false; - } - - int filter(CephContext *cct, - const rgw_obj_key& source_key, - const RGWBucketInfo& dest_bucket_info, - std::optional dest_placement_rule, - const map& obj_attrs, - std::optional *poverride_owner, - const rgw_placement_rule **prule) override; -}; - -int RGWFetchObjFilter_Sync::filter(CephContext *cct, - const rgw_obj_key& source_key, - const RGWBucketInfo& dest_bucket_info, - std::optional dest_placement_rule, - const map& obj_attrs, - std::optional *poverride_owner, - const rgw_placement_rule **prule) -{ - int abort_err = -ERR_PRECONDITION_FAILED; - - rgw_sync_pipe_params params; - - RGWObjTags obj_tags; - - auto iter = obj_attrs.find(RGW_ATTR_TAGS); - if (iter != obj_attrs.end()) { - try { - auto it = iter->second.cbegin(); - obj_tags.decode(it); - } catch (buffer::error &err) { - ldout(cct, 0) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; - } - } - - if (!sync_pipe.info.handler.find_obj_params(source_key, - obj_tags.get_tags(), - ¶ms)) { - return abort_err; - } - - if (verify_dest_params && - !(*verify_dest_params == params.dest)) { - /* raced! original dest params were different, will need to retry */ - ldout(cct, 0) << "WARNING: " << __func__ << ": pipe dest params are different than original params, must have raced with object rewrite, retrying" << dendl; - *need_retry = true; - return -ECANCELED; - } - - std::optional > new_attrs; - - if (params.dest.acl_translation) { - rgw_user& acl_translation_owner = params.dest.acl_translation->owner; - if (!acl_translation_owner.empty()) { - if (params.mode == rgw_sync_pipe_params::MODE_USER && - acl_translation_owner != dest_bucket_info.owner) { - ldout(cct, 0) << "ERROR: " << __func__ << ": acl translation was requested, but user (" << acl_translation_owner - << ") is not dest bucket owner (" << dest_bucket_info.owner << ")" << dendl; - return -EPERM; - } - *poverride_owner = acl_translation_owner; - } - } - if (params.mode == rgw_sync_pipe_params::MODE_USER) { - if (!bucket_perms->verify_object_permission(obj_attrs, RGW_PERM_READ)) { - ldout(cct, 0) << "ERROR: " << __func__ << ": permission check failed: user not allowed to fetch object" << dendl; - return -EPERM; - } - } - - if (!dest_placement_rule && - params.dest.storage_class) { - dest_rule.storage_class = *params.dest.storage_class; - dest_rule.inherit_from(dest_bucket_info.placement_rule); - dest_placement_rule = dest_rule; - *prule = &dest_rule; - } - - return RGWFetchObjFilter_Default::filter(cct, - source_key, - dest_bucket_info, - dest_placement_rule, - obj_attrs, - poverride_owner, - prule); -} - -class RGWObjFetchCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_bucket_sync_pipe& sync_pipe; - rgw_obj_key& key; - std::optional dest_key; - std::optional versioned_epoch; - rgw_zone_set *zones_trace; - - bool need_more_info{false}; - bool check_change{false}; - - ceph::real_time src_mtime; - uint64_t src_size; - string src_etag; - map src_attrs; - map src_headers; - - std::optional param_user; - rgw_sync_pipe_params::Mode param_mode; - - std::optional user_perms; - std::shared_ptr source_bucket_perms; - RGWUserPermHandler::Bucket dest_bucket_perms; - - std::optional dest_params; - - int try_num{0}; - std::shared_ptr need_retry; -public: - RGWObjFetchCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, - rgw_obj_key& _key, - std::optional _dest_key, - std::optional _versioned_epoch, - rgw_zone_set *_zones_trace) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - sync_pipe(_sync_pipe), - key(_key), - dest_key(_dest_key), - versioned_epoch(_versioned_epoch), - zones_trace(_zones_trace) { - } - - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - -#define MAX_RACE_RETRIES_OBJ_FETCH 10 - for (try_num = 0; try_num < MAX_RACE_RETRIES_OBJ_FETCH; ++try_num) { - - { - std::optional param_acl_translation; - std::optional param_storage_class; - - if (!sync_pipe.info.handler.find_basic_info_without_tags(key, - ¶m_user, - ¶m_acl_translation, - ¶m_storage_class, - ¶m_mode, - &need_more_info)) { - if (!need_more_info) { - return set_cr_error(-ERR_PRECONDITION_FAILED); - } - } - } - - if (need_more_info) { - ldout(cct, 20) << "Could not determine exact policy rule for obj=" << key << ", will read source object attributes" << dendl; - /* - * we need to fetch info about source object, so that we can determine - * the correct policy configuration. This can happen if there are multiple - * policy rules, and some depend on the object tagging */ - yield call(new RGWStatRemoteObjCR(sync_env->async_rados, - sync_env->driver, - sc->source_zone, - sync_pipe.info.source_bs.bucket, - key, - &src_mtime, - &src_size, - &src_etag, - &src_attrs, - &src_headers)); - if (retcode < 0) { - return set_cr_error(retcode); - } - - RGWObjTags obj_tags; - - auto iter = src_attrs.find(RGW_ATTR_TAGS); - if (iter != src_attrs.end()) { - try { - auto it = iter->second.cbegin(); - obj_tags.decode(it); - } catch (buffer::error &err) { - ldout(cct, 0) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; - } - } - - rgw_sync_pipe_params params; - if (!sync_pipe.info.handler.find_obj_params(key, - obj_tags.get_tags(), - ¶ms)) { - return set_cr_error(-ERR_PRECONDITION_FAILED); - } - - param_user = params.user; - param_mode = params.mode; - - dest_params = params.dest; - } - - if (param_mode == rgw_sync_pipe_params::MODE_USER) { - if (!param_user) { - ldout(cct, 20) << "ERROR: " << __func__ << ": user level sync but user param not set" << dendl; - return set_cr_error(-EPERM); - } - user_perms.emplace(sync_env, *param_user); - - yield call(user_perms->init_cr()); - if (retcode < 0) { - ldout(cct, 20) << "ERROR: " << __func__ << ": failed to init user perms manager for uid=" << *param_user << dendl; - return set_cr_error(retcode); - } - - /* verify that user is allowed to write at the target bucket */ - int r = user_perms->init_bucket(sync_pipe.dest_bucket_info, - sync_pipe.dest_bucket_attrs, - &dest_bucket_perms); - if (r < 0) { - ldout(cct, 20) << "ERROR: " << __func__ << ": failed to init bucket perms manager for uid=" << *param_user << " bucket=" << sync_pipe.source_bucket_info.bucket.get_key() << dendl; - return set_cr_error(retcode); - } - - if (!dest_bucket_perms.verify_bucket_permission(RGW_PERM_WRITE)) { - ldout(cct, 0) << "ERROR: " << __func__ << ": permission check failed: user not allowed to write into bucket (bucket=" << sync_pipe.info.dest_bucket.get_key() << ")" << dendl; - return -EPERM; - } - - /* init source bucket permission structure */ - source_bucket_perms = make_shared(); - r = user_perms->init_bucket(sync_pipe.source_bucket_info, - sync_pipe.source_bucket_attrs, - source_bucket_perms.get()); - if (r < 0) { - ldout(cct, 20) << "ERROR: " << __func__ << ": failed to init bucket perms manager for uid=" << *param_user << " bucket=" << sync_pipe.source_bucket_info.bucket.get_key() << dendl; - return set_cr_error(retcode); - } - } - - yield { - if (!need_retry) { - need_retry = make_shared(); - } - auto filter = make_shared(sync_pipe, - source_bucket_perms, - std::move(dest_params), - need_retry); - - call(new RGWFetchRemoteObjCR(sync_env->async_rados, sync_env->driver, sc->source_zone, - nullopt, - sync_pipe.info.source_bs.bucket, - std::nullopt, sync_pipe.dest_bucket_info, - key, dest_key, versioned_epoch, - true, - std::static_pointer_cast(filter), - zones_trace, sync_env->counters, dpp)); - } - if (retcode < 0) { - if (*need_retry) { - continue; - } - return set_cr_error(retcode); - } - - return set_cr_done(); - } - - ldout(cct, 0) << "ERROR: " << __func__ << ": Too many retries trying to fetch object, possibly a bug: bucket=" << sync_pipe.source_bucket_info.bucket.get_key() << " key=" << key << dendl; - - return set_cr_error(-EIO); - } - return 0; - } -}; - -RGWCoroutine *RGWDefaultDataSyncModule::sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) -{ - return new RGWObjFetchCR(sc, sync_pipe, key, std::nullopt, versioned_epoch, zones_trace); -} - -RGWCoroutine *RGWDefaultDataSyncModule::remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, - real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) -{ - auto sync_env = sc->env; - return new RGWRemoveObjCR(sync_env->dpp, sync_env->async_rados, sync_env->driver, sc->source_zone, - sync_pipe.dest_bucket_info, key, versioned, versioned_epoch, - NULL, NULL, false, &mtime, zones_trace); -} - -RGWCoroutine *RGWDefaultDataSyncModule::create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) -{ - auto sync_env = sc->env; - return new RGWRemoveObjCR(sync_env->dpp, sync_env->async_rados, sync_env->driver, sc->source_zone, - sync_pipe.dest_bucket_info, key, versioned, versioned_epoch, - &owner.id, &owner.display_name, true, &mtime, zones_trace); -} - -class RGWArchiveDataSyncModule : public RGWDefaultDataSyncModule { -public: - RGWArchiveDataSyncModule() {} - - RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override; - RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; - RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override; -}; - -class RGWArchiveSyncModuleInstance : public RGWDefaultSyncModuleInstance { - RGWArchiveDataSyncModule data_handler; -public: - RGWArchiveSyncModuleInstance() {} - RGWDataSyncModule *get_data_handler() override { - return &data_handler; - } - RGWMetadataHandler *alloc_bucket_meta_handler() override { - return RGWArchiveBucketMetaHandlerAllocator::alloc(); - } - RGWBucketInstanceMetadataHandlerBase *alloc_bucket_instance_meta_handler(rgw::sal::Driver* driver) override { - return RGWArchiveBucketInstanceMetaHandlerAllocator::alloc(driver); - } -}; - -int RGWArchiveSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) -{ - instance->reset(new RGWArchiveSyncModuleInstance()); - return 0; -} - -RGWCoroutine *RGWArchiveDataSyncModule::sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) -{ - auto sync_env = sc->env; - ldout(sc->cct, 5) << "SYNC_ARCHIVE: sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; - if (!sync_pipe.dest_bucket_info.versioned() || - (sync_pipe.dest_bucket_info.flags & BUCKET_VERSIONS_SUSPENDED)) { - ldout(sc->cct, 0) << "SYNC_ARCHIVE: sync_object: enabling object versioning for archive bucket" << dendl; - sync_pipe.dest_bucket_info.flags = (sync_pipe.dest_bucket_info.flags & ~BUCKET_VERSIONS_SUSPENDED) | BUCKET_VERSIONED; - int op_ret = sync_env->driver->getRados()->put_bucket_instance_info(sync_pipe.dest_bucket_info, false, real_time(), NULL, sync_env->dpp); - if (op_ret < 0) { - ldpp_dout(sync_env->dpp, 0) << "SYNC_ARCHIVE: sync_object: error versioning archive bucket" << dendl; - return NULL; - } - } - - std::optional dest_key; - - if (versioned_epoch.value_or(0) == 0) { /* force version if not set */ - versioned_epoch = 0; - dest_key = key; - if (key.instance.empty()) { - sync_env->driver->getRados()->gen_rand_obj_instance_name(&(*dest_key)); - } - } - - return new RGWObjFetchCR(sc, sync_pipe, key, dest_key, versioned_epoch, zones_trace); -} - -RGWCoroutine *RGWArchiveDataSyncModule::remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, - real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) -{ - ldout(sc->cct, 0) << "SYNC_ARCHIVE: remove_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch << dendl; - return NULL; -} - -RGWCoroutine *RGWArchiveDataSyncModule::create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) -{ - ldout(sc->cct, 0) << "SYNC_ARCHIVE: create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime - << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - auto sync_env = sc->env; - return new RGWRemoveObjCR(sync_env->dpp, sync_env->async_rados, sync_env->driver, sc->source_zone, - sync_pipe.dest_bucket_info, key, versioned, versioned_epoch, - &owner.id, &owner.display_name, true, &mtime, zones_trace); -} - -class RGWDataSyncControlCR : public RGWBackoffControlCR -{ - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - uint32_t num_shards; - - RGWSyncTraceNodeRef tn; - - static constexpr bool exit_on_error = false; // retry on all errors -public: - RGWDataSyncControlCR(RGWDataSyncCtx *_sc, uint32_t _num_shards, - RGWSyncTraceNodeRef& _tn_parent) : RGWBackoffControlCR(_sc->cct, exit_on_error), - sc(_sc), sync_env(_sc->env), num_shards(_num_shards) { - tn = sync_env->sync_tracer->add_node(_tn_parent, "sync"); - } - - RGWCoroutine *alloc_cr() override { - return new RGWDataSyncCR(sc, num_shards, tn, backoff_ptr()); - } - - void wakeup(int shard_id, bc::flat_set& entries) { - ceph::mutex& m = cr_lock(); - - m.lock(); - RGWDataSyncCR *cr = static_cast(get_cr()); - if (!cr) { - m.unlock(); - return; - } - - cr->get(); - m.unlock(); - - if (cr) { - cr->wakeup(shard_id, entries); - } - - cr->put(); - } -}; - -void RGWRemoteDataLog::wakeup(int shard_id, bc::flat_set& entries) { - std::shared_lock rl{lock}; - if (!data_sync_cr) { - return; - } - data_sync_cr->wakeup(shard_id, entries); -} - -int RGWRemoteDataLog::run_sync(const DoutPrefixProvider *dpp, int num_shards) -{ - lock.lock(); - data_sync_cr = new RGWDataSyncControlCR(&sc, num_shards, tn); - data_sync_cr->get(); // run() will drop a ref, so take another - lock.unlock(); - - int r = run(dpp, data_sync_cr); - - lock.lock(); - data_sync_cr->put(); - data_sync_cr = NULL; - lock.unlock(); - - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to run sync" << dendl; - return r; - } - return 0; -} - -CephContext *RGWDataSyncStatusManager::get_cct() const -{ - return driver->ctx(); -} - -int RGWDataSyncStatusManager::init(const DoutPrefixProvider *dpp) -{ - RGWZone *zone_def; - - if (!(zone_def = driver->svc()->zone->find_zone(source_zone))) { - ldpp_dout(this, 0) << "ERROR: failed to find zone config info for zone=" << source_zone << dendl; - return -EIO; - } - - if (!driver->svc()->sync_modules->get_manager()->supports_data_export(zone_def->tier_type)) { - return -ENOTSUP; - } - - const RGWZoneParams& zone_params = driver->svc()->zone->get_zone_params(); - - if (sync_module == nullptr) { - sync_module = driver->get_sync_module(); - } - - conn = driver->svc()->zone->get_zone_conn(source_zone); - if (!conn) { - ldpp_dout(this, 0) << "connection object to zone " << source_zone << " does not exist" << dendl; - return -EINVAL; - } - - error_logger = new RGWSyncErrorLogger(driver, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS); - - int r = source_log.init(source_zone, conn, error_logger, driver->getRados()->get_sync_tracer(), - sync_module, counters); - if (r < 0) { - ldpp_dout(this, 0) << "ERROR: failed to init remote log, r=" << r << dendl; - finalize(); - return r; - } - - rgw_datalog_info datalog_info; - r = source_log.read_log_info(dpp, &datalog_info); - if (r < 0) { - ldpp_dout(this, 5) << "ERROR: master.read_log_info() returned r=" << r << dendl; - finalize(); - return r; - } - - num_shards = datalog_info.num_shards; - - for (int i = 0; i < num_shards; i++) { - shard_objs[i] = rgw_raw_obj(zone_params.log_pool, shard_obj_name(source_zone, i)); - } - - return 0; -} - -void RGWDataSyncStatusManager::finalize() -{ - delete error_logger; - error_logger = nullptr; -} - -unsigned RGWDataSyncStatusManager::get_subsys() const -{ - return dout_subsys; -} - -std::ostream& RGWDataSyncStatusManager::gen_prefix(std::ostream& out) const -{ - auto zone = std::string_view{source_zone.id}; - return out << "data sync zone:" << zone.substr(0, 8) << ' '; -} - -string RGWDataSyncStatusManager::sync_status_oid(const rgw_zone_id& source_zone) -{ - char buf[datalog_sync_status_oid_prefix.size() + source_zone.id.size() + 16]; - snprintf(buf, sizeof(buf), "%s.%s", datalog_sync_status_oid_prefix.c_str(), source_zone.id.c_str()); - - return string(buf); -} - -string RGWDataSyncStatusManager::shard_obj_name(const rgw_zone_id& source_zone, int shard_id) -{ - char buf[datalog_sync_status_shard_prefix.size() + source_zone.id.size() + 16]; - snprintf(buf, sizeof(buf), "%s.%s.%d", datalog_sync_status_shard_prefix.c_str(), source_zone.id.c_str(), shard_id); - - return string(buf); -} - -class RGWInitBucketShardSyncStatusCoroutine : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - const rgw_bucket_sync_pair_info& sync_pair; - const string sync_status_oid; - - rgw_bucket_shard_sync_info& status; - RGWObjVersionTracker& objv_tracker; - const BucketIndexShardsManager& marker_mgr; - bool exclusive; -public: - RGWInitBucketShardSyncStatusCoroutine(RGWDataSyncCtx *_sc, - const rgw_bucket_sync_pair_info& _sync_pair, - rgw_bucket_shard_sync_info& _status, - uint64_t gen, - const BucketIndexShardsManager& _marker_mgr, - RGWObjVersionTracker& objv_tracker, - bool exclusive) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - sync_pair(_sync_pair), - sync_status_oid(RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, _sync_pair, gen)), - status(_status), objv_tracker(objv_tracker), marker_mgr(_marker_mgr), exclusive(exclusive) - {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield { - rgw_raw_obj obj(sync_env->svc->zone->get_zone_params().log_pool, sync_status_oid); - - // whether or not to do full sync, incremental sync will follow anyway - if (sync_env->sync_module->should_full_sync()) { - const auto max_marker = marker_mgr.get(sync_pair.source_bs.shard_id, ""); - status.inc_marker.position = max_marker; - } - status.inc_marker.timestamp = ceph::real_clock::now(); - status.state = rgw_bucket_shard_sync_info::StateIncrementalSync; - - map attrs; - status.encode_all_attrs(attrs); - call(new RGWSimpleRadosWriteAttrsCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - obj, attrs, &objv_tracker, exclusive)); - } - - if (retcode < 0) { - ldout(cct, 20) << "ERROR: init marker position failed. error: " << retcode << dendl; - return set_cr_error(retcode); - } - ldout(cct, 20) << "init marker position: " << status.inc_marker.position << - ". written to shard status object: " << sync_status_oid << dendl; - return set_cr_done(); - } - return 0; - } -}; - -#define BUCKET_SYNC_ATTR_PREFIX RGW_ATTR_PREFIX "bucket-sync." - -template -static bool decode_attr(CephContext *cct, map& attrs, const string& attr_name, T *val) -{ - map::iterator iter = attrs.find(attr_name); - if (iter == attrs.end()) { - *val = T(); - return false; - } - - auto biter = iter->second.cbegin(); - try { - decode(*val, biter); - } catch (buffer::error& err) { - ldout(cct, 0) << "ERROR: failed to decode attribute: " << attr_name << dendl; - return false; - } - return true; -} - -void rgw_bucket_shard_sync_info::decode_from_attrs(CephContext *cct, map& attrs) -{ - if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "state", &state)) { - decode_attr(cct, attrs, "state", &state); - } - if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "inc_marker", &inc_marker)) { - decode_attr(cct, attrs, "inc_marker", &inc_marker); - } -} - -void rgw_bucket_shard_sync_info::encode_all_attrs(map& attrs) -{ - encode_state_attr(attrs); - inc_marker.encode_attr(attrs); -} - -void rgw_bucket_shard_sync_info::encode_state_attr(map& attrs) -{ - using ceph::encode; - encode(state, attrs[BUCKET_SYNC_ATTR_PREFIX "state"]); -} - -void rgw_bucket_shard_full_sync_marker::encode_attr(map& attrs) -{ - using ceph::encode; - encode(*this, attrs[BUCKET_SYNC_ATTR_PREFIX "full_marker"]); -} - -void rgw_bucket_shard_inc_sync_marker::encode_attr(map& attrs) -{ - using ceph::encode; - encode(*this, attrs[BUCKET_SYNC_ATTR_PREFIX "inc_marker"]); -} - -class RGWReadBucketPipeSyncStatusCoroutine : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - string oid; - rgw_bucket_shard_sync_info *status; - RGWObjVersionTracker* objv_tracker; - map attrs; -public: - RGWReadBucketPipeSyncStatusCoroutine(RGWDataSyncCtx *_sc, - const rgw_bucket_sync_pair_info& sync_pair, - rgw_bucket_shard_sync_info *_status, - RGWObjVersionTracker* objv_tracker, - uint64_t gen) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - oid(RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, sync_pair, gen)), - status(_status), objv_tracker(objv_tracker) - {} - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWReadBucketPipeSyncStatusCoroutine::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - yield call(new RGWSimpleRadosReadAttrsCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, oid), - &attrs, true, objv_tracker)); - if (retcode == -ENOENT) { - *status = rgw_bucket_shard_sync_info(); - return set_cr_done(); - } - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to call fetch bucket shard info oid=" << oid << " ret=" << retcode << dendl; - return set_cr_error(retcode); - } - status->decode_from_attrs(sync_env->cct, attrs); - return set_cr_done(); - } - return 0; -} - -// wrap ReadSyncStatus and set a flag if it's not in incremental -class CheckBucketShardStatusIsIncremental : public RGWReadBucketPipeSyncStatusCoroutine { - bool* result; - rgw_bucket_shard_sync_info status; - public: - CheckBucketShardStatusIsIncremental(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& sync_pair, - bool* result) - : RGWReadBucketPipeSyncStatusCoroutine(sc, sync_pair, &status, nullptr, 0 /*no gen in compat mode*/), - result(result) - {} - - int operate(const DoutPrefixProvider *dpp) override { - int r = RGWReadBucketPipeSyncStatusCoroutine::operate(dpp); - if (state == RGWCoroutine_Done && - status.state != rgw_bucket_shard_sync_info::StateIncrementalSync) { - *result = false; - } - return r; - } -}; - -class CheckAllBucketShardStatusIsIncremental : public RGWShardCollectCR { - // start with 1 shard, and only spawn more if we detect an existing shard. - // this makes the backward compatilibility check far less expensive in the - // general case where no shards exist - static constexpr int initial_concurrent_shards = 1; - static constexpr int max_concurrent_shards = 16; - - RGWDataSyncCtx* sc; - rgw_bucket_sync_pair_info sync_pair; - const int num_shards; - bool* result; - int shard = 0; - public: - CheckAllBucketShardStatusIsIncremental(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& sync_pair, - int num_shards, bool* result) - : RGWShardCollectCR(sc->cct, initial_concurrent_shards), - sc(sc), sync_pair(sync_pair), num_shards(num_shards), result(result) - {} - - bool spawn_next() override { - // stop spawning if we saw any errors or non-incremental shards - if (shard >= num_shards || status < 0 || !*result) { - return false; - } - sync_pair.source_bs.shard_id = shard++; - spawn(new CheckBucketShardStatusIsIncremental(sc, sync_pair, result), false); - return true; - } - - private: - int handle_result(int r) override { - if (r < 0) { - ldout(cct, 4) << "failed to read bucket shard status: " - << cpp_strerror(r) << dendl; - } else if (shard == 0) { - // enable concurrency once the first shard succeeds - max_concurrent = max_concurrent_shards; - } - return r; - } -}; - -// wrap InitBucketShardSyncStatus with local storage for 'status' and 'objv' -// and a loop to retry on racing writes -class InitBucketShardStatusCR : public RGWCoroutine { - RGWDataSyncCtx* sc; - rgw_bucket_sync_pair_info pair; - rgw_bucket_shard_sync_info status; - RGWObjVersionTracker objv; - const uint64_t gen; - const BucketIndexShardsManager& marker_mgr; - - public: - InitBucketShardStatusCR(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& pair, - uint64_t gen, - const BucketIndexShardsManager& marker_mgr) - : RGWCoroutine(sc->cct), sc(sc), pair(pair), gen(gen), marker_mgr(marker_mgr) - {} - int operate(const DoutPrefixProvider *dpp) { - reenter(this) { - // non exclusive create with empty status - objv.generate_new_write_ver(cct); - yield call(new RGWInitBucketShardSyncStatusCoroutine(sc, pair, status, gen, marker_mgr, objv, false)); - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } -}; - -class InitBucketShardStatusCollectCR : public RGWShardCollectCR { - static constexpr int max_concurrent_shards = 16; - RGWDataSyncCtx* sc; - rgw_bucket_sync_pair_info sync_pair; - const uint64_t gen; - const BucketIndexShardsManager& marker_mgr; - - const int num_shards; - int shard = 0; - - int handle_result(int r) override { - if (r < 0) { - ldout(cct, 4) << "failed to init bucket shard status: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - InitBucketShardStatusCollectCR(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& sync_pair, - uint64_t gen, - const BucketIndexShardsManager& marker_mgr, - int num_shards) - : RGWShardCollectCR(sc->cct, max_concurrent_shards), - sc(sc), sync_pair(sync_pair), gen(gen), marker_mgr(marker_mgr), num_shards(num_shards) - {} - - bool spawn_next() override { - if (shard >= num_shards || status < 0) { // stop spawning on any errors - return false; - } - sync_pair.source_bs.shard_id = shard++; - spawn(new InitBucketShardStatusCR(sc, sync_pair, gen, marker_mgr), false); - return true; - } -}; - -class RemoveBucketShardStatusCR : public RGWCoroutine { - RGWDataSyncCtx* const sc; - RGWDataSyncEnv* const sync_env; - - rgw_bucket_sync_pair_info sync_pair; - rgw_raw_obj obj; - RGWObjVersionTracker objv; - -public: - RemoveBucketShardStatusCR(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& sync_pair, uint64_t gen) - : RGWCoroutine(sc->cct), sc(sc), sync_env(sc->env), - sync_pair(sync_pair), - obj(sync_env->svc->zone->get_zone_params().log_pool, - RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, sync_pair, gen)) - {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield call(new RGWRadosRemoveCR(sync_env->driver, obj, &objv)); - if (retcode < 0 && retcode != -ENOENT) { - ldout(cct, 20) << "ERROR: failed to remove bucket shard status for: " << sync_pair << - ". with error: " << retcode << dendl; - return set_cr_error(retcode); - } - ldout(cct, 20) << "removed bucket shard status object: " << obj.oid << dendl; - return set_cr_done(); - } - return 0; - } -}; - -class RemoveBucketShardStatusCollectCR : public RGWShardCollectCR { - static constexpr int max_concurrent_shards = 16; - RGWDataSyncCtx* const sc; - RGWDataSyncEnv* const sync_env; - rgw_bucket_sync_pair_info sync_pair; - const uint64_t gen; - - const int num_shards; - int shard = 0; - - int handle_result(int r) override { - if (r < 0) { - ldout(cct, 4) << "failed to remove bucket shard status object: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - RemoveBucketShardStatusCollectCR(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& sync_pair, - uint64_t gen, - int num_shards) - : RGWShardCollectCR(sc->cct, max_concurrent_shards), - sc(sc), sync_env(sc->env), sync_pair(sync_pair), gen(gen), num_shards(num_shards) - {} - - bool spawn_next() override { - if (shard >= num_shards) { - return false; - } - sync_pair.source_bs.shard_id = shard++; - spawn(new RemoveBucketShardStatusCR(sc, sync_pair, gen), false); - return true; - } -}; - -class InitBucketFullSyncStatusCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - const rgw_bucket_sync_pair_info& sync_pair; - const rgw_raw_obj& status_obj; - rgw_bucket_sync_status& status; - RGWObjVersionTracker& objv; - const RGWBucketInfo& source_info; - const bool check_compat; - - const rgw_bucket_index_marker_info& info; - BucketIndexShardsManager marker_mgr; - - bool all_incremental = true; - bool no_zero = false; - -public: - InitBucketFullSyncStatusCR(RGWDataSyncCtx* sc, - const rgw_bucket_sync_pair_info& sync_pair, - const rgw_raw_obj& status_obj, - rgw_bucket_sync_status& status, - RGWObjVersionTracker& objv, - const RGWBucketInfo& source_info, - bool check_compat, - const rgw_bucket_index_marker_info& info) - : RGWCoroutine(sc->cct), sc(sc), sync_env(sc->env), - sync_pair(sync_pair), status_obj(status_obj), - status(status), objv(objv), source_info(source_info), - check_compat(check_compat), info(info) - {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - retcode = marker_mgr.from_string(info.max_marker, -1); - if (retcode < 0) { - lderr(cct) << "failed to parse bilog shard markers: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - status.state = BucketSyncState::Init; - - if (info.oldest_gen == 0) { - if (check_compat) { - // use shard count from our log gen=0 - // try to convert existing per-shard incremental status for backward compatibility - if (source_info.layout.logs.empty() || - source_info.layout.logs.front().gen > 0) { - ldpp_dout(dpp, 20) << "no generation zero when checking compatibility" << dendl; - no_zero = true; - } else if (auto& log = source_info.layout.logs.front(); - log.layout.type != rgw::BucketLogType::InIndex) { - ldpp_dout(dpp, 20) << "unrecognized log layout type when checking compatibility " << log.layout.type << dendl; - no_zero = true; - } - if (!no_zero) { - yield { - const int num_shards0 = - source_info.layout.logs.front().layout.in_index.layout.num_shards; - call(new CheckAllBucketShardStatusIsIncremental(sc, sync_pair, - num_shards0, - &all_incremental)); - } - if (retcode < 0) { - return set_cr_error(retcode); - } - if (all_incremental) { - // we can use existing status and resume incremental sync - status.state = BucketSyncState::Incremental; - } - } else { - all_incremental = false; - } - } - } - - if (status.state != BucketSyncState::Incremental) { - // initialize all shard sync status. this will populate the log marker - // positions where incremental sync will resume after full sync - yield { - const int num_shards = marker_mgr.get().size(); - call(new InitBucketShardStatusCollectCR(sc, sync_pair, info.latest_gen, marker_mgr, num_shards)); - } - if (retcode < 0) { - ldout(cct, 20) << "failed to init bucket shard status: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - if (sync_env->sync_module->should_full_sync()) { - status.state = BucketSyncState::Full; - } else { - status.state = BucketSyncState::Incremental; - } - } - - status.shards_done_with_gen.resize(marker_mgr.get().size()); - status.incremental_gen = info.latest_gen; - - ldout(cct, 20) << "writing bucket sync status during init. state=" << status.state << ". marker=" << status.full.position.to_str() << dendl; - - // write bucket sync status - using CR = RGWSimpleRadosWriteCR; - yield call(new CR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - status_obj, status, &objv, false)); - if (retcode < 0) { - ldout(cct, 20) << "failed to write bucket shard status: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } -}; - -#define OMAP_READ_MAX_ENTRIES 10 -class RGWReadRecoveringBucketShardsCoroutine : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw::sal::RadosStore* driver; - - const int shard_id; - int max_entries; - - set& recovering_buckets; - string marker; - string error_oid; - - RGWRadosGetOmapKeysCR::ResultPtr omapkeys; - set error_entries; - int max_omap_entries; - int count; - -public: - RGWReadRecoveringBucketShardsCoroutine(RGWDataSyncCtx *_sc, const int _shard_id, - set& _recovering_buckets, const int _max_entries) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - driver(sync_env->driver), shard_id(_shard_id), max_entries(_max_entries), - recovering_buckets(_recovering_buckets), max_omap_entries(OMAP_READ_MAX_ENTRIES) - { - error_oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id) + ".retry"; - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWReadRecoveringBucketShardsCoroutine::operate(const DoutPrefixProvider *dpp) -{ - reenter(this){ - //read recovering bucket shards - count = 0; - do { - omapkeys = std::make_shared(); - yield call(new RGWRadosGetOmapKeysCR(driver, rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, error_oid), - marker, max_omap_entries, omapkeys)); - - if (retcode == -ENOENT) { - break; - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "failed to read recovering bucket shards with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - error_entries = std::move(omapkeys->entries); - if (error_entries.empty()) { - break; - } - - count += error_entries.size(); - marker = *error_entries.rbegin(); - recovering_buckets.insert(std::make_move_iterator(error_entries.begin()), - std::make_move_iterator(error_entries.end())); - } while (omapkeys->more && count < max_entries); - - return set_cr_done(); - } - - return 0; -} - -class RGWReadPendingBucketShardsCoroutine : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw::sal::RadosStore* driver; - - const int shard_id; - int max_entries; - - set& pending_buckets; - string marker; - string status_oid; - - rgw_data_sync_marker* sync_marker; - int count; - - std::string next_marker; - vector log_entries; - bool truncated; - -public: - RGWReadPendingBucketShardsCoroutine(RGWDataSyncCtx *_sc, const int _shard_id, - set& _pending_buckets, - rgw_data_sync_marker* _sync_marker, const int _max_entries) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - driver(sync_env->driver), shard_id(_shard_id), max_entries(_max_entries), - pending_buckets(_pending_buckets), sync_marker(_sync_marker) - { - status_oid = RGWDataSyncStatusManager::shard_obj_name(sc->source_zone, shard_id); - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWReadPendingBucketShardsCoroutine::operate(const DoutPrefixProvider *dpp) -{ - reenter(this){ - //read sync status marker - using CR = RGWSimpleRadosReadCR; - yield call(new CR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - rgw_raw_obj(sync_env->svc->zone->get_zone_params().log_pool, status_oid), - sync_marker)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "failed to read sync status marker with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - //read pending bucket shards - marker = sync_marker->marker; - count = 0; - do{ - yield call(new RGWReadRemoteDataLogShardCR(sc, shard_id, marker, - &next_marker, &log_entries, &truncated)); - - if (retcode == -ENOENT) { - break; - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "failed to read remote data log info with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - if (log_entries.empty()) { - break; - } - - count += log_entries.size(); - for (const auto& entry : log_entries) { - pending_buckets.insert(entry.entry.key); - } - }while(truncated && count < max_entries); - - return set_cr_done(); - } - - return 0; -} - -int RGWRemoteDataLog::read_shard_status(const DoutPrefixProvider *dpp, int shard_id, set& pending_buckets, set& recovering_buckets, rgw_data_sync_marker *sync_marker, const int max_entries) -{ - // cannot run concurrently with run_sync(), so run in a separate manager - RGWCoroutinesManager crs(driver->ctx(), driver->getRados()->get_cr_registry()); - RGWHTTPManager http_manager(driver->ctx(), crs.get_completion_mgr()); - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - RGWDataSyncEnv sync_env_local = sync_env; - sync_env_local.http_manager = &http_manager; - RGWDataSyncCtx sc_local = sc; - sc_local.env = &sync_env_local; - list stacks; - RGWCoroutinesStack* recovering_stack = new RGWCoroutinesStack(driver->ctx(), &crs); - recovering_stack->call(new RGWReadRecoveringBucketShardsCoroutine(&sc_local, shard_id, recovering_buckets, max_entries)); - stacks.push_back(recovering_stack); - RGWCoroutinesStack* pending_stack = new RGWCoroutinesStack(driver->ctx(), &crs); - pending_stack->call(new RGWReadPendingBucketShardsCoroutine(&sc_local, shard_id, pending_buckets, sync_marker, max_entries)); - stacks.push_back(pending_stack); - ret = crs.run(dpp, stacks); - http_manager.stop(); - return ret; -} - -CephContext *RGWBucketPipeSyncStatusManager::get_cct() const -{ - return driver->ctx(); -} - -void rgw_bucket_entry_owner::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("ID", id, obj); - JSONDecoder::decode_json("DisplayName", display_name, obj); -} - -struct bucket_list_entry { - bool delete_marker; - rgw_obj_key key; - bool is_latest; - real_time mtime; - string etag; - uint64_t size; - string storage_class; - rgw_bucket_entry_owner owner; - uint64_t versioned_epoch; - string rgw_tag; - - bucket_list_entry() : delete_marker(false), is_latest(false), size(0), versioned_epoch(0) {} - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("IsDeleteMarker", delete_marker, obj); - JSONDecoder::decode_json("Key", key.name, obj); - JSONDecoder::decode_json("VersionId", key.instance, obj); - JSONDecoder::decode_json("IsLatest", is_latest, obj); - string mtime_str; - JSONDecoder::decode_json("RgwxMtime", mtime_str, obj); - - struct tm t; - uint32_t nsec; - if (parse_iso8601(mtime_str.c_str(), &t, &nsec)) { - ceph_timespec ts; - ts.tv_sec = (uint64_t)internal_timegm(&t); - ts.tv_nsec = nsec; - mtime = real_clock::from_ceph_timespec(ts); - } - JSONDecoder::decode_json("ETag", etag, obj); - JSONDecoder::decode_json("Size", size, obj); - JSONDecoder::decode_json("StorageClass", storage_class, obj); - JSONDecoder::decode_json("Owner", owner, obj); - JSONDecoder::decode_json("VersionedEpoch", versioned_epoch, obj); - JSONDecoder::decode_json("RgwxTag", rgw_tag, obj); - if (key.instance == "null" && !versioned_epoch) { - key.instance.clear(); - } - } - - RGWModifyOp get_modify_op() const { - if (delete_marker) { - return CLS_RGW_OP_LINK_OLH_DM; - } else if (!key.instance.empty() && key.instance != "null") { - return CLS_RGW_OP_LINK_OLH; - } else { - return CLS_RGW_OP_ADD; - } - } -}; - -struct bucket_list_result { - string name; - string prefix; - string key_marker; - string version_id_marker; - int max_keys; - bool is_truncated; - list entries; - - bucket_list_result() : max_keys(0), is_truncated(false) {} - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("Name", name, obj); - JSONDecoder::decode_json("Prefix", prefix, obj); - JSONDecoder::decode_json("KeyMarker", key_marker, obj); - JSONDecoder::decode_json("VersionIdMarker", version_id_marker, obj); - JSONDecoder::decode_json("MaxKeys", max_keys, obj); - JSONDecoder::decode_json("IsTruncated", is_truncated, obj); - JSONDecoder::decode_json("Entries", entries, obj); - } -}; - -class RGWListRemoteBucketCR: public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - const rgw_bucket_shard& bs; - rgw_obj_key marker_position; - - bucket_list_result *result; - -public: - RGWListRemoteBucketCR(RGWDataSyncCtx *_sc, const rgw_bucket_shard& bs, - rgw_obj_key& _marker_position, bucket_list_result *_result) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), bs(bs), - marker_position(_marker_position), result(_result) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield { - rgw_http_param_pair pairs[] = { { "versions" , NULL }, - { "format" , "json" }, - { "objs-container" , "true" }, - { "key-marker" , marker_position.name.c_str() }, - { "version-id-marker" , marker_position.instance.c_str() }, - { NULL, NULL } }; - string p = string("/") + bs.bucket.get_key(':', 0); - call(new RGWReadRESTResourceCR(sync_env->cct, sc->conn, sync_env->http_manager, p, pairs, result)); - } - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } -}; - -struct next_bilog_result { - uint64_t generation = 0; - int num_shards = 0; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("generation", generation, obj); - JSONDecoder::decode_json("num_shards", num_shards, obj); - } -}; - -struct bilog_list_result { - list entries; - bool truncated{false}; - std::optional next_log; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("entries", entries, obj); - JSONDecoder::decode_json("truncated", truncated, obj); - JSONDecoder::decode_json("next_log", next_log, obj); - } -}; - -class RGWListBucketIndexLogCR: public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - const string instance_key; - string marker; - - bilog_list_result *result; - std::optional timer; - uint64_t generation; - std::string gen_str = std::to_string(generation); - uint32_t format_ver{1}; - -public: - RGWListBucketIndexLogCR(RGWDataSyncCtx *_sc, const rgw_bucket_shard& bs, string& _marker, - uint64_t _generation, bilog_list_result *_result) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - instance_key(bs.get_key()), marker(_marker), result(_result), generation(_generation) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - if (sync_env->counters) { - timer.emplace(sync_env->counters, sync_counters::l_poll); - } - yield { - rgw_http_param_pair pairs[] = { { "bucket-instance", instance_key.c_str() }, - { "format" , "json" }, - { "marker" , marker.c_str() }, - { "type", "bucket-index" }, - { "generation", gen_str.c_str() }, - { "format-ver", "2"}, - { NULL, NULL } }; - - call(new RGWReadRESTResourceCR(sync_env->cct, sc->conn, sync_env->http_manager, - "/admin/log", pairs, result)); - } - timer.reset(); - if (retcode < 0) { - if (sync_env->counters) { - sync_env->counters->inc(sync_counters::l_poll_err); - } - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } -}; - -#define BUCKET_SYNC_UPDATE_MARKER_WINDOW 10 - -class RGWBucketFullSyncMarkerTrack : public RGWSyncShardMarkerTrack { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - const rgw_raw_obj& status_obj; - rgw_bucket_sync_status& sync_status; - RGWSyncTraceNodeRef tn; - RGWObjVersionTracker& objv_tracker; - -public: - RGWBucketFullSyncMarkerTrack(RGWDataSyncCtx *_sc, - const rgw_raw_obj& status_obj, - rgw_bucket_sync_status& sync_status, - RGWSyncTraceNodeRef tn, - RGWObjVersionTracker& objv_tracker) - : RGWSyncShardMarkerTrack(BUCKET_SYNC_UPDATE_MARKER_WINDOW), - sc(_sc), sync_env(_sc->env), status_obj(status_obj), - sync_status(sync_status), tn(std::move(tn)), objv_tracker(objv_tracker) - {} - - - RGWCoroutine *store_marker(const rgw_obj_key& new_marker, uint64_t index_pos, const real_time& timestamp) override { - sync_status.full.position = new_marker; - sync_status.full.count = index_pos; - - tn->log(20, SSTR("updating marker oid=" << status_obj.oid << " marker=" << new_marker)); - return new RGWSimpleRadosWriteCR( - sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, - status_obj, sync_status, &objv_tracker); - } - - RGWOrderCallCR *allocate_order_control_cr() override { - return new RGWLastCallerWinsCR(sync_env->cct); - } -}; - -// write the incremental sync status and update 'stable_timestamp' on success -class RGWWriteBucketShardIncSyncStatus : public RGWCoroutine { - RGWDataSyncEnv *sync_env; - rgw_raw_obj obj; - rgw_bucket_shard_inc_sync_marker sync_marker; - ceph::real_time* stable_timestamp; - RGWObjVersionTracker& objv_tracker; - std::map attrs; - public: - RGWWriteBucketShardIncSyncStatus(RGWDataSyncEnv *sync_env, - const rgw_raw_obj& obj, - const rgw_bucket_shard_inc_sync_marker& sync_marker, - ceph::real_time* stable_timestamp, - RGWObjVersionTracker& objv_tracker) - : RGWCoroutine(sync_env->cct), sync_env(sync_env), obj(obj), - sync_marker(sync_marker), stable_timestamp(stable_timestamp), - objv_tracker(objv_tracker) - {} - int operate(const DoutPrefixProvider *dpp) { - reenter(this) { - sync_marker.encode_attr(attrs); - - yield call(new RGWSimpleRadosWriteAttrsCR(sync_env->dpp, sync_env->async_rados, sync_env->svc->sysobj, - obj, attrs, &objv_tracker)); - if (retcode < 0) { - return set_cr_error(retcode); - } - if (stable_timestamp) { - *stable_timestamp = sync_marker.timestamp; - } - return set_cr_done(); - } - return 0; - } -}; - -class RGWBucketIncSyncShardMarkerTrack : public RGWSyncShardMarkerTrack { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - rgw_raw_obj obj; - rgw_bucket_shard_inc_sync_marker sync_marker; - - map key_to_marker; - - struct operation { - rgw_obj_key key; - bool is_olh; - }; - map marker_to_op; - std::set pending_olh; // object names with pending olh operations - - RGWSyncTraceNodeRef tn; - RGWObjVersionTracker& objv_tracker; - ceph::real_time* stable_timestamp; - - void handle_finish(const string& marker) override { - auto iter = marker_to_op.find(marker); - if (iter == marker_to_op.end()) { - return; - } - auto& op = iter->second; - key_to_marker.erase(op.key); - reset_need_retry(op.key); - if (op.is_olh) { - pending_olh.erase(op.key.name); - } - marker_to_op.erase(iter); - } - -public: - RGWBucketIncSyncShardMarkerTrack(RGWDataSyncCtx *_sc, - const string& _marker_oid, - const rgw_bucket_shard_inc_sync_marker& _marker, - RGWSyncTraceNodeRef tn, - RGWObjVersionTracker& objv_tracker, - ceph::real_time* stable_timestamp) - : RGWSyncShardMarkerTrack(BUCKET_SYNC_UPDATE_MARKER_WINDOW), - sc(_sc), sync_env(_sc->env), - obj(sync_env->svc->zone->get_zone_params().log_pool, _marker_oid), - sync_marker(_marker), tn(std::move(tn)), objv_tracker(objv_tracker), - stable_timestamp(stable_timestamp) - {} - - const rgw_raw_obj& get_obj() const { return obj; } - - RGWCoroutine* store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override { - sync_marker.position = new_marker; - sync_marker.timestamp = timestamp; - - tn->log(20, SSTR("updating marker marker_oid=" << obj.oid << " marker=" << new_marker << " timestamp=" << timestamp)); - return new RGWWriteBucketShardIncSyncStatus(sync_env, obj, sync_marker, - stable_timestamp, objv_tracker); - } - - /* - * create index from key -> , and from marker -> key - * this is useful so that we can insure that we only have one - * entry for any key that is used. This is needed when doing - * incremenatl sync of data, and we don't want to run multiple - * concurrent sync operations for the same bucket shard - * Also, we should make sure that we don't run concurrent operations on the same key with - * different ops. - */ - bool index_key_to_marker(const rgw_obj_key& key, const string& marker, bool is_olh) { - auto result = key_to_marker.emplace(key, marker); - if (!result.second) { // exists - set_need_retry(key); - return false; - } - marker_to_op[marker] = operation{key, is_olh}; - if (is_olh) { - // prevent other olh ops from starting on this object name - pending_olh.insert(key.name); - } - return true; - } - - bool can_do_op(const rgw_obj_key& key, bool is_olh) { - // serialize olh ops on the same object name - if (is_olh && pending_olh.count(key.name)) { - tn->log(20, SSTR("sync of " << key << " waiting for pending olh op")); - return false; - } - return (key_to_marker.find(key) == key_to_marker.end()); - } - - RGWOrderCallCR *allocate_order_control_cr() override { - return new RGWLastCallerWinsCR(sync_env->cct); - } -}; - -static bool ignore_sync_error(int err) { - switch (err) { - case -ENOENT: - case -EPERM: - return true; - default: - break; - } - return false; -} - -template -class RGWBucketSyncSingleEntryCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - rgw_bucket_sync_pipe& sync_pipe; - rgw_bucket_shard& bs; - - rgw_obj_key key; - bool versioned; - std::optional versioned_epoch; - rgw_bucket_entry_owner owner; - real_time timestamp; - RGWModifyOp op; - RGWPendingState op_state; - - T entry_marker; - RGWSyncShardMarkerTrack *marker_tracker; - - int sync_status; - - stringstream error_ss; - - bool error_injection; - - RGWDataSyncModule *data_sync_module; - - rgw_zone_set zones_trace; - - RGWSyncTraceNodeRef tn; - std::string zone_name; - -public: - RGWBucketSyncSingleEntryCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, - const rgw_obj_key& _key, bool _versioned, - std::optional _versioned_epoch, - real_time& _timestamp, - const rgw_bucket_entry_owner& _owner, - RGWModifyOp _op, RGWPendingState _op_state, - const T& _entry_marker, RGWSyncShardMarkerTrack *_marker_tracker, rgw_zone_set& _zones_trace, - RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - sync_pipe(_sync_pipe), bs(_sync_pipe.info.source_bs), - key(_key), versioned(_versioned), versioned_epoch(_versioned_epoch), - owner(_owner), - timestamp(_timestamp), op(_op), - op_state(_op_state), - entry_marker(_entry_marker), - marker_tracker(_marker_tracker), - sync_status(0){ - stringstream ss; - ss << bucket_shard_str{bs} << "/" << key << "[" << versioned_epoch.value_or(0) << "]"; - set_description() << "bucket sync single entry (source_zone=" << sc->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state; - set_status("init"); - - tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", SSTR(key)); - - tn->log(20, SSTR("bucket sync single entry (source_zone=" << sc->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state)); - error_injection = (sync_env->cct->_conf->rgw_sync_data_inject_err_probability > 0); - - data_sync_module = sync_env->sync_module->get_data_handler(); - - zones_trace = _zones_trace; - zones_trace.insert(sync_env->svc->zone->get_zone().id, _sync_pipe.info.dest_bucket.get_key()); - - if (sc->env->ostr) { - RGWZone* z; - if ((z = sc->env->driver->svc()->zone->find_zone(sc->source_zone))) { - zone_name = z->name; - } - } - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - /* skip entries that are not complete */ - if (op_state != CLS_RGW_STATE_COMPLETE) { - goto done; - } - tn->set_flag(RGW_SNS_FLAG_ACTIVE); - do { - yield { - marker_tracker->reset_need_retry(key); - if (key.name.empty()) { - /* shouldn't happen */ - set_status("skipping empty entry"); - tn->log(0, "entry with empty obj name, skipping"); - goto done; - } - if (error_injection && - rand() % 10000 < cct->_conf->rgw_sync_data_inject_err_probability * 10000.0) { - tn->log(0, SSTR(": injecting data sync error on key=" << key.name)); - retcode = -EIO; - } else if (op == CLS_RGW_OP_ADD || - op == CLS_RGW_OP_LINK_OLH) { - set_status("syncing obj"); - tn->log(5, SSTR("bucket sync: sync obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); - if (versioned_epoch) { - pretty_print(sc->env, "Syncing object s3://{}/{} version {} in sync from zone {}\n", - bs.bucket.name, key, *versioned_epoch, zone_name); - } else { - pretty_print(sc->env, "Syncing object s3://{}/{} in sync from zone {}\n", - bs.bucket.name, key, zone_name); - } - call(data_sync_module->sync_object(dpp, sc, sync_pipe, key, versioned_epoch, &zones_trace)); - } else if (op == CLS_RGW_OP_DEL || op == CLS_RGW_OP_UNLINK_INSTANCE) { - set_status("removing obj"); - if (versioned_epoch) { - pretty_print(sc->env, "Deleting object s3://{}/{} version {} in sync from zone {}\n", - bs.bucket.name, key, *versioned_epoch, zone_name); - } else { - pretty_print(sc->env, "Deleting object s3://{}/{} in sync from zone {}\n", - bs.bucket.name, key, zone_name); - } - if (op == CLS_RGW_OP_UNLINK_INSTANCE) { - versioned = true; - } - tn->log(10, SSTR("removing obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); - call(data_sync_module->remove_object(dpp, sc, sync_pipe, key, timestamp, versioned, versioned_epoch.value_or(0), &zones_trace)); - // our copy of the object is more recent, continue as if it succeeded - } else if (op == CLS_RGW_OP_LINK_OLH_DM) { - set_status("creating delete marker"); - tn->log(10, SSTR("creating delete marker: obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); - call(data_sync_module->create_delete_marker(dpp, sc, sync_pipe, key, timestamp, owner, versioned, versioned_epoch.value_or(0), &zones_trace)); - } - tn->set_resource_name(SSTR(bucket_str_noinstance(bs.bucket) << "/" << key)); - } - if (retcode == -ERR_PRECONDITION_FAILED) { - pretty_print(sc->env, "Skipping object s3://{}/{} in sync from zone {}\n", - bs.bucket.name, key, zone_name); - set_status("Skipping object sync: precondition failed (object contains newer change or policy doesn't allow sync)"); - tn->log(0, "Skipping object sync: precondition failed (object contains newer change or policy doesn't allow sync)"); - retcode = 0; - } - } while (marker_tracker->need_retry(key)); - { - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - if (retcode >= 0) { - tn->log(10, "success"); - } else { - tn->log(10, SSTR("failed, retcode=" << retcode << " (" << cpp_strerror(-retcode) << ")")); - } - } - - if (retcode < 0 && retcode != -ENOENT) { - set_status() << "failed to sync obj; retcode=" << retcode; - tn->log(0, SSTR("ERROR: failed to sync object: " - << bucket_shard_str{bs} << "/" << key.name)); - if (!ignore_sync_error(retcode)) { - error_ss << bucket_shard_str{bs} << "/" << key.name; - sync_status = retcode; - } - } - if (!error_ss.str().empty()) { - yield call(sync_env->error_logger->log_error_cr(dpp, sc->conn->get_remote_id(), "data", error_ss.str(), -retcode, string("failed to sync object") + cpp_strerror(-sync_status))); - } -done: - if (sync_status == 0) { - /* update marker */ - set_status() << "calling marker_tracker->finish(" << entry_marker << ")"; - yield call(marker_tracker->finish(entry_marker)); - sync_status = retcode; - } - if (sync_status < 0) { - return set_cr_error(sync_status); - } - return set_cr_done(); - } - return 0; - } -}; - -class RGWBucketFullSyncCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_bucket_sync_pipe& sync_pipe; - rgw_bucket_sync_status& sync_status; - rgw_bucket_shard& bs; - boost::intrusive_ptr lease_cr; - bucket_list_result list_result; - list::iterator entries_iter; - rgw_obj_key list_marker; - bucket_list_entry *entry{nullptr}; - - int total_entries{0}; - - int sync_result{0}; - - const rgw_raw_obj& status_obj; - RGWObjVersionTracker& objv; - - rgw_zone_set zones_trace; - - RGWSyncTraceNodeRef tn; - RGWBucketFullSyncMarkerTrack marker_tracker; - - struct _prefix_handler { - RGWBucketSyncFlowManager::pipe_rules_ref rules; - RGWBucketSyncFlowManager::pipe_rules::prefix_map_t::const_iterator iter; - std::optional cur_prefix; - - void set_rules(RGWBucketSyncFlowManager::pipe_rules_ref& _rules) { - rules = _rules; - } - - bool revalidate_marker(rgw_obj_key *marker) { - if (cur_prefix && - boost::starts_with(marker->name, *cur_prefix)) { - return true; - } - if (!rules) { - return false; - } - iter = rules->prefix_search(marker->name); - if (iter == rules->prefix_end()) { - return false; - } - cur_prefix = iter->first; - marker->name = *cur_prefix; - marker->instance.clear(); - return true; - } - - bool check_key_handled(const rgw_obj_key& key) { - if (!rules) { - return false; - } - if (cur_prefix && - boost::starts_with(key.name, *cur_prefix)) { - return true; - } - iter = rules->prefix_search(key.name); - if (iter == rules->prefix_end()) { - return false; - } - cur_prefix = iter->first; - return boost::starts_with(key.name, iter->first); - } - } prefix_handler; - -public: - RGWBucketFullSyncCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, - const rgw_raw_obj& status_obj, - boost::intrusive_ptr lease_cr, - rgw_bucket_sync_status& sync_status, - RGWSyncTraceNodeRef tn_parent, - RGWObjVersionTracker& objv_tracker) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - sync_pipe(_sync_pipe), sync_status(sync_status), - bs(_sync_pipe.info.source_bs), - lease_cr(std::move(lease_cr)), status_obj(status_obj), objv(objv_tracker), - tn(sync_env->sync_tracer->add_node(tn_parent, "full_sync", - SSTR(bucket_shard_str{bs}))), - marker_tracker(sc, status_obj, sync_status, tn, objv_tracker) - { - zones_trace.insert(sc->source_zone.id, sync_pipe.info.dest_bucket.get_key()); - prefix_handler.set_rules(sync_pipe.get_rules()); - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWBucketFullSyncCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - list_marker = sync_status.full.position; - - total_entries = sync_status.full.count; - do { - if (lease_cr && !lease_cr->is_locked()) { - drain_all(); - tn->log(1, "no lease or lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - set_status("listing remote bucket"); - tn->log(20, "listing bucket for full sync"); - - if (!prefix_handler.revalidate_marker(&list_marker)) { - set_status() << "finished iterating over all available prefixes: last marker=" << list_marker; - tn->log(20, SSTR("finished iterating over all available prefixes: last marker=" << list_marker)); - break; - } - - yield call(new RGWListRemoteBucketCR(sc, bs, list_marker, &list_result)); - if (retcode < 0 && retcode != -ENOENT) { - set_status("failed bucket listing, going down"); - drain_all(); - return set_cr_error(retcode); - } - if (list_result.entries.size() > 0) { - tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - } - entries_iter = list_result.entries.begin(); - for (; entries_iter != list_result.entries.end(); ++entries_iter) { - if (lease_cr && !lease_cr->is_locked()) { - drain_all(); - tn->log(1, "no lease or lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - tn->log(20, SSTR("[full sync] syncing object: " - << bucket_shard_str{bs} << "/" << entries_iter->key)); - entry = &(*entries_iter); - list_marker = entries_iter->key; - if (!prefix_handler.check_key_handled(entries_iter->key)) { - set_status() << "skipping entry due to policy rules: " << entries_iter->key; - tn->log(20, SSTR("skipping entry due to policy rules: " << entries_iter->key)); - continue; - } - total_entries++; - if (!marker_tracker.start(entry->key, total_entries, real_time())) { - tn->log(0, SSTR("ERROR: cannot start syncing " << entry->key << ". Duplicate entry?")); - } else { - using SyncCR = RGWBucketSyncSingleEntryCR; - yield spawn(new SyncCR(sc, sync_pipe, entry->key, - false, /* versioned, only matters for object removal */ - entry->versioned_epoch, entry->mtime, - entry->owner, entry->get_modify_op(), CLS_RGW_STATE_COMPLETE, - entry->key, &marker_tracker, zones_trace, tn), - false); - } - drain_with_cb(cct->_conf->rgw_bucket_sync_spawn_window, - [&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_result = ret; - } - return 0; - }); - } - } while (list_result.is_truncated && sync_result == 0); - set_status("done iterating over all objects"); - - /* wait for all operations to complete */ - drain_all_cb([&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_result = ret; - } - return 0; - }); - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - if (lease_cr && !lease_cr->is_locked()) { - tn->log(1, "no lease or lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - yield call(marker_tracker.flush()); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: marker_tracker.flush() returned retcode=" << retcode)); - return set_cr_error(retcode); - } - /* update sync state to incremental */ - if (sync_result == 0) { - sync_status.state = BucketSyncState::Incremental; - tn->log(5, SSTR("set bucket state=" << sync_status.state)); - yield call(new RGWSimpleRadosWriteCR( - dpp, sync_env->async_rados, sync_env->svc->sysobj, - status_obj, sync_status, &objv)); - tn->log(5, SSTR("bucket status objv=" << objv)); - } else { - tn->log(10, SSTR("backing out with sync_status=" << sync_result)); - } - if (retcode < 0 && sync_result == 0) { /* actually tried to set incremental state and failed */ - tn->log(0, SSTR("ERROR: failed to set sync state on bucket " - << bucket_shard_str{bs} << " retcode=" << retcode)); - return set_cr_error(retcode); - } - if (sync_result < 0) { - return set_cr_error(sync_result); - } - return set_cr_done(); - } - return 0; -} - -static bool has_olh_epoch(RGWModifyOp op) { - return op == CLS_RGW_OP_LINK_OLH || op == CLS_RGW_OP_UNLINK_INSTANCE; -} - -class RGWBucketShardIsDoneCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_bucket_sync_status bucket_status; - const rgw_raw_obj& bucket_status_obj; - const int shard_id; - RGWObjVersionTracker objv_tracker; - const next_bilog_result& next_log; - const uint64_t generation; - -public: - RGWBucketShardIsDoneCR(RGWDataSyncCtx *_sc, const rgw_raw_obj& _bucket_status_obj, - int _shard_id, const next_bilog_result& _next_log, const uint64_t _gen) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - bucket_status_obj(_bucket_status_obj), - shard_id(_shard_id), next_log(_next_log), generation(_gen) {} - - int operate(const DoutPrefixProvider* dpp) override - { - reenter(this) { - do { - // read bucket sync status - objv_tracker.clear(); - using ReadCR = RGWSimpleRadosReadCR; - yield call(new ReadCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - bucket_status_obj, &bucket_status, false, &objv_tracker)); - if (retcode < 0) { - ldpp_dout(dpp, 20) << "failed to read bucket shard status: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - if (bucket_status.state != BucketSyncState::Incremental) { - // exit with success to avoid stale shard being - // retried in error repo if we lost a race - ldpp_dout(dpp, 20) << "RGWBucketShardIsDoneCR found sync state = " << bucket_status.state << dendl; - return set_cr_done(); - } - - if (bucket_status.incremental_gen != generation) { - // exit with success to avoid stale shard being - // retried in error repo if we lost a race - ldpp_dout(dpp, 20) << "RGWBucketShardIsDoneCR expected gen: " << generation - << ", got: " << bucket_status.incremental_gen << dendl; - return set_cr_done(); - } - - yield { - // update bucket_status after a shard is done with current gen - auto& done = bucket_status.shards_done_with_gen; - done[shard_id] = true; - - // increment gen if all shards are already done with current gen - if (std::all_of(done.begin(), done.end(), - [] (const bool done){return done; } )) { - bucket_status.incremental_gen = next_log.generation; - done.clear(); - done.resize(next_log.num_shards, false); - } - ldpp_dout(dpp, 20) << "bucket status incremental gen is " << bucket_status.incremental_gen << dendl; - using WriteCR = RGWSimpleRadosWriteCR; - call(new WriteCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - bucket_status_obj, bucket_status, &objv_tracker, false)); - } - if (retcode < 0 && retcode != -ECANCELED) { - ldpp_dout(dpp, 20) << "failed to write bucket sync status: " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } else if (retcode >= 0) { - return set_cr_done(); - } - } while (retcode == -ECANCELED); - } - return 0; - } -}; - -class RGWBucketShardIncrementalSyncCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_bucket_sync_pipe& sync_pipe; - RGWBucketSyncFlowManager::pipe_rules_ref rules; - rgw_bucket_shard& bs; - const rgw_raw_obj& bucket_status_obj; - boost::intrusive_ptr lease_cr; - bilog_list_result extended_result; - list list_result; - int next_num_shards; - uint64_t next_gen; - bool truncated; - - list::iterator entries_iter, entries_end; - map, pair > squash_map; - rgw_bucket_shard_sync_info& sync_info; - uint64_t generation; - rgw_obj_key key; - rgw_bi_log_entry *entry{nullptr}; - bool updated_status{false}; - rgw_zone_id zone_id; - string target_location_key; - - string cur_id; - - int sync_status{0}; - bool syncstopped{false}; - - RGWSyncTraceNodeRef tn; - RGWBucketIncSyncShardMarkerTrack marker_tracker; - -public: - RGWBucketShardIncrementalSyncCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, - const std::string& shard_status_oid, - const rgw_raw_obj& _bucket_status_obj, - boost::intrusive_ptr lease_cr, - rgw_bucket_shard_sync_info& sync_info, - uint64_t generation, - RGWSyncTraceNodeRef& _tn_parent, - RGWObjVersionTracker& objv_tracker, - ceph::real_time* stable_timestamp) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - sync_pipe(_sync_pipe), bs(_sync_pipe.info.source_bs), - bucket_status_obj(_bucket_status_obj), lease_cr(std::move(lease_cr)), - sync_info(sync_info), generation(generation), zone_id(sync_env->svc->zone->get_zone().id), - tn(sync_env->sync_tracer->add_node(_tn_parent, "inc_sync", - SSTR(bucket_shard_str{bs}))), - marker_tracker(sc, shard_status_oid, sync_info.inc_marker, tn, - objv_tracker, stable_timestamp) - { - set_description() << "bucket shard incremental sync bucket=" - << bucket_shard_str{bs}; - set_status("init"); - rules = sync_pipe.get_rules(); - target_location_key = sync_pipe.info.dest_bucket.get_key(); - } - - bool check_key_handled(const rgw_obj_key& key) { - if (!rules) { - return false; - } - auto iter = rules->prefix_search(key.name); - if (iter == rules->prefix_end()) { - return false; - } - return boost::starts_with(key.name, iter->first); - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWBucketShardIncrementalSyncCR::operate(const DoutPrefixProvider *dpp) -{ - int ret; - reenter(this) { - do { - if (lease_cr && !lease_cr->is_locked()) { - drain_all(); - tn->log(1, "no lease or lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - tn->log(20, SSTR("listing bilog for incremental sync; position=" << sync_info.inc_marker.position)); - set_status() << "listing bilog; position=" << sync_info.inc_marker.position; - yield call(new RGWListBucketIndexLogCR(sc, bs, sync_info.inc_marker.position, generation, &extended_result)); - if (retcode < 0 && retcode != -ENOENT) { - /* wait for all operations to complete */ - drain_all(); - return set_cr_error(retcode); - } - list_result = std::move(extended_result.entries); - truncated = extended_result.truncated; - if (extended_result.next_log) { - next_gen = extended_result.next_log->generation; - next_num_shards = extended_result.next_log->num_shards; - } - - squash_map.clear(); - entries_iter = list_result.begin(); - entries_end = list_result.end(); - for (; entries_iter != entries_end; ++entries_iter) { - auto e = *entries_iter; - if (e.op == RGWModifyOp::CLS_RGW_OP_SYNCSTOP) { - ldpp_dout(dpp, 20) << "syncstop at: " << e.timestamp << ". marker: " << e.id << dendl; - syncstopped = true; - entries_end = std::next(entries_iter); // stop after this entry - break; - } - if (e.op == RGWModifyOp::CLS_RGW_OP_RESYNC) { - ldpp_dout(dpp, 20) << "syncstart at: " << e.timestamp << ". marker: " << e.id << dendl; - continue; - } - if (e.op == CLS_RGW_OP_CANCEL) { - continue; - } - if (e.state != CLS_RGW_STATE_COMPLETE) { - continue; - } - if (e.zones_trace.exists(zone_id.id, target_location_key)) { - continue; - } - auto& squash_entry = squash_map[make_pair(e.object, e.instance)]; - // don't squash over olh entries - we need to apply their olh_epoch - if (has_olh_epoch(squash_entry.second) && !has_olh_epoch(e.op)) { - continue; - } - if (squash_entry.first <= e.timestamp) { - squash_entry = make_pair<>(e.timestamp, e.op); - } - } - - entries_iter = list_result.begin(); - for (; entries_iter != entries_end; ++entries_iter) { - if (lease_cr && !lease_cr->is_locked()) { - drain_all(); - tn->log(1, "no lease or lease is lost, abort"); - return set_cr_error(-ECANCELED); - } - entry = &(*entries_iter); - { - ssize_t p = entry->id.find('#'); /* entries might have explicit shard info in them, e.g., 6#00000000004.94.3 */ - if (p < 0) { - cur_id = entry->id; - } else { - cur_id = entry->id.substr(p + 1); - } - } - sync_info.inc_marker.position = cur_id; - - if (entry->op == RGWModifyOp::CLS_RGW_OP_SYNCSTOP || entry->op == RGWModifyOp::CLS_RGW_OP_RESYNC) { - ldpp_dout(dpp, 20) << "detected syncstop or resync on " << entries_iter->timestamp << ", skipping entry" << dendl; - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - - if (!key.set(rgw_obj_index_key{entry->object, entry->instance})) { - set_status() << "parse_raw_oid() on " << entry->object << " returned false, skipping entry"; - tn->log(20, SSTR("parse_raw_oid() on " << entry->object << " returned false, skipping entry")); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - - tn->log(20, SSTR("parsed entry: id=" << cur_id << " iter->object=" << entry->object << " iter->instance=" << entry->instance << " name=" << key.name << " instance=" << key.instance << " ns=" << key.ns)); - - if (!key.ns.empty()) { - set_status() << "skipping entry in namespace: " << entry->object; - tn->log(20, SSTR("skipping entry in namespace: " << entry->object)); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - - if (!check_key_handled(key)) { - set_status() << "skipping entry due to policy rules: " << entry->object; - tn->log(20, SSTR("skipping entry due to policy rules: " << entry->object)); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - - set_status() << "got entry.id=" << cur_id << " key=" << key << " op=" << (int)entry->op; - if (entry->op == CLS_RGW_OP_CANCEL) { - set_status() << "canceled operation, skipping"; - tn->log(20, SSTR("skipping object: " - << bucket_shard_str{bs} << "/" << key << ": canceled operation")); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - if (entry->state != CLS_RGW_STATE_COMPLETE) { - set_status() << "non-complete operation, skipping"; - tn->log(20, SSTR("skipping object: " - << bucket_shard_str{bs} << "/" << key << ": non-complete operation")); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - if (entry->zones_trace.exists(zone_id.id, target_location_key)) { - set_status() << "redundant operation, skipping"; - tn->log(20, SSTR("skipping object: " - <timestamp); - continue; - } - if (make_pair<>(entry->timestamp, entry->op) != squash_map[make_pair(entry->object, entry->instance)]) { - set_status() << "squashed operation, skipping"; - tn->log(20, SSTR("skipping object: " - << bucket_shard_str{bs} << "/" << key << ": squashed operation")); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - tn->set_flag(RGW_SNS_FLAG_ACTIVE); - tn->log(20, SSTR("syncing object: " - << bucket_shard_str{bs} << "/" << key)); - updated_status = false; - while (!marker_tracker.can_do_op(key, has_olh_epoch(entry->op))) { - if (!updated_status) { - set_status() << "can't do op, conflicting inflight operation"; - updated_status = true; - } - tn->log(5, SSTR("can't do op on key=" << key << " need to wait for conflicting operation to complete")); - yield wait_for_child(); - bool again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(0, SSTR("ERROR: a child operation returned error (ret=" << ret << ")")); - sync_status = ret; - /* we have reported this error */ - } - } - if (sync_status != 0) - break; - } - if (sync_status != 0) { - /* get error, stop */ - break; - } - if (!marker_tracker.index_key_to_marker(key, cur_id, has_olh_epoch(entry->op))) { - set_status() << "can't do op, sync already in progress for object"; - tn->log(20, SSTR("skipping sync of entry: " << cur_id << ":" << key << " sync already in progress for object")); - marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp); - continue; - } - // yield { - set_status() << "start object sync"; - if (!marker_tracker.start(cur_id, 0, entry->timestamp)) { - tn->log(0, SSTR("ERROR: cannot start syncing " << cur_id << ". Duplicate entry?")); - } else { - std::optional versioned_epoch; - rgw_bucket_entry_owner owner(entry->owner, entry->owner_display_name); - if (entry->ver.pool < 0) { - versioned_epoch = entry->ver.epoch; - } - tn->log(20, SSTR("entry->timestamp=" << entry->timestamp)); - using SyncCR = RGWBucketSyncSingleEntryCR; - spawn(new SyncCR(sc, sync_pipe, key, - entry->is_versioned(), versioned_epoch, - entry->timestamp, owner, entry->op, entry->state, - cur_id, &marker_tracker, entry->zones_trace, tn), - false); - } - // } - drain_with_cb(cct->_conf->rgw_bucket_sync_spawn_window, - [&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_status = ret; - } - return 0; - }); - } - - } while (!list_result.empty() && sync_status == 0 && !syncstopped); - - drain_all_cb([&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_status = ret; - } - return 0; - }); - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - - if (syncstopped) { - // transition to StateStopped in RGWSyncBucketShardCR. if sync is - // still disabled, we'll delete the sync status object. otherwise we'll - // restart full sync to catch any changes that happened while sync was - // disabled - sync_info.state = rgw_bucket_shard_sync_info::StateStopped; - return set_cr_done(); - } - - yield call(marker_tracker.flush()); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: marker_tracker.flush() returned retcode=" << retcode)); - return set_cr_error(retcode); - } - if (sync_status < 0) { - tn->log(10, SSTR("backing out with sync_status=" << sync_status)); - return set_cr_error(sync_status); - } - - if (!truncated && extended_result.next_log) { - yield call(new RGWBucketShardIsDoneCR(sc, bucket_status_obj, bs.shard_id, *extended_result.next_log, generation)); - if (retcode < 0) { - ldout(cct, 20) << "failed to update bucket sync status: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - yield { - // delete the shard status object - auto status_obj = sync_env->svc->rados->obj(marker_tracker.get_obj()); - retcode = status_obj.open(dpp); - if (retcode < 0) { - return set_cr_error(retcode); - } - call(new RGWRadosRemoveOidCR(sync_env->driver, std::move(status_obj))); - if (retcode < 0) { - ldpp_dout(dpp, 20) << "failed to remove shard status object: " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - } - } - - return set_cr_done(); - } - return 0; -} - -class RGWGetBucketPeersCR : public RGWCoroutine { - RGWDataSyncEnv *sync_env; - - std::optional target_bucket; - std::optional source_zone; - std::optional source_bucket; - - rgw_sync_pipe_info_set *pipes; - map buckets_info; - map::iterator siiter; - std::optional target_bucket_info; - std::optional source_bucket_info; - - rgw_sync_pipe_info_set::iterator siter; - - std::shared_ptr source_policy; - std::shared_ptr target_policy; - - RGWSyncTraceNodeRef tn; - - using pipe_const_iter = map::const_iterator; - - static pair get_pipe_iters(const map& m, std::optional zone) { - if (!zone) { - return { m.begin(), m.end() }; - } - - auto b = m.find(*zone); - if (b == m.end()) { - return { b, b }; - } - return { b, std::next(b) }; - } - - void filter_sources(std::optional source_zone, - std::optional source_bucket, - const map& all_sources, - rgw_sync_pipe_info_set *result) { - ldpp_dout(sync_env->dpp, 20) << __func__ << ": source_zone=" << source_zone.value_or(rgw_zone_id("*")).id - << " source_bucket=" << source_bucket.value_or(rgw_bucket()) - << " all_sources.size()=" << all_sources.size() << dendl; - auto iters = get_pipe_iters(all_sources, source_zone); - for (auto i = iters.first; i != iters.second; ++i) { - for (auto& handler : i->second) { - if (!handler.specific()) { - ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": skipping" << dendl; - continue; - } - if (source_bucket && - !source_bucket->match(*handler.source.bucket)) { - continue; - } - ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": adding" << dendl; - result->insert(handler, source_bucket_info, target_bucket_info); - } - } - } - - void filter_targets(std::optional target_zone, - std::optional target_bucket, - const map& all_targets, - rgw_sync_pipe_info_set *result) { - ldpp_dout(sync_env->dpp, 20) << __func__ << ": target_zone=" << source_zone.value_or(rgw_zone_id("*")).id - << " target_bucket=" << source_bucket.value_or(rgw_bucket()) - << " all_targets.size()=" << all_targets.size() << dendl; - auto iters = get_pipe_iters(all_targets, target_zone); - for (auto i = iters.first; i != iters.second; ++i) { - for (auto& handler : i->second) { - if (target_bucket && - handler.dest.bucket && - !target_bucket->match(*handler.dest.bucket)) { - ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": skipping" << dendl; - continue; - } - ldpp_dout(sync_env->dpp, 20) << __func__ << ": pipe_handler=" << handler << ": adding" << dendl; - result->insert(handler, source_bucket_info, target_bucket_info); - } - } - } - - void update_from_target_bucket_policy(); - void update_from_source_bucket_policy(); - - struct GetHintTargets : public RGWGenericAsyncCR::Action { - RGWDataSyncEnv *sync_env; - rgw_bucket source_bucket; - std::set targets; - - GetHintTargets(RGWDataSyncEnv *_sync_env, - const rgw_bucket& _source_bucket) : sync_env(_sync_env), - source_bucket(_source_bucket) {} - int operate() override { - int r = sync_env->svc->bucket_sync->get_bucket_sync_hints(sync_env->dpp, - source_bucket, - nullptr, - &targets, - null_yield); - if (r < 0) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: " << __func__ << "(): failed to fetch bucket sync hints for bucket=" << source_bucket << dendl; - return r; - } - - return 0; - } - }; - - std::shared_ptr get_hint_targets_action; - std::set::iterator hiter; - -public: - RGWGetBucketPeersCR(RGWDataSyncEnv *_sync_env, - std::optional _target_bucket, - std::optional _source_zone, - std::optional _source_bucket, - rgw_sync_pipe_info_set *_pipes, - const RGWSyncTraceNodeRef& _tn_parent) - : RGWCoroutine(_sync_env->cct), - sync_env(_sync_env), - target_bucket(_target_bucket), - source_zone(_source_zone), - source_bucket(_source_bucket), - pipes(_pipes), - tn(sync_env->sync_tracer->add_node(_tn_parent, "get_bucket_peers", - SSTR( "target=" << target_bucket.value_or(rgw_bucket()) - << ":source=" << target_bucket.value_or(rgw_bucket()) - << ":source_zone=" << source_zone.value_or(rgw_zone_id("*")).id))) { - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -std::ostream& operator<<(std::ostream& out, std::optional& bs) { - if (!bs) { - out << "*"; - } else { - out << *bs; - } - return out; -} - -static RGWCoroutine* sync_bucket_shard_cr(RGWDataSyncCtx* sc, - boost::intrusive_ptr lease, - const rgw_bucket_sync_pair_info& sync_pair, - std::optional gen, - const RGWSyncTraceNodeRef& tn, - ceph::real_time* progress); - -RGWRunBucketSourcesSyncCR::RGWRunBucketSourcesSyncCR(RGWDataSyncCtx *_sc, - boost::intrusive_ptr lease_cr, - const rgw_bucket_shard& source_bs, - const RGWSyncTraceNodeRef& _tn_parent, - std::optional gen, - ceph::real_time* progress) - : RGWCoroutine(_sc->env->cct), sc(_sc), sync_env(_sc->env), - lease_cr(std::move(lease_cr)), - tn(sync_env->sync_tracer->add_node( - _tn_parent, "bucket_sync_sources", - SSTR( "source=" << source_bs << ":source_zone=" << sc->source_zone))), - progress(progress), - gen(gen) -{ - sync_pair.source_bs = source_bs; -} - -int RGWRunBucketSourcesSyncCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - yield call(new RGWGetBucketPeersCR(sync_env, std::nullopt, sc->source_zone, - sync_pair.source_bs.bucket, &pipes, tn)); - if (retcode < 0 && retcode != -ENOENT) { - tn->log(0, SSTR("ERROR: failed to read sync status for bucket. error: " << retcode)); - return set_cr_error(retcode); - } - - ldpp_dout(dpp, 20) << __func__ << "(): requested source_bs=" << sync_pair.source_bs << dendl; - - if (pipes.empty()) { - ldpp_dout(dpp, 20) << __func__ << "(): no relevant sync pipes found" << dendl; - return set_cr_done(); - } - - shard_progress.resize(pipes.size()); - cur_shard_progress = shard_progress.begin(); - - for (siter = pipes.begin(); siter != pipes.end(); ++siter, ++cur_shard_progress) { - ldpp_dout(dpp, 20) << __func__ << "(): sync pipe=" << *siter << dendl; - - sync_pair.dest_bucket = siter->target.get_bucket(); - sync_pair.handler = siter->handler; - - ldpp_dout(dpp, 20) << __func__ << "(): sync_pair=" << sync_pair << dendl; - - yield_spawn_window(sync_bucket_shard_cr(sc, lease_cr, sync_pair, - gen, tn, &*cur_shard_progress), - cct->_conf->rgw_bucket_sync_spawn_window, - [&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, SSTR("ERROR: a sync operation returned error: " << ret)); - } - return ret; - }); - } - drain_all_cb([&](uint64_t stack_id, int ret) { - if (ret < 0) { - tn->log(10, SSTR("a sync operation returned error: " << ret)); - } - return ret; - }); - if (progress) { - *progress = *std::min_element(shard_progress.begin(), shard_progress.end()); - } - return set_cr_done(); - } - - return 0; -} - -class RGWSyncGetBucketInfoCR : public RGWCoroutine { - RGWDataSyncEnv *sync_env; - rgw_bucket bucket; - RGWBucketInfo *pbucket_info; - map *pattrs; - RGWMetaSyncEnv meta_sync_env; - - RGWSyncTraceNodeRef tn; - -public: - RGWSyncGetBucketInfoCR(RGWDataSyncEnv *_sync_env, - const rgw_bucket& _bucket, - RGWBucketInfo *_pbucket_info, - map *_pattrs, - const RGWSyncTraceNodeRef& _tn_parent) - : RGWCoroutine(_sync_env->cct), - sync_env(_sync_env), - bucket(_bucket), - pbucket_info(_pbucket_info), - pattrs(_pattrs), - tn(sync_env->sync_tracer->add_node(_tn_parent, "get_bucket_info", - SSTR(bucket))) { - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWSyncGetBucketInfoCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->driver, bucket, pbucket_info, pattrs, dpp)); - if (retcode == -ENOENT) { - /* bucket instance info has not been synced in yet, fetch it now */ - yield { - tn->log(10, SSTR("no local info for bucket:" << ": fetching metadata")); - string raw_key = string("bucket.instance:") + bucket.get_key(); - - meta_sync_env.init(dpp, cct, sync_env->driver, sync_env->svc->zone->get_master_conn(), sync_env->async_rados, - sync_env->http_manager, sync_env->error_logger, sync_env->sync_tracer); - - call(new RGWMetaSyncSingleEntryCR(&meta_sync_env, raw_key, - string() /* no marker */, - MDLOG_STATUS_COMPLETE, - NULL /* no marker tracker */, - tn)); - } - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to fetch bucket instance info for " << bucket_str{bucket})); - return set_cr_error(retcode); - } - - yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->driver, bucket, pbucket_info, pattrs, dpp)); - } - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{bucket})); - return set_cr_error(retcode); - } - - return set_cr_done(); - } - - return 0; -} - -void RGWGetBucketPeersCR::update_from_target_bucket_policy() -{ - if (!target_policy || - !target_policy->policy_handler || - !pipes) { - return; - } - - auto handler = target_policy->policy_handler.get(); - - filter_sources(source_zone, - source_bucket, - handler->get_sources(), - pipes); - - for (siter = pipes->begin(); siter != pipes->end(); ++siter) { - if (!siter->source.has_bucket_info()) { - buckets_info.emplace(siter->source.get_bucket(), all_bucket_info()); - } - if (!siter->target.has_bucket_info()) { - buckets_info.emplace(siter->target.get_bucket(), all_bucket_info()); - } - } -} - -void RGWGetBucketPeersCR::update_from_source_bucket_policy() -{ - if (!source_policy || - !source_policy->policy_handler || - !pipes) { - return; - } - - auto handler = source_policy->policy_handler.get(); - - filter_targets(sync_env->svc->zone->get_zone().id, - target_bucket, - handler->get_targets(), - pipes); - - for (siter = pipes->begin(); siter != pipes->end(); ++siter) { - if (!siter->source.has_bucket_info()) { - buckets_info.emplace(siter->source.get_bucket(), all_bucket_info()); - } - if (!siter->target.has_bucket_info()) { - buckets_info.emplace(siter->target.get_bucket(), all_bucket_info()); - } - } -} - - -class RGWSyncGetBucketSyncPolicyHandlerCR : public RGWCoroutine { - RGWDataSyncEnv *sync_env; - rgw_bucket bucket; - rgw_bucket_get_sync_policy_params get_policy_params; - - std::shared_ptr policy; - - RGWSyncTraceNodeRef tn; - - int i; - -public: - RGWSyncGetBucketSyncPolicyHandlerCR(RGWDataSyncEnv *_sync_env, - std::optional zone, - const rgw_bucket& _bucket, - std::shared_ptr& _policy, - const RGWSyncTraceNodeRef& _tn_parent) - : RGWCoroutine(_sync_env->cct), - sync_env(_sync_env), - bucket(_bucket), - policy(_policy), - tn(sync_env->sync_tracer->add_node(_tn_parent, "get_sync_policy_handler", - SSTR(bucket))) { - get_policy_params.zone = zone; - get_policy_params.bucket = bucket; - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - for (i = 0; i < 2; ++i) { - yield call(new RGWBucketGetSyncPolicyHandlerCR(sync_env->async_rados, - sync_env->driver, - get_policy_params, - policy, - dpp)); - if (retcode < 0 && - retcode != -ENOENT) { - return set_cr_error(retcode); - } - - if (retcode == 0) { - return set_cr_done(); - } - - /* bucket instance was not found, - * try to get bucket instance info, can trigger - * metadata sync of bucket instance - */ - yield call(new RGWSyncGetBucketInfoCR(sync_env, - bucket, - nullptr, - nullptr, - tn)); - if (retcode < 0) { - return set_cr_error(retcode); - } - } - } - - return 0; - } -}; - - -int RGWGetBucketPeersCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - if (pipes) { - pipes->clear(); - } - if (target_bucket) { - target_policy = make_shared(); - yield call(new RGWSyncGetBucketSyncPolicyHandlerCR(sync_env, - nullopt, - *target_bucket, - target_policy, - tn)); - if (retcode < 0 && - retcode != -ENOENT) { - return set_cr_error(retcode); - } - - update_from_target_bucket_policy(); - } - - if (source_bucket && source_zone) { - source_policy = make_shared(); - yield call(new RGWSyncGetBucketSyncPolicyHandlerCR(sync_env, - source_zone, - *source_bucket, - source_policy, - tn)); - if (retcode < 0 && - retcode != -ENOENT) { - return set_cr_error(retcode); - } - - if (source_policy->policy_handler) { - auto& opt_bucket_info = source_policy->policy_handler->get_bucket_info(); - auto& opt_attrs = source_policy->policy_handler->get_bucket_attrs(); - if (opt_bucket_info && opt_attrs) { - source_bucket_info.emplace(); - source_bucket_info->bucket_info = *opt_bucket_info; - source_bucket_info->attrs = *opt_attrs; - } - } - - if (!target_bucket) { - get_hint_targets_action = make_shared(sync_env, *source_bucket); - - yield call(new RGWGenericAsyncCR(cct, sync_env->async_rados, - get_hint_targets_action)); - if (retcode < 0) { - return set_cr_error(retcode); - } - - /* hints might have incomplete bucket ids, - * in which case we need to figure out the current - * bucket_id - */ - for (hiter = get_hint_targets_action->targets.begin(); - hiter != get_hint_targets_action->targets.end(); - ++hiter) { - ldpp_dout(dpp, 20) << "Got sync hint for bucket=" << *source_bucket << ": " << hiter->get_key() << dendl; - - target_policy = make_shared(); - yield call(new RGWSyncGetBucketSyncPolicyHandlerCR(sync_env, - nullopt, - *hiter, - target_policy, - tn)); - if (retcode < 0 && - retcode != -ENOENT) { - return set_cr_error(retcode); - } - update_from_target_bucket_policy(); - } - } - } - - update_from_source_bucket_policy(); - - for (siiter = buckets_info.begin(); siiter != buckets_info.end(); ++siiter) { - if (siiter->second.bucket_info.bucket.name.empty()) { - yield call(new RGWSyncGetBucketInfoCR(sync_env, siiter->first, - &siiter->second.bucket_info, - &siiter->second.attrs, - tn)); - } - } - - if (pipes) { - pipes->update_empty_bucket_info(buckets_info); - } - - return set_cr_done(); - } - - return 0; -} - -class RGWSyncBucketShardCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - boost::intrusive_ptr lease_cr; - rgw_bucket_sync_pair_info sync_pair; - rgw_bucket_sync_pipe& sync_pipe; - bool& bucket_stopped; - uint64_t generation; - ceph::real_time* progress; - - const std::string shard_status_oid; - const rgw_raw_obj bucket_status_obj; - rgw_bucket_shard_sync_info sync_status; - RGWObjVersionTracker objv_tracker; - - RGWSyncTraceNodeRef tn; - -public: - RGWSyncBucketShardCR(RGWDataSyncCtx *_sc, - boost::intrusive_ptr lease_cr, - const rgw_bucket_sync_pair_info& _sync_pair, - rgw_bucket_sync_pipe& sync_pipe, - bool& bucket_stopped, - uint64_t generation, - const RGWSyncTraceNodeRef& tn, - ceph::real_time* progress) - : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - lease_cr(std::move(lease_cr)), sync_pair(_sync_pair), - sync_pipe(sync_pipe), bucket_stopped(bucket_stopped), generation(generation), progress(progress), - shard_status_oid(RGWBucketPipeSyncStatusManager::inc_status_oid(sc->source_zone, sync_pair, generation)), - bucket_status_obj(sc->env->svc->zone->get_zone_params().log_pool, - RGWBucketPipeSyncStatusManager::full_status_oid(sc->source_zone, - sync_pair.source_bs.bucket, - sync_pair.dest_bucket)), - tn(tn) { - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWSyncBucketShardCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - yield call(new RGWReadBucketPipeSyncStatusCoroutine(sc, sync_pair, &sync_status, &objv_tracker, generation)); - if (retcode < 0 && retcode != -ENOENT) { - tn->log(0, SSTR("ERROR: failed to read sync status for bucket. error: " << retcode)); - return set_cr_error(retcode); - } - - tn->log(20, SSTR("sync status for source bucket shard: " << sync_status.state)); - sync_status.state = rgw_bucket_shard_sync_info::StateIncrementalSync; - if (progress) { - *progress = sync_status.inc_marker.timestamp; - } - - yield call(new RGWBucketShardIncrementalSyncCR(sc, sync_pipe, - shard_status_oid, bucket_status_obj, lease_cr, - sync_status, generation, tn, - objv_tracker, progress)); - if (retcode < 0) { - tn->log(5, SSTR("incremental sync on bucket failed, retcode=" << retcode)); - return set_cr_error(retcode); - } - - if (sync_status.state == rgw_bucket_shard_sync_info::StateStopped) { - tn->log(20, SSTR("syncstopped indication for source bucket shard")); - bucket_stopped = true; - } - - return set_cr_done(); - } - - return 0; -} - -class RGWSyncBucketCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *env; - boost::intrusive_ptr data_lease_cr; - boost::intrusive_ptr bucket_lease_cr; - rgw_bucket_sync_pair_info sync_pair; - rgw_bucket_sync_pipe sync_pipe; - std::optional gen; - ceph::real_time* progress; - - const std::string lock_name = "bucket sync"; - const uint32_t lock_duration; - const rgw_raw_obj status_obj; - rgw_bucket_sync_status bucket_status; - bool bucket_stopped = false; - RGWObjVersionTracker objv; - bool init_check_compat = false; - rgw_bucket_index_marker_info info; - - RGWSyncTraceNodeRef tn; - -public: - RGWSyncBucketCR(RGWDataSyncCtx *_sc, - boost::intrusive_ptr lease_cr, - const rgw_bucket_sync_pair_info& _sync_pair, - std::optional gen, - const RGWSyncTraceNodeRef& _tn_parent, - ceph::real_time* progress) - : RGWCoroutine(_sc->cct), sc(_sc), env(_sc->env), - data_lease_cr(std::move(lease_cr)), sync_pair(_sync_pair), - gen(gen), progress(progress), - lock_duration(cct->_conf->rgw_sync_lease_period), - status_obj(env->svc->zone->get_zone_params().log_pool, - RGWBucketPipeSyncStatusManager::full_status_oid(sc->source_zone, - sync_pair.source_bs.bucket, - sync_pair.dest_bucket)), - tn(env->sync_tracer->add_node(_tn_parent, "bucket", - SSTR(bucket_str{_sync_pair.dest_bucket} << "<-" << bucket_shard_str{_sync_pair.source_bs} ))) { - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -static RGWCoroutine* sync_bucket_shard_cr(RGWDataSyncCtx* sc, - boost::intrusive_ptr lease, - const rgw_bucket_sync_pair_info& sync_pair, - std::optional gen, - const RGWSyncTraceNodeRef& tn, - ceph::real_time* progress) -{ - return new RGWSyncBucketCR(sc, std::move(lease), sync_pair, - gen, tn, progress); -} - -#define RELEASE_LOCK(cr) \ - if (cr) {cr->go_down(); drain_all(); cr.reset();} - -int RGWSyncBucketCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - // read source/destination bucket info - yield call(new RGWSyncGetBucketInfoCR(env, sync_pair.source_bs.bucket, &sync_pipe.source_bucket_info, - &sync_pipe.source_bucket_attrs, tn)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{sync_pair.source_bs.bucket})); - return set_cr_error(retcode); - } - - yield call(new RGWSyncGetBucketInfoCR(env, sync_pair.dest_bucket, &sync_pipe.dest_bucket_info, - &sync_pipe.dest_bucket_attrs, tn)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{sync_pair.source_bs.bucket})); - return set_cr_error(retcode); - } - - sync_pipe.info = sync_pair; - - // read bucket sync status - using ReadCR = RGWSimpleRadosReadCR; - using WriteCR = RGWSimpleRadosWriteCR; - - yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, - status_obj, &bucket_status, false, &objv)); - if (retcode == -ENOENT) { - // use exclusive create to set state=Init - objv.generate_new_write_ver(cct); - yield call(new WriteCR(dpp, env->async_rados, env->svc->sysobj, - status_obj, bucket_status, &objv, true)); - tn->log(20, "bucket status object does not exist, create a new one"); - if (retcode == -EEXIST) { - // raced with another create, read its status - tn->log(20, "raced with another create, read its status"); - yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, - status_obj, &bucket_status, false, &objv)); - } - } - if (retcode < 0) { - tn->log(20, SSTR("ERROR: failed to read bucket status object. error: " << retcode)); - return set_cr_error(retcode); - } - - do { - tn->log(20, SSTR("sync status for source bucket: " << bucket_status.state << - ". lease is: " << (bucket_lease_cr ? "taken" : "not taken") << ". stop indications is: " << bucket_stopped)); - - if (bucket_status.state != BucketSyncState::Incremental || - bucket_stopped) { - // if state is Init or Stopped, we query the remote RGW for ther state - yield call(new RGWReadRemoteBucketIndexLogInfoCR(sc, sync_pair.source_bs.bucket, &info)); - if (retcode < 0) { - return set_cr_error(retcode); - } - if (info.syncstopped) { - // remote indicates stopped state - tn->log(20, "remote bilog indicates that sync was stopped"); - if (!bucket_lease_cr) { - bucket_lease_cr.reset(new RGWContinuousLeaseCR(env->async_rados, env->driver, status_obj, - lock_name, lock_duration, this)); - yield spawn(bucket_lease_cr.get(), false); - while (!bucket_lease_cr->is_locked()) { - if (bucket_lease_cr->is_done()) { - tn->log(5, "failed to take lease"); - set_status("lease lock failed, early abort"); - drain_all(); - return set_cr_error(bucket_lease_cr->get_ret_status()); - } - tn->log(5, "waiting on bucket lease"); - yield set_sleeping(true); - } - } - - // if state was incremental, remove all per-shard status objects - if (bucket_status.state == BucketSyncState::Incremental) { - yield { - const auto num_shards = bucket_status.shards_done_with_gen.size(); - const auto gen = bucket_status.incremental_gen; - call(new RemoveBucketShardStatusCollectCR(sc, sync_pair, gen, num_shards)); - } - } - - // check if local state is "stopped" - yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, - status_obj, &bucket_status, false, &objv)); - if (retcode < 0) { - tn->log(20, SSTR("ERROR: failed to read status before writing 'stopped'. error: " << retcode)); - RELEASE_LOCK(bucket_lease_cr); - return set_cr_error(retcode); - } - if (bucket_status.state != BucketSyncState::Stopped) { - // make sure that state is changed to stopped localy - bucket_status.state = BucketSyncState::Stopped; - yield call(new WriteCR(dpp, env->async_rados, env->svc->sysobj, - status_obj, bucket_status, &objv, false)); - if (retcode < 0) { - tn->log(20, SSTR("ERROR: failed to write 'stopped' status. error: " << retcode)); - RELEASE_LOCK(bucket_lease_cr); - return set_cr_error(retcode); - } - } - RELEASE_LOCK(bucket_lease_cr); - return set_cr_done(); - } - if (bucket_stopped) { - tn->log(20, SSTR("ERROR: switched from 'stop' to 'start' sync. while state is: " << bucket_status.state)); - bucket_stopped = false; - bucket_status.state = BucketSyncState::Init; - } - } - - if (bucket_status.state != BucketSyncState::Incremental) { - // if the state wasn't Incremental, take a bucket-wide lease to prevent - // different shards from duplicating the init and full sync - if (!bucket_lease_cr) { - bucket_lease_cr.reset(new RGWContinuousLeaseCR(env->async_rados, env->driver, status_obj, - lock_name, lock_duration, this)); - yield spawn(bucket_lease_cr.get(), false); - while (!bucket_lease_cr->is_locked()) { - if (bucket_lease_cr->is_done()) { - tn->log(5, "failed to take lease"); - set_status("lease lock failed, early abort"); - drain_all(); - return set_cr_error(bucket_lease_cr->get_ret_status()); - } - tn->log(5, "waiting on bucket lease"); - yield set_sleeping(true); - } - } - - // reread the status after acquiring the lock - yield call(new ReadCR(dpp, env->async_rados, env->svc->sysobj, - status_obj, &bucket_status, false, &objv)); - if (retcode < 0) { - RELEASE_LOCK(bucket_lease_cr); - tn->log(20, SSTR("ERROR: reading the status after acquiring the lock failed. error: " << retcode)); - return set_cr_error(retcode); - } - tn->log(20, SSTR("status after acquiring the lock is: " << bucket_status.state)); - - yield call(new InitBucketFullSyncStatusCR(sc, sync_pair, status_obj, - bucket_status, objv, - sync_pipe.source_bucket_info, - init_check_compat, info)); - - if (retcode < 0) { - tn->log(20, SSTR("ERROR: init full sync failed. error: " << retcode)); - RELEASE_LOCK(bucket_lease_cr); - return set_cr_error(retcode); - } - } - - assert(bucket_status.state == BucketSyncState::Incremental || - bucket_status.state == BucketSyncState::Full); - - if (bucket_status.state == BucketSyncState::Full) { - assert(bucket_lease_cr); - yield call(new RGWBucketFullSyncCR(sc, sync_pipe, status_obj, - bucket_lease_cr, bucket_status, - tn, objv)); - if (retcode < 0) { - tn->log(20, SSTR("ERROR: full sync failed. error: " << retcode)); - RELEASE_LOCK(bucket_lease_cr); - return set_cr_error(retcode); - } - } - - if (bucket_status.state == BucketSyncState::Incremental) { - // lease not required for incremental sync - RELEASE_LOCK(bucket_lease_cr); - - // if a specific gen was requested, compare that to the sync status - if (gen) { - const auto current_gen = bucket_status.incremental_gen; - if (*gen > current_gen) { - retcode = -EAGAIN; - tn->log(10, SSTR("ERROR: requested sync of future generation " - << *gen << " > " << current_gen - << ", returning " << retcode << " for later retry")); - return set_cr_error(retcode); - } else if (*gen < current_gen) { - tn->log(10, SSTR("WARNING: requested sync of past generation " - << *gen << " < " << current_gen - << ", returning success")); - return set_cr_done(); - } - } - - assert(sync_pair.source_bs.shard_id >= 0); - if (static_cast(sync_pair.source_bs.shard_id) >= bucket_status.shards_done_with_gen.size()) { - tn->log(1, SSTR("bucket shard " << sync_pair.source_bs << " index out of bounds")); - return set_cr_done(); // return success so we don't retry - } - if (bucket_status.shards_done_with_gen[sync_pair.source_bs.shard_id]) { - tn->log(10, SSTR("bucket shard " << sync_pair.source_bs << " of gen " << - gen << " already synced.")); - return set_cr_done(); - } - - yield call(new RGWSyncBucketShardCR(sc, data_lease_cr, sync_pair, - sync_pipe, bucket_stopped, - bucket_status.incremental_gen, tn, progress)); - if (retcode < 0) { - tn->log(20, SSTR("ERROR: incremental sync failed. error: " << retcode)); - return set_cr_error(retcode); - } - } - // loop back to previous states unless incremental sync returns normally - } while (bucket_status.state != BucketSyncState::Incremental || bucket_stopped); - - return set_cr_done(); - } - - return 0; -} - -int RGWBucketPipeSyncStatusManager::do_init(const DoutPrefixProvider *dpp, - std::ostream* ostr) -{ - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(this, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - - sync_module.reset(new RGWDefaultSyncModuleInstance()); - auto async_rados = driver->svc()->rados->get_async_processor(); - - sync_env.init(this, driver->ctx(), driver, - driver->svc(), async_rados, &http_manager, - error_logger.get(), driver->getRados()->get_sync_tracer(), - sync_module, nullptr); - - sync_env.ostr = ostr; - - rgw_sync_pipe_info_set pipes; - - ret = cr_mgr.run(dpp, new RGWGetBucketPeersCR(&sync_env, - dest_bucket, - source_zone, - source_bucket, - &pipes, - sync_env.sync_tracer->root_node)); - if (ret < 0) { - ldpp_dout(this, 0) << "failed to get bucket source peers info: (ret=" << ret << "): " << cpp_strerror(-ret) << dendl; - return ret; - } - - if (pipes.empty()) { - ldpp_dout(this, 0) << "No peers. This is not a valid multisite configuration." << dendl; - return -EINVAL; - } - - for (auto& pipe : pipes) { - auto& szone = pipe.source.zone; - - auto conn = driver->svc()->zone->get_zone_conn(szone); - if (!conn) { - ldpp_dout(this, 0) << "connection object to zone " << szone << " does not exist" << dendl; - return -EINVAL; - } - - RGWZone* z; - if (!(z = driver->svc()->zone->find_zone(szone))) { - ldpp_dout(this, 0) << "zone " << szone << " does not exist" << dendl; - return -EINVAL; - } - sources.emplace_back(&sync_env, szone, conn, - pipe.source.get_bucket_info(), - pipe.target.get_bucket(), - pipe.handler, z->name); - } - - return 0; -} - -int RGWBucketPipeSyncStatusManager::remote_info(const DoutPrefixProvider *dpp, - source& s, - uint64_t* oldest_gen, - uint64_t* latest_gen, - uint64_t* num_shards) -{ - rgw_bucket_index_marker_info remote_info; - BucketIndexShardsManager remote_markers; - auto r = rgw_read_remote_bilog_info(dpp, s.sc.conn, s.info.bucket, - remote_info, remote_markers, - null_yield); - - if (r < 0) { - ldpp_dout(dpp, 0) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " rgw_read_remote_bilog_info: r=" - << r << dendl; - return r; - } - if (oldest_gen) - *oldest_gen = remote_info.oldest_gen; - - if (latest_gen) - *latest_gen = remote_info.latest_gen; - - if (num_shards) - *num_shards = remote_markers.get().size(); - - return 0; -} - -tl::expected, int> -RGWBucketPipeSyncStatusManager::construct( - const DoutPrefixProvider* dpp, - rgw::sal::RadosStore* driver, - std::optional source_zone, - std::optional source_bucket, - const rgw_bucket& dest_bucket, - std::ostream* ostr) -{ - std::unique_ptr self{ - new RGWBucketPipeSyncStatusManager(driver, source_zone, source_bucket, - dest_bucket)}; - auto r = self->do_init(dpp, ostr); - if (r < 0) { - return tl::unexpected(r); - } - return self; -} - -int RGWBucketPipeSyncStatusManager::init_sync_status( - const DoutPrefixProvider *dpp) -{ - // Just running one at a time saves us from buildup/teardown and in - // practice we only do one zone at a time. - for (auto& source : sources) { - list stacks; - RGWCoroutinesStack *stack = new RGWCoroutinesStack(driver->ctx(), &cr_mgr); - pretty_print(source.sc.env, "Initializing sync state of bucket {} with zone {}.\n", - source.info.bucket.name, source.zone_name); - stack->call(new RGWSimpleRadosWriteCR( - dpp, source.sc.env->async_rados, source.sc.env->svc->sysobj, - {sync_env.svc->zone->get_zone_params().log_pool, - full_status_oid(source.sc.source_zone, - source.info.bucket, - source.dest)}, - rgw_bucket_sync_status{})); - stacks.push_back(stack); - auto r = cr_mgr.run(dpp, stacks); - if (r < 0) { - pretty_print(source.sc.env, - "Initialization of sync state for bucket {} with zone {} " - "failed with error {}\n", - source.info.bucket.name, source.zone_name, cpp_strerror(r)); - } - } - return 0; -} - -tl::expected, int> -RGWBucketPipeSyncStatusManager::read_sync_status( - const DoutPrefixProvider *dpp) -{ - std::map sync_status; - list stacks; - - auto sz = sources.begin(); - - if (source_zone) { - sz = std::find_if(sources.begin(), sources.end(), - [this](const source& s) { - return s.sc.source_zone == *source_zone; - } - ); - if (sz == sources.end()) { - ldpp_dout(this, 0) << "ERROR: failed to find source zone: " - << *source_zone << dendl; - return tl::unexpected(-ENOENT); - } - } else { - ldpp_dout(this, 5) << "No source zone specified, using source zone: " - << sz->sc.source_zone << dendl; - return tl::unexpected(-ENOENT); - } - uint64_t num_shards, latest_gen; - auto ret = remote_info(dpp, *sz, nullptr, &latest_gen, &num_shards); - if (!ret) { - ldpp_dout(this, 5) << "Unable to get remote info: " - << ret << dendl; - return tl::unexpected(ret); - } - auto stack = new RGWCoroutinesStack(driver->ctx(), &cr_mgr); - std::vector pairs(num_shards); - for (auto shard = 0u; shard < num_shards; ++shard) { - auto& pair = pairs[shard]; - pair.source_bs.bucket = sz->info.bucket; - pair.dest_bucket = sz->dest; - pair.source_bs.shard_id = shard; - stack->call(new RGWReadBucketPipeSyncStatusCoroutine( - &sz->sc, pair, &sync_status[shard], - nullptr, latest_gen)); - } - - stacks.push_back(stack); - - ret = cr_mgr.run(dpp, stacks); - if (ret < 0) { - ldpp_dout(this, 0) << "ERROR: failed to read sync status for " - << bucket_str{dest_bucket} << dendl; - return tl::unexpected(ret); - } - - return sync_status; -} - -namespace rgw::bucket_sync_run { -// Retry-loop over calls to sync_bucket_shard_cr -class ShardCR : public RGWCoroutine { - static constexpr auto allowed_retries = 10u; - - RGWDataSyncCtx& sc; - const rgw_bucket_sync_pair_info& pair; - const uint64_t gen; - unsigned retries = 0; - - ceph::real_time prev_progress; - ceph::real_time progress; - -public: - - ShardCR(RGWDataSyncCtx& sc, const rgw_bucket_sync_pair_info& pair, - const uint64_t gen) - : RGWCoroutine(sc.cct), sc(sc), pair(pair), gen(gen) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - // Since all errors (except ECANCELED) are considered retryable, - // retry other errors so long as we're making progress. - for (retries = 0u, retcode = -EDOM; - (retries < allowed_retries) && (retcode != 0); - ++retries) { - ldpp_dout(dpp, 5) << "ShardCR: syncing bucket shard on: " - << "zone=" << sc.source_zone - << ", bucket=" << pair.source_bs.bucket.name - << ", shard=" << pair.source_bs.shard_id - << ", gen=" << gen - << dendl; - yield call(sync_bucket_shard_cr(&sc, nullptr, pair, gen, - sc.env->sync_tracer->root_node, - &progress)); - - if (retcode == -ECANCELED) { - ldpp_dout(dpp, -1) << "ERROR: Got -ECANCELED for " - << pair.source_bs << dendl; - drain_all(); - return set_cr_error(retcode); - } else if (retcode < 0) { - ldpp_dout(dpp, 5) << "WARNING: Got error, retcode=" << retcode << " for " - << pair.source_bs << "on retry " - << retries + 1 << " of " << allowed_retries - << " allowed" << dendl; - // Reset the retry counter if we made any progress - if (progress != prev_progress) { - retries = 0; - } - prev_progress = progress; - } - } - if (retcode < 0) { - ldpp_dout(dpp, -1) << "ERROR: Exhausted retries for " - << pair.source_bs << " retcode=" - << retcode << dendl; - drain_all(); - return set_cr_error(retcode); - } - - drain_all(); - return set_cr_done(); - } - return 0; - } -}; - -// Loop over calls to ShardCR with limited concurrency -class GenCR : public RGWShardCollectCR { - static constexpr auto MAX_CONCURRENT_SHARDS = 64; - - RGWDataSyncCtx& sc; - const uint64_t gen; - - std::vector pairs; - decltype(pairs)::const_iterator iter; - -public: - GenCR(RGWDataSyncCtx& sc, const rgw_bucket& source, const rgw_bucket& dest, - const uint64_t gen, const uint64_t shards, - const RGWBucketSyncFlowManager::pipe_handler& handler) - : RGWShardCollectCR(sc.cct, MAX_CONCURRENT_SHARDS), - sc(sc), gen(gen) { - pairs.resize(shards); - for (auto shard = 0u; shard < shards; ++shard) { - auto& pair = pairs[shard]; - pair.handler = handler; - pair.source_bs.bucket = source; - pair.dest_bucket = dest; - pair.source_bs.shard_id = shard; - } - iter = pairs.cbegin(); - assert(pairs.size() == shards); - } - - virtual bool spawn_next() override { - if (iter == pairs.cend()) { - return false; - } - spawn(new ShardCR(sc, *iter, gen), false); - ++iter; - return true; - } - - int handle_result(int r) override { - if (r < 0) { - ldpp_dout(sc.env->dpp, 4) << "ERROR: Error syncing shard: " - << cpp_strerror(r) << dendl; - } - return r; - } -}; - -// Read sync status, loop over calls to GenCR -class SourceCR : public RGWCoroutine { - RGWDataSyncCtx& sc; - const RGWBucketInfo& info; - const rgw_bucket& dest; - const RGWBucketSyncFlowManager::pipe_handler& handler; - const rgw_raw_obj status_obj{ - sc.env->svc->zone->get_zone_params().log_pool, - RGWBucketPipeSyncStatusManager::full_status_oid(sc.source_zone, info.bucket, - dest)}; - - BucketSyncState state = BucketSyncState::Incremental; - uint64_t gen = 0; - uint64_t num_shards = 0; - rgw_bucket_sync_status status; - std::string zone_name; - -public: - - SourceCR(RGWDataSyncCtx& sc, const RGWBucketInfo& info, - const rgw_bucket& dest, - const RGWBucketSyncFlowManager::pipe_handler& handler, - const std::string& zone_name) - : RGWCoroutine(sc.cct), sc(sc), info(info), dest(dest), handler(handler), - zone_name(zone_name) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - // Get the source's status. In incremental sync, this gives us - // the generation and shard count that is next needed to be run. - yield call(new RGWSimpleRadosReadCR( - dpp, sc.env->async_rados, sc.env->svc->sysobj, - status_obj, &status)); - if (retcode < 0) { - ldpp_dout(dpp, -1) << "ERROR: Unable to fetch status for zone=" - << sc.source_zone << " retcode=" - << retcode << dendl; - drain_all(); - return set_cr_error(retcode); - } - - if (status.state == BucketSyncState::Stopped) { - // Nothing to do. - pretty_print(sc.env, "Sync of bucket {} from source zone {} is in state Stopped. " - "Nothing to do.\n", dest.name, zone_name); - ldpp_dout(dpp, 5) << "SourceCR: Bucket is in state Stopped, returning." - << dendl; - drain_all(); - return set_cr_done(); - } - - do { - state = status.state; - gen = status.incremental_gen; - num_shards = status.shards_done_with_gen.size(); - - ldpp_dout(dpp, 5) << "SourceCR: " - << "state=" << state - << ", gen=" << gen - << ", num_shards=" << num_shards - << dendl; - - // Special case to handle full sync. Since full sync no longer - // uses shards and has no generations, we sync shard zero, - // though use the current generation so a following - // incremental sync can carry on. - if (state != BucketSyncState::Incremental) { - pretty_print(sc.env, "Beginning full sync of bucket {} from source zone {}.\n", - dest.name, zone_name); - ldpp_dout(dpp, 5) << "SourceCR: Calling GenCR with " - << "gen=" << gen - << ", num_shards=" << 1 - << dendl; - yield call(new GenCR(sc, info.bucket, dest, gen, 1, handler)); - } else { - pretty_print(sc.env, "Beginning incremental sync of bucket {}, generation {} from source zone {}.\n", - dest.name, gen, zone_name); - ldpp_dout(dpp, 5) << "SourceCR: Calling GenCR with " - << "gen=" << gen - << ", num_shards=" << num_shards - << dendl; - yield call(new GenCR(sc, info.bucket, dest, gen, num_shards, - handler)); - } - if (retcode < 0) { - ldpp_dout(dpp, -1) << "ERROR: Giving up syncing from " - << sc.source_zone << " retcode=" - << retcode << dendl; - drain_all(); - return set_cr_error(retcode); - } - - pretty_print(sc.env, "Completed.\n"); - - yield call(new RGWSimpleRadosReadCR( - dpp, sc.env->async_rados, sc.env->svc->sysobj, - status_obj, &status)); - if (retcode < 0) { - ldpp_dout(dpp, -1) << "ERROR: Unable to fetch status for zone=" - << sc.source_zone << " retcode=" - << retcode << dendl; - drain_all(); - return set_cr_error(retcode); - } - // Repeat until we have done an incremental run and the - // generation remains unchanged. - ldpp_dout(dpp, 5) << "SourceCR: " - << "state=" << state - << ", gen=" << gen - << ", num_shards=" << num_shards - << ", status.state=" << status.state - << ", status.incremental_gen=" << status.incremental_gen - << ", status.shards_done_with_gen.size()=" << status.shards_done_with_gen.size() - << dendl; - } while (state != BucketSyncState::Incremental || - gen != status.incremental_gen); - drain_all(); - return set_cr_done(); - } - return 0; - } -}; -} // namespace rgw::bucket_sync_run - -int RGWBucketPipeSyncStatusManager::run(const DoutPrefixProvider *dpp) -{ - list stacks; - for (auto& source : sources) { - auto stack = new RGWCoroutinesStack(driver->ctx(), &cr_mgr); - stack->call(new rgw::bucket_sync_run::SourceCR( - source.sc, source.info, source.dest, source.handler, - source.zone_name)); - stacks.push_back(stack); - } - auto ret = cr_mgr.run(dpp, stacks); - if (ret < 0) { - ldpp_dout(this, 0) << "ERROR: Sync unsuccessful on bucket " - << bucket_str{dest_bucket} << dendl; - } - return ret; -} - -unsigned RGWBucketPipeSyncStatusManager::get_subsys() const -{ - return dout_subsys; -} - -std::ostream& RGWBucketPipeSyncStatusManager::gen_prefix(std::ostream& out) const -{ - auto zone = std::string_view{source_zone.value_or(rgw_zone_id("*")).id}; - return out << "bucket sync zone:" << zone.substr(0, 8) - << " bucket:" << dest_bucket << ' '; -} - -string RGWBucketPipeSyncStatusManager::full_status_oid(const rgw_zone_id& source_zone, - const rgw_bucket& source_bucket, - const rgw_bucket& dest_bucket) -{ - if (source_bucket == dest_bucket) { - return bucket_full_status_oid_prefix + "." + source_zone.id + ":" - + dest_bucket.get_key(); - } else { - return bucket_full_status_oid_prefix + "." + source_zone.id + ":" - + dest_bucket.get_key() + ":" + source_bucket.get_key(); - } -} - -inline std::string generation_token(uint64_t gen) { - return (gen == 0) ? "" : (":" + std::to_string(gen)); -} - -string RGWBucketPipeSyncStatusManager::inc_status_oid(const rgw_zone_id& source_zone, - const rgw_bucket_sync_pair_info& sync_pair, - uint64_t gen) -{ - if (sync_pair.source_bs.bucket == sync_pair.dest_bucket) { - return bucket_status_oid_prefix + "." + source_zone.id + ":" + sync_pair.source_bs.get_key() + - generation_token(gen); - } else { - return bucket_status_oid_prefix + "." + source_zone.id + ":" + sync_pair.dest_bucket.get_key() + ":" + sync_pair.source_bs.get_key() + - generation_token(gen); - } -} - -string RGWBucketPipeSyncStatusManager::obj_status_oid(const rgw_bucket_sync_pipe& sync_pipe, - const rgw_zone_id& source_zone, - const rgw::sal::Object* obj) -{ - string prefix = object_status_oid_prefix + "." + source_zone.id + ":" + obj->get_bucket()->get_key().get_key(); - if (sync_pipe.source_bucket_info.bucket != - sync_pipe.dest_bucket_info.bucket) { - prefix += string("/") + sync_pipe.dest_bucket_info.bucket.get_key(); - } - return prefix + ":" + obj->get_name() + ":" + obj->get_instance(); -} - -int rgw_read_remote_bilog_info(const DoutPrefixProvider *dpp, - RGWRESTConn* conn, - const rgw_bucket& bucket, - rgw_bucket_index_marker_info& info, - BucketIndexShardsManager& markers, - optional_yield y) -{ - const auto instance_key = bucket.get_key(); - const rgw_http_param_pair params[] = { - { "type" , "bucket-index" }, - { "bucket-instance", instance_key.c_str() }, - { "info" , nullptr }, - { nullptr, nullptr } - }; - int r = conn->get_json_resource(dpp, "/admin/log/", params, y, info); - if (r < 0) { - ldpp_dout(dpp, -1) << "failed to fetch remote log markers: " << cpp_strerror(r) << dendl; - return r; - } - // parse shard markers - r = markers.from_string(info.max_marker, -1); - if (r < 0) { - ldpp_dout(dpp, -1) << "failed to decode remote log markers" << dendl; - return r; - } - return 0; -} - -class RGWCollectBucketSyncStatusCR : public RGWShardCollectCR { - static constexpr int max_concurrent_shards = 16; - rgw::sal::RadosStore* const driver; - RGWDataSyncCtx *const sc; - RGWDataSyncEnv *const env; - const uint64_t gen; - - rgw_bucket_sync_pair_info sync_pair; - using Vector = std::vector; - Vector::iterator i, end; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to read bucket shard sync status: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - RGWCollectBucketSyncStatusCR(rgw::sal::RadosStore* driver, RGWDataSyncCtx *sc, - const rgw_bucket_sync_pair_info& sync_pair, - uint64_t gen, - Vector *status) - : RGWShardCollectCR(sc->cct, max_concurrent_shards), - driver(driver), sc(sc), env(sc->env), gen(gen), sync_pair(sync_pair), - i(status->begin()), end(status->end()) - {} - - bool spawn_next() override { - if (i == end) { - return false; - } - spawn(new RGWReadBucketPipeSyncStatusCoroutine(sc, sync_pair, &*i, nullptr, gen), false); - ++i; - ++sync_pair.source_bs.shard_id; - return true; - } -}; - -int rgw_read_bucket_full_sync_status(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore *driver, - const rgw_sync_bucket_pipe& pipe, - rgw_bucket_sync_status *status, - optional_yield y) -{ - auto get_oid = RGWBucketPipeSyncStatusManager::full_status_oid; - const rgw_raw_obj obj{driver->svc()->zone->get_zone_params().log_pool, - get_oid(*pipe.source.zone, *pipe.source.bucket, *pipe.dest.bucket)}; - - auto svc = driver->svc()->sysobj; - auto sysobj = svc->get_obj(obj); - bufferlist bl; - int ret = sysobj.rop().read(dpp, &bl, y); - if (ret < 0) - return ret; - - try { - auto iter = bl.cbegin(); - using ceph::decode; - rgw_bucket_sync_status result; - decode(result, iter); - *status = result; - return 0; - } catch (const buffer::error& err) { - lderr(svc->ctx()) << "error decoding " << obj << ": " << err.what() << dendl; - return -EIO; - } -} - -int rgw_read_bucket_inc_sync_status(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore *driver, - const rgw_sync_bucket_pipe& pipe, - uint64_t gen, - std::vector *status) -{ - if (!pipe.source.zone || - !pipe.source.bucket || - !pipe.dest.zone || - !pipe.dest.bucket) { - return -EINVAL; - } - - rgw_bucket_sync_pair_info sync_pair; - sync_pair.source_bs.bucket = *pipe.source.bucket; - sync_pair.source_bs.shard_id = 0; - sync_pair.dest_bucket = *pipe.dest.bucket; - - RGWDataSyncEnv env; - RGWSyncModuleInstanceRef module; // null sync module - env.init(dpp, driver->ctx(), driver, driver->svc(), driver->svc()->rados->get_async_processor(), - nullptr, nullptr, nullptr, module, nullptr); - - RGWDataSyncCtx sc; - sc.init(&env, nullptr, *pipe.source.zone); - - RGWCoroutinesManager crs(driver->ctx(), driver->getRados()->get_cr_registry()); - return crs.run(dpp, new RGWCollectBucketSyncStatusCR(driver, &sc, - sync_pair, - gen, - status)); -} - -void rgw_data_sync_info::generate_test_instances(list& o) -{ - auto info = new rgw_data_sync_info; - info->state = rgw_data_sync_info::StateBuildingFullSyncMaps; - info->num_shards = 8; - o.push_back(info); - o.push_back(new rgw_data_sync_info); -} - -void rgw_data_sync_marker::generate_test_instances(list& o) -{ - auto marker = new rgw_data_sync_marker; - marker->state = rgw_data_sync_marker::IncrementalSync; - marker->marker = "01234"; - marker->pos = 5; - o.push_back(marker); - o.push_back(new rgw_data_sync_marker); -} - -void rgw_data_sync_status::generate_test_instances(list& o) -{ - o.push_back(new rgw_data_sync_status); -} - -void rgw_bucket_shard_full_sync_marker::dump(Formatter *f) const -{ - encode_json("position", position, f); - encode_json("count", count, f); -} - -void rgw_bucket_shard_inc_sync_marker::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("position", position, obj); - JSONDecoder::decode_json("timestamp", timestamp, obj); -} - -void rgw_bucket_shard_inc_sync_marker::dump(Formatter *f) const -{ - encode_json("position", position, f); - encode_json("timestamp", timestamp, f); -} - -void rgw_bucket_shard_sync_info::decode_json(JSONObj *obj) -{ - std::string s; - JSONDecoder::decode_json("status", s, obj); - if (s == "full-sync") { - state = StateFullSync; - } else if (s == "incremental-sync") { - state = StateIncrementalSync; - } else if (s == "stopped") { - state = StateStopped; - } else { - state = StateInit; - } - JSONDecoder::decode_json("inc_marker", inc_marker, obj); -} - -void rgw_bucket_shard_full_sync_marker::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("position", position, obj); - JSONDecoder::decode_json("count", count, obj); -} - -void rgw_bucket_shard_sync_info::dump(Formatter *f) const -{ - const char *s{nullptr}; - switch ((SyncState)state) { - case StateInit: - s = "init"; - break; - case StateFullSync: - s = "full-sync"; - break; - case StateIncrementalSync: - s = "incremental-sync"; - break; - case StateStopped: - s = "stopped"; - break; - default: - s = "unknown"; - break; - } - encode_json("status", s, f); - encode_json("inc_marker", inc_marker, f); -} - -void rgw_bucket_full_sync_status::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("position", position, obj); - JSONDecoder::decode_json("count", count, obj); -} - -void rgw_bucket_full_sync_status::dump(Formatter *f) const -{ - encode_json("position", position, f); - encode_json("count", count, f); -} - -void encode_json(const char *name, BucketSyncState state, Formatter *f) -{ - switch (state) { - case BucketSyncState::Init: - encode_json(name, "init", f); - break; - case BucketSyncState::Full: - encode_json(name, "full-sync", f); - break; - case BucketSyncState::Incremental: - encode_json(name, "incremental-sync", f); - break; - case BucketSyncState::Stopped: - encode_json(name, "stopped", f); - break; - default: - encode_json(name, "unknown", f); - break; - } -} - -void decode_json_obj(BucketSyncState& state, JSONObj *obj) -{ - std::string s; - decode_json_obj(s, obj); - if (s == "full-sync") { - state = BucketSyncState::Full; - } else if (s == "incremental-sync") { - state = BucketSyncState::Incremental; - } else if (s == "stopped") { - state = BucketSyncState::Stopped; - } else { - state = BucketSyncState::Init; - } -} - -void rgw_bucket_sync_status::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("state", state, obj); - JSONDecoder::decode_json("full", full, obj); - JSONDecoder::decode_json("incremental_gen", incremental_gen, obj); -} - -void rgw_bucket_sync_status::dump(Formatter *f) const -{ - encode_json("state", state, f); - encode_json("full", full, f); - encode_json("incremental_gen", incremental_gen, f); -} - - -void bilog_status_v2::dump(Formatter *f) const -{ - encode_json("sync_status", sync_status, f); - encode_json("inc_status", inc_status, f); -} - -void bilog_status_v2::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("sync_status", sync_status, obj); - JSONDecoder::decode_json("inc_status", inc_status, obj); -} diff --git a/src/rgw/store/rados/rgw_data_sync.h b/src/rgw/store/rados/rgw_data_sync.h deleted file mode 100644 index 6cc714dbaf8..00000000000 --- a/src/rgw/store/rados/rgw_data_sync.h +++ /dev/null @@ -1,823 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_DATA_SYNC_H -#define CEPH_RGW_DATA_SYNC_H - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include -#include - -#include "include/encoding.h" - -#include "common/ceph_json.h" -#include "common/likely.h" - -#include "rgw_coroutine.h" -#include "rgw_http_client.h" -#include "rgw_sal_rados.h" - -#include "rgw_datalog.h" -#include "rgw_sync.h" -#include "rgw_sync_module.h" -#include "rgw_sync_trace.h" -#include "rgw_sync_policy.h" - -#include "rgw_bucket_sync.h" - -// represents an obligation to sync an entry up a given time -struct rgw_data_sync_obligation { - rgw_bucket_shard bs; - std::optional gen; - std::string marker; - ceph::real_time timestamp; - bool retry = false; -}; - -inline std::ostream& operator<<(std::ostream& out, const rgw_data_sync_obligation& o) { - out << "key=" << o.bs; - if (o.gen) { - out << '[' << *o.gen << ']'; - } - if (!o.marker.empty()) { - out << " marker=" << o.marker; - } - if (o.timestamp != ceph::real_time{}) { - out << " timestamp=" << o.timestamp; - } - if (o.retry) { - out << " retry"; - } - return out; -} - -class JSONObj; -struct rgw_sync_bucket_pipe; - -struct rgw_bucket_sync_pair_info { - RGWBucketSyncFlowManager::pipe_handler handler; /* responsible for sync filters */ - rgw_bucket_shard source_bs; - rgw_bucket dest_bucket; -}; - -inline std::ostream& operator<<(std::ostream& out, const rgw_bucket_sync_pair_info& p) { - if (p.source_bs.bucket == p.dest_bucket) { - return out << p.source_bs; - } - return out << p.source_bs << "->" << p.dest_bucket; -} - -struct rgw_bucket_sync_pipe { - rgw_bucket_sync_pair_info info; - RGWBucketInfo source_bucket_info; - std::map source_bucket_attrs; - RGWBucketInfo dest_bucket_info; - std::map dest_bucket_attrs; - - RGWBucketSyncFlowManager::pipe_rules_ref& get_rules() { - return info.handler.rules; - } -}; - -inline std::ostream& operator<<(std::ostream& out, const rgw_bucket_sync_pipe& p) { - return out << p.info; -} - -struct rgw_datalog_info { - uint32_t num_shards; - - rgw_datalog_info() : num_shards(0) {} - - void decode_json(JSONObj *obj); -}; - -struct rgw_data_sync_info { - enum SyncState { - StateInit = 0, - StateBuildingFullSyncMaps = 1, - StateSync = 2, - }; - - uint16_t state; - uint32_t num_shards; - - uint64_t instance_id{0}; - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(state, bl); - encode(num_shards, bl); - encode(instance_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(state, bl); - decode(num_shards, bl); - if (struct_v >= 2) { - decode(instance_id, bl); - } - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const { - std::string s; - switch ((SyncState)state) { - case StateInit: - s = "init"; - break; - case StateBuildingFullSyncMaps: - s = "building-full-sync-maps"; - break; - case StateSync: - s = "sync"; - break; - default: - s = "unknown"; - break; - } - encode_json("status", s, f); - encode_json("num_shards", num_shards, f); - encode_json("instance_id", instance_id, f); - } - void decode_json(JSONObj *obj) { - std::string s; - JSONDecoder::decode_json("status", s, obj); - if (s == "building-full-sync-maps") { - state = StateBuildingFullSyncMaps; - } else if (s == "sync") { - state = StateSync; - } else { - state = StateInit; - } - JSONDecoder::decode_json("num_shards", num_shards, obj); - JSONDecoder::decode_json("instance_id", instance_id, obj); - } - static void generate_test_instances(std::list& o); - - rgw_data_sync_info() : state((int)StateInit), num_shards(0) {} -}; -WRITE_CLASS_ENCODER(rgw_data_sync_info) - -struct rgw_data_sync_marker { - enum SyncState { - FullSync = 0, - IncrementalSync = 1, - }; - uint16_t state; - std::string marker; - std::string next_step_marker; - uint64_t total_entries; - uint64_t pos; - real_time timestamp; - - rgw_data_sync_marker() : state(FullSync), total_entries(0), pos(0) {} - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(state, bl); - encode(marker, bl); - encode(next_step_marker, bl); - encode(total_entries, bl); - encode(pos, bl); - encode(timestamp, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(state, bl); - decode(marker, bl); - decode(next_step_marker, bl); - decode(total_entries, bl); - decode(pos, bl); - decode(timestamp, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const { - const char *s{nullptr}; - switch ((SyncState)state) { - case FullSync: - s = "full-sync"; - break; - case IncrementalSync: - s = "incremental-sync"; - break; - default: - s = "unknown"; - break; - } - encode_json("status", s, f); - encode_json("marker", marker, f); - encode_json("next_step_marker", next_step_marker, f); - encode_json("total_entries", total_entries, f); - encode_json("pos", pos, f); - encode_json("timestamp", utime_t(timestamp), f); - } - void decode_json(JSONObj *obj) { - std::string s; - JSONDecoder::decode_json("status", s, obj); - if (s == "full-sync") { - state = FullSync; - } else if (s == "incremental-sync") { - state = IncrementalSync; - } - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("next_step_marker", next_step_marker, obj); - JSONDecoder::decode_json("total_entries", total_entries, obj); - JSONDecoder::decode_json("pos", pos, obj); - utime_t t; - JSONDecoder::decode_json("timestamp", t, obj); - timestamp = t.to_real_time(); - } - static void generate_test_instances(std::list& o); -}; -WRITE_CLASS_ENCODER(rgw_data_sync_marker) - -struct rgw_data_sync_status { - rgw_data_sync_info sync_info; - std::map sync_markers; - - rgw_data_sync_status() {} - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(sync_info, bl); - /* sync markers are encoded separately */ - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(sync_info, bl); - /* sync markers are decoded separately */ - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const { - encode_json("info", sync_info, f); - encode_json("markers", sync_markers, f); - } - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("info", sync_info, obj); - JSONDecoder::decode_json("markers", sync_markers, obj); - } - static void generate_test_instances(std::list& o); -}; -WRITE_CLASS_ENCODER(rgw_data_sync_status) - -struct rgw_datalog_entry { - std::string key; - ceph::real_time timestamp; - - void decode_json(JSONObj *obj); -}; - -struct rgw_datalog_shard_data { - std::string marker; - bool truncated; - std::vector entries; - - void decode_json(JSONObj *obj); -}; - -class RGWAsyncRadosProcessor; -class RGWDataSyncControlCR; - -struct rgw_bucket_entry_owner { - std::string id; - std::string display_name; - - rgw_bucket_entry_owner() {} - rgw_bucket_entry_owner(const std::string& _id, const std::string& _display_name) : id(_id), display_name(_display_name) {} - - void decode_json(JSONObj *obj); -}; - -class RGWSyncErrorLogger; -class RGWRESTConn; -class RGWServices; - -struct RGWDataSyncEnv { - const DoutPrefixProvider *dpp{nullptr}; - CephContext *cct{nullptr}; - rgw::sal::RadosStore* driver{nullptr}; - RGWServices *svc{nullptr}; - RGWAsyncRadosProcessor *async_rados{nullptr}; - RGWHTTPManager *http_manager{nullptr}; - RGWSyncErrorLogger *error_logger{nullptr}; - RGWSyncTraceManager *sync_tracer{nullptr}; - RGWSyncModuleInstanceRef sync_module{nullptr}; - PerfCounters* counters{nullptr}; - - RGWDataSyncEnv() {} - - void init(const DoutPrefixProvider *_dpp, CephContext *_cct, rgw::sal::RadosStore* _driver, RGWServices *_svc, - RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager, - RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer, - RGWSyncModuleInstanceRef& _sync_module, - PerfCounters* _counters) { - dpp = _dpp; - cct = _cct; - driver = _driver; - svc = _svc; - async_rados = _async_rados; - http_manager = _http_manager; - error_logger = _error_logger; - sync_tracer = _sync_tracer; - sync_module = _sync_module; - counters = _counters; - } - - std::string shard_obj_name(int shard_id); - std::string status_oid(); - - std::ostream* ostr{nullptr}; // For pretty printing progress -}; - -// pretty ostream output for `radosgw-admin bucket sync run` -template -void pretty_print(const RGWDataSyncEnv* env, T&& ...t) { - if (unlikely(!!env->ostr)) { - fmt::print(*env->ostr, std::forward(t)...); - env->ostr->flush(); - } -} - -struct RGWDataSyncCtx { - RGWDataSyncEnv *env{nullptr}; - CephContext *cct{nullptr}; - - RGWRESTConn *conn{nullptr}; - rgw_zone_id source_zone; - - RGWDataSyncCtx() = default; - - RGWDataSyncCtx(RGWDataSyncEnv* env, - RGWRESTConn* conn, - const rgw_zone_id& source_zone) - : env(env), cct(env->cct), conn(conn), source_zone(source_zone) {} - - void init(RGWDataSyncEnv *_env, - RGWRESTConn *_conn, - const rgw_zone_id& _source_zone) { - cct = _env->cct; - env = _env; - conn = _conn; - source_zone = _source_zone; - } -}; - -class RGWRados; - -class RGWRemoteDataLog : public RGWCoroutinesManager { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* driver; - CephContext *cct; - RGWCoroutinesManagerRegistry *cr_registry; - RGWAsyncRadosProcessor *async_rados; - RGWHTTPManager http_manager; - - RGWDataSyncEnv sync_env; - RGWDataSyncCtx sc; - - ceph::shared_mutex lock = ceph::make_shared_mutex("RGWRemoteDataLog::lock"); - RGWDataSyncControlCR *data_sync_cr; - - RGWSyncTraceNodeRef tn; - - bool initialized; - -public: - RGWRemoteDataLog(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* _store, - RGWAsyncRadosProcessor *async_rados); - int init(const rgw_zone_id& _source_zone, RGWRESTConn *_conn, RGWSyncErrorLogger *_error_logger, - RGWSyncTraceManager *_sync_tracer, RGWSyncModuleInstanceRef& module, - PerfCounters* _counters); - void finish(); - - int read_log_info(const DoutPrefixProvider *dpp, rgw_datalog_info *log_info); - int read_source_log_shards_info(const DoutPrefixProvider *dpp, std::map *shards_info); - int read_source_log_shards_next(const DoutPrefixProvider *dpp, std::map shard_markers, std::map *result); - int read_sync_status(const DoutPrefixProvider *dpp, rgw_data_sync_status *sync_status); - int read_recovering_shards(const DoutPrefixProvider *dpp, const int num_shards, std::set& recovering_shards); - int read_shard_status(const DoutPrefixProvider *dpp, int shard_id, std::set& lagging_buckets,std::set& recovering_buckets, rgw_data_sync_marker* sync_marker, const int max_entries); - int init_sync_status(const DoutPrefixProvider *dpp, int num_shards); - int run_sync(const DoutPrefixProvider *dpp, int num_shards); - - void wakeup(int shard_id, bc::flat_set& entries); -}; - -class RGWDataSyncStatusManager : public DoutPrefixProvider { - rgw::sal::RadosStore* driver; - - rgw_zone_id source_zone; - RGWRESTConn *conn; - RGWSyncErrorLogger *error_logger; - RGWSyncModuleInstanceRef sync_module; - PerfCounters* counters; - - RGWRemoteDataLog source_log; - - std::string source_status_oid; - std::string source_shard_status_oid_prefix; - - std::map shard_objs; - - int num_shards; - -public: - RGWDataSyncStatusManager(rgw::sal::RadosStore* _driver, RGWAsyncRadosProcessor *async_rados, - const rgw_zone_id& _source_zone, PerfCounters* counters) - : driver(_driver), source_zone(_source_zone), conn(NULL), error_logger(NULL), - sync_module(nullptr), counters(counters), - source_log(this, driver, async_rados), num_shards(0) {} - RGWDataSyncStatusManager(rgw::sal::RadosStore* _driver, RGWAsyncRadosProcessor *async_rados, - const rgw_zone_id& _source_zone, PerfCounters* counters, - const RGWSyncModuleInstanceRef& _sync_module) - : driver(_driver), source_zone(_source_zone), conn(NULL), error_logger(NULL), - sync_module(_sync_module), counters(counters), - source_log(this, driver, async_rados), num_shards(0) {} - ~RGWDataSyncStatusManager() { - finalize(); - } - int init(const DoutPrefixProvider *dpp); - void finalize(); - - static std::string shard_obj_name(const rgw_zone_id& source_zone, int shard_id); - static std::string sync_status_oid(const rgw_zone_id& source_zone); - - int read_sync_status(const DoutPrefixProvider *dpp, rgw_data_sync_status *sync_status) { - return source_log.read_sync_status(dpp, sync_status); - } - - int read_recovering_shards(const DoutPrefixProvider *dpp, const int num_shards, std::set& recovering_shards) { - return source_log.read_recovering_shards(dpp, num_shards, recovering_shards); - } - - int read_shard_status(const DoutPrefixProvider *dpp, int shard_id, std::set& lagging_buckets, std::set& recovering_buckets, rgw_data_sync_marker *sync_marker, const int max_entries) { - return source_log.read_shard_status(dpp, shard_id, lagging_buckets, recovering_buckets,sync_marker, max_entries); - } - int init_sync_status(const DoutPrefixProvider *dpp) { return source_log.init_sync_status(dpp, num_shards); } - - int read_log_info(const DoutPrefixProvider *dpp, rgw_datalog_info *log_info) { - return source_log.read_log_info(dpp, log_info); - } - int read_source_log_shards_info(const DoutPrefixProvider *dpp, std::map *shards_info) { - return source_log.read_source_log_shards_info(dpp, shards_info); - } - int read_source_log_shards_next(const DoutPrefixProvider *dpp, std::map shard_markers, std::map *result) { - return source_log.read_source_log_shards_next(dpp, shard_markers, result); - } - - int run(const DoutPrefixProvider *dpp) { return source_log.run_sync(dpp, num_shards); } - - void wakeup(int shard_id, bc::flat_set& entries) { return source_log.wakeup(shard_id, entries); } - - void stop() { - source_log.finish(); - } - - // implements DoutPrefixProvider - CephContext *get_cct() const override; - unsigned get_subsys() const override; - std::ostream& gen_prefix(std::ostream& out) const override; -}; - -class RGWBucketPipeSyncStatusManager; -class RGWBucketSyncCR; - -struct rgw_bucket_shard_full_sync_marker { - rgw_obj_key position; - uint64_t count; - - rgw_bucket_shard_full_sync_marker() : count(0) {} - - void encode_attr(std::map& attrs); - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(position, bl); - encode(count, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(position, bl); - decode(count, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(rgw_bucket_shard_full_sync_marker) - -struct rgw_bucket_shard_inc_sync_marker { - std::string position; - ceph::real_time timestamp; - - void encode_attr(std::map& attrs); - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(position, bl); - encode(timestamp, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(position, bl); - if (struct_v >= 2) { - decode(timestamp, bl); - } - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(rgw_bucket_shard_inc_sync_marker) - -struct rgw_bucket_shard_sync_info { - enum SyncState { - StateInit = 0, - StateFullSync = 1, - StateIncrementalSync = 2, - StateStopped = 3, - }; - - uint16_t state; - rgw_bucket_shard_inc_sync_marker inc_marker; - - void decode_from_attrs(CephContext *cct, std::map& attrs); - void encode_all_attrs(std::map& attrs); - void encode_state_attr(std::map& attrs); - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(state, bl); - encode(inc_marker, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(state, bl); - if (struct_v <= 1) { - rgw_bucket_shard_full_sync_marker full_marker; - decode(full_marker, bl); - } - decode(inc_marker, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - - rgw_bucket_shard_sync_info() : state((int)StateInit) {} - -}; -WRITE_CLASS_ENCODER(rgw_bucket_shard_sync_info) - -struct rgw_bucket_full_sync_status { - rgw_obj_key position; - uint64_t count = 0; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(position, bl); - encode(count, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(position, bl); - decode(count, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(rgw_bucket_full_sync_status) - -enum class BucketSyncState : uint8_t { - Init = 0, - Full, - Incremental, - Stopped, -}; -inline std::ostream& operator<<(std::ostream& out, const BucketSyncState& s) { - switch (s) { - case BucketSyncState::Init: out << "init"; break; - case BucketSyncState::Full: out << "full"; break; - case BucketSyncState::Incremental: out << "incremental"; break; - case BucketSyncState::Stopped: out << "stopped"; break; - } - return out; -} - -void encode_json(const char *name, BucketSyncState state, Formatter *f); -void decode_json_obj(BucketSyncState& state, JSONObj *obj); - -struct rgw_bucket_sync_status { - BucketSyncState state = BucketSyncState::Init; - rgw_bucket_full_sync_status full; - uint64_t incremental_gen = 0; - std::vector shards_done_with_gen; - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(state, bl); - encode(full, bl); - encode(incremental_gen, bl); - encode(shards_done_with_gen, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(state, bl); - decode(full, bl); - if (struct_v > 1) { - decode(incremental_gen, bl); - decode(shards_done_with_gen, bl); - } - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(rgw_bucket_sync_status) - -struct bilog_status_v2 { - rgw_bucket_sync_status sync_status; - std::vector inc_status; - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; - -struct store_gen_shards { - uint64_t gen = 0; - uint32_t num_shards = 0; - - void dump(Formatter *f) const { - encode_json("gen", gen, f); - encode_json("num_shards", num_shards, f); - } - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("gen", gen, obj); - JSONDecoder::decode_json("num_shards", num_shards, obj); - } -}; - -struct rgw_bucket_index_marker_info { - std::string bucket_ver; - std::string master_ver; - std::string max_marker; - bool syncstopped{false}; - uint64_t oldest_gen = 0; - uint64_t latest_gen = 0; - std::vector generations; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("bucket_ver", bucket_ver, obj); - JSONDecoder::decode_json("master_ver", master_ver, obj); - JSONDecoder::decode_json("max_marker", max_marker, obj); - JSONDecoder::decode_json("syncstopped", syncstopped, obj); - JSONDecoder::decode_json("oldest_gen", oldest_gen, obj); - JSONDecoder::decode_json("latest_gen", latest_gen, obj); - JSONDecoder::decode_json("generations", generations, obj); - } -}; - - -class BucketIndexShardsManager; - -int rgw_read_remote_bilog_info(const DoutPrefixProvider *dpp, - RGWRESTConn* conn, - const rgw_bucket& bucket, - rgw_bucket_index_marker_info& info, - BucketIndexShardsManager& markers, - optional_yield y); - -class RGWBucketPipeSyncStatusManager : public DoutPrefixProvider { - rgw::sal::RadosStore* driver; - - RGWDataSyncEnv sync_env; - - RGWCoroutinesManager cr_mgr{driver->ctx(), - driver->getRados()->get_cr_registry()}; - - RGWHTTPManager http_manager{driver->ctx(), cr_mgr.get_completion_mgr()}; - - std::optional source_zone; - std::optional source_bucket; - - std::unique_ptr error_logger = - std::make_unique(driver, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, - ERROR_LOGGER_SHARDS); - RGWSyncModuleInstanceRef sync_module; - - rgw_bucket dest_bucket; - - struct source { - RGWDataSyncCtx sc; - RGWBucketInfo info; - rgw_bucket dest; - RGWBucketSyncFlowManager::pipe_handler handler; - std::string zone_name; - - source(RGWDataSyncEnv* env, const rgw_zone_id& zone, RGWRESTConn* conn, - const RGWBucketInfo& info, const rgw_bucket& dest, - const RGWBucketSyncFlowManager::pipe_handler& handler, - const std::string& zone_name) - : sc(env, conn, zone), info(info), dest(dest), handler(handler), - zone_name(zone_name) {} - }; - std::vector sources; - - int do_init(const DoutPrefixProvider *dpp, std::ostream* ostr); - RGWBucketPipeSyncStatusManager(rgw::sal::RadosStore* driver, - std::optional source_zone, - std::optional source_bucket, - const rgw_bucket& dest_bucket) - : driver(driver), source_zone(source_zone), source_bucket(source_bucket), - dest_bucket(dest_bucket) {} - - int remote_info(const DoutPrefixProvider *dpp, source& s, - uint64_t* oldest_gen, uint64_t* latest_gen, - uint64_t* num_shards); -public: - static tl::expected, int> - construct(const DoutPrefixProvider* dpp, rgw::sal::RadosStore* driver, - std::optional source_zone, - std::optional source_bucket, - const rgw_bucket& dest_bucket, std::ostream *ostream); - ~RGWBucketPipeSyncStatusManager() = default; - - - static std::string full_status_oid(const rgw_zone_id& source_zone, - const rgw_bucket& source_bucket, - const rgw_bucket& dest_bucket); - static std::string inc_status_oid(const rgw_zone_id& source_zone, - const rgw_bucket_sync_pair_info& bs, - uint64_t gen); - // specific source obj sync status, can be used by sync modules - static std::string obj_status_oid(const rgw_bucket_sync_pipe& sync_pipe, - const rgw_zone_id& source_zone, const rgw::sal::Object* obj); /* specific source obj sync status, - can be used by sync modules */ - - // implements DoutPrefixProvider - CephContext *get_cct() const override; - unsigned get_subsys() const override; - std::ostream& gen_prefix(std::ostream& out) const override; - - int init_sync_status(const DoutPrefixProvider *dpp); - tl::expected, int> read_sync_status( - const DoutPrefixProvider *dpp); - int run(const DoutPrefixProvider *dpp); -}; - -/// read the full sync status with respect to a source bucket -int rgw_read_bucket_full_sync_status(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore *driver, - const rgw_sync_bucket_pipe& pipe, - rgw_bucket_sync_status *status, - optional_yield y); - -/// read the incremental sync status of all bucket shards from the given source zone -int rgw_read_bucket_inc_sync_status(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore *driver, - const rgw_sync_bucket_pipe& pipe, - uint64_t gen, - std::vector *status); - -class RGWDefaultSyncModule : public RGWSyncModule { -public: - RGWDefaultSyncModule() {} - bool supports_writes() override { return true; } - bool supports_data_export() override { return true; } - int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; -}; - -class RGWArchiveSyncModule : public RGWDefaultSyncModule { -public: - RGWArchiveSyncModule() {} - bool supports_writes() override { return true; } - bool supports_data_export() override { return false; } - int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; -}; - -#endif diff --git a/src/rgw/store/rados/rgw_datalog.cc b/src/rgw/store/rados/rgw_datalog.cc deleted file mode 100644 index 3eeb820e2eb..00000000000 --- a/src/rgw/store/rados/rgw_datalog.cc +++ /dev/null @@ -1,1065 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include - -#include "common/debug.h" -#include "common/containers.h" -#include "common/errno.h" -#include "common/error_code.h" - -#include "common/async/blocked_completion.h" -#include "common/async/librados_completion.h" - -#include "cls/fifo/cls_fifo_types.h" -#include "cls/log/cls_log_client.h" - -#include "cls_fifo_legacy.h" -#include "rgw_bucket_layout.h" -#include "rgw_datalog.h" -#include "rgw_log_backing.h" -#include "rgw_tools.h" - -#define dout_context g_ceph_context -static constexpr auto dout_subsys = ceph_subsys_rgw; - -namespace bs = boost::system; -namespace lr = librados; - -using ceph::containers::tiny_vector; - -void rgw_data_change::dump(ceph::Formatter *f) const -{ - std::string type; - switch (entity_type) { - case ENTITY_TYPE_BUCKET: - type = "bucket"; - break; - default: - type = "unknown"; - } - encode_json("entity_type", type, f); - encode_json("key", key, f); - utime_t ut(timestamp); - encode_json("timestamp", ut, f); - encode_json("gen", gen, f); -} - -void rgw_data_change::decode_json(JSONObj *obj) { - std::string s; - JSONDecoder::decode_json("entity_type", s, obj); - if (s == "bucket") { - entity_type = ENTITY_TYPE_BUCKET; - } else { - entity_type = ENTITY_TYPE_UNKNOWN; - } - JSONDecoder::decode_json("key", key, obj); - utime_t ut; - JSONDecoder::decode_json("timestamp", ut, obj); - timestamp = ut.to_real_time(); - JSONDecoder::decode_json("gen", gen, obj); -} - -void rgw_data_change_log_entry::dump(Formatter *f) const -{ - encode_json("log_id", log_id, f); - utime_t ut(log_timestamp); - encode_json("log_timestamp", ut, f); - encode_json("entry", entry, f); -} - -void rgw_data_change_log_entry::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("log_id", log_id, obj); - utime_t ut; - JSONDecoder::decode_json("log_timestamp", ut, obj); - log_timestamp = ut.to_real_time(); - JSONDecoder::decode_json("entry", entry, obj); -} - -void rgw_data_notify_entry::dump(Formatter *f) const -{ - encode_json("key", key, f); - encode_json("gen", gen, f); -} - -void rgw_data_notify_entry::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("key", key, obj); - JSONDecoder::decode_json("gen", gen, obj); -} - -class RGWDataChangesOmap final : public RGWDataChangesBE { - using centries = std::list; - std::vector oids; - -public: - RGWDataChangesOmap(lr::IoCtx& ioctx, - RGWDataChangesLog& datalog, - uint64_t gen_id, - int num_shards) - : RGWDataChangesBE(ioctx, datalog, gen_id) { - oids.reserve(num_shards); - for (auto i = 0; i < num_shards; ++i) { - oids.push_back(get_oid(i)); - } - } - ~RGWDataChangesOmap() override = default; - - void prepare(ceph::real_time ut, const std::string& key, - ceph::buffer::list&& entry, entries& out) override { - if (!std::holds_alternative(out)) { - ceph_assert(std::visit([](const auto& v) { return std::empty(v); }, out)); - out = centries(); - } - - cls_log_entry e; - cls_log_add_prepare_entry(e, utime_t(ut), {}, key, entry); - std::get(out).push_back(std::move(e)); - } - int push(const DoutPrefixProvider *dpp, int index, entries&& items) override { - lr::ObjectWriteOperation op; - cls_log_add(op, std::get(items), true); - auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to push to " << oids[index] << cpp_strerror(-r) - << dendl; - } - return r; - } - int push(const DoutPrefixProvider *dpp, int index, ceph::real_time now, - const std::string& key, - ceph::buffer::list&& bl) override { - lr::ObjectWriteOperation op; - cls_log_add(op, utime_t(now), {}, key, bl); - auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to push to " << oids[index] - << cpp_strerror(-r) << dendl; - } - return r; - } - int list(const DoutPrefixProvider *dpp, int index, int max_entries, - std::vector& entries, - std::optional marker, - std::string* out_marker, bool* truncated) override { - std::list log_entries; - lr::ObjectReadOperation op; - cls_log_list(op, {}, {}, std::string(marker.value_or("")), - max_entries, log_entries, out_marker, truncated); - auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, nullptr, null_yield); - if (r == -ENOENT) { - *truncated = false; - return 0; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to list " << oids[index] - << cpp_strerror(-r) << dendl; - return r; - } - for (auto iter = log_entries.begin(); iter != log_entries.end(); ++iter) { - rgw_data_change_log_entry log_entry; - log_entry.log_id = iter->id; - auto rt = iter->timestamp.to_real_time(); - log_entry.log_timestamp = rt; - auto liter = iter->data.cbegin(); - try { - decode(log_entry.entry, liter); - } catch (ceph::buffer::error& err) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to decode data changes log entry: " - << err.what() << dendl; - return -EIO; - } - entries.push_back(log_entry); - } - return 0; - } - int get_info(const DoutPrefixProvider *dpp, int index, RGWDataChangesLogInfo *info) override { - cls_log_header header; - lr::ObjectReadOperation op; - cls_log_info(op, &header); - auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, nullptr, null_yield); - if (r == -ENOENT) r = 0; - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to get info from " << oids[index] - << cpp_strerror(-r) << dendl; - } else { - info->marker = header.max_marker; - info->last_update = header.max_time.to_real_time(); - } - return r; - } - int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker) override { - lr::ObjectWriteOperation op; - cls_log_trim(op, {}, {}, {}, std::string(marker)); - auto r = rgw_rados_operate(dpp, ioctx, oids[index], &op, null_yield); - if (r == -ENOENT) r = -ENODATA; - if (r < 0 && r != -ENODATA) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to get info from " << oids[index] - << cpp_strerror(-r) << dendl; - } - return r; - } - int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker, - lr::AioCompletion* c) override { - lr::ObjectWriteOperation op; - cls_log_trim(op, {}, {}, {}, std::string(marker)); - auto r = ioctx.aio_operate(oids[index], c, &op, 0); - if (r == -ENOENT) r = -ENODATA; - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to get info from " << oids[index] - << cpp_strerror(-r) << dendl; - } - return r; - } - std::string_view max_marker() const override { - return "99999999"; - } - int is_empty(const DoutPrefixProvider *dpp) override { - for (auto shard = 0u; shard < oids.size(); ++shard) { - std::list log_entries; - lr::ObjectReadOperation op; - std::string out_marker; - bool truncated; - cls_log_list(op, {}, {}, {}, 1, log_entries, &out_marker, &truncated); - auto r = rgw_rados_operate(dpp, ioctx, oids[shard], &op, nullptr, null_yield); - if (r == -ENOENT) { - continue; - } - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to list " << oids[shard] - << cpp_strerror(-r) << dendl; - return r; - } - if (!log_entries.empty()) { - return 0; - } - } - return 1; - } -}; - -class RGWDataChangesFIFO final : public RGWDataChangesBE { - using centries = std::vector; - tiny_vector fifos; - -public: - RGWDataChangesFIFO(lr::IoCtx& ioctx, - RGWDataChangesLog& datalog, - uint64_t gen_id, int shards) - : RGWDataChangesBE(ioctx, datalog, gen_id), - fifos(shards, [&ioctx, this](std::size_t i, auto emplacer) { - emplacer.emplace(ioctx, get_oid(i)); - }) {} - ~RGWDataChangesFIFO() override = default; - void prepare(ceph::real_time, const std::string&, - ceph::buffer::list&& entry, entries& out) override { - if (!std::holds_alternative(out)) { - ceph_assert(std::visit([](auto& v) { return std::empty(v); }, out)); - out = centries(); - } - std::get(out).push_back(std::move(entry)); - } - int push(const DoutPrefixProvider *dpp, int index, entries&& items) override { - auto r = fifos[index].push(dpp, std::get(items), null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to push to FIFO: " << get_oid(index) - << ": " << cpp_strerror(-r) << dendl; - } - return r; - } - int push(const DoutPrefixProvider *dpp, int index, ceph::real_time, - const std::string&, - ceph::buffer::list&& bl) override { - auto r = fifos[index].push(dpp, std::move(bl), null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to push to FIFO: " << get_oid(index) - << ": " << cpp_strerror(-r) << dendl; - } - return r; - } - int list(const DoutPrefixProvider *dpp, int index, int max_entries, - std::vector& entries, - std::optional marker, - std::string* out_marker, bool* truncated) override { - std::vector log_entries; - bool more = false; - auto r = fifos[index].list(dpp, max_entries, marker, &log_entries, &more, - null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to list FIFO: " << get_oid(index) - << ": " << cpp_strerror(-r) << dendl; - return r; - } - for (const auto& entry : log_entries) { - rgw_data_change_log_entry log_entry; - log_entry.log_id = entry.marker; - log_entry.log_timestamp = entry.mtime; - auto liter = entry.data.cbegin(); - try { - decode(log_entry.entry, liter); - } catch (const buffer::error& err) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": failed to decode data changes log entry: " - << err.what() << dendl; - return -EIO; - } - entries.push_back(std::move(log_entry)); - } - if (truncated) - *truncated = more; - if (out_marker && !log_entries.empty()) { - *out_marker = log_entries.back().marker; - } - return 0; - } - int get_info(const DoutPrefixProvider *dpp, int index, RGWDataChangesLogInfo *info) override { - auto& fifo = fifos[index]; - auto r = fifo.read_meta(dpp, null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to get FIFO metadata: " << get_oid(index) - << ": " << cpp_strerror(-r) << dendl; - return r; - } - rados::cls::fifo::info m; - fifo.meta(dpp, m, null_yield); - auto p = m.head_part_num; - if (p < 0) { - info->marker = ""; - info->last_update = ceph::real_clock::zero(); - return 0; - } - rgw::cls::fifo::part_info h; - r = fifo.get_part_info(dpp, p, &h, null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to get part info: " << get_oid(index) << "/" << p - << ": " << cpp_strerror(-r) << dendl; - return r; - } - info->marker = rgw::cls::fifo::marker{p, h.last_ofs}.to_string(); - info->last_update = h.max_time; - return 0; - } - int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker) override { - auto r = fifos[index].trim(dpp, marker, false, null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to trim FIFO: " << get_oid(index) - << ": " << cpp_strerror(-r) << dendl; - } - return r; - } - int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker, - librados::AioCompletion* c) override { - int r = 0; - if (marker == rgw::cls::fifo::marker(0, 0).to_string()) { - rgw_complete_aio_completion(c, -ENODATA); - } else { - fifos[index].trim(dpp, marker, false, c, null_yield); - } - return r; - } - std::string_view max_marker() const override { - static const std::string mm = - rgw::cls::fifo::marker::max().to_string(); - return std::string_view(mm); - } - int is_empty(const DoutPrefixProvider *dpp) override { - std::vector log_entries; - bool more = false; - for (auto shard = 0u; shard < fifos.size(); ++shard) { - auto r = fifos[shard].list(dpp, 1, {}, &log_entries, &more, - null_yield); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": unable to list FIFO: " << get_oid(shard) - << ": " << cpp_strerror(-r) << dendl; - return r; - } - if (!log_entries.empty()) { - return 0; - } - } - return 1; - } -}; - -RGWDataChangesLog::RGWDataChangesLog(CephContext* cct) - : cct(cct), - num_shards(cct->_conf->rgw_data_log_num_shards), - prefix(get_prefix()), - changes(cct->_conf->rgw_data_log_changes_size) {} - -bs::error_code DataLogBackends::handle_init(entries_t e) noexcept { - std::unique_lock l(m); - - for (const auto& [gen_id, gen] : e) { - if (gen.pruned) { - lderr(datalog.cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": ERROR: given empty generation: gen_id=" << gen_id << dendl; - } - if (count(gen_id) != 0) { - lderr(datalog.cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": ERROR: generation already exists: gen_id=" << gen_id << dendl; - } - try { - switch (gen.type) { - case log_type::omap: - emplace(gen_id, new RGWDataChangesOmap(ioctx, datalog, gen_id, shards)); - break; - case log_type::fifo: - emplace(gen_id, new RGWDataChangesFIFO(ioctx, datalog, gen_id, shards)); - break; - default: - lderr(datalog.cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": IMPOSSIBLE: invalid log type: gen_id=" << gen_id - << ", type" << gen.type << dendl; - return bs::error_code(EFAULT, bs::system_category()); - } - } catch (const bs::system_error& err) { - lderr(datalog.cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": error setting up backend: gen_id=" << gen_id - << ", err=" << err.what() << dendl; - return err.code(); - } - } - return {}; -} -bs::error_code DataLogBackends::handle_new_gens(entries_t e) noexcept { - return handle_init(std::move(e)); -} -bs::error_code DataLogBackends::handle_empty_to(uint64_t new_tail) noexcept { - std::unique_lock l(m); - auto i = cbegin(); - if (i->first < new_tail) { - return {}; - } - if (new_tail >= (cend() - 1)->first) { - lderr(datalog.cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": ERROR: attempt to trim head: new_tail=" << new_tail << dendl; - return bs::error_code(EFAULT, bs::system_category()); - } - erase(i, upper_bound(new_tail)); - return {}; -} - - -int RGWDataChangesLog::start(const DoutPrefixProvider *dpp, const RGWZone* _zone, - const RGWZoneParams& zoneparams, - librados::Rados* lr) -{ - zone = _zone; - ceph_assert(zone); - auto defbacking = to_log_type( - cct->_conf.get_val("rgw_default_data_log_backing")); - // Should be guaranteed by `set_enum_allowed` - ceph_assert(defbacking); - auto log_pool = zoneparams.log_pool; - auto r = rgw_init_ioctx(dpp, lr, log_pool, ioctx, true, false); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ - << ": Failed to initialized ioctx, r=" << r - << ", pool=" << log_pool << dendl; - return -r; - } - - auto besr = logback_generations::init( - dpp, ioctx, metadata_log_oid(), [this](uint64_t gen_id, int shard) { - return get_oid(gen_id, shard); - }, - num_shards, *defbacking, null_yield, *this); - - - if (!besr) { - lderr(cct) << __PRETTY_FUNCTION__ - << ": Error initializing backends: " - << besr.error().message() << dendl; - return ceph::from_error_code(besr.error()); - } - - bes = std::move(*besr); - renew_thread = make_named_thread("rgw_dt_lg_renew", - &RGWDataChangesLog::renew_run, this); - return 0; -} - -int RGWDataChangesLog::choose_oid(const rgw_bucket_shard& bs) { - const auto& name = bs.bucket.name; - auto shard_shift = (bs.shard_id > 0 ? bs.shard_id : 0); - auto r = (ceph_str_hash_linux(name.data(), name.size()) + - shard_shift) % num_shards; - return static_cast(r); -} - -int RGWDataChangesLog::renew_entries(const DoutPrefixProvider *dpp) -{ - if (!zone->log_data) - return 0; - - /* we can't keep the bucket name as part of the cls_log_entry, and we need - * it later, so we keep two lists under the map */ - bc::flat_map, - RGWDataChangesBE::entries>> m; - - std::unique_lock l(lock); - decltype(cur_cycle) entries; - entries.swap(cur_cycle); - l.unlock(); - - auto ut = real_clock::now(); - auto be = bes->head(); - for (const auto& [bs, gen] : entries) { - auto index = choose_oid(bs); - - rgw_data_change change; - bufferlist bl; - change.entity_type = ENTITY_TYPE_BUCKET; - change.key = bs.get_key(); - change.timestamp = ut; - change.gen = gen; - encode(change, bl); - - m[index].first.push_back({bs, gen}); - be->prepare(ut, change.key, std::move(bl), m[index].second); - } - - for (auto& [index, p] : m) { - auto& [buckets, entries] = p; - - auto now = real_clock::now(); - - auto ret = be->push(dpp, index, std::move(entries)); - if (ret < 0) { - /* we don't really need to have a special handling for failed cases here, - * as this is just an optimization. */ - ldpp_dout(dpp, -1) << "ERROR: svc.cls->timelog.add() returned " << ret << dendl; - return ret; - } - - auto expiration = now; - expiration += ceph::make_timespan(cct->_conf->rgw_data_log_window); - for (auto& [bs, gen] : buckets) { - update_renewed(bs, gen, expiration); - } - } - - return 0; -} - -auto RGWDataChangesLog::_get_change(const rgw_bucket_shard& bs, - uint64_t gen) - -> ChangeStatusPtr -{ - ceph_assert(ceph_mutex_is_locked(lock)); - ChangeStatusPtr status; - if (!changes.find({bs, gen}, status)) { - status = std::make_shared(); - changes.add({bs, gen}, status); - } - return status; -} - -void RGWDataChangesLog::register_renew(const rgw_bucket_shard& bs, - const rgw::bucket_log_layout_generation& gen) -{ - std::scoped_lock l{lock}; - cur_cycle.insert({bs, gen.gen}); -} - -void RGWDataChangesLog::update_renewed(const rgw_bucket_shard& bs, - uint64_t gen, - real_time expiration) -{ - std::unique_lock l{lock}; - auto status = _get_change(bs, gen); - l.unlock(); - - ldout(cct, 20) << "RGWDataChangesLog::update_renewd() bucket_name=" - << bs.bucket.name << " shard_id=" << bs.shard_id - << " expiration=" << expiration << dendl; - - std::unique_lock sl(status->lock); - status->cur_expiration = expiration; -} - -int RGWDataChangesLog::get_log_shard_id(rgw_bucket& bucket, int shard_id) { - rgw_bucket_shard bs(bucket, shard_id); - return choose_oid(bs); -} - -bool RGWDataChangesLog::filter_bucket(const DoutPrefixProvider *dpp, - const rgw_bucket& bucket, - optional_yield y) const -{ - if (!bucket_filter) { - return true; - } - - return bucket_filter(bucket, y, dpp); -} - -std::string RGWDataChangesLog::get_oid(uint64_t gen_id, int i) const { - return (gen_id > 0 ? - fmt::format("{}@G{}.{}", prefix, gen_id, i) : - fmt::format("{}.{}", prefix, i)); -} - -int RGWDataChangesLog::add_entry(const DoutPrefixProvider *dpp, - const RGWBucketInfo& bucket_info, - const rgw::bucket_log_layout_generation& gen, - int shard_id) -{ - auto& bucket = bucket_info.bucket; - - if (!filter_bucket(dpp, bucket, null_yield)) { - return 0; - } - - if (observer) { - observer->on_bucket_changed(bucket.get_key()); - } - - rgw_bucket_shard bs(bucket, shard_id); - - int index = choose_oid(bs); - - mark_modified(index, bs, gen.gen); - - std::unique_lock l(lock); - - auto status = _get_change(bs, gen.gen); - l.unlock(); - - auto now = real_clock::now(); - - std::unique_lock sl(status->lock); - - ldpp_dout(dpp, 20) << "RGWDataChangesLog::add_entry() bucket.name=" << bucket.name - << " shard_id=" << shard_id << " now=" << now - << " cur_expiration=" << status->cur_expiration << dendl; - - if (now < status->cur_expiration) { - /* no need to send, recently completed */ - sl.unlock(); - register_renew(bs, gen); - return 0; - } - - RefCountedCond* cond; - - if (status->pending) { - cond = status->cond; - - ceph_assert(cond); - - status->cond->get(); - sl.unlock(); - - int ret = cond->wait(); - cond->put(); - if (!ret) { - register_renew(bs, gen); - } - return ret; - } - - status->cond = new RefCountedCond; - status->pending = true; - - ceph::real_time expiration; - - int ret; - - do { - status->cur_sent = now; - - expiration = now; - expiration += ceph::make_timespan(cct->_conf->rgw_data_log_window); - - sl.unlock(); - - ceph::buffer::list bl; - rgw_data_change change; - change.entity_type = ENTITY_TYPE_BUCKET; - change.key = bs.get_key(); - change.timestamp = now; - change.gen = gen.gen; - encode(change, bl); - - ldpp_dout(dpp, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl; - - auto be = bes->head(); - ret = be->push(dpp, index, now, change.key, std::move(bl)); - - now = real_clock::now(); - - sl.lock(); - - } while (!ret && real_clock::now() > expiration); - - cond = status->cond; - - status->pending = false; - /* time of when operation started, not completed */ - status->cur_expiration = status->cur_sent; - status->cur_expiration += make_timespan(cct->_conf->rgw_data_log_window); - status->cond = nullptr; - sl.unlock(); - - cond->done(ret); - cond->put(); - - return ret; -} - -int DataLogBackends::list(const DoutPrefixProvider *dpp, int shard, int max_entries, - std::vector& entries, - std::string_view marker, - std::string* out_marker, - bool* truncated) -{ - const auto [start_id, start_cursor] = cursorgen(marker); - auto gen_id = start_id; - std::string out_cursor; - while (max_entries > 0) { - std::vector gentries; - std::unique_lock l(m); - auto i = lower_bound(gen_id); - if (i == end()) return 0; - auto be = i->second; - l.unlock(); - gen_id = be->gen_id; - auto r = be->list(dpp, shard, max_entries, gentries, - gen_id == start_id ? start_cursor : std::string{}, - &out_cursor, truncated); - if (r < 0) - return r; - - if (out_marker && !out_cursor.empty()) { - *out_marker = gencursor(gen_id, out_cursor); - } - for (auto& g : gentries) { - g.log_id = gencursor(gen_id, g.log_id); - } - if (int s = gentries.size(); s < 0 || s > max_entries) - max_entries = 0; - else - max_entries -= gentries.size(); - - std::move(gentries.begin(), gentries.end(), - std::back_inserter(entries)); - ++gen_id; - } - return 0; -} - -int RGWDataChangesLog::list_entries(const DoutPrefixProvider *dpp, int shard, int max_entries, - std::vector& entries, - std::string_view marker, - std::string* out_marker, bool* truncated) -{ - assert(shard < num_shards); - return bes->list(dpp, shard, max_entries, entries, marker, out_marker, truncated); -} - -int RGWDataChangesLog::list_entries(const DoutPrefixProvider *dpp, int max_entries, - std::vector& entries, - LogMarker& marker, bool *ptruncated) -{ - bool truncated; - entries.clear(); - for (; marker.shard < num_shards && int(entries.size()) < max_entries; - marker.shard++, marker.marker.clear()) { - int ret = list_entries(dpp, marker.shard, max_entries - entries.size(), - entries, marker.marker, NULL, &truncated); - if (ret == -ENOENT) { - continue; - } - if (ret < 0) { - return ret; - } - if (!truncated) { - *ptruncated = false; - return 0; - } - } - *ptruncated = (marker.shard < num_shards); - return 0; -} - -int RGWDataChangesLog::get_info(const DoutPrefixProvider *dpp, int shard_id, RGWDataChangesLogInfo *info) -{ - assert(shard_id < num_shards); - auto be = bes->head(); - auto r = be->get_info(dpp, shard_id, info); - if (!info->marker.empty()) { - info->marker = gencursor(be->gen_id, info->marker); - } - return r; -} - -int DataLogBackends::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker) -{ - auto [target_gen, cursor] = cursorgen(marker); - std::unique_lock l(m); - const auto head_gen = (end() - 1)->second->gen_id; - const auto tail_gen = begin()->first; - if (target_gen < tail_gen) return 0; - auto r = 0; - for (auto be = lower_bound(0)->second; - be->gen_id <= target_gen && be->gen_id <= head_gen && r >= 0; - be = upper_bound(be->gen_id)->second) { - l.unlock(); - auto c = be->gen_id == target_gen ? cursor : be->max_marker(); - r = be->trim(dpp, shard_id, c); - if (r == -ENOENT) - r = -ENODATA; - if (r == -ENODATA && be->gen_id < target_gen) - r = 0; - if (be->gen_id == target_gen) - break; - l.lock(); - }; - return r; -} - -int RGWDataChangesLog::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker) -{ - assert(shard_id < num_shards); - return bes->trim_entries(dpp, shard_id, marker); -} - -class GenTrim : public rgw::cls::fifo::Completion { -public: - DataLogBackends* const bes; - const int shard_id; - const uint64_t target_gen; - const std::string cursor; - const uint64_t head_gen; - const uint64_t tail_gen; - boost::intrusive_ptr be; - - GenTrim(const DoutPrefixProvider *dpp, DataLogBackends* bes, int shard_id, uint64_t target_gen, - std::string cursor, uint64_t head_gen, uint64_t tail_gen, - boost::intrusive_ptr be, - lr::AioCompletion* super) - : Completion(dpp, super), bes(bes), shard_id(shard_id), target_gen(target_gen), - cursor(std::move(cursor)), head_gen(head_gen), tail_gen(tail_gen), - be(std::move(be)) {} - - void handle(const DoutPrefixProvider *dpp, Ptr&& p, int r) { - auto gen_id = be->gen_id; - be.reset(); - if (r == -ENOENT) - r = -ENODATA; - if (r == -ENODATA && gen_id < target_gen) - r = 0; - if (r < 0) { - complete(std::move(p), r); - return; - } - - { - std::unique_lock l(bes->m); - auto i = bes->upper_bound(gen_id); - if (i == bes->end() || i->first > target_gen || i->first > head_gen) { - l.unlock(); - complete(std::move(p), -ENODATA); - return; - } - be = i->second; - } - auto c = be->gen_id == target_gen ? cursor : be->max_marker(); - be->trim(dpp, shard_id, c, call(std::move(p))); - } -}; - -void DataLogBackends::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, - librados::AioCompletion* c) -{ - auto [target_gen, cursor] = cursorgen(marker); - std::unique_lock l(m); - const auto head_gen = (end() - 1)->second->gen_id; - const auto tail_gen = begin()->first; - if (target_gen < tail_gen) { - l.unlock(); - rgw_complete_aio_completion(c, -ENODATA); - return; - } - auto be = begin()->second; - l.unlock(); - auto gt = std::make_unique(dpp, this, shard_id, target_gen, - std::string(cursor), head_gen, tail_gen, - be, c); - - auto cc = be->gen_id == target_gen ? cursor : be->max_marker(); - be->trim(dpp, shard_id, cc, GenTrim::call(std::move(gt))); -} - -int DataLogBackends::trim_generations(const DoutPrefixProvider *dpp, std::optional& through) { - if (size() != 1) { - std::vector candidates; - { - std::scoped_lock l(m); - auto e = cend() - 1; - for (auto i = cbegin(); i < e; ++i) { - candidates.push_back(i->second); - } - } - - std::optional highest; - for (auto& be : candidates) { - auto r = be->is_empty(dpp); - if (r < 0) { - return r; - } else if (r == 1) { - highest = be->gen_id; - } else { - break; - } - } - - through = highest; - if (!highest) { - return 0; - } - auto ec = empty_to(dpp, *highest, null_yield); - if (ec) { - return ceph::from_error_code(ec); - } - } - - return ceph::from_error_code(remove_empty(dpp, null_yield)); -} - - -int RGWDataChangesLog::trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, - librados::AioCompletion* c) -{ - assert(shard_id < num_shards); - bes->trim_entries(dpp, shard_id, marker, c); - return 0; -} - -bool RGWDataChangesLog::going_down() const -{ - return down_flag; -} - -RGWDataChangesLog::~RGWDataChangesLog() { - down_flag = true; - if (renew_thread.joinable()) { - renew_stop(); - renew_thread.join(); - } -} - -void RGWDataChangesLog::renew_run() noexcept { - static constexpr auto runs_per_prune = 150; - auto run = 0; - for (;;) { - const DoutPrefix dp(cct, dout_subsys, "rgw data changes log: "); - ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: start" << dendl; - int r = renew_entries(&dp); - if (r < 0) { - ldpp_dout(&dp, 0) << "ERROR: RGWDataChangesLog::renew_entries returned error r=" << r << dendl; - } - - if (going_down()) - break; - - if (run == runs_per_prune) { - std::optional through; - ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: pruning old generations" << dendl; - trim_generations(&dp, through); - if (r < 0) { - derr << "RGWDataChangesLog::ChangesRenewThread: failed pruning r=" - << r << dendl; - } else if (through) { - ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: pruned generations " - << "through " << *through << "." << dendl; - } else { - ldpp_dout(&dp, 2) << "RGWDataChangesLog::ChangesRenewThread: nothing to prune." - << dendl; - } - run = 0; - } else { - ++run; - } - - int interval = cct->_conf->rgw_data_log_window * 3 / 4; - std::unique_lock locker{renew_lock}; - renew_cond.wait_for(locker, std::chrono::seconds(interval)); - } -} - -void RGWDataChangesLog::renew_stop() -{ - std::lock_guard l{renew_lock}; - renew_cond.notify_all(); -} - -void RGWDataChangesLog::mark_modified(int shard_id, const rgw_bucket_shard& bs, uint64_t gen) -{ - if (!cct->_conf->rgw_data_notify_interval_msec) { - return; - } - - auto key = bs.get_key(); - { - std::shared_lock rl{modified_lock}; // read lock to check for existence - auto shard = modified_shards.find(shard_id); - if (shard != modified_shards.end() && shard->second.count({key, gen})) { - return; - } - } - - std::unique_lock wl{modified_lock}; // write lock for insertion - modified_shards[shard_id].insert(rgw_data_notify_entry{key, gen}); -} - -std::string RGWDataChangesLog::max_marker() const { - return gencursor(std::numeric_limits::max(), - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); -} - -int RGWDataChangesLog::change_format(const DoutPrefixProvider *dpp, log_type type, optional_yield y) { - return ceph::from_error_code(bes->new_backing(dpp, type, y)); -} - -int RGWDataChangesLog::trim_generations(const DoutPrefixProvider *dpp, std::optional& through) { - return bes->trim_generations(dpp, through); -} - -void RGWDataChangesLogInfo::dump(Formatter *f) const -{ - encode_json("marker", marker, f); - utime_t ut(last_update); - encode_json("last_update", ut, f); -} - -void RGWDataChangesLogInfo::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("marker", marker, obj); - utime_t ut; - JSONDecoder::decode_json("last_update", ut, obj); - last_update = ut.to_real_time(); -} - - diff --git a/src/rgw/store/rados/rgw_datalog.h b/src/rgw/store/rados/rgw_datalog.h deleted file mode 100644 index 0bc4837c9c1..00000000000 --- a/src/rgw/store/rados/rgw_datalog.h +++ /dev/null @@ -1,386 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_DATALOG_H -#define CEPH_RGW_DATALOG_H - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include "include/buffer.h" -#include "include/encoding.h" -#include "include/function2.hpp" - -#include "include/rados/librados.hpp" - -#include "common/ceph_context.h" -#include "common/ceph_json.h" -#include "common/ceph_time.h" -#include "common/Formatter.h" -#include "common/lru_map.h" -#include "common/RefCountedObj.h" - -#include "cls/log/cls_log_types.h" - -#include "rgw_basic_types.h" -#include "rgw_log_backing.h" -#include "rgw_sync_policy.h" -#include "rgw_zone.h" -#include "rgw_trim_bilog.h" - -namespace bc = boost::container; - -enum DataLogEntityType { - ENTITY_TYPE_UNKNOWN = 0, - ENTITY_TYPE_BUCKET = 1, -}; - -struct rgw_data_change { - DataLogEntityType entity_type; - std::string key; - ceph::real_time timestamp; - uint64_t gen = 0; - - void encode(ceph::buffer::list& bl) const { - // require decoders to recognize v2 when gen>0 - const uint8_t compat = (gen == 0) ? 1 : 2; - ENCODE_START(2, compat, bl); - auto t = std::uint8_t(entity_type); - encode(t, bl); - encode(key, bl); - encode(timestamp, bl); - encode(gen, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - std::uint8_t t; - decode(t, bl); - entity_type = DataLogEntityType(t); - decode(key, bl); - decode(timestamp, bl); - if (struct_v < 2) { - gen = 0; - } else { - decode(gen, bl); - } - DECODE_FINISH(bl); - } - - void dump(ceph::Formatter* f) const; - void decode_json(JSONObj* obj); -}; -WRITE_CLASS_ENCODER(rgw_data_change) - -struct rgw_data_change_log_entry { - std::string log_id; - ceph::real_time log_timestamp; - rgw_data_change entry; - - void encode(ceph::buffer::list& bl) const { - ENCODE_START(1, 1, bl); - encode(log_id, bl); - encode(log_timestamp, bl); - encode(entry, bl); - ENCODE_FINISH(bl); - } - - void decode(ceph::buffer::list::const_iterator& bl) { - DECODE_START(1, bl); - decode(log_id, bl); - decode(log_timestamp, bl); - decode(entry, bl); - DECODE_FINISH(bl); - } - - void dump(ceph::Formatter* f) const; - void decode_json(JSONObj* obj); -}; -WRITE_CLASS_ENCODER(rgw_data_change_log_entry) - -struct RGWDataChangesLogInfo { - std::string marker; - ceph::real_time last_update; - - void dump(ceph::Formatter* f) const; - void decode_json(JSONObj* obj); -}; - -struct RGWDataChangesLogMarker { - int shard = 0; - std::string marker; - - RGWDataChangesLogMarker() = default; -}; - -class RGWDataChangesLog; - -struct rgw_data_notify_entry { - std::string key; - uint64_t gen = 0; - - void dump(ceph::Formatter* f) const; - void decode_json(JSONObj* obj); - - rgw_data_notify_entry& operator=(const rgw_data_notify_entry&) = default; - - bool operator <(const rgw_data_notify_entry& d) const { - if (key < d.key) { - return true; - } - if (d.key < key) { - return false; - } - return gen < d.gen; - } - friend std::ostream& operator <<(std::ostream& m, - const rgw_data_notify_entry& e) { - return m << "[key: " << e.key << ", gen: " << e.gen << "]"; - } -}; - -class RGWDataChangesBE; - -class DataLogBackends final - : public logback_generations, - private bc::flat_map> { - friend class logback_generations; - friend class GenTrim; - - std::mutex m; - RGWDataChangesLog& datalog; - - DataLogBackends(librados::IoCtx& ioctx, - std::string oid, - fu2::unique_function&& get_oid, - int shards, RGWDataChangesLog& datalog) noexcept - : logback_generations(ioctx, oid, std::move(get_oid), - shards), datalog(datalog) {} -public: - - boost::intrusive_ptr head() { - std::unique_lock l(m); - auto i = end(); - --i; - return i->second; - } - int list(const DoutPrefixProvider *dpp, int shard, int max_entries, - std::vector& entries, - std::string_view marker, - std::string* out_marker, bool* truncated); - int trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker); - void trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, - librados::AioCompletion* c); - void set_zero(RGWDataChangesBE* be) { - emplace(0, be); - } - - bs::error_code handle_init(entries_t e) noexcept override; - bs::error_code handle_new_gens(entries_t e) noexcept override; - bs::error_code handle_empty_to(uint64_t new_tail) noexcept override; - - int trim_generations(const DoutPrefixProvider *dpp, std::optional& through); -}; - -struct BucketGen { - rgw_bucket_shard shard; - uint64_t gen; - - BucketGen(const rgw_bucket_shard& shard, uint64_t gen) - : shard(shard), gen(gen) {} - - BucketGen(rgw_bucket_shard&& shard, uint64_t gen) - : shard(std::move(shard)), gen(gen) {} - - BucketGen(const BucketGen&) = default; - BucketGen(BucketGen&&) = default; - BucketGen& operator =(const BucketGen&) = default; - BucketGen& operator =(BucketGen&&) = default; - - ~BucketGen() = default; -}; - -inline bool operator ==(const BucketGen& l, const BucketGen& r) { - return (l.shard == r.shard) && (l.gen == r.gen); -} - -inline bool operator <(const BucketGen& l, const BucketGen& r) { - if (l.shard < r.shard) { - return true; - } else if (l.shard == r.shard) { - return l.gen < r.gen; - } else { - return false; - } -} - -class RGWDataChangesLog { - friend DataLogBackends; - CephContext *cct; - librados::IoCtx ioctx; - rgw::BucketChangeObserver *observer = nullptr; - const RGWZone* zone; - std::unique_ptr bes; - - const int num_shards; - std::string get_prefix() { - auto prefix = cct->_conf->rgw_data_log_obj_prefix; - return prefix.empty() ? prefix : "data_log"; - } - std::string metadata_log_oid() { - return get_prefix() + "generations_metadata"; - } - std::string prefix; - - ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::lock"); - ceph::shared_mutex modified_lock = - ceph::make_shared_mutex("RGWDataChangesLog::modified_lock"); - bc::flat_map> modified_shards; - - std::atomic down_flag = { false }; - - struct ChangeStatus { - std::shared_ptr sync_policy; - ceph::real_time cur_expiration; - ceph::real_time cur_sent; - bool pending = false; - RefCountedCond* cond = nullptr; - ceph::mutex lock = ceph::make_mutex("RGWDataChangesLog::ChangeStatus"); - }; - - using ChangeStatusPtr = std::shared_ptr; - - lru_map changes; - - bc::flat_set cur_cycle; - - ChangeStatusPtr _get_change(const rgw_bucket_shard& bs, uint64_t gen); - void register_renew(const rgw_bucket_shard& bs, - const rgw::bucket_log_layout_generation& gen); - void update_renewed(const rgw_bucket_shard& bs, - uint64_t gen, - ceph::real_time expiration); - - ceph::mutex renew_lock = ceph::make_mutex("ChangesRenewThread::lock"); - ceph::condition_variable renew_cond; - void renew_run() noexcept; - void renew_stop(); - std::thread renew_thread; - - std::function bucket_filter; - bool going_down() const; - bool filter_bucket(const DoutPrefixProvider *dpp, const rgw_bucket& bucket, optional_yield y) const; - int renew_entries(const DoutPrefixProvider *dpp); - -public: - - RGWDataChangesLog(CephContext* cct); - ~RGWDataChangesLog(); - - int start(const DoutPrefixProvider *dpp, const RGWZone* _zone, const RGWZoneParams& zoneparams, - librados::Rados* lr); - int choose_oid(const rgw_bucket_shard& bs); - int add_entry(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - const rgw::bucket_log_layout_generation& gen, int shard_id); - int get_log_shard_id(rgw_bucket& bucket, int shard_id); - int list_entries(const DoutPrefixProvider *dpp, int shard, int max_entries, - std::vector& entries, - std::string_view marker, - std::string* out_marker, bool* truncated); - int trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker); - int trim_entries(const DoutPrefixProvider *dpp, int shard_id, std::string_view marker, - librados::AioCompletion* c); // :( - int get_info(const DoutPrefixProvider *dpp, int shard_id, RGWDataChangesLogInfo *info); - - using LogMarker = RGWDataChangesLogMarker; - - int list_entries(const DoutPrefixProvider *dpp, int max_entries, - std::vector& entries, - LogMarker& marker, bool* ptruncated); - - void mark_modified(int shard_id, const rgw_bucket_shard& bs, uint64_t gen); - auto read_clear_modified() { - std::unique_lock wl{modified_lock}; - decltype(modified_shards) modified; - modified.swap(modified_shards); - modified_shards.clear(); - return modified; - } - - void set_observer(rgw::BucketChangeObserver *observer) { - this->observer = observer; - } - - void set_bucket_filter(decltype(bucket_filter)&& f) { - bucket_filter = std::move(f); - } - // a marker that compares greater than any other - std::string max_marker() const; - std::string get_oid(uint64_t gen_id, int shard_id) const; - - - int change_format(const DoutPrefixProvider *dpp, log_type type, optional_yield y); - int trim_generations(const DoutPrefixProvider *dpp, std::optional& through); -}; - -class RGWDataChangesBE : public boost::intrusive_ref_counter { -protected: - librados::IoCtx& ioctx; - CephContext* const cct; - RGWDataChangesLog& datalog; - - std::string get_oid(int shard_id) { - return datalog.get_oid(gen_id, shard_id); - } -public: - using entries = std::variant, - std::vector>; - - const uint64_t gen_id; - - RGWDataChangesBE(librados::IoCtx& ioctx, - RGWDataChangesLog& datalog, - uint64_t gen_id) - : ioctx(ioctx), cct(static_cast(ioctx.cct())), - datalog(datalog), gen_id(gen_id) {} - virtual ~RGWDataChangesBE() = default; - - virtual void prepare(ceph::real_time now, - const std::string& key, - ceph::buffer::list&& entry, - entries& out) = 0; - virtual int push(const DoutPrefixProvider *dpp, int index, entries&& items) = 0; - virtual int push(const DoutPrefixProvider *dpp, int index, ceph::real_time now, - const std::string& key, - ceph::buffer::list&& bl) = 0; - virtual int list(const DoutPrefixProvider *dpp, int shard, int max_entries, - std::vector& entries, - std::optional marker, - std::string* out_marker, bool* truncated) = 0; - virtual int get_info(const DoutPrefixProvider *dpp, int index, RGWDataChangesLogInfo *info) = 0; - virtual int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker) = 0; - virtual int trim(const DoutPrefixProvider *dpp, int index, std::string_view marker, - librados::AioCompletion* c) = 0; - virtual std::string_view max_marker() const = 0; - // 1 on empty, 0 on non-empty, negative on error. - virtual int is_empty(const DoutPrefixProvider *dpp) = 0; -}; - - -#endif diff --git a/src/rgw/store/rados/rgw_datalog_notify.cc b/src/rgw/store/rados/rgw_datalog_notify.cc deleted file mode 100644 index 12cdc532f3c..00000000000 --- a/src/rgw/store/rados/rgw_datalog_notify.cc +++ /dev/null @@ -1,76 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_datalog_notify.h" -#include "rgw_datalog.h" - -// custom encoding for v1 notify API -struct EntryEncoderV1 { - const rgw_data_notify_entry& entry; -}; -struct SetEncoderV1 { - const bc::flat_set& entries; -}; - -// encode rgw_data_notify_entry as string -void encode_json(const char *name, const EntryEncoderV1& e, Formatter *f) -{ - f->dump_string(name, e.entry.key); // encode the key only -} -// encode set as set -void encode_json(const char *name, const SetEncoderV1& e, Formatter *f) -{ - f->open_array_section(name); - for (auto& entry : e.entries) { - encode_json("obj", EntryEncoderV1{entry}, f); - } - f->close_section(); -} -// encode map> as map> -void encode_json(const char *name, const rgw_data_notify_v1_encoder& e, Formatter *f) -{ - f->open_array_section(name); - for (auto& [key, val] : e.shards) { - f->open_object_section("entry"); - encode_json("key", key, f); - encode_json("val", SetEncoderV1{val}, f); - f->close_section(); - } - f->close_section(); -} - -struct EntryDecoderV1 { - rgw_data_notify_entry& entry; -}; -struct SetDecoderV1 { - bc::flat_set& entries; -}; - -// decode string into rgw_data_notify_entry -void decode_json_obj(EntryDecoderV1& d, JSONObj *obj) -{ - decode_json_obj(d.entry.key, obj); - d.entry.gen = 0; -} -// decode set into set -void decode_json_obj(SetDecoderV1& d, JSONObj *obj) -{ - for (JSONObjIter o = obj->find_first(); !o.end(); ++o) { - rgw_data_notify_entry val; - auto decoder = EntryDecoderV1{val}; - decode_json_obj(decoder, *o); - d.entries.insert(std::move(val)); - } -} -// decode map> into map> -void decode_json_obj(rgw_data_notify_v1_decoder& d, JSONObj *obj) -{ - for (JSONObjIter o = obj->find_first(); !o.end(); ++o) { - int shard_id = 0; - JSONDecoder::decode_json("key", shard_id, *o); - bc::flat_set val; - SetDecoderV1 decoder{val}; - JSONDecoder::decode_json("val", decoder, *o); - d.shards[shard_id] = std::move(val); - } -} diff --git a/src/rgw/store/rados/rgw_datalog_notify.h b/src/rgw/store/rados/rgw_datalog_notify.h deleted file mode 100644 index 4cd1b3c110f..00000000000 --- a/src/rgw/store/rados/rgw_datalog_notify.h +++ /dev/null @@ -1,31 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include -#include - -#include "rgw_datalog.h" - -namespace bc = boost::container; - -namespace ceph { class Formatter; } -class JSONObj; - -class RGWCoroutine; -class RGWHTTPManager; -class RGWRESTConn; - -struct rgw_data_notify_entry; - -// json encoder and decoder for notify v1 API -struct rgw_data_notify_v1_encoder { - const bc::flat_map>& shards; -}; -void encode_json(const char *name, const rgw_data_notify_v1_encoder& e, - ceph::Formatter *f); -struct rgw_data_notify_v1_decoder { - bc::flat_map>& shards; -}; -void decode_json_obj(rgw_data_notify_v1_decoder& d, JSONObj *obj); diff --git a/src/rgw/store/rados/rgw_etag_verifier.cc b/src/rgw/store/rados/rgw_etag_verifier.cc deleted file mode 100644 index 52f7c794842..00000000000 --- a/src/rgw/store/rados/rgw_etag_verifier.cc +++ /dev/null @@ -1,191 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_etag_verifier.h" -#include "rgw_obj_manifest.h" - -#define dout_subsys ceph_subsys_rgw - -namespace rgw::putobj { - -int create_etag_verifier(const DoutPrefixProvider *dpp, - CephContext* cct, rgw::sal::DataProcessor* filter, - const bufferlist& manifest_bl, - const std::optional& compression, - etag_verifier_ptr& verifier) -{ - RGWObjManifest manifest; - - try { - auto miter = manifest_bl.cbegin(); - decode(manifest, miter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: couldn't decode manifest" << dendl; - return -EIO; - } - - RGWObjManifestRule rule; - bool found = manifest.get_rule(0, &rule); - if (!found) { - ldpp_dout(dpp, -1) << "ERROR: manifest->get_rule() could not find rule" << dendl; - return -EIO; - } - - if (rule.start_part_num == 0) { - /* Atomic object */ - verifier.emplace(cct, filter); - return 0; - } - - uint64_t cur_part_ofs = UINT64_MAX; - std::vector part_ofs; - - /* - * We must store the offset of each part to calculate the ETAGs for each - * MPU part. These part ETags then become the input for the MPU object - * Etag. - */ - for (auto mi = manifest.obj_begin(dpp); mi != manifest.obj_end(dpp); ++mi) { - if (cur_part_ofs == mi.get_part_ofs()) - continue; - cur_part_ofs = mi.get_part_ofs(); - ldpp_dout(dpp, 20) << "MPU Part offset:" << cur_part_ofs << dendl; - part_ofs.push_back(cur_part_ofs); - } - - if (compression) { - // if the source object was compressed, the manifest is storing - // compressed part offsets. transform the compressed offsets back to - // their original offsets by finding the first block of each part - const auto& blocks = compression->blocks; - auto block = blocks.begin(); - for (auto& ofs : part_ofs) { - // find the compression_block with new_ofs == ofs - constexpr auto less = [] (const compression_block& block, uint64_t ofs) { - return block.new_ofs < ofs; - }; - block = std::lower_bound(block, blocks.end(), ofs, less); - if (block == blocks.end() || block->new_ofs != ofs) { - ldpp_dout(dpp, 4) << "no match for compressed offset " << ofs - << ", disabling etag verification" << dendl; - return -EIO; - } - ofs = block->old_ofs; - ldpp_dout(dpp, 20) << "MPU Part uncompressed offset:" << ofs << dendl; - } - } - - verifier.emplace(cct, std::move(part_ofs), filter); - return 0; -} - -int ETagVerifier_Atomic::process(bufferlist&& in, uint64_t logical_offset) -{ - bufferlist out; - if (in.length() > 0) - hash.Update((const unsigned char *)in.c_str(), in.length()); - - return Pipe::process(std::move(in), logical_offset); -} - -void ETagVerifier_Atomic::calculate_etag() -{ - unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; - - /* Return early if ETag has already been calculated */ - if (!calculated_etag.empty()) - return; - - hash.Final(m); - buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); - calculated_etag = calc_md5; - ldout(cct, 20) << "Single part object: " << " etag:" << calculated_etag - << dendl; -} - -void ETagVerifier_MPU::process_end_of_MPU_part() -{ - unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char calc_md5_part[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; - std::string calculated_etag_part; - - hash.Final(m); - mpu_etag_hash.Update((const unsigned char *)m, sizeof(m)); - hash.Restart(); - - if (cct->_conf->subsys.should_gather(dout_subsys, 20)) { - buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5_part); - calculated_etag_part = calc_md5_part; - ldout(cct, 20) << "Part etag: " << calculated_etag_part << dendl; - } - - cur_part_index++; - next_part_index++; -} - -int ETagVerifier_MPU::process(bufferlist&& in, uint64_t logical_offset) -{ - uint64_t bl_end = in.length() + logical_offset; - - /* Handle the last MPU part */ - if (size_t(next_part_index) == part_ofs.size()) { - hash.Update((const unsigned char *)in.c_str(), in.length()); - goto done; - } - - /* Incoming bufferlist spans two MPU parts. Calculate separate ETags */ - if (bl_end > part_ofs[next_part_index]) { - - uint64_t part_one_len = part_ofs[next_part_index] - logical_offset; - hash.Update((const unsigned char *)in.c_str(), part_one_len); - process_end_of_MPU_part(); - - hash.Update((const unsigned char *)in.c_str() + part_one_len, - bl_end - part_ofs[cur_part_index]); - /* - * If we've moved to the last part of the MPU, avoid usage of - * parts_ofs[next_part_index] as it will lead to our-of-range access. - */ - if (size_t(next_part_index) == part_ofs.size()) - goto done; - } else { - hash.Update((const unsigned char *)in.c_str(), in.length()); - } - - /* Update the MPU Etag if the current part has ended */ - if (logical_offset + in.length() + 1 == part_ofs[next_part_index]) - process_end_of_MPU_part(); - -done: - return Pipe::process(std::move(in), logical_offset); -} - -void ETagVerifier_MPU::calculate_etag() -{ - const uint32_t parts = part_ofs.size(); - constexpr auto digits10 = std::numeric_limits::digits10; - constexpr auto extra = 2 + digits10; // add "-%u\0" at the end - - unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE], mpu_m[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + extra]; - - /* Return early if ETag has already been calculated */ - if (!calculated_etag.empty()) - return; - - hash.Final(m); - mpu_etag_hash.Update((const unsigned char *)m, sizeof(m)); - - /* Refer RGWCompleteMultipart::execute() for ETag calculation for MPU object */ - mpu_etag_hash.Final(mpu_m); - buf_to_hex(mpu_m, CEPH_CRYPTO_MD5_DIGESTSIZE, final_etag_str); - snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], - sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, - "-%u", parts); - - calculated_etag = final_etag_str; - ldout(cct, 20) << "MPU calculated ETag:" << calculated_etag << dendl; -} - -} // namespace rgw::putobj diff --git a/src/rgw/store/rados/rgw_etag_verifier.h b/src/rgw/store/rados/rgw_etag_verifier.h deleted file mode 100644 index 56a679ebddd..00000000000 --- a/src/rgw/store/rados/rgw_etag_verifier.h +++ /dev/null @@ -1,92 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * RGW Etag Verifier is an RGW filter which enables the objects copied using - * multisite sync to be verified using their ETag from source i.e. the MD5 - * checksum of the object is computed at the destination and is verified to be - * identical to the ETag stored in the object HEAD at source cluster. - * - * For MPU objects, a different filter named RGWMultipartEtagFilter is applied - * which re-computes ETag using RGWObjManifest. This computes the ETag using the - * same algorithm used at the source cluster i.e. MD5 sum of the individual ETag - * on the MPU parts. - */ -#ifndef CEPH_RGW_ETAG_VERIFIER_H -#define CEPH_RGW_ETAG_VERIFIER_H - -#include "rgw_putobj.h" -#include "rgw_op.h" -#include "common/static_ptr.h" - -namespace rgw::putobj { - -class ETagVerifier : public rgw::putobj::Pipe -{ -protected: - CephContext* cct; - MD5 hash; - std::string calculated_etag; - -public: - ETagVerifier(CephContext* cct_, rgw::sal::DataProcessor *next) - : Pipe(next), cct(cct_) { - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - } - - virtual void calculate_etag() = 0; - std::string get_calculated_etag() { return calculated_etag;} - -}; /* ETagVerifier */ - -class ETagVerifier_Atomic : public ETagVerifier -{ -public: - ETagVerifier_Atomic(CephContext* cct_, rgw::sal::DataProcessor *next) - : ETagVerifier(cct_, next) {} - - int process(bufferlist&& data, uint64_t logical_offset) override; - void calculate_etag() override; - -}; /* ETagVerifier_Atomic */ - -class ETagVerifier_MPU : public ETagVerifier -{ - std::vector part_ofs; - uint64_t cur_part_index{0}, next_part_index{1}; - MD5 mpu_etag_hash; - - void process_end_of_MPU_part(); - -public: - ETagVerifier_MPU(CephContext* cct, - std::vector part_ofs, - rgw::sal::DataProcessor *next) - : ETagVerifier(cct, next), - part_ofs(std::move(part_ofs)) - { - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - } - - int process(bufferlist&& data, uint64_t logical_offset) override; - void calculate_etag() override; - -}; /* ETagVerifier_MPU */ - -constexpr auto max_etag_verifier_size = std::max( - sizeof(ETagVerifier_Atomic), - sizeof(ETagVerifier_MPU) - ); -using etag_verifier_ptr = ceph::static_ptr; - -int create_etag_verifier(const DoutPrefixProvider *dpp, - CephContext* cct, rgw::sal::DataProcessor* next, - const bufferlist& manifest_bl, - const std::optional& compression, - etag_verifier_ptr& verifier); - -} // namespace rgw::putobj - -#endif /* CEPH_RGW_ETAG_VERIFIER_H */ diff --git a/src/rgw/store/rados/rgw_gc.cc b/src/rgw/store/rados/rgw_gc.cc deleted file mode 100644 index bd16bde1bd5..00000000000 --- a/src/rgw/store/rados/rgw_gc.cc +++ /dev/null @@ -1,811 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_gc.h" - -#include "rgw_tools.h" -#include "include/scope_guard.h" -#include "include/rados/librados.hpp" -#include "cls/rgw/cls_rgw_client.h" -#include "cls/rgw_gc/cls_rgw_gc_client.h" -#include "cls/refcount/cls_refcount_client.h" -#include "cls/version/cls_version_client.h" -#include "rgw_perf_counters.h" -#include "cls/lock/cls_lock_client.h" -#include "include/random.h" -#include "rgw_gc_log.h" - -#include // XXX -#include -#include "xxhash.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; -using namespace librados; - -static string gc_oid_prefix = "gc"; -static string gc_index_lock_name = "gc_process"; - -void RGWGC::initialize(CephContext *_cct, RGWRados *_store) { - cct = _cct; - store = _store; - - max_objs = min(static_cast(cct->_conf->rgw_gc_max_objs), rgw_shards_max()); - - obj_names = new string[max_objs]; - - for (int i = 0; i < max_objs; i++) { - obj_names[i] = gc_oid_prefix; - char buf[32]; - snprintf(buf, 32, ".%d", i); - obj_names[i].append(buf); - - auto it = transitioned_objects_cache.begin() + i; - transitioned_objects_cache.insert(it, false); - - //version = 0 -> not ready for transition - //version = 1 -> marked ready for transition - librados::ObjectWriteOperation op; - op.create(false); - const uint64_t queue_size = cct->_conf->rgw_gc_max_queue_size, num_deferred_entries = cct->_conf->rgw_gc_max_deferred; - gc_log_init2(op, queue_size, num_deferred_entries); - store->gc_operate(this, obj_names[i], &op); - } -} - -void RGWGC::finalize() -{ - delete[] obj_names; -} - -int RGWGC::tag_index(const string& tag) -{ - return rgw_shards_mod(XXH64(tag.c_str(), tag.size(), seed), max_objs); -} - -std::tuple> RGWGC::send_split_chain(const cls_rgw_obj_chain& chain, const std::string& tag) -{ - ldpp_dout(this, 20) << "RGWGC::send_split_chain - tag is: " << tag << dendl; - - if (cct->_conf->rgw_max_chunk_size) { - cls_rgw_obj_chain broken_chain; - ldpp_dout(this, 20) << "RGWGC::send_split_chain - rgw_max_chunk_size is: " << cct->_conf->rgw_max_chunk_size << dendl; - - for (auto it = chain.objs.begin(); it != chain.objs.end(); it++) { - ldpp_dout(this, 20) << "RGWGC::send_split_chain - adding obj with name: " << it->key << dendl; - broken_chain.objs.emplace_back(*it); - cls_rgw_gc_obj_info info; - info.tag = tag; - info.chain = broken_chain; - cls_rgw_gc_set_entry_op op; - op.info = info; - size_t total_encoded_size = op.estimate_encoded_size(); - ldpp_dout(this, 20) << "RGWGC::send_split_chain - total_encoded_size is: " << total_encoded_size << dendl; - - if (total_encoded_size > cct->_conf->rgw_max_chunk_size) { //dont add to chain, and send to gc - broken_chain.objs.pop_back(); - --it; - ldpp_dout(this, 20) << "RGWGC::send_split_chain - more than, dont add to broken chain and send chain" << dendl; - auto ret = send_chain(broken_chain, tag); - if (ret < 0) { - broken_chain.objs.insert(broken_chain.objs.end(), it, chain.objs.end()); // add all the remainder objs to the list to be deleted inline - ldpp_dout(this, 0) << "RGWGC::send_split_chain - send chain returned error: " << ret << dendl; - return {ret, {broken_chain}}; - } - broken_chain.objs.clear(); - } - } - if (!broken_chain.objs.empty()) { //when the chain is smaller than or equal to rgw_max_chunk_size - ldpp_dout(this, 20) << "RGWGC::send_split_chain - sending leftover objects" << dendl; - auto ret = send_chain(broken_chain, tag); - if (ret < 0) { - ldpp_dout(this, 0) << "RGWGC::send_split_chain - send chain returned error: " << ret << dendl; - return {ret, {broken_chain}}; - } - } - } else { - auto ret = send_chain(chain, tag); - if (ret < 0) { - ldpp_dout(this, 0) << "RGWGC::send_split_chain - send chain returned error: " << ret << dendl; - return {ret, {std::move(chain)}}; - } - } - return {0, {}}; -} - -int RGWGC::send_chain(const cls_rgw_obj_chain& chain, const string& tag) -{ - ObjectWriteOperation op; - cls_rgw_gc_obj_info info; - info.chain = chain; - info.tag = tag; - gc_log_enqueue2(op, cct->_conf->rgw_gc_obj_min_wait, info); - - int i = tag_index(tag); - - ldpp_dout(this, 20) << "RGWGC::send_chain - on object name: " << obj_names[i] << "tag is: " << tag << dendl; - - auto ret = store->gc_operate(this, obj_names[i], &op); - if (ret != -ECANCELED && ret != -EPERM) { - return ret; - } - ObjectWriteOperation set_entry_op; - cls_rgw_gc_set_entry(set_entry_op, cct->_conf->rgw_gc_obj_min_wait, info); - return store->gc_operate(this, obj_names[i], &set_entry_op); -} - -struct defer_chain_state { - librados::AioCompletion* completion = nullptr; - // TODO: hold a reference on the state in RGWGC to avoid use-after-free if - // RGWGC destructs before this completion fires - RGWGC* gc = nullptr; - cls_rgw_gc_obj_info info; - - ~defer_chain_state() { - if (completion) { - completion->release(); - } - } -}; - -static void async_defer_callback(librados::completion_t, void* arg) -{ - std::unique_ptr state{static_cast(arg)}; - if (state->completion->get_return_value() == -ECANCELED) { - state->gc->on_defer_canceled(state->info); - } -} - -void RGWGC::on_defer_canceled(const cls_rgw_gc_obj_info& info) -{ - const std::string& tag = info.tag; - const int i = tag_index(tag); - - // ECANCELED from cls_version_check() tells us that we've transitioned - transitioned_objects_cache[i] = true; - - ObjectWriteOperation op; - cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); - cls_rgw_gc_remove(op, {tag}); - - auto c = librados::Rados::aio_create_completion(nullptr, nullptr); - store->gc_aio_operate(obj_names[i], c, &op); - c->release(); -} - -int RGWGC::async_defer_chain(const string& tag, const cls_rgw_obj_chain& chain) -{ - const int i = tag_index(tag); - cls_rgw_gc_obj_info info; - info.chain = chain; - info.tag = tag; - - // if we've transitioned this shard object, we can rely on the cls_rgw_gc queue - if (transitioned_objects_cache[i]) { - ObjectWriteOperation op; - cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); - - // this tag may still be present in omap, so remove it once the cls_rgw_gc - // enqueue succeeds - cls_rgw_gc_remove(op, {tag}); - - auto c = librados::Rados::aio_create_completion(nullptr, nullptr); - int ret = store->gc_aio_operate(obj_names[i], c, &op); - c->release(); - return ret; - } - - // if we haven't seen the transition yet, write the defer to omap with cls_rgw - ObjectWriteOperation op; - - // assert that we haven't initialized cls_rgw_gc queue. this prevents us - // from writing new entries to omap after the transition - gc_log_defer1(op, cct->_conf->rgw_gc_obj_min_wait, info); - - // prepare a callback to detect the transition via ECANCELED from cls_version_check() - auto state = std::make_unique(); - state->gc = this; - state->info.chain = chain; - state->info.tag = tag; - state->completion = librados::Rados::aio_create_completion( - state.get(), async_defer_callback); - - int ret = store->gc_aio_operate(obj_names[i], state->completion, &op); - if (ret == 0) { - state.release(); // release ownership until async_defer_callback() - } - return ret; -} - -int RGWGC::remove(int index, const std::vector& tags, AioCompletion **pc) -{ - ObjectWriteOperation op; - cls_rgw_gc_remove(op, tags); - - auto c = librados::Rados::aio_create_completion(nullptr, nullptr); - int ret = store->gc_aio_operate(obj_names[index], c, &op); - if (ret < 0) { - c->release(); - } else { - *pc = c; - } - return ret; -} - -int RGWGC::remove(int index, int num_entries) -{ - ObjectWriteOperation op; - cls_rgw_gc_queue_remove_entries(op, num_entries); - - return store->gc_operate(this, obj_names[index], &op); -} - -int RGWGC::list(int *index, string& marker, uint32_t max, bool expired_only, std::list& result, bool *truncated, bool& processing_queue) -{ - result.clear(); - string next_marker; - bool check_queue = false; - - for (; *index < max_objs && result.size() < max; (*index)++, marker.clear(), check_queue = false) { - std::list entries, queue_entries; - int ret = 0; - - //processing_queue is set to true from previous iteration if the queue was under process and probably has more elements in it. - if (! transitioned_objects_cache[*index] && ! check_queue && ! processing_queue) { - ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, max - result.size(), expired_only, entries, truncated, next_marker); - if (ret != -ENOENT && ret < 0) { - return ret; - } - obj_version objv; - cls_version_read(store->gc_pool_ctx, obj_names[*index], &objv); - if (ret == -ENOENT || entries.size() == 0) { - if (objv.ver == 0) { - continue; - } else { - if (! expired_only) { - transitioned_objects_cache[*index] = true; - marker.clear(); - } else { - std::list non_expired_entries; - ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, 1, false, non_expired_entries, truncated, next_marker); - if (non_expired_entries.size() == 0) { - transitioned_objects_cache[*index] = true; - marker.clear(); - } - } - } - } - if ((objv.ver == 1) && (entries.size() < max - result.size())) { - check_queue = true; - marker.clear(); - } - } - if (transitioned_objects_cache[*index] || check_queue || processing_queue) { - processing_queue = false; - ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[*index], marker, (max - result.size()) - entries.size(), expired_only, queue_entries, truncated, next_marker); - if (ret < 0) { - return ret; - } - } - if (entries.size() == 0 && queue_entries.size() == 0) - continue; - - std::list::iterator iter; - for (iter = entries.begin(); iter != entries.end(); ++iter) { - result.push_back(*iter); - } - - for (iter = queue_entries.begin(); iter != queue_entries.end(); ++iter) { - result.push_back(*iter); - } - - marker = next_marker; - - if (*index == max_objs - 1) { - if (queue_entries.size() > 0 && *truncated) { - processing_queue = true; - } else { - processing_queue = false; - } - /* we cut short here, truncated will hold the correct value */ - return 0; - } - - if (result.size() == max) { - if (queue_entries.size() > 0 && *truncated) { - processing_queue = true; - } else { - processing_queue = false; - *index += 1; //move to next gc object - } - - /* close approximation, it might be that the next of the objects don't hold - * anything, in this case truncated should have been false, but we can find - * that out on the next iteration - */ - *truncated = true; - return 0; - } - } - *truncated = false; - processing_queue = false; - - return 0; -} - -class RGWGCIOManager { - const DoutPrefixProvider* dpp; - CephContext *cct; - RGWGC *gc; - - struct IO { - enum Type { - UnknownIO = 0, - TailIO = 1, - IndexIO = 2, - } type{UnknownIO}; - librados::AioCompletion *c{nullptr}; - string oid; - int index{-1}; - string tag; - }; - - deque ios; - vector > remove_tags; - /* tracks the number of remaining shadow objects for a given tag in order to - * only remove the tag once all shadow objects have themselves been removed - */ - vector > tag_io_size; - -#define MAX_AIO_DEFAULT 10 - size_t max_aio{MAX_AIO_DEFAULT}; - -public: - RGWGCIOManager(const DoutPrefixProvider* _dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), - cct(_cct), - gc(_gc) { - max_aio = cct->_conf->rgw_gc_max_concurrent_io; - remove_tags.resize(min(static_cast(cct->_conf->rgw_gc_max_objs), rgw_shards_max())); - tag_io_size.resize(min(static_cast(cct->_conf->rgw_gc_max_objs), rgw_shards_max())); - } - - ~RGWGCIOManager() { - for (auto io : ios) { - io.c->release(); - } - } - - int schedule_io(IoCtx *ioctx, const string& oid, ObjectWriteOperation *op, - int index, const string& tag) { - while (ios.size() > max_aio) { - if (gc->going_down()) { - return 0; - } - auto ret = handle_next_completion(); - //Return error if we are using queue, else ignore it - if (gc->transitioned_objects_cache[index] && ret < 0) { - return ret; - } - } - - auto c = librados::Rados::aio_create_completion(nullptr, nullptr); - int ret = ioctx->aio_operate(oid, c, op); - if (ret < 0) { - return ret; - } - ios.push_back(IO{IO::TailIO, c, oid, index, tag}); - - return 0; - } - - int handle_next_completion() { - ceph_assert(!ios.empty()); - IO& io = ios.front(); - io.c->wait_for_complete(); - int ret = io.c->get_return_value(); - io.c->release(); - - if (ret == -ENOENT) { - ret = 0; - } - - if (io.type == IO::IndexIO && ! gc->transitioned_objects_cache[io.index]) { - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: gc cleanup of tags on gc shard index=" << - io.index << " returned error, ret=" << ret << dendl; - } - goto done; - } - - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: gc could not remove oid=" << io.oid << - ", ret=" << ret << dendl; - goto done; - } - - if (! gc->transitioned_objects_cache[io.index]) { - schedule_tag_removal(io.index, io.tag); - } - - done: - ios.pop_front(); - return ret; - } - - /* This is a request to schedule a tag removal. It will be called once when - * there are no shadow objects. But it will also be called for every shadow - * object when there are any. Since we do not want the tag to be removed - * until all shadow objects have been successfully removed, the scheduling - * will not happen until the shadow object count goes down to zero - */ - void schedule_tag_removal(int index, string tag) { - auto& ts = tag_io_size[index]; - auto ts_it = ts.find(tag); - if (ts_it != ts.end()) { - auto& size = ts_it->second; - --size; - // wait all shadow obj delete return - if (size != 0) - return; - - ts.erase(ts_it); - } - - auto& rt = remove_tags[index]; - - rt.push_back(tag); - if (rt.size() >= (size_t)cct->_conf->rgw_gc_max_trim_chunk) { - flush_remove_tags(index, rt); - } - } - - void add_tag_io_size(int index, string tag, size_t size) { - auto& ts = tag_io_size[index]; - ts.emplace(tag, size); - } - - int drain_ios() { - int ret_val = 0; - while (!ios.empty()) { - if (gc->going_down()) { - return -EAGAIN; - } - auto ret = handle_next_completion(); - if (ret < 0) { - ret_val = ret; - } - } - return ret_val; - } - - void drain() { - drain_ios(); - flush_remove_tags(); - /* the tags draining might have generated more ios, drain those too */ - drain_ios(); - } - - void flush_remove_tags(int index, vector& rt) { - IO index_io; - index_io.type = IO::IndexIO; - index_io.index = index; - - ldpp_dout(dpp, 20) << __func__ << - " removing entries from gc log shard index=" << index << ", size=" << - rt.size() << ", entries=" << rt << dendl; - - auto rt_guard = make_scope_guard( - [&] - { - rt.clear(); - } - ); - - int ret = gc->remove(index, rt, &index_io.c); - if (ret < 0) { - /* we already cleared list of tags, this prevents us from - * ballooning in case of a persistent problem - */ - ldpp_dout(dpp, 0) << "WARNING: failed to remove tags on gc shard index=" << - index << " ret=" << ret << dendl; - return; - } - if (perfcounter) { - /* log the count of tags retired for rate estimation */ - perfcounter->inc(l_rgw_gc_retire, rt.size()); - } - ios.push_back(index_io); - } - - void flush_remove_tags() { - int index = 0; - for (auto& rt : remove_tags) { - if (! gc->transitioned_objects_cache[index]) { - flush_remove_tags(index, rt); - } - ++index; - } - } - - int remove_queue_entries(int index, int num_entries) { - int ret = gc->remove(index, num_entries); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to remove queue entries on index=" << - index << " ret=" << ret << dendl; - return ret; - } - if (perfcounter) { - /* log the count of tags retired for rate estimation */ - perfcounter->inc(l_rgw_gc_retire, num_entries); - } - return 0; - } -}; // class RGWGCIOManger - -int RGWGC::process(int index, int max_secs, bool expired_only, - RGWGCIOManager& io_manager) -{ - ldpp_dout(this, 20) << "RGWGC::process entered with GC index_shard=" << - index << ", max_secs=" << max_secs << ", expired_only=" << - expired_only << dendl; - - rados::cls::lock::Lock l(gc_index_lock_name); - utime_t end = ceph_clock_now(); - - /* max_secs should be greater than zero. We don't want a zero max_secs - * to be translated as no timeout, since we'd then need to break the - * lock and that would require a manual intervention. In this case - * we can just wait it out. */ - if (max_secs <= 0) - return -EAGAIN; - - end += max_secs; - utime_t time(max_secs, 0); - l.set_duration(time); - - int ret = l.lock_exclusive(&store->gc_pool_ctx, obj_names[index]); - if (ret == -EBUSY) { /* already locked by another gc processor */ - ldpp_dout(this, 10) << "RGWGC::process failed to acquire lock on " << - obj_names[index] << dendl; - return 0; - } - if (ret < 0) - return ret; - - string marker; - string next_marker; - bool truncated; - IoCtx *ctx = new IoCtx; - do { - int max = 100; - std::list entries; - - int ret = 0; - - if (! transitioned_objects_cache[index]) { - ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); - ldpp_dout(this, 20) << - "RGWGC::process cls_rgw_gc_list returned with returned:" << ret << - ", entries.size=" << entries.size() << ", truncated=" << truncated << - ", next_marker='" << next_marker << "'" << dendl; - obj_version objv; - cls_version_read(store->gc_pool_ctx, obj_names[index], &objv); - if ((objv.ver == 1) && entries.size() == 0) { - std::list non_expired_entries; - ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, 1, false, non_expired_entries, &truncated, next_marker); - if (non_expired_entries.size() == 0) { - transitioned_objects_cache[index] = true; - marker.clear(); - ldpp_dout(this, 20) << "RGWGC::process cls_rgw_gc_list returned NO non expired entries, so setting cache entry to TRUE" << dendl; - } else { - ret = 0; - goto done; - } - } - if ((objv.ver == 0) && (ret == -ENOENT || entries.size() == 0)) { - ret = 0; - goto done; - } - } - - if (transitioned_objects_cache[index]) { - ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); - ldpp_dout(this, 20) << - "RGWGC::process cls_rgw_gc_queue_list_entries returned with return value:" << ret << - ", entries.size=" << entries.size() << ", truncated=" << truncated << - ", next_marker='" << next_marker << "'" << dendl; - if (entries.size() == 0) { - ret = 0; - goto done; - } - } - - if (ret < 0) - goto done; - - marker = next_marker; - - string last_pool; - std::list::iterator iter; - for (iter = entries.begin(); iter != entries.end(); ++iter) { - cls_rgw_gc_obj_info& info = *iter; - - ldpp_dout(this, 20) << "RGWGC::process iterating over entry tag='" << - info.tag << "', time=" << info.time << ", chain.objs.size()=" << - info.chain.objs.size() << dendl; - - std::list::iterator liter; - cls_rgw_obj_chain& chain = info.chain; - - utime_t now = ceph_clock_now(); - if (now >= end) { - goto done; - } - if (! transitioned_objects_cache[index]) { - if (chain.objs.empty()) { - io_manager.schedule_tag_removal(index, info.tag); - } else { - io_manager.add_tag_io_size(index, info.tag, chain.objs.size()); - } - } - if (! chain.objs.empty()) { - for (liter = chain.objs.begin(); liter != chain.objs.end(); ++liter) { - cls_rgw_obj& obj = *liter; - - if (obj.pool != last_pool) { - delete ctx; - ctx = new IoCtx; - ret = rgw_init_ioctx(this, store->get_rados_handle(), obj.pool, *ctx); - if (ret < 0) { - if (transitioned_objects_cache[index]) { - goto done; - } - last_pool = ""; - ldpp_dout(this, 0) << "ERROR: failed to create ioctx pool=" << - obj.pool << dendl; - continue; - } - last_pool = obj.pool; - } - - ctx->locator_set_key(obj.loc); - - const string& oid = obj.key.name; /* just stored raw oid there */ - - ldpp_dout(this, 5) << "RGWGC::process removing " << obj.pool << - ":" << obj.key.name << dendl; - ObjectWriteOperation op; - cls_refcount_put(op, info.tag, true); - - ret = io_manager.schedule_io(ctx, oid, &op, index, info.tag); - if (ret < 0) { - ldpp_dout(this, 0) << - "WARNING: failed to schedule deletion for oid=" << oid << dendl; - if (transitioned_objects_cache[index]) { - //If deleting oid failed for any of them, we will not delete queue entries - goto done; - } - } - if (going_down()) { - // leave early, even if tag isn't removed, it's ok since it - // will be picked up next time around - goto done; - } - } // chains loop - } // else -- chains not empty - } // entries loop - if (transitioned_objects_cache[index] && entries.size() > 0) { - ret = io_manager.drain_ios(); - if (ret < 0) { - goto done; - } - //Remove the entries from the queue - ldpp_dout(this, 5) << "RGWGC::process removing entries, marker: " << marker << dendl; - ret = io_manager.remove_queue_entries(index, entries.size()); - if (ret < 0) { - ldpp_dout(this, 0) << - "WARNING: failed to remove queue entries" << dendl; - goto done; - } - } - } while (truncated); - -done: - /* we don't drain here, because if we're going down we don't want to - * hold the system if backend is unresponsive - */ - l.unlock(&store->gc_pool_ctx, obj_names[index]); - delete ctx; - - return 0; -} - -int RGWGC::process(bool expired_only) -{ - int max_secs = cct->_conf->rgw_gc_processor_max_time; - - const int start = ceph::util::generate_random_number(0, max_objs - 1); - - RGWGCIOManager io_manager(this, store->ctx(), this); - - for (int i = 0; i < max_objs; i++) { - int index = (i + start) % max_objs; - int ret = process(index, max_secs, expired_only, io_manager); - if (ret < 0) - return ret; - } - if (!going_down()) { - io_manager.drain(); - } - - return 0; -} - -bool RGWGC::going_down() -{ - return down_flag; -} - -void RGWGC::start_processor() -{ - worker = new GCWorker(this, cct, this); - worker->create("rgw_gc"); -} - -void RGWGC::stop_processor() -{ - down_flag = true; - if (worker) { - worker->stop(); - worker->join(); - } - delete worker; - worker = NULL; -} - -unsigned RGWGC::get_subsys() const -{ - return dout_subsys; -} - -std::ostream& RGWGC::gen_prefix(std::ostream& out) const -{ - return out << "garbage collection: "; -} - -void *RGWGC::GCWorker::entry() { - do { - utime_t start = ceph_clock_now(); - ldpp_dout(dpp, 2) << "garbage collection: start" << dendl; - int r = gc->process(true); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: garbage collection process() returned error r=" << r << dendl; - } - ldpp_dout(dpp, 2) << "garbage collection: stop" << dendl; - - if (gc->going_down()) - break; - - utime_t end = ceph_clock_now(); - end -= start; - int secs = cct->_conf->rgw_gc_processor_period; - - if (secs <= end.sec()) - continue; // next round - - secs -= end.sec(); - - std::unique_lock locker{lock}; - cond.wait_for(locker, std::chrono::seconds(secs)); - } while (!gc->going_down()); - - return NULL; -} - -void RGWGC::GCWorker::stop() -{ - std::lock_guard l{lock}; - cond.notify_all(); -} diff --git a/src/rgw/store/rados/rgw_gc.h b/src/rgw/store/rados/rgw_gc.h deleted file mode 100644 index 196f2802c16..00000000000 --- a/src/rgw/store/rados/rgw_gc.h +++ /dev/null @@ -1,87 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_GC_H -#define CEPH_RGW_GC_H - - -#include "include/types.h" -#include "include/rados/librados.hpp" -#include "common/ceph_mutex.h" -#include "common/Cond.h" -#include "common/Thread.h" -#include "rgw_common.h" -#include "rgw_sal.h" -#include "rgw_rados.h" -#include "cls/rgw/cls_rgw_types.h" - -#include - -class RGWGCIOManager; - -class RGWGC : public DoutPrefixProvider { - CephContext *cct; - RGWRados *store; - int max_objs; - std::string *obj_names; - std::atomic down_flag = { false }; - - static constexpr uint64_t seed = 8675309; - - int tag_index(const std::string& tag); - int send_chain(const cls_rgw_obj_chain& chain, const std::string& tag); - - class GCWorker : public Thread { - const DoutPrefixProvider *dpp; - CephContext *cct; - RGWGC *gc; - ceph::mutex lock = ceph::make_mutex("GCWorker"); - ceph::condition_variable cond; - - public: - GCWorker(const DoutPrefixProvider *_dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), cct(_cct), gc(_gc) {} - void *entry() override; - void stop(); - }; - - GCWorker *worker; -public: - RGWGC() : cct(NULL), store(NULL), max_objs(0), obj_names(NULL), worker(NULL) {} - ~RGWGC() { - stop_processor(); - finalize(); - } - std::vector transitioned_objects_cache; - std::tuple> send_split_chain(const cls_rgw_obj_chain& chain, const std::string& tag); - - // asynchronously defer garbage collection on an object that's still being read - int async_defer_chain(const std::string& tag, const cls_rgw_obj_chain& info); - - // callback for when async_defer_chain() fails with ECANCELED - void on_defer_canceled(const cls_rgw_gc_obj_info& info); - - int remove(int index, const std::vector& tags, librados::AioCompletion **pc); - int remove(int index, int num_entries); - - void initialize(CephContext *_cct, RGWRados *_store); - void finalize(); - - int list(int *index, std::string& marker, uint32_t max, bool expired_only, std::list& result, bool *truncated, bool& processing_queue); - void list_init(int *index) { *index = 0; } - int process(int index, int process_max_secs, bool expired_only, - RGWGCIOManager& io_manager); - int process(bool expired_only); - - bool going_down(); - void start_processor(); - void stop_processor(); - - CephContext *get_cct() const override { return store->ctx(); } - unsigned get_subsys() const; - - std::ostream& gen_prefix(std::ostream& out) const; - -}; - - -#endif diff --git a/src/rgw/store/rados/rgw_gc_log.cc b/src/rgw/store/rados/rgw_gc_log.cc deleted file mode 100644 index ad819eddc05..00000000000 --- a/src/rgw/store/rados/rgw_gc_log.cc +++ /dev/null @@ -1,55 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_gc_log.h" - -#include "cls/rgw/cls_rgw_client.h" -#include "cls/rgw_gc/cls_rgw_gc_client.h" -#include "cls/version/cls_version_client.h" - - -void gc_log_init2(librados::ObjectWriteOperation& op, - uint64_t max_size, uint64_t max_deferred) -{ - obj_version objv; // objv.ver = 0 - cls_version_check(op, objv, VER_COND_EQ); - cls_rgw_gc_queue_init(op, max_size, max_deferred); - objv.ver = 1; - cls_version_set(op, objv); -} - -void gc_log_enqueue1(librados::ObjectWriteOperation& op, - uint32_t expiration, cls_rgw_gc_obj_info& info) -{ - obj_version objv; // objv.ver = 0 - cls_version_check(op, objv, VER_COND_EQ); - cls_rgw_gc_set_entry(op, expiration, info); -} - -void gc_log_enqueue2(librados::ObjectWriteOperation& op, - uint32_t expiration, const cls_rgw_gc_obj_info& info) -{ - obj_version objv; - objv.ver = 1; - cls_version_check(op, objv, VER_COND_EQ); - cls_rgw_gc_queue_enqueue(op, expiration, info); -} - -void gc_log_defer1(librados::ObjectWriteOperation& op, - uint32_t expiration, const cls_rgw_gc_obj_info& info) -{ - obj_version objv; // objv.ver = 0 - cls_version_check(op, objv, VER_COND_EQ); - cls_rgw_gc_defer_entry(op, expiration, info.tag); -} - -void gc_log_defer2(librados::ObjectWriteOperation& op, - uint32_t expiration, const cls_rgw_gc_obj_info& info) -{ - obj_version objv; - objv.ver = 1; - cls_version_check(op, objv, VER_COND_EQ); - cls_rgw_gc_queue_defer_entry(op, expiration, info); - // TODO: conditional on whether omap is known to be empty - cls_rgw_gc_remove(op, {info.tag}); -} diff --git a/src/rgw/store/rados/rgw_lc_tier.cc b/src/rgw/store/rados/rgw_lc_tier.cc deleted file mode 100644 index 0ad21693123..00000000000 --- a/src/rgw/store/rados/rgw_lc_tier.cc +++ /dev/null @@ -1,1336 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include -#include -#include - -#include "common/Formatter.h" -#include -#include "rgw_lc.h" -#include "rgw_lc_tier.h" -#include "rgw_string.h" -#include "rgw_zone.h" -#include "rgw_common.h" -#include "rgw_rest.h" -#include "svc_zone.h" - -#include -#include -#include - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -struct rgw_lc_multipart_part_info { - int part_num{0}; - uint64_t ofs{0}; - uint64_t size{0}; - std::string etag; -}; - -struct rgw_lc_obj_properties { - ceph::real_time mtime; - std::string etag; - uint64_t versioned_epoch{0}; - std::map& target_acl_mappings; - std::string target_storage_class; - - rgw_lc_obj_properties(ceph::real_time _mtime, std::string _etag, - uint64_t _versioned_epoch, std::map& _t_acl_mappings, - std::string _t_storage_class) : - mtime(_mtime), etag(_etag), - versioned_epoch(_versioned_epoch), - target_acl_mappings(_t_acl_mappings), - target_storage_class(_t_storage_class) {} -}; - -struct rgw_lc_multipart_upload_info { - std::string upload_id; - uint64_t obj_size; - ceph::real_time mtime; - std::string etag; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(upload_id, bl); - encode(obj_size, bl); - encode(mtime, bl); - encode(etag, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(upload_id, bl); - decode(obj_size, bl); - decode(mtime, bl); - decode(etag, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_lc_multipart_upload_info) - -static inline string get_key_instance(const rgw_obj_key& key) -{ - if (!key.instance.empty() && - !key.have_null_instance()) { - return "-" + key.instance; - } - return ""; -} - -static inline string get_key_oid(const rgw_obj_key& key) -{ - string oid = key.name; - if (!key.instance.empty() && - !key.have_null_instance()) { - oid += string("-") + key.instance; - } - return oid; -} - -static inline string obj_to_aws_path(const rgw_obj& obj) -{ - string path = obj.bucket.name + "/" + get_key_oid(obj.key); - return path; -} - -static int read_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver, - const rgw_raw_obj *status_obj, rgw_lc_multipart_upload_info *status) -{ - int ret = 0; - rgw::sal::RadosStore *rados = dynamic_cast(driver); - - if (!rados) { - ldpp_dout(dpp, 0) << "ERROR: Not a RadosStore. Cannot be transitioned to cloud." << dendl; - return -1; - } - - auto& pool = status_obj->pool; - const auto oid = status_obj->oid; - auto sysobj = rados->svc()->sysobj; - bufferlist bl; - - ret = rgw_get_system_obj(sysobj, pool, oid, bl, nullptr, nullptr, - null_yield, dpp); - - if (ret < 0) { - return ret; - } - - if (bl.length() > 0) { - try { - auto p = bl.cbegin(); - status->decode(p); - } catch (buffer::error& e) { - ldpp_dout(dpp, 10) << "failed to decode status obj: " - << e.what() << dendl; - return -EIO; - } - } else { - return -EIO; - } - - return 0; -} - -static int put_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver, - const rgw_raw_obj *status_obj, rgw_lc_multipart_upload_info *status) -{ - int ret = 0; - rgw::sal::RadosStore *rados = dynamic_cast(driver); - - if (!rados) { - ldpp_dout(dpp, 0) << "ERROR: Not a RadosStore. Cannot be transitioned to cloud." << dendl; - return -1; - } - - auto& pool = status_obj->pool; - const auto oid = status_obj->oid; - auto sysobj = rados->svc()->sysobj; - bufferlist bl; - status->encode(bl); - - ret = rgw_put_system_obj(dpp, sysobj, pool, oid, bl, true, nullptr, - real_time{}, null_yield); - - return ret; -} - -static int delete_upload_status(const DoutPrefixProvider *dpp, rgw::sal::Driver *driver, - const rgw_raw_obj *status_obj) -{ - int ret = 0; - rgw::sal::RadosStore *rados = dynamic_cast(driver); - - if (!rados) { - ldpp_dout(dpp, 0) << "ERROR: Not a RadosStore. Cannot be transitioned to cloud." << dendl; - return -1; - } - - auto& pool = status_obj->pool; - const auto oid = status_obj->oid; - auto sysobj = rados->svc()->sysobj; - - ret = rgw_delete_system_obj(dpp, sysobj, pool, oid, nullptr, null_yield); - - return ret; -} - -static std::set keep_headers = { "CONTENT_TYPE", - "CONTENT_ENCODING", - "CONTENT_DISPOSITION", - "CONTENT_LANGUAGE" }; - -/* - * mapping between rgw object attrs and output http fields - * - static const struct rgw_http_attr base_rgw_to_http_attrs[] = { - { RGW_ATTR_CONTENT_LANG, "Content-Language" }, - { RGW_ATTR_EXPIRES, "Expires" }, - { RGW_ATTR_CACHE_CONTROL, "Cache-Control" }, - { RGW_ATTR_CONTENT_DISP, "Content-Disposition" }, - { RGW_ATTR_CONTENT_ENC, "Content-Encoding" }, - { RGW_ATTR_USER_MANIFEST, "X-Object-Manifest" }, - { RGW_ATTR_X_ROBOTS_TAG , "X-Robots-Tag" }, - { RGW_ATTR_STORAGE_CLASS , "X-Amz-Storage-Class" }, -// RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION header depends on access mode: -// S3 endpoint: x-amz-website-redirect-location -// S3Website endpoint: Location -{ RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION, "x-amz-website-redirect-location" }, -}; */ - -static void init_headers(map& attrs, - map& headers) -{ - for (auto& kv : attrs) { - const char * name = kv.first.c_str(); - const auto aiter = rgw_to_http_attrs.find(name); - - if (aiter != std::end(rgw_to_http_attrs)) { - headers[aiter->second] = rgw_bl_str(kv.second); - } else if (strncmp(name, RGW_ATTR_META_PREFIX, - sizeof(RGW_ATTR_META_PREFIX)-1) == 0) { - name += sizeof(RGW_ATTR_META_PREFIX) - 1; - string sname(name); - string name_prefix = RGW_ATTR_META_PREFIX; - char full_name_buf[name_prefix.size() + sname.size() + 1]; - snprintf(full_name_buf, sizeof(full_name_buf), "%.*s%.*s", - static_cast(name_prefix.length()), - name_prefix.data(), - static_cast(sname.length()), - sname.data()); - headers[full_name_buf] = rgw_bl_str(kv.second); - } else if (strcmp(name,RGW_ATTR_CONTENT_TYPE) == 0) { - headers["CONTENT_TYPE"] = rgw_bl_str(kv.second); - } - } -} - -/* Read object or just head from remote endpoint. For now initializes only headers, - * but can be extended to fetch etag, mtime etc if needed. - */ -static int cloud_tier_get_object(RGWLCCloudTierCtx& tier_ctx, bool head, - std::map& headers) { - RGWRESTConn::get_obj_params req_params; - RGWBucketInfo b; - std::string target_obj_name; - int ret = 0; - std::unique_ptr dest_bucket; - std::unique_ptr dest_obj; - rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, - tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, - tier_ctx.target_storage_class); - std::string etag; - RGWRESTStreamRWRequest *in_req; - - b.bucket.name = tier_ctx.target_bucket_name; - target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - tier_ctx.obj->get_name(); - if (!tier_ctx.o.is_current()) { - target_obj_name += get_key_instance(tier_ctx.obj->get_key()); - } - - ret = tier_ctx.driver->get_bucket(nullptr, b, &dest_bucket); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , reterr = " << ret << dendl; - return ret; - } - - dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); - if (!dest_obj) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; - return -1; - } - /* init input connection */ - req_params.get_op = !head; - req_params.prepend_metadata = true; - req_params.rgwx_stat = true; - req_params.sync_manifest = true; - req_params.skip_decrypt = true; - - ret = tier_ctx.conn.get_obj(tier_ctx.dpp, dest_obj.get(), req_params, true /* send */, &in_req); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: " << __func__ << "(): conn.get_obj() returned ret=" << ret << dendl; - return ret; - } - - /* fetch headers */ - ret = tier_ctx.conn.complete_request(in_req, nullptr, nullptr, nullptr, nullptr, &headers, null_yield); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(tier_ctx.dpp, 20) << "ERROR: " << __func__ << "(): conn.complete_request() returned ret=" << ret << dendl; - return ret; - } - return 0; -} - -static bool is_already_tiered(const DoutPrefixProvider *dpp, - std::map& headers, - ceph::real_time& mtime) { - char buf[32]; - map attrs = headers; - - for (const auto& a : attrs) { - ldpp_dout(dpp, 20) << "GetCrf attr[" << a.first << "] = " << a.second <iterate(dpp, ofs, end, out_cb, null_yield); - return ret; -} - -int RGWLCCloudStreamPut::init() { - /* init output connection */ - if (multipart.is_multipart) { - char buf[32]; - snprintf(buf, sizeof(buf), "%d", multipart.part_num); - rgw_http_param_pair params[] = { { "uploadId", multipart.upload_id.c_str() }, - { "partNumber", buf }, - { nullptr, nullptr } }; - conn.put_obj_send_init(dest_obj, params, &out_req); - } else { - conn.put_obj_send_init(dest_obj, nullptr, &out_req); - } - - return 0; -} - -bool RGWLCCloudStreamPut::keep_attr(const string& h) { - return (keep_headers.find(h) != keep_headers.end() || - boost::algorithm::starts_with(h, "X_AMZ_")); -} - -void RGWLCCloudStreamPut::init_send_attrs(const DoutPrefixProvider *dpp, - const rgw_rest_obj& rest_obj, - const rgw_lc_obj_properties& obj_properties, - std::map& attrs) { - - map& acl_mappings(obj_properties.target_acl_mappings); - const std::string& target_storage_class = obj_properties.target_storage_class; - - attrs.clear(); - - for (auto& hi : rest_obj.attrs) { - if (keep_attr(hi.first)) { - attrs.insert(hi); - } - } - - const auto acl = rest_obj.acls.get_acl(); - - map > access_map; - - if (!acl_mappings.empty()) { - for (auto& grant : acl.get_grant_map()) { - auto& orig_grantee = grant.first; - auto& perm = grant.second; - - string grantee; - - const auto& am = acl_mappings; - - const auto iter = am.find(orig_grantee); - if (iter == am.end()) { - ldpp_dout(dpp, 20) << "acl_mappings: Could not find " << orig_grantee << " .. ignoring" << dendl; - continue; - } - - grantee = iter->second.dest_id; - - string type; - - switch (iter->second.type) { - case ACL_TYPE_CANON_USER: - type = "id"; - break; - case ACL_TYPE_EMAIL_USER: - type = "emailAddress"; - break; - case ACL_TYPE_GROUP: - type = "uri"; - break; - default: - continue; - } - - string tv = type + "=" + grantee; - - int flags = perm.get_permission().get_permissions(); - if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) { - access_map[flags].push_back(tv); - continue; - } - - for (int i = 1; i <= RGW_PERM_WRITE_ACP; i <<= 1) { - if (flags & i) { - access_map[i].push_back(tv); - } - } - } - } - - for (const auto& aiter : access_map) { - int grant_type = aiter.first; - - string header_str("x-amz-grant-"); - - switch (grant_type) { - case RGW_PERM_READ: - header_str.append("read"); - break; - case RGW_PERM_WRITE: - header_str.append("write"); - break; - case RGW_PERM_READ_ACP: - header_str.append("read-acp"); - break; - case RGW_PERM_WRITE_ACP: - header_str.append("write-acp"); - break; - case RGW_PERM_FULL_CONTROL: - header_str.append("full-control"); - break; - } - - string s; - - for (const auto& viter : aiter.second) { - if (!s.empty()) { - s.append(", "); - } - s.append(viter); - } - - ldpp_dout(dpp, 20) << "acl_mappings: set acl: " << header_str << "=" << s << dendl; - - attrs[header_str] = s; - } - - /* Copy target storage class */ - if (!target_storage_class.empty()) { - attrs["x-amz-storage-class"] = target_storage_class; - } else { - attrs["x-amz-storage-class"] = "STANDARD"; - } - - /* New attribute to specify its transitioned from RGW */ - attrs["x-amz-meta-rgwx-source"] = "rgw"; - - char buf[32]; - snprintf(buf, sizeof(buf), "%llu", (long long)obj_properties.versioned_epoch); - attrs["x-amz-meta-rgwx-versioned-epoch"] = buf; - - utime_t ut(obj_properties.mtime); - snprintf(buf, sizeof(buf), "%lld.%09lld", - (long long)ut.sec(), - (long long)ut.nsec()); - - attrs["x-amz-meta-rgwx-source-mtime"] = buf; - attrs["x-amz-meta-rgwx-source-etag"] = obj_properties.etag; - attrs["x-amz-meta-rgwx-source-key"] = rest_obj.key.name; - if (!rest_obj.key.instance.empty()) { - attrs["x-amz-meta-rgwx-source-version-id"] = rest_obj.key.instance; - } - for (const auto& a : attrs) { - ldpp_dout(dpp, 30) << "init_send_attrs attr[" << a.first << "] = " << a.second <(out_req); - - std::map new_attrs; - if (!multipart.is_multipart) { - init_send_attrs(dpp, rest_obj, obj_properties, new_attrs); - } - - r->set_send_length(rest_obj.content_len); - - RGWAccessControlPolicy policy; - - r->send_ready(dpp, conn.get_key(), new_attrs, policy); -} - -void RGWLCCloudStreamPut::handle_headers(const map& headers) { - for (const auto& h : headers) { - if (h.first == "ETAG") { - etag = h.second; - } - } -} - -bool RGWLCCloudStreamPut::get_etag(string *petag) { - if (etag.empty()) { - return false; - } - *petag = etag; - return true; -} - -void RGWLCCloudStreamPut::set_multipart(const string& upload_id, int part_num, uint64_t part_size) { - multipart.is_multipart = true; - multipart.upload_id = upload_id; - multipart.part_num = part_num; - multipart.part_size = part_size; -} - -int RGWLCCloudStreamPut::send() { - int ret = RGWHTTP::send(out_req); - return ret; -} - -RGWGetDataCB *RGWLCCloudStreamPut::get_cb() { - return out_req->get_out_cb(); -} - -int RGWLCCloudStreamPut::complete_request() { - int ret = conn.complete_request(out_req, etag, &obj_properties.mtime, null_yield); - return ret; -} - -/* Read local copy and write to Cloud endpoint */ -static int cloud_tier_transfer_object(const DoutPrefixProvider* dpp, - RGWLCStreamRead* readf, RGWLCCloudStreamPut* writef) { - std::string url; - bufferlist bl; - bool sent_attrs{false}; - int ret{0}; - off_t ofs; - off_t end; - - ret = readf->init(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: fail to initialize in_crf, ret = " << ret << dendl; - return ret; - } - readf->get_range(ofs, end); - rgw_rest_obj& rest_obj = readf->get_rest_obj(); - if (!sent_attrs) { - ret = writef->init(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: fail to initialize out_crf, ret = " << ret << dendl; - return ret; - } - - writef->send_ready(dpp, rest_obj); - ret = writef->send(); - if (ret < 0) { - return ret; - } - sent_attrs = true; - } - - ret = readf->read(ofs, end, writef->get_cb()); - - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: fail to read from in_crf, ret = " << ret << dendl; - return ret; - } - - ret = writef->complete_request(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: fail to complete request, ret = " << ret << dendl; - return ret; - } - - return 0; -} - -static int cloud_tier_plain_transfer(RGWLCCloudTierCtx& tier_ctx) { - int ret; - std::unique_ptr dest_bucket; - std::unique_ptr dest_obj; - - rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, - tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, - tier_ctx.target_storage_class); - RGWBucketInfo b; - std::string target_obj_name; - - b.bucket.name = tier_ctx.target_bucket_name; - target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - tier_ctx.obj->get_name(); - if (!tier_ctx.o.is_current()) { - target_obj_name += get_key_instance(tier_ctx.obj->get_key()); - } - - ret = tier_ctx.driver->get_bucket(nullptr, b, &dest_bucket); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , ret = " << ret << dendl; - return ret; - } - - dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); - if (!dest_obj) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; - return -1; - } - - tier_ctx.obj->set_atomic(); - - /* Prepare Read from source */ - /* TODO: Define readf, writef as stack variables. For some reason, - * when used as stack variables (esp., readf), the transition seems to - * be taking lot of time eventually erroring out at times. - */ - std::shared_ptr readf; - readf.reset(new RGWLCStreamRead(tier_ctx.cct, tier_ctx.dpp, - tier_ctx.obj, tier_ctx.o.meta.mtime)); - - std::shared_ptr writef; - writef.reset(new RGWLCCloudStreamPut(tier_ctx.dpp, obj_properties, tier_ctx.conn, - dest_obj.get())); - - /* actual Read & Write */ - ret = cloud_tier_transfer_object(tier_ctx.dpp, readf.get(), writef.get()); - - return ret; -} - -static int cloud_tier_send_multipart_part(RGWLCCloudTierCtx& tier_ctx, - const std::string& upload_id, - const rgw_lc_multipart_part_info& part_info, - std::string *petag) { - int ret; - std::unique_ptr dest_bucket; - std::unique_ptr dest_obj; - - rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, - tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, - tier_ctx.target_storage_class); - RGWBucketInfo b; - std::string target_obj_name; - off_t end; - - b.bucket.name = tier_ctx.target_bucket_name; - target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - tier_ctx.obj->get_name(); - if (!tier_ctx.o.is_current()) { - target_obj_name += get_key_instance(tier_ctx.obj->get_key()); - } - - ret = tier_ctx.driver->get_bucket(nullptr, b, &dest_bucket); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_bucket - " << tier_ctx.target_bucket_name << " , ret = " << ret << dendl; - return ret; - } - - dest_obj = dest_bucket->get_object(rgw_obj_key(target_obj_name)); - if (!dest_obj) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize dest_object path - " << target_obj_name << dendl; - return -1; - } - - tier_ctx.obj->set_atomic(); - - /* TODO: Define readf, writef as stack variables. For some reason, - * when used as stack variables (esp., readf), the transition seems to - * be taking lot of time eventually erroring out at times. */ - std::shared_ptr readf; - readf.reset(new RGWLCStreamRead(tier_ctx.cct, tier_ctx.dpp, - tier_ctx.obj, tier_ctx.o.meta.mtime)); - - std::shared_ptr writef; - writef.reset(new RGWLCCloudStreamPut(tier_ctx.dpp, obj_properties, tier_ctx.conn, - dest_obj.get())); - - /* Prepare Read from source */ - end = part_info.ofs + part_info.size - 1; - readf->set_multipart(part_info.size, part_info.ofs, end); - - /* Prepare write */ - writef->set_multipart(upload_id, part_info.part_num, part_info.size); - - /* actual Read & Write */ - ret = cloud_tier_transfer_object(tier_ctx.dpp, readf.get(), writef.get()); - if (ret < 0) { - return ret; - } - - if (!(writef->get_etag(petag))) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to get etag from PUT request" << dendl; - return -EIO; - } - - return 0; -} - -static int cloud_tier_abort_multipart(const DoutPrefixProvider *dpp, - RGWRESTConn& dest_conn, const rgw_obj& dest_obj, - const std::string& upload_id) { - int ret; - bufferlist out_bl; - bufferlist bl; - rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; - - string resource = obj_to_aws_path(dest_obj); - ret = dest_conn.send_resource(dpp, "DELETE", resource, params, nullptr, - out_bl, &bl, nullptr, null_yield); - - - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload for dest object=" << dest_obj << " (ret=" << ret << ")" << dendl; - return ret; - } - - return 0; -} - -static int cloud_tier_init_multipart(const DoutPrefixProvider *dpp, - RGWRESTConn& dest_conn, const rgw_obj& dest_obj, - uint64_t obj_size, std::map& attrs, - std::string& upload_id) { - bufferlist out_bl; - bufferlist bl; - - struct InitMultipartResult { - std::string bucket; - std::string key; - std::string upload_id; - - void decode_xml(XMLObj *obj) { - RGWXMLDecoder::decode_xml("Bucket", bucket, obj); - RGWXMLDecoder::decode_xml("Key", key, obj); - RGWXMLDecoder::decode_xml("UploadId", upload_id, obj); - } - } result; - - int ret; - rgw_http_param_pair params[] = { { "uploads", nullptr }, {nullptr, nullptr} }; - - string resource = obj_to_aws_path(dest_obj); - - ret = dest_conn.send_resource(dpp, "POST", resource, params, &attrs, - out_bl, &bl, nullptr, null_yield); - - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl; - return ret; - } - /* - * If one of the following fails we cannot abort upload, as we cannot - * extract the upload id. If one of these fail it's very likely that that's - * the least of our problem. - */ - RGWXMLDecoder::XMLParser parser; - if (!parser.init()) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; - return -EIO; - } - - if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: failed to parse xml initmultipart: " << str << dendl; - return -EIO; - } - - try { - RGWXMLDecoder::decode_xml("InitiateMultipartUploadResult", result, &parser, true); - } catch (RGWXMLDecoder::err& err) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; - return -EIO; - } - - ldpp_dout(dpp, 20) << "init multipart result: bucket=" << result.bucket << " key=" << result.key << " upload_id=" << result.upload_id << dendl; - - upload_id = result.upload_id; - - return 0; -} - -static int cloud_tier_complete_multipart(const DoutPrefixProvider *dpp, - RGWRESTConn& dest_conn, const rgw_obj& dest_obj, - std::string& upload_id, - const std::map& parts) { - rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; - - stringstream ss; - XMLFormatter formatter; - int ret; - - bufferlist bl, out_bl; - string resource = obj_to_aws_path(dest_obj); - - struct CompleteMultipartReq { - std::map parts; - - explicit CompleteMultipartReq(const std::map& _parts) : parts(_parts) {} - - void dump_xml(Formatter *f) const { - for (const auto& p : parts) { - f->open_object_section("Part"); - encode_xml("PartNumber", p.first, f); - encode_xml("ETag", p.second.etag, f); - f->close_section(); - }; - } - } req_enc(parts); - - struct CompleteMultipartResult { - std::string location; - std::string bucket; - std::string key; - std::string etag; - - void decode_xml(XMLObj *obj) { - RGWXMLDecoder::decode_xml("Location", bucket, obj); - RGWXMLDecoder::decode_xml("Bucket", bucket, obj); - RGWXMLDecoder::decode_xml("Key", key, obj); - RGWXMLDecoder::decode_xml("ETag", etag, obj); - } - } result; - - encode_xml("CompleteMultipartUpload", req_enc, &formatter); - - formatter.flush(ss); - bl.append(ss.str()); - - ret = dest_conn.send_resource(dpp, "POST", resource, params, nullptr, - out_bl, &bl, nullptr, null_yield); - - - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to complete multipart upload for dest object=" << dest_obj << dendl; - return ret; - } - /* - * If one of the following fails we cannot abort upload, as we cannot - * extract the upload id. If one of these fail it's very likely that that's - * the least of our problem. - */ - RGWXMLDecoder::XMLParser parser; - if (!parser.init()) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; - return -EIO; - } - - if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: failed to parse xml Completemultipart: " << str << dendl; - return -EIO; - } - - try { - RGWXMLDecoder::decode_xml("CompleteMultipartUploadResult", result, &parser, true); - } catch (RGWXMLDecoder::err& err) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; - return -EIO; - } - - ldpp_dout(dpp, 20) << "complete multipart result: location=" << result.location << " bucket=" << result.bucket << " key=" << result.key << " etag=" << result.etag << dendl; - - return ret; -} - -static int cloud_tier_abort_multipart_upload(RGWLCCloudTierCtx& tier_ctx, - const rgw_obj& dest_obj, const rgw_raw_obj& status_obj, - const std::string& upload_id) { - int ret; - - ret = cloud_tier_abort_multipart(tier_ctx.dpp, tier_ctx.conn, dest_obj, upload_id); - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to abort multipart upload dest obj=" << dest_obj << " upload_id=" << upload_id << " ret=" << ret << dendl; - /* ignore error, best effort */ - } - /* remove status obj */ - ret = delete_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to remove sync status obj obj=" << status_obj << " ret=" << ret << dendl; - // ignore error, best effort - } - return 0; -} - -static int cloud_tier_multipart_transfer(RGWLCCloudTierCtx& tier_ctx) { - rgw_obj src_obj; - rgw_obj dest_obj; - - uint64_t obj_size; - std::string src_etag; - rgw_rest_obj rest_obj; - - rgw_lc_multipart_upload_info status; - - std::map new_attrs; - - rgw_raw_obj status_obj; - - RGWBucketInfo b; - std::string target_obj_name; - rgw_bucket target_bucket; - - int ret; - - rgw_lc_obj_properties obj_properties(tier_ctx.o.meta.mtime, tier_ctx.o.meta.etag, - tier_ctx.o.versioned_epoch, tier_ctx.acl_mappings, - tier_ctx.target_storage_class); - - uint32_t part_size{0}; - uint32_t num_parts{0}; - - int cur_part{0}; - uint64_t cur_ofs{0}; - std::map parts; - - obj_size = tier_ctx.o.meta.size; - - target_bucket.name = tier_ctx.target_bucket_name; - - target_obj_name = tier_ctx.bucket_info.bucket.name + "/" + - tier_ctx.obj->get_name(); - if (!tier_ctx.o.is_current()) { - target_obj_name += get_key_instance(tier_ctx.obj->get_key()); - } - dest_obj.init(target_bucket, target_obj_name); - - rgw_pool pool = static_cast(tier_ctx.driver)->svc()->zone->get_zone_params().log_pool; - status_obj = rgw_raw_obj(pool, "lc_multipart_" + tier_ctx.obj->get_oid()); - - ret = read_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj, &status); - - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to read sync status of object " << src_obj << " ret=" << ret << dendl; - return ret; - } - - if (ret >= 0) { - // check here that mtime and size did not change - if (status.mtime != obj_properties.mtime || status.obj_size != obj_size || - status.etag != obj_properties.etag) { - cloud_tier_abort_multipart_upload(tier_ctx, dest_obj, status_obj, status.upload_id); - ret = -ENOENT; - } - } - - if (ret == -ENOENT) { - RGWLCStreamRead readf(tier_ctx.cct, tier_ctx.dpp, tier_ctx.obj, tier_ctx.o.meta.mtime); - - readf.init(); - - rest_obj = readf.get_rest_obj(); - - RGWLCCloudStreamPut::init_send_attrs(tier_ctx.dpp, rest_obj, obj_properties, new_attrs); - - ret = cloud_tier_init_multipart(tier_ctx.dpp, tier_ctx.conn, dest_obj, obj_size, new_attrs, status.upload_id); - if (ret < 0) { - return ret; - } - - status.obj_size = obj_size; - status.mtime = obj_properties.mtime; - status.etag = obj_properties.etag; - - ret = put_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj, &status); - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to driver multipart upload state, ret=" << ret << dendl; - // continue with upload anyway - } - -#define MULTIPART_MAX_PARTS 10000 -#define MULTIPART_MAX_PARTS 10000 - uint64_t min_part_size = obj_size / MULTIPART_MAX_PARTS; - uint64_t min_conf_size = tier_ctx.multipart_min_part_size; - - if (min_conf_size < MULTIPART_MIN_POSSIBLE_PART_SIZE) { - min_conf_size = MULTIPART_MIN_POSSIBLE_PART_SIZE; - } - - part_size = std::max(min_conf_size, min_part_size); - num_parts = (obj_size + part_size - 1) / part_size; - cur_part = 1; - cur_ofs = 0; - } - - for (; (uint32_t)cur_part <= num_parts; ++cur_part) { - ldpp_dout(tier_ctx.dpp, 20) << "cur_part = "<< cur_part << ", info.ofs = " << cur_ofs << ", info.size = " << part_size << ", obj size = " << obj_size<< ", num_parts:" << num_parts << dendl; - rgw_lc_multipart_part_info& cur_part_info = parts[cur_part]; - cur_part_info.part_num = cur_part; - cur_part_info.ofs = cur_ofs; - cur_part_info.size = std::min((uint64_t)part_size, obj_size - cur_ofs); - - cur_ofs += cur_part_info.size; - - ret = cloud_tier_send_multipart_part(tier_ctx, - status.upload_id, - cur_part_info, - &cur_part_info.etag); - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to send multipart part of obj=" << tier_ctx.obj << ", sync via multipart upload, upload_id=" << status.upload_id << " part number " << cur_part << " (error: " << cpp_strerror(-ret) << ")" << dendl; - cloud_tier_abort_multipart_upload(tier_ctx, dest_obj, status_obj, status.upload_id); - return ret; - } - - } - - ret = cloud_tier_complete_multipart(tier_ctx.dpp, tier_ctx.conn, dest_obj, status.upload_id, parts); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to complete multipart upload of obj=" << tier_ctx.obj << " (error: " << cpp_strerror(-ret) << ")" << dendl; - cloud_tier_abort_multipart_upload(tier_ctx, dest_obj, status_obj, status.upload_id); - return ret; - } - - /* remove status obj */ - ret = delete_upload_status(tier_ctx.dpp, tier_ctx.driver, &status_obj); - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to abort multipart upload obj=" << tier_ctx.obj << " upload_id=" << status.upload_id << " part number " << cur_part << " (" << cpp_strerror(-ret) << ")" << dendl; - // ignore error, best effort - } - return 0; -} - -/* Check if object has already been transitioned */ -static int cloud_tier_check_object(RGWLCCloudTierCtx& tier_ctx, bool& already_tiered) { - int ret; - std::map headers; - - /* Fetch Head object */ - ret = cloud_tier_get_object(tier_ctx, true, headers); - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to fetch HEAD from cloud for obj=" << tier_ctx.obj << " , ret = " << ret << dendl; - return ret; - } - - already_tiered = is_already_tiered(tier_ctx.dpp, headers, tier_ctx.o.meta.mtime); - - if (already_tiered) { - ldpp_dout(tier_ctx.dpp, 20) << "is_already_tiered true" << dendl; - } else { - ldpp_dout(tier_ctx.dpp, 20) << "is_already_tiered false..going with out_crf writing" << dendl; - } - - return ret; -} - -static int cloud_tier_create_bucket(RGWLCCloudTierCtx& tier_ctx) { - bufferlist out_bl; - int ret = 0; - pair key(tier_ctx.storage_class, tier_ctx.target_bucket_name); - struct CreateBucketResult { - std::string code; - - void decode_xml(XMLObj *obj) { - RGWXMLDecoder::decode_xml("Code", code, obj); - } - } result; - - ldpp_dout(tier_ctx.dpp, 30) << "Cloud_tier_ctx: creating bucket:" << tier_ctx.target_bucket_name << dendl; - bufferlist bl; - string resource = tier_ctx.target_bucket_name; - - ret = tier_ctx.conn.send_resource(tier_ctx.dpp, "PUT", resource, nullptr, nullptr, - out_bl, &bl, nullptr, null_yield); - - if (ret < 0 ) { - ldpp_dout(tier_ctx.dpp, 0) << "create target bucket : " << tier_ctx.target_bucket_name << " returned ret:" << ret << dendl; - } - if (out_bl.length() > 0) { - RGWXMLDecoder::XMLParser parser; - if (!parser.init()) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to initialize xml parser for parsing create_bucket response from server" << dendl; - return -EIO; - } - - if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(tier_ctx.dpp, 5) << "ERROR: failed to parse xml createbucket: " << str << dendl; - return -EIO; - } - - try { - RGWXMLDecoder::decode_xml("Error", result, &parser, true); - } catch (RGWXMLDecoder::err& err) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(tier_ctx.dpp, 5) << "ERROR: unexpected xml: " << str << dendl; - return -EIO; - } - - if (result.code != "BucketAlreadyOwnedByYou" && result.code != "BucketAlreadyExists") { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: Creating target bucket failed with error: " << result.code << dendl; - return -EIO; - } - } - - return 0; -} - -int rgw_cloud_tier_transfer_object(RGWLCCloudTierCtx& tier_ctx, std::set& cloud_targets) { - int ret = 0; - - // check if target_path is already created - std::set::iterator it; - - it = cloud_targets.find(tier_ctx.target_bucket_name); - tier_ctx.target_bucket_created = (it != cloud_targets.end()); - - /* If run first time attempt to create the target bucket */ - if (!tier_ctx.target_bucket_created) { - ret = cloud_tier_create_bucket(tier_ctx); - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to create target bucket on the cloud endpoint ret=" << ret << dendl; - return ret; - } - tier_ctx.target_bucket_created = true; - cloud_targets.insert(tier_ctx.target_bucket_name); - } - - /* Since multiple zones may try to transition the same object to the cloud, - * verify if the object is already transitioned. And since its just a best - * effort, do not bail out in case of any errors. - */ - bool already_tiered = false; - ret = cloud_tier_check_object(tier_ctx, already_tiered); - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to check object on the cloud endpoint ret=" << ret << dendl; - } - - if (already_tiered) { - ldpp_dout(tier_ctx.dpp, 20) << "Object (" << tier_ctx.o.key << ") is already tiered" << dendl; - return 0; - } - - uint64_t size = tier_ctx.o.meta.size; - uint64_t multipart_sync_threshold = tier_ctx.multipart_sync_threshold; - - if (multipart_sync_threshold < MULTIPART_MIN_POSSIBLE_PART_SIZE) { - multipart_sync_threshold = MULTIPART_MIN_POSSIBLE_PART_SIZE; - } - - if (size < multipart_sync_threshold) { - ret = cloud_tier_plain_transfer(tier_ctx); - } else { - tier_ctx.is_multipart_upload = true; - ret = cloud_tier_multipart_transfer(tier_ctx); - } - - if (ret < 0) { - ldpp_dout(tier_ctx.dpp, 0) << "ERROR: failed to transition object ret=" << ret << dendl; - } - - return ret; -} diff --git a/src/rgw/store/rados/rgw_lc_tier.h b/src/rgw/store/rados/rgw_lc_tier.h deleted file mode 100644 index 1b21f262092..00000000000 --- a/src/rgw/store/rados/rgw_lc_tier.h +++ /dev/null @@ -1,54 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_LC_TIER_H -#define CEPH_RGW_LC_TIER_H - -#include "rgw_lc.h" -#include "rgw_rest_conn.h" -#include "rgw_rados.h" -#include "rgw_zone.h" -#include "rgw_sal_rados.h" -#include "rgw_cr_rest.h" - -#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) -#define MULTIPART_MIN_POSSIBLE_PART_SIZE (5 * 1024 * 1024) - -struct RGWLCCloudTierCtx { - CephContext *cct; - const DoutPrefixProvider *dpp; - - /* Source */ - rgw_bucket_dir_entry& o; - rgw::sal::Driver *driver; - RGWBucketInfo& bucket_info; - std::string storage_class; - - rgw::sal::Object *obj; - - /* Remote */ - RGWRESTConn& conn; - std::string target_bucket_name; - std::string target_storage_class; - - std::map acl_mappings; - uint64_t multipart_min_part_size; - uint64_t multipart_sync_threshold; - - bool is_multipart_upload{false}; - bool target_bucket_created{true}; - - RGWLCCloudTierCtx(CephContext* _cct, const DoutPrefixProvider *_dpp, - rgw_bucket_dir_entry& _o, rgw::sal::Driver *_driver, - RGWBucketInfo &_binfo, rgw::sal::Object *_obj, - RGWRESTConn& _conn, std::string& _bucket, - std::string& _storage_class) : - cct(_cct), dpp(_dpp), o(_o), driver(_driver), bucket_info(_binfo), - obj(_obj), conn(_conn), target_bucket_name(_bucket), - target_storage_class(_storage_class) {} -}; - -/* Transition object to cloud endpoint */ -int rgw_cloud_tier_transfer_object(RGWLCCloudTierCtx& tier_ctx, std::set& cloud_targets); - -#endif diff --git a/src/rgw/store/rados/rgw_log_backing.cc b/src/rgw/store/rados/rgw_log_backing.cc deleted file mode 100644 index 7c9dafe7e44..00000000000 --- a/src/rgw/store/rados/rgw_log_backing.cc +++ /dev/null @@ -1,708 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "cls/log/cls_log_client.h" -#include "cls/version/cls_version_client.h" - -#include "rgw_log_backing.h" -#include "rgw_tools.h" -#include "cls_fifo_legacy.h" - -using namespace std::chrono_literals; -namespace cb = ceph::buffer; - -static constexpr auto dout_subsys = ceph_subsys_rgw; - -enum class shard_check { dne, omap, fifo, corrupt }; -inline std::ostream& operator <<(std::ostream& m, const shard_check& t) { - switch (t) { - case shard_check::dne: - return m << "shard_check::dne"; - case shard_check::omap: - return m << "shard_check::omap"; - case shard_check::fifo: - return m << "shard_check::fifo"; - case shard_check::corrupt: - return m << "shard_check::corrupt"; - } - - return m << "shard_check::UNKNOWN=" << static_cast(t); -} - -namespace { -/// Return the shard type, and a bool to see whether it has entries. -shard_check -probe_shard(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - bool& fifo_unsupported, optional_yield y) -{ - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " probing oid=" << oid - << dendl; - if (!fifo_unsupported) { - std::unique_ptr fifo; - auto r = rgw::cls::fifo::FIFO::open(dpp, ioctx, oid, - &fifo, y, - std::nullopt, true); - switch (r) { - case 0: - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": oid=" << oid << " is FIFO" - << dendl; - return shard_check::fifo; - - case -ENODATA: - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": oid=" << oid << " is empty and therefore OMAP" - << dendl; - return shard_check::omap; - - case -ENOENT: - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": oid=" << oid << " does not exist" - << dendl; - return shard_check::dne; - - case -EPERM: - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": FIFO is unsupported, marking." - << dendl; - fifo_unsupported = true; - return shard_check::omap; - - default: - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": error probing: r=" << r - << ", oid=" << oid << dendl; - return shard_check::corrupt; - } - } else { - // Since FIFO is unsupported, OMAP is the only alternative - return shard_check::omap; - } -} - -tl::expected -handle_dne(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, - log_type def, - std::string oid, - bool fifo_unsupported, - optional_yield y) -{ - if (def == log_type::fifo) { - if (fifo_unsupported) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " WARNING: FIFO set as default but not supported by OSD. " - << "Falling back to OMAP." << dendl; - return log_type::omap; - } - std::unique_ptr fifo; - auto r = rgw::cls::fifo::FIFO::create(dpp, ioctx, oid, - &fifo, y, - std::nullopt); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " error creating FIFO: r=" << r - << ", oid=" << oid << dendl; - return tl::unexpected(bs::error_code(-r, bs::system_category())); - } - } - return def; -} -} - -tl::expected -log_backing_type(const DoutPrefixProvider *dpp, - librados::IoCtx& ioctx, - log_type def, - int shards, - const fu2::unique_function& get_oid, - optional_yield y) -{ - auto check = shard_check::dne; - bool fifo_unsupported = false; - for (int i = 0; i < shards; ++i) { - auto c = probe_shard(dpp, ioctx, get_oid(i), fifo_unsupported, y); - if (c == shard_check::corrupt) - return tl::unexpected(bs::error_code(EIO, bs::system_category())); - if (c == shard_check::dne) continue; - if (check == shard_check::dne) { - check = c; - continue; - } - - if (check != c) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " clashing types: check=" << check - << ", c=" << c << dendl; - return tl::unexpected(bs::error_code(EIO, bs::system_category())); - } - } - if (check == shard_check::corrupt) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " should be unreachable!" << dendl; - return tl::unexpected(bs::error_code(EIO, bs::system_category())); - } - - if (check == shard_check::dne) - return handle_dne(dpp, ioctx, - def, - get_oid(0), - fifo_unsupported, - y); - - return (check == shard_check::fifo ? log_type::fifo : log_type::omap); -} - -bs::error_code log_remove(const DoutPrefixProvider *dpp, - librados::IoCtx& ioctx, - int shards, - const fu2::unique_function& get_oid, - bool leave_zero, - optional_yield y) -{ - bs::error_code ec; - for (int i = 0; i < shards; ++i) { - auto oid = get_oid(i); - rados::cls::fifo::info info; - uint32_t part_header_size = 0, part_entry_overhead = 0; - - auto r = rgw::cls::fifo::get_meta(dpp, ioctx, oid, std::nullopt, &info, - &part_header_size, &part_entry_overhead, - 0, y, true); - if (r == -ENOENT) continue; - if (r == 0 && info.head_part_num > -1) { - for (auto j = info.tail_part_num; j <= info.head_part_num; ++j) { - librados::ObjectWriteOperation op; - op.remove(); - auto part_oid = info.part_oid(j); - auto subr = rgw_rados_operate(dpp, ioctx, part_oid, &op, null_yield); - if (subr < 0 && subr != -ENOENT) { - if (!ec) - ec = bs::error_code(-subr, bs::system_category()); - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed removing FIFO part: part_oid=" << part_oid - << ", subr=" << subr << dendl; - } - } - } - if (r < 0 && r != -ENODATA) { - if (!ec) - ec = bs::error_code(-r, bs::system_category()); - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed checking FIFO part: oid=" << oid - << ", r=" << r << dendl; - } - librados::ObjectWriteOperation op; - if (i == 0 && leave_zero) { - // Leave shard 0 in existence, but remove contents and - // omap. cls_lock stores things in the xattrs. And sync needs to - // rendezvous with locks on generation 0 shard 0. - op.omap_set_header({}); - op.omap_clear(); - op.truncate(0); - } else { - op.remove(); - } - r = rgw_rados_operate(dpp, ioctx, oid, &op, null_yield); - if (r < 0 && r != -ENOENT) { - if (!ec) - ec = bs::error_code(-r, bs::system_category()); - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed removing shard: oid=" << oid - << ", r=" << r << dendl; - } - } - return ec; -} - -logback_generations::~logback_generations() { - if (watchcookie > 0) { - auto cct = static_cast(ioctx.cct()); - auto r = ioctx.unwatch2(watchcookie); - if (r < 0) { - lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed unwatching oid=" << oid - << ", r=" << r << dendl; - } - } -} - -bs::error_code logback_generations::setup(const DoutPrefixProvider *dpp, - log_type def, - optional_yield y) noexcept -{ - try { - // First, read. - auto cct = static_cast(ioctx.cct()); - auto res = read(dpp, y); - if (!res && res.error() != bs::errc::no_such_file_or_directory) { - return res.error(); - } - if (res) { - std::unique_lock lock(m); - std::tie(entries_, version) = std::move(*res); - } else { - // Are we the first? Then create generation 0 and the generations - // metadata. - librados::ObjectWriteOperation op; - auto type = log_backing_type(dpp, ioctx, def, shards, - [this](int shard) { - return this->get_oid(0, shard); - }, y); - if (!type) - return type.error(); - - logback_generation l; - l.type = *type; - - std::unique_lock lock(m); - version.ver = 1; - static constexpr auto TAG_LEN = 24; - version.tag.clear(); - append_rand_alpha(cct, version.tag, version.tag, TAG_LEN); - op.create(true); - cls_version_set(op, version); - cb::list bl; - entries_.emplace(0, std::move(l)); - encode(entries_, bl); - lock.unlock(); - - op.write_full(bl); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r < 0 && r != -EEXIST) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed writing oid=" << oid - << ", r=" << r << dendl; - bs::system_error(-r, bs::system_category()); - } - // Did someone race us? Then re-read. - if (r != 0) { - res = read(dpp, y); - if (!res) - return res.error(); - if (res->first.empty()) - return bs::error_code(EIO, bs::system_category()); - auto l = res->first.begin()->second; - // In the unlikely event that someone raced us, created - // generation zero, incremented, then erased generation zero, - // don't leave generation zero lying around. - if (l.gen_id != 0) { - auto ec = log_remove(dpp, ioctx, shards, - [this](int shard) { - return this->get_oid(0, shard); - }, true, y); - if (ec) return ec; - } - std::unique_lock lock(m); - std::tie(entries_, version) = std::move(*res); - } - } - // Pass all non-empty generations to the handler - std::unique_lock lock(m); - auto i = lowest_nomempty(entries_); - entries_t e; - std::copy(i, entries_.cend(), - std::inserter(e, e.end())); - m.unlock(); - auto ec = watch(); - if (ec) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed to re-establish watch, unsafe to continue: oid=" - << oid << ", ec=" << ec.message() << dendl; - } - return handle_init(std::move(e)); - } catch (const std::bad_alloc&) { - return bs::error_code(ENOMEM, bs::system_category()); - } -} - -bs::error_code logback_generations::update(const DoutPrefixProvider *dpp, optional_yield y) noexcept -{ - try { - auto res = read(dpp, y); - if (!res) { - return res.error(); - } - - std::unique_lock l(m); - auto& [es, v] = *res; - if (v == version) { - // Nothing to do! - return {}; - } - - // Check consistency and prepare update - if (es.empty()) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": INCONSISTENCY! Read empty update." << dendl; - return bs::error_code(EFAULT, bs::system_category()); - } - auto cur_lowest = lowest_nomempty(entries_); - // Straight up can't happen - assert(cur_lowest != entries_.cend()); - auto new_lowest = lowest_nomempty(es); - if (new_lowest == es.cend()) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": INCONSISTENCY! Read update with no active head." << dendl; - return bs::error_code(EFAULT, bs::system_category()); - } - if (new_lowest->first < cur_lowest->first) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": INCONSISTENCY! Tail moved wrong way." << dendl; - return bs::error_code(EFAULT, bs::system_category()); - } - - std::optional highest_empty; - if (new_lowest->first > cur_lowest->first && new_lowest != es.begin()) { - --new_lowest; - highest_empty = new_lowest->first; - } - - entries_t new_entries; - - if ((es.end() - 1)->first < (entries_.end() - 1)->first) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": INCONSISTENCY! Head moved wrong way." << dendl; - return bs::error_code(EFAULT, bs::system_category()); - } - - if ((es.end() - 1)->first > (entries_.end() - 1)->first) { - auto ei = es.lower_bound((entries_.end() - 1)->first + 1); - std::copy(ei, es.end(), std::inserter(new_entries, new_entries.end())); - } - - // Everything checks out! - - version = v; - entries_ = es; - l.unlock(); - - if (highest_empty) { - auto ec = handle_empty_to(*highest_empty); - if (ec) return ec; - } - - if (!new_entries.empty()) { - auto ec = handle_new_gens(std::move(new_entries)); - if (ec) return ec; - } - } catch (const std::bad_alloc&) { - return bs::error_code(ENOMEM, bs::system_category()); - } - return {}; -} - -auto logback_generations::read(const DoutPrefixProvider *dpp, optional_yield y) noexcept -> - tl::expected, bs::error_code> -{ - try { - librados::ObjectReadOperation op; - std::unique_lock l(m); - cls_version_check(op, version, VER_COND_GE); - l.unlock(); - obj_version v2; - cls_version_read(op, &v2); - cb::list bl; - op.read(0, 0, &bl, nullptr); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, nullptr, y); - if (r < 0) { - if (r == -ENOENT) { - ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": oid=" << oid - << " not found" << dendl; - } else { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed reading oid=" << oid - << ", r=" << r << dendl; - } - return tl::unexpected(bs::error_code(-r, bs::system_category())); - } - auto bi = bl.cbegin(); - entries_t e; - try { - decode(e, bi); - } catch (const cb::error& err) { - return tl::unexpected(err.code()); - } - return std::pair{ std::move(e), std::move(v2) }; - } catch (const std::bad_alloc&) { - return tl::unexpected(bs::error_code(ENOMEM, bs::system_category())); - } -} - -bs::error_code logback_generations::write(const DoutPrefixProvider *dpp, entries_t&& e, - std::unique_lock&& l_, - optional_yield y) noexcept -{ - auto l = std::move(l_); - ceph_assert(l.mutex() == &m && - l.owns_lock()); - try { - librados::ObjectWriteOperation op; - cls_version_check(op, version, VER_COND_GE); - cb::list bl; - encode(e, bl); - op.write_full(bl); - cls_version_inc(op); - auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (r == 0) { - entries_ = std::move(e); - version.inc(); - return {}; - } - l.unlock(); - if (r < 0 && r != -ECANCELED) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed reading oid=" << oid - << ", r=" << r << dendl; - return { -r, bs::system_category() }; - } - if (r == -ECANCELED) { - auto ec = update(dpp, y); - if (ec) { - return ec; - } else { - return { ECANCELED, bs::system_category() }; - } - } - } catch (const std::bad_alloc&) { - return { ENOMEM, bs::system_category() }; - } - return {}; -} - - -bs::error_code logback_generations::watch() noexcept { - try { - auto cct = static_cast(ioctx.cct()); - auto r = ioctx.watch2(oid, &watchcookie, this); - if (r < 0) { - lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed to set watch oid=" << oid - << ", r=" << r << dendl; - return { -r, bs::system_category() }; - } - } catch (const std::bad_alloc&) { - return bs::error_code(ENOMEM, bs::system_category()); - } - return {}; -} - -bs::error_code logback_generations::new_backing(const DoutPrefixProvider *dpp, - log_type type, - optional_yield y) noexcept { - static constexpr auto max_tries = 10; - try { - auto ec = update(dpp, y); - if (ec) return ec; - auto tries = 0; - entries_t new_entries; - do { - std::unique_lock l(m); - auto last = entries_.end() - 1; - if (last->second.type == type) { - // Nothing to be done - return {}; - } - auto newgenid = last->first + 1; - logback_generation newgen; - newgen.gen_id = newgenid; - newgen.type = type; - new_entries.emplace(newgenid, newgen); - auto es = entries_; - es.emplace(newgenid, std::move(newgen)); - ec = write(dpp, std::move(es), std::move(l), y); - ++tries; - } while (ec == bs::errc::operation_canceled && - tries < max_tries); - if (tries >= max_tries) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": exhausted retry attempts." << dendl; - return ec; - } - - if (ec) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": write failed with ec=" << ec.message() << dendl; - return ec; - } - - cb::list bl, rbl; - - auto r = rgw_rados_notify(dpp, ioctx, oid, bl, 10'000, &rbl, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": notify failed with r=" << r << dendl; - return { -r, bs::system_category() }; - } - ec = handle_new_gens(new_entries); - } catch (const std::bad_alloc&) { - return bs::error_code(ENOMEM, bs::system_category()); - } - return {}; -} - -bs::error_code logback_generations::empty_to(const DoutPrefixProvider *dpp, - uint64_t gen_id, - optional_yield y) noexcept { - static constexpr auto max_tries = 10; - try { - auto ec = update(dpp, y); - if (ec) return ec; - auto tries = 0; - uint64_t newtail = 0; - do { - std::unique_lock l(m); - { - auto last = entries_.end() - 1; - if (gen_id >= last->first) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": Attempt to trim beyond the possible." << dendl; - return bs::error_code(EINVAL, bs::system_category()); - } - } - auto es = entries_; - auto ei = es.upper_bound(gen_id); - if (ei == es.begin()) { - // Nothing to be done. - return {}; - } - for (auto i = es.begin(); i < ei; ++i) { - newtail = i->first; - i->second.pruned = ceph::real_clock::now(); - } - ec = write(dpp, std::move(es), std::move(l), y); - ++tries; - } while (ec == bs::errc::operation_canceled && - tries < max_tries); - if (tries >= max_tries) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": exhausted retry attempts." << dendl; - return ec; - } - - if (ec) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": write failed with ec=" << ec.message() << dendl; - return ec; - } - - cb::list bl, rbl; - - auto r = rgw_rados_notify(dpp, ioctx, oid, bl, 10'000, &rbl, y); - if (r < 0) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": notify failed with r=" << r << dendl; - return { -r, bs::system_category() }; - } - ec = handle_empty_to(newtail); - } catch (const std::bad_alloc&) { - return bs::error_code(ENOMEM, bs::system_category()); - } - return {}; -} - -bs::error_code logback_generations::remove_empty(const DoutPrefixProvider *dpp, optional_yield y) noexcept { - static constexpr auto max_tries = 10; - try { - auto ec = update(dpp, y); - if (ec) return ec; - auto tries = 0; - entries_t new_entries; - std::unique_lock l(m); - ceph_assert(!entries_.empty()); - { - auto i = lowest_nomempty(entries_); - if (i == entries_.begin()) { - return {}; - } - } - entries_t es; - auto now = ceph::real_clock::now(); - l.unlock(); - do { - std::copy_if(entries_.cbegin(), entries_.cend(), - std::inserter(es, es.end()), - [now](const auto& e) { - if (!e.second.pruned) - return false; - - auto pruned = *e.second.pruned; - return (now - pruned) >= 1h; - }); - auto es2 = entries_; - for (const auto& [gen_id, e] : es) { - ceph_assert(e.pruned); - auto ec = log_remove(dpp, ioctx, shards, - [this, gen_id = gen_id](int shard) { - return this->get_oid(gen_id, shard); - }, (gen_id == 0), y); - if (ec) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": Error pruning: gen_id=" << gen_id - << " ec=" << ec.message() << dendl; - } - if (auto i = es2.find(gen_id); i != es2.end()) { - es2.erase(i); - } - } - l.lock(); - es.clear(); - ec = write(dpp, std::move(es2), std::move(l), y); - ++tries; - } while (ec == bs::errc::operation_canceled && - tries < max_tries); - if (tries >= max_tries) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": exhausted retry attempts." << dendl; - return ec; - } - - if (ec) { - ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": write failed with ec=" << ec.message() << dendl; - return ec; - } - } catch (const std::bad_alloc&) { - return bs::error_code(ENOMEM, bs::system_category()); - } - return {}; -} - -void logback_generations::handle_notify(uint64_t notify_id, - uint64_t cookie, - uint64_t notifier_id, - bufferlist& bl) -{ - auto cct = static_cast(ioctx.cct()); - const DoutPrefix dp(cct, dout_subsys, "logback generations handle_notify: "); - if (notifier_id != my_id) { - auto ec = update(&dp, null_yield); - if (ec) { - lderr(cct) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": update failed, no one to report to and no safe way to continue." - << dendl; - abort(); - } - } - cb::list rbl; - ioctx.notify_ack(oid, notify_id, watchcookie, rbl); -} - -void logback_generations::handle_error(uint64_t cookie, int err) { - auto cct = static_cast(ioctx.cct()); - auto r = ioctx.unwatch2(watchcookie); - if (r < 0) { - lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed to set unwatch oid=" << oid - << ", r=" << r << dendl; - } - - auto ec = watch(); - if (ec) { - lderr(cct) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << ": failed to re-establish watch, unsafe to continue: oid=" - << oid << ", ec=" << ec.message() << dendl; - } -} diff --git a/src/rgw/store/rados/rgw_log_backing.h b/src/rgw/store/rados/rgw_log_backing.h deleted file mode 100644 index 3fa67d7418b..00000000000 --- a/src/rgw/store/rados/rgw_log_backing.h +++ /dev/null @@ -1,399 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_LOGBACKING_H -#define CEPH_RGW_LOGBACKING_H - -#include -#include -#include -#include - -#include - -#include -#include - -#undef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY 1 -#include - -#include "include/rados/librados.hpp" -#include "include/encoding.h" -#include "include/expected.hpp" -#include "include/function2.hpp" - -#include "cls/version/cls_version_types.h" - -#include "common/async/yield_context.h" -#include "common/Formatter.h" -#include "common/strtol.h" - -namespace bc = boost::container; -namespace bs = boost::system; - -#include "cls_fifo_legacy.h" - -/// Type of log backing, stored in the mark used in the quick check, -/// and passed to checking functions. -enum class log_type { - omap = 0, - fifo = 1 -}; - -inline void encode(const log_type& type, ceph::buffer::list& bl) { - auto t = static_cast(type); - encode(t, bl); -} - -inline void decode(log_type& type, bufferlist::const_iterator& bl) { - uint8_t t; - decode(t, bl); - type = static_cast(t); -} - -inline std::optional to_log_type(std::string_view s) { - if (strncasecmp(s.data(), "omap", s.length()) == 0) { - return log_type::omap; - } else if (strncasecmp(s.data(), "fifo", s.length()) == 0) { - return log_type::fifo; - } else { - return std::nullopt; - } -} -inline std::ostream& operator <<(std::ostream& m, const log_type& t) { - switch (t) { - case log_type::omap: - return m << "log_type::omap"; - case log_type::fifo: - return m << "log_type::fifo"; - } - - return m << "log_type::UNKNOWN=" << static_cast(t); -} - -/// Look over the shards in a log and determine the type. -tl::expected -log_backing_type(const DoutPrefixProvider *dpp, - librados::IoCtx& ioctx, - log_type def, - int shards, //< Total number of shards - /// A function taking a shard number and - /// returning an oid. - const fu2::unique_function& get_oid, - optional_yield y); - -/// Remove all log shards and associated parts of fifos. -bs::error_code log_remove(librados::IoCtx& ioctx, - int shards, //< Total number of shards - /// A function taking a shard number and - /// returning an oid. - const fu2::unique_function& get_oid, - bool leave_zero, - optional_yield y); - - -struct logback_generation { - uint64_t gen_id = 0; - log_type type; - std::optional pruned; - - void encode(ceph::buffer::list& bl) const { - ENCODE_START(1, 1, bl); - encode(gen_id, bl); - encode(type, bl); - encode(pruned, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(gen_id, bl); - decode(type, bl); - decode(pruned, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(logback_generation) -inline std::ostream& operator <<(std::ostream& m, const logback_generation& g) { - return m << "[" << g.gen_id << "," << g.type << "," - << (g.pruned ? "PRUNED" : "NOT PRUNED") << "]"; -} - -class logback_generations : public librados::WatchCtx2 { -public: - using entries_t = bc::flat_map; - -protected: - librados::IoCtx& ioctx; - logback_generations(librados::IoCtx& ioctx, - std::string oid, - fu2::unique_function&& get_oid, - int shards) noexcept - : ioctx(ioctx), oid(oid), get_oid(std::move(get_oid)), - shards(shards) {} - - uint64_t my_id = ioctx.get_instance_id(); - -private: - const std::string oid; - const fu2::unique_function get_oid; - -protected: - const int shards; - -private: - - uint64_t watchcookie = 0; - - obj_version version; - std::mutex m; - entries_t entries_; - - tl::expected, bs::error_code> - read(const DoutPrefixProvider *dpp, optional_yield y) noexcept; - bs::error_code write(const DoutPrefixProvider *dpp, entries_t&& e, std::unique_lock&& l_, - optional_yield y) noexcept; - bs::error_code setup(const DoutPrefixProvider *dpp, log_type def, optional_yield y) noexcept; - - bs::error_code watch() noexcept; - - auto lowest_nomempty(const entries_t& es) { - return std::find_if(es.begin(), es.end(), - [](const auto& e) { - return !e.second.pruned; - }); - } - -public: - - /// For the use of watch/notify. - - void handle_notify(uint64_t notify_id, - uint64_t cookie, - uint64_t notifier_id, - bufferlist& bl) override final; - - void handle_error(uint64_t cookie, int err) override final; - - /// Public interface - - virtual ~logback_generations(); - - template - static tl::expected, bs::error_code> - init(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx_, std::string oid_, - fu2::unique_function&& get_oid_, - int shards_, log_type def, optional_yield y, - Args&& ...args) noexcept { - try { - T* lgp = new T(ioctx_, std::move(oid_), - std::move(get_oid_), - shards_, std::forward(args)...); - std::unique_ptr lg(lgp); - lgp = nullptr; - auto ec = lg->setup(dpp, def, y); - if (ec) - return tl::unexpected(ec); - // Obnoxiousness for C++ Compiler in Bionic Beaver - return tl::expected, bs::error_code>(std::move(lg)); - } catch (const std::bad_alloc&) { - return tl::unexpected(bs::error_code(ENOMEM, bs::system_category())); - } - } - - bs::error_code update(const DoutPrefixProvider *dpp, optional_yield y) noexcept; - - entries_t entries() const { - return entries_; - } - - bs::error_code new_backing(const DoutPrefixProvider *dpp, log_type type, optional_yield y) noexcept; - - bs::error_code empty_to(const DoutPrefixProvider *dpp, uint64_t gen_id, optional_yield y) noexcept; - - bs::error_code remove_empty(const DoutPrefixProvider *dpp, optional_yield y) noexcept; - - // Callbacks, to be defined by descendant. - - /// Handle initialization on startup - /// - /// @param e All non-empty generations - virtual bs::error_code handle_init(entries_t e) noexcept = 0; - - /// Handle new generations. - /// - /// @param e Map of generations added since last update - virtual bs::error_code handle_new_gens(entries_t e) noexcept = 0; - - /// Handle generations being marked empty - /// - /// @param new_tail Lowest non-empty generation - virtual bs::error_code handle_empty_to(uint64_t new_tail) noexcept = 0; -}; - -inline std::string gencursor(uint64_t gen_id, std::string_view cursor) { - return (gen_id > 0 ? - fmt::format("G{:0>20}@{}", gen_id, cursor) : - std::string(cursor)); -} - -inline std::pair -cursorgen(std::string_view cursor_) { - if (cursor_.empty()) { - return { 0, "" }; - } - std::string_view cursor = cursor_; - if (cursor[0] != 'G') { - return { 0, cursor }; - } - cursor.remove_prefix(1); - auto gen_id = ceph::consume(cursor); - if (!gen_id || cursor[0] != '@') { - return { 0, cursor_ }; - } - cursor.remove_prefix(1); - return { *gen_id, cursor }; -} - -class LazyFIFO { - librados::IoCtx& ioctx; - std::string oid; - std::mutex m; - std::unique_ptr fifo; - - int lazy_init(const DoutPrefixProvider *dpp, optional_yield y) { - std::unique_lock l(m); - if (fifo) return 0; - auto r = rgw::cls::fifo::FIFO::create(dpp, ioctx, oid, &fifo, y); - if (r) { - fifo.reset(); - } - return r; - } - -public: - - LazyFIFO(librados::IoCtx& ioctx, std::string oid) - : ioctx(ioctx), oid(std::move(oid)) {} - - int read_meta(const DoutPrefixProvider *dpp, optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - return fifo->read_meta(dpp, y); - } - - int meta(const DoutPrefixProvider *dpp, rados::cls::fifo::info& info, optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - info = fifo->meta(); - return 0; - } - - int get_part_layout_info(const DoutPrefixProvider *dpp, - std::uint32_t& part_header_size, - std::uint32_t& part_entry_overhead, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - std::tie(part_header_size, part_entry_overhead) - = fifo->get_part_layout_info(); - return 0; - } - - int push(const DoutPrefixProvider *dpp, - const ceph::buffer::list& bl, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - return fifo->push(dpp, bl, y); - } - - int push(const DoutPrefixProvider *dpp, - ceph::buffer::list& bl, - librados::AioCompletion* c, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - fifo->push(dpp, bl, c); - return 0; - } - - int push(const DoutPrefixProvider *dpp, - const std::vector& data_bufs, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - return fifo->push(dpp, data_bufs, y); - } - - int push(const DoutPrefixProvider *dpp, - const std::vector& data_bufs, - librados::AioCompletion* c, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - fifo->push(dpp, data_bufs, c); - return 0; - } - - int list(const DoutPrefixProvider *dpp, - int max_entries, std::optional markstr, - std::vector* out, - bool* more, optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - return fifo->list(dpp, max_entries, markstr, out, more, y); - } - - int list(const DoutPrefixProvider *dpp, int max_entries, std::optional markstr, - std::vector* out, bool* more, - librados::AioCompletion* c, optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - fifo->list(dpp, max_entries, markstr, out, more, c); - return 0; - } - - int trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - return fifo->trim(dpp, markstr, exclusive, y); - } - - int trim(const DoutPrefixProvider *dpp, std::string_view markstr, bool exclusive, librados::AioCompletion* c, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - fifo->trim(dpp, markstr, exclusive, c); - return 0; - } - - int get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, rados::cls::fifo::part_header* header, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - return fifo->get_part_info(dpp, part_num, header, y); - } - - int get_part_info(const DoutPrefixProvider *dpp, int64_t part_num, rados::cls::fifo::part_header* header, - librados::AioCompletion* c, optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - fifo->get_part_info(part_num, header, c); - return 0; - } - - int get_head_info(const DoutPrefixProvider *dpp, fu2::unique_function< - void(int r, rados::cls::fifo::part_header&&)>&& f, - librados::AioCompletion* c, - optional_yield y) { - auto r = lazy_init(dpp, y); - if (r < 0) return r; - fifo->get_head_info(dpp, std::move(f), c); - return 0; - } -}; - -#endif diff --git a/src/rgw/store/rados/rgw_metadata.cc b/src/rgw/store/rados/rgw_metadata.cc deleted file mode 100644 index e3e49316eac..00000000000 --- a/src/rgw/store/rados/rgw_metadata.cc +++ /dev/null @@ -1,233 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_metadata.h" - -#include "rgw_zone.h" -#include "rgw_mdlog.h" - -#include "services/svc_zone.h" -#include "services/svc_cls.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -const std::string RGWMetadataLogHistory::oid = "meta.history"; - -struct obj_version; - -void rgw_shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id) -{ - uint32_t val = ceph_str_hash_linux(key.c_str(), key.size()); - char buf[16]; - if (shard_id) { - *shard_id = val % max_shards; - } - snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards)); - name = prefix + buf; -} - -void rgw_shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name) -{ - uint32_t val = ceph_str_hash_linux(key.c_str(), key.size()); - val ^= ceph_str_hash_linux(section.c_str(), section.size()); - char buf[16]; - snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards)); - name = prefix + buf; -} - -void rgw_shard_name(const string& prefix, unsigned shard_id, string& name) -{ - char buf[16]; - snprintf(buf, sizeof(buf), "%u", shard_id); - name = prefix + buf; -} - -int RGWMetadataLog::add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl) { - if (!svc.zone->need_to_log_metadata()) - return 0; - - string oid; - int shard_id; - - rgw_shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id); - mark_modified(shard_id); - real_time now = real_clock::now(); - return svc.cls->timelog.add(dpp, oid, now, section, key, bl, null_yield); -} - -int RGWMetadataLog::get_shard_id(const string& hash_key, int *shard_id) -{ - string oid; - - rgw_shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, shard_id); - return 0; -} - -int RGWMetadataLog::store_entries_in_shard(const DoutPrefixProvider *dpp, list& entries, int shard_id, librados::AioCompletion *completion) -{ - string oid; - - mark_modified(shard_id); - rgw_shard_name(prefix, shard_id, oid); - return svc.cls->timelog.add(dpp, oid, entries, completion, false, null_yield); -} - -void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, - const string& marker, void **handle) -{ - LogListCtx *ctx = new LogListCtx(); - - ctx->cur_shard = shard_id; - ctx->from_time = from_time; - ctx->end_time = end_time; - ctx->marker = marker; - - get_shard_oid(ctx->cur_shard, ctx->cur_oid); - - *handle = (void *)ctx; -} - -void RGWMetadataLog::complete_list_entries(void *handle) { - LogListCtx *ctx = static_cast(handle); - delete ctx; -} - -int RGWMetadataLog::list_entries(const DoutPrefixProvider *dpp, void *handle, - int max_entries, - list& entries, - string *last_marker, - bool *truncated) { - LogListCtx *ctx = static_cast(handle); - - if (!max_entries) { - *truncated = false; - return 0; - } - - std::string next_marker; - int ret = svc.cls->timelog.list(dpp, ctx->cur_oid, ctx->from_time, ctx->end_time, - max_entries, entries, ctx->marker, - &next_marker, truncated, null_yield); - if ((ret < 0) && (ret != -ENOENT)) - return ret; - - ctx->marker = std::move(next_marker); - if (last_marker) { - *last_marker = ctx->marker; - } - - if (ret == -ENOENT) - *truncated = false; - - return 0; -} - -int RGWMetadataLog::get_info(const DoutPrefixProvider *dpp, int shard_id, RGWMetadataLogInfo *info) -{ - string oid; - get_shard_oid(shard_id, oid); - - cls_log_header header; - - int ret = svc.cls->timelog.info(dpp, oid, &header, null_yield); - if ((ret < 0) && (ret != -ENOENT)) - return ret; - - info->marker = header.max_marker; - info->last_update = header.max_time.to_real_time(); - - return 0; -} - -static void _mdlog_info_completion(librados::completion_t cb, void *arg) -{ - auto infoc = static_cast(arg); - infoc->finish(cb); - infoc->put(); // drop the ref from get_info_async() -} - -RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb) - : completion(librados::Rados::aio_create_completion((void *)this, - _mdlog_info_completion)), - callback(cb) -{ -} - -RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion() -{ - completion->release(); -} - -int RGWMetadataLog::get_info_async(const DoutPrefixProvider *dpp, int shard_id, RGWMetadataLogInfoCompletion *completion) -{ - string oid; - get_shard_oid(shard_id, oid); - - completion->get(); // hold a ref until the completion fires - - return svc.cls->timelog.info_async(dpp, completion->get_io_obj(), oid, - &completion->get_header(), - completion->get_completion()); -} - -int RGWMetadataLog::trim(const DoutPrefixProvider *dpp, int shard_id, const real_time& from_time, const real_time& end_time, - const string& start_marker, const string& end_marker) -{ - string oid; - get_shard_oid(shard_id, oid); - - return svc.cls->timelog.trim(dpp, oid, from_time, end_time, start_marker, - end_marker, nullptr, null_yield); -} - -int RGWMetadataLog::lock_exclusive(const DoutPrefixProvider *dpp, int shard_id, timespan duration, string& zone_id, string& owner_id) { - string oid; - get_shard_oid(shard_id, oid); - - return svc.cls->lock.lock_exclusive(dpp, svc.zone->get_zone_params().log_pool, oid, duration, zone_id, owner_id); -} - -int RGWMetadataLog::unlock(const DoutPrefixProvider *dpp, int shard_id, string& zone_id, string& owner_id) { - string oid; - get_shard_oid(shard_id, oid); - - return svc.cls->lock.unlock(dpp, svc.zone->get_zone_params().log_pool, oid, zone_id, owner_id); -} - -void RGWMetadataLog::mark_modified(int shard_id) -{ - lock.get_read(); - if (modified_shards.find(shard_id) != modified_shards.end()) { - lock.unlock(); - return; - } - lock.unlock(); - - std::unique_lock wl{lock}; - modified_shards.insert(shard_id); -} - -void RGWMetadataLog::read_clear_modified(set &modified) -{ - std::unique_lock wl{lock}; - modified.swap(modified_shards); - modified_shards.clear(); -} - -void RGWMetadataLogInfo::dump(Formatter *f) const -{ - encode_json("marker", marker, f); - utime_t ut(last_update); - encode_json("last_update", ut, f); -} - -void RGWMetadataLogInfo::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("marker", marker, obj); - utime_t ut; - JSONDecoder::decode_json("last_update", ut, obj); - last_update = ut.to_real_time(); -} - diff --git a/src/rgw/store/rados/rgw_metadata.h b/src/rgw/store/rados/rgw_metadata.h deleted file mode 100644 index 72283702e7e..00000000000 --- a/src/rgw/store/rados/rgw_metadata.h +++ /dev/null @@ -1,300 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_METADATA_H -#define CEPH_RGW_METADATA_H - -#include -#include -#include - -#include "include/types.h" -#include "rgw_common.h" -#include "rgw_period_history.h" -#include "rgw_mdlog_types.h" -#include "cls/version/cls_version_types.h" -#include "cls/log/cls_log_types.h" -#include "common/RefCountedObj.h" -#include "common/ceph_time.h" -#include "services/svc_meta_be.h" -#include "rgw_sal_fwd.h" - - -class RGWCoroutine; -class JSONObj; -struct RGWObjVersionTracker; - -struct obj_version; - - -class RGWMetadataObject { -protected: - obj_version objv; - ceph::real_time mtime; - std::map *pattrs{nullptr}; - -public: - RGWMetadataObject() {} - RGWMetadataObject(const obj_version& v, - real_time m) : objv(v), mtime(m) {} - virtual ~RGWMetadataObject() {} - obj_version& get_version(); - real_time& get_mtime() { return mtime; } - void set_pattrs(std::map *_pattrs) { - pattrs = _pattrs; - } - std::map *get_pattrs() { - return pattrs; - } - - virtual void dump(Formatter *f) const {} -}; - -class RGWMetadataManager; - -class RGWMetadataHandler { - friend class RGWMetadataManager; - -protected: - CephContext *cct; - -public: - RGWMetadataHandler() {} - virtual ~RGWMetadataHandler(); - virtual std::string get_type() = 0; - - void base_init(CephContext *_cct) { - cct = _cct; - } - - virtual RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) = 0; - - virtual int get(std::string& entry, RGWMetadataObject **obj, optional_yield, const DoutPrefixProvider *dpp) = 0; - virtual int put(std::string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield, - const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, - bool from_remote_zone) = 0; - virtual int remove(std::string& entry, RGWObjVersionTracker& objv_tracker, optional_yield, const DoutPrefixProvider *dpp) = 0; - - virtual int mutate(const std::string& entry, - const ceph::real_time& mtime, - RGWObjVersionTracker *objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogStatus op_type, - std::function f) = 0; - - virtual int list_keys_init(const DoutPrefixProvider *dpp, const std::string& marker, void **phandle) = 0; - virtual int list_keys_next(const DoutPrefixProvider *dpp, void *handle, int max, std::list& keys, bool *truncated) = 0; - virtual void list_keys_complete(void *handle) = 0; - - virtual std::string get_marker(void *handle) = 0; - - virtual int get_shard_id(const std::string& entry, int *shard_id) { - *shard_id = 0; - return 0; - } - virtual int attach(RGWMetadataManager *manager); -}; - -class RGWMetadataHandler_GenericMetaBE : public RGWMetadataHandler { - friend class RGWSI_MetaBackend; - friend class RGWMetadataManager; - friend class Put; - -public: - class Put; - -protected: - RGWSI_MetaBackend_Handler *be_handler; - - virtual int do_get(RGWSI_MetaBackend_Handler::Op *op, std::string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) = 0; - virtual int do_put(RGWSI_MetaBackend_Handler::Op *op, std::string& entry, RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, optional_yield y, - const DoutPrefixProvider *dpp, RGWMDLogSyncType type, - bool from_remote_zone) = 0; - virtual int do_put_operate(Put *put_op, const DoutPrefixProvider *dpp); - virtual int do_remove(RGWSI_MetaBackend_Handler::Op *op, std::string& entry, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) = 0; - -public: - RGWMetadataHandler_GenericMetaBE() {} - - void base_init(CephContext *_cct, - RGWSI_MetaBackend_Handler *_be_handler) { - RGWMetadataHandler::base_init(_cct); - be_handler = _be_handler; - } - - RGWSI_MetaBackend_Handler *get_be_handler() { - return be_handler; - } - - class Put { - protected: - RGWMetadataHandler_GenericMetaBE *handler; - RGWSI_MetaBackend_Handler::Op *op; - std::string& entry; - RGWMetadataObject *obj; - RGWObjVersionTracker& objv_tracker; - RGWMDLogSyncType apply_type; - optional_yield y; - bool from_remote_zone{false}; - - int get(RGWMetadataObject **obj, const DoutPrefixProvider *dpp) { - return handler->do_get(op, entry, obj, y, dpp); - } - public: - Put(RGWMetadataHandler_GenericMetaBE *_handler, RGWSI_MetaBackend_Handler::Op *_op, - std::string& _entry, RGWMetadataObject *_obj, - RGWObjVersionTracker& _objv_tracker, optional_yield _y, - RGWMDLogSyncType _type, bool from_remote_zone); - - virtual ~Put() {} - - virtual int put_pre(const DoutPrefixProvider *dpp) { - return 0; - } - virtual int put(const DoutPrefixProvider *dpp) { - return 0; - } - virtual int put_post(const DoutPrefixProvider *dpp) { - return 0; - } - virtual int finalize() { - return 0; - } - }; - - int get(std::string& entry, RGWMetadataObject **obj, optional_yield, const DoutPrefixProvider *dpp) override; - int put(std::string& entry, RGWMetadataObject *obj, RGWObjVersionTracker& objv_tracker, optional_yield, const DoutPrefixProvider *dpp, RGWMDLogSyncType type, bool from_remote_zone) override; - int remove(std::string& entry, RGWObjVersionTracker& objv_tracker, optional_yield, const DoutPrefixProvider *dpp) override; - - int mutate(const std::string& entry, - const ceph::real_time& mtime, - RGWObjVersionTracker *objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogStatus op_type, - std::function f) override; - - int get_shard_id(const std::string& entry, int *shard_id) override; - - int list_keys_init(const DoutPrefixProvider *dpp, const std::string& marker, void **phandle) override; - int list_keys_next(const DoutPrefixProvider *dpp, void *handle, int max, std::list& keys, bool *truncated) override; - void list_keys_complete(void *handle) override; - - std::string get_marker(void *handle) override; - - /** - * Compare an incoming versus on-disk tag/version+mtime combo against - * the sync mode to see if the new one should replace the on-disk one. - * - * @return true if the update should proceed, false otherwise. - */ - static bool check_versions(bool exists, - const obj_version& ondisk, const real_time& ondisk_time, - const obj_version& incoming, const real_time& incoming_time, - RGWMDLogSyncType sync_mode) { - switch (sync_mode) { - case APPLY_UPDATES: - if ((ondisk.tag != incoming.tag) || - (ondisk.ver >= incoming.ver)) - return false; - break; - case APPLY_NEWER: - if (ondisk_time >= incoming_time) - return false; - break; - case APPLY_EXCLUSIVE: - if (exists) - return false; - break; - case APPLY_ALWAYS: //deliberate fall-thru -- we always apply! - default: break; - } - return true; - } -}; - -class RGWMetadataTopHandler; - -class RGWMetadataManager { - friend class RGWMetadataHandler; - - CephContext *cct; - RGWSI_Meta *meta_svc; - std::map handlers; - std::unique_ptr md_top_handler; - - int find_handler(const std::string& metadata_key, RGWMetadataHandler **handler, std::string& entry); - int register_handler(RGWMetadataHandler *handler); - -public: - RGWMetadataManager(RGWSI_Meta *_meta_svc); - ~RGWMetadataManager(); - - RGWMetadataHandler *get_handler(const std::string& type); - - int get(std::string& metadata_key, Formatter *f, optional_yield y, const DoutPrefixProvider *dpp); - int put(std::string& metadata_key, bufferlist& bl, optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogSyncType sync_mode, - bool from_remote_zone, - obj_version *existing_version = NULL); - int remove(std::string& metadata_key, optional_yield y, const DoutPrefixProvider *dpp); - - int mutate(const std::string& metadata_key, - const ceph::real_time& mtime, - RGWObjVersionTracker *objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogStatus op_type, - std::function f); - - int list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, void **phandle); - int list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void **phandle); - int list_keys_next(const DoutPrefixProvider *dpp, void *handle, int max, std::list& keys, bool *truncated); - void list_keys_complete(void *handle); - - std::string get_marker(void *handle); - - void dump_log_entry(cls_log_entry& entry, Formatter *f); - - void get_sections(std::list& sections); - - void parse_metadata_key(const std::string& metadata_key, std::string& type, std::string& entry); - - int get_shard_id(const std::string& section, const std::string& key, int *shard_id); -}; - -class RGWMetadataHandlerPut_SObj : public RGWMetadataHandler_GenericMetaBE::Put -{ -protected: - std::unique_ptr oo; - RGWMetadataObject *old_obj{nullptr}; - bool exists{false}; - -public: - RGWMetadataHandlerPut_SObj(RGWMetadataHandler_GenericMetaBE *handler, RGWSI_MetaBackend_Handler::Op *op, - std::string& entry, RGWMetadataObject *obj, RGWObjVersionTracker& objv_tracker, - optional_yield y, - RGWMDLogSyncType type, bool from_remote_zone); - ~RGWMetadataHandlerPut_SObj(); - - int put_pre(const DoutPrefixProvider *dpp) override; - int put(const DoutPrefixProvider *dpp) override; - virtual int put_check(const DoutPrefixProvider *dpp) { - return 0; - } - virtual int put_checked(const DoutPrefixProvider *dpp); - virtual void encode_obj(bufferlist *bl) {} -}; - -void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& key, std::string& name, int *shard_id); -void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& section, const std::string& key, std::string& name); -void rgw_shard_name(const std::string& prefix, unsigned shard_id, std::string& name); - -#endif diff --git a/src/rgw/store/rados/rgw_notify.cc b/src/rgw/store/rados/rgw_notify.cc deleted file mode 100644 index 253a3bc4035..00000000000 --- a/src/rgw/store/rados/rgw_notify.cc +++ /dev/null @@ -1,1009 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "rgw_notify.h" -#include "cls/2pc_queue/cls_2pc_queue_client.h" -#include "cls/lock/cls_lock_client.h" -#include -#include -#include -#include -#include "rgw_sal_rados.h" -#include "rgw_pubsub.h" -#include "rgw_pubsub_push.h" -#include "rgw_perf_counters.h" -#include "common/dout.h" -#include - -#define dout_subsys ceph_subsys_rgw - -namespace rgw::notify { - -struct event_entry_t { - rgw_pubsub_s3_event event; - std::string push_endpoint; - std::string push_endpoint_args; - std::string arn_topic; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(event, bl); - encode(push_endpoint, bl); - encode(push_endpoint_args, bl); - encode(arn_topic, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(event, bl); - decode(push_endpoint, bl); - decode(push_endpoint_args, bl); - decode(arn_topic, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(event_entry_t) - -using queues_t = std::set; - -// use mmap/mprotect to allocate 128k coroutine stacks -auto make_stack_allocator() { - return boost::context::protected_fixedsize_stack{128*1024}; -} - -class Manager : public DoutPrefixProvider { - const size_t max_queue_size; - const uint32_t queues_update_period_ms; - const uint32_t queues_update_retry_ms; - const uint32_t queue_idle_sleep_us; - const utime_t failover_time; - CephContext* const cct; - librados::IoCtx& rados_ioctx; - static constexpr auto COOKIE_LEN = 16; - const std::string lock_cookie; - boost::asio::io_context io_context; - boost::asio::executor_work_guard work_guard; - const uint32_t worker_count; - std::vector workers; - const uint32_t stale_reservations_period_s; - const uint32_t reservations_cleanup_period_s; - - const std::string Q_LIST_OBJECT_NAME = "queues_list_object"; - - CephContext *get_cct() const override { return cct; } - unsigned get_subsys() const override { return dout_subsys; } - std::ostream& gen_prefix(std::ostream& out) const override { return out << "rgw notify: "; } - - // read the list of queues from the queue list object - int read_queue_list(queues_t& queues, optional_yield y) { - constexpr auto max_chunk = 1024U; - std::string start_after; - bool more = true; - int rval; - while (more) { - librados::ObjectReadOperation op; - queues_t queues_chunk; - op.omap_get_keys2(start_after, max_chunk, &queues_chunk, &more, &rval); - const auto ret = rgw_rados_operate(this, rados_ioctx, Q_LIST_OBJECT_NAME, &op, nullptr, y); - if (ret == -ENOENT) { - // queue list object was not created - nothing to do - return 0; - } - if (ret < 0) { - // TODO: do we need to check on rval as well as ret? - ldpp_dout(this, 1) << "ERROR: failed to read queue list. error: " << ret << dendl; - return ret; - } - queues.merge(queues_chunk); - } - return 0; - } - - // set m1 to be the minimum between m1 and m2 - static int set_min_marker(std::string& m1, const std::string m2) { - cls_queue_marker mr1; - cls_queue_marker mr2; - if (mr1.from_str(m1.c_str()) < 0 || mr2.from_str(m2.c_str()) < 0) { - return -EINVAL; - } - if (mr2.gen <= mr1.gen && mr2.offset < mr1.offset) { - m1 = m2; - } - return 0; - } - - using Clock = ceph::coarse_mono_clock; - using Executor = boost::asio::io_context::executor_type; - using Timer = boost::asio::basic_waitable_timer, Executor>; - - class tokens_waiter { - const std::chrono::hours infinite_duration; - size_t pending_tokens; - Timer timer; - - struct token { - tokens_waiter& waiter; - token(tokens_waiter& _waiter) : waiter(_waiter) { - ++waiter.pending_tokens; - } - - ~token() { - --waiter.pending_tokens; - if (waiter.pending_tokens == 0) { - waiter.timer.cancel(); - } - } - }; - - public: - - tokens_waiter(boost::asio::io_context& io_context) : - infinite_duration(1000), - pending_tokens(0), - timer(io_context) {} - - void async_wait(yield_context yield) { - if (pending_tokens == 0) { - return; - } - timer.expires_from_now(infinite_duration); - boost::system::error_code ec; - timer.async_wait(yield[ec]); - ceph_assert(ec == boost::system::errc::operation_canceled); - } - - token make_token() { - return token(*this); - } - }; - - // processing of a specific entry - // return whether processing was successfull (true) or not (false) - bool process_entry(const cls_queue_entry& entry, yield_context yield) { - event_entry_t event_entry; - auto iter = entry.data.cbegin(); - try { - decode(event_entry, iter); - } catch (buffer::error& err) { - ldpp_dout(this, 5) << "WARNING: failed to decode entry. error: " << err.what() << dendl; - return false; - } - try { - // TODO move endpoint creation to queue level - const auto push_endpoint = RGWPubSubEndpoint::create(event_entry.push_endpoint, event_entry.arn_topic, - RGWHTTPArgs(event_entry.push_endpoint_args, this), - cct); - ldpp_dout(this, 20) << "INFO: push endpoint created: " << event_entry.push_endpoint << - " for entry: " << entry.marker << dendl; - const auto ret = push_endpoint->send_to_completion_async(cct, event_entry.event, optional_yield(io_context, yield)); - if (ret < 0) { - ldpp_dout(this, 5) << "WARNING: push entry: " << entry.marker << " to endpoint: " << event_entry.push_endpoint - << " failed. error: " << ret << " (will retry)" << dendl; - return false; - } else { - ldpp_dout(this, 20) << "INFO: push entry: " << entry.marker << " to endpoint: " << event_entry.push_endpoint - << " ok" << dendl; - if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); - return true; - } - } catch (const RGWPubSubEndpoint::configuration_error& e) { - ldpp_dout(this, 5) << "WARNING: failed to create push endpoint: " - << event_entry.push_endpoint << " for entry: " << entry.marker << ". error: " << e.what() << " (will retry) " << dendl; - return false; - } - } - - // clean stale reservation from queue - void cleanup_queue(const std::string& queue_name, yield_context yield) { - while (true) { - ldpp_dout(this, 20) << "INFO: trying to perform stale reservation cleanup for queue: " << queue_name << dendl; - const auto now = ceph::coarse_real_time::clock::now(); - const auto stale_time = now - std::chrono::seconds(stale_reservations_period_s); - librados::ObjectWriteOperation op; - op.assert_exists(); - rados::cls::lock::assert_locked(&op, queue_name+"_lock", - ClsLockType::EXCLUSIVE, - lock_cookie, - "" /*no tag*/); - cls_2pc_queue_expire_reservations(op, stale_time); - // check ownership and do reservation cleanup in one batch - auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); - if (ret == -ENOENT) { - // queue was deleted - ldpp_dout(this, 5) << "INFO: queue: " - << queue_name << ". was removed. cleanup will stop" << dendl; - return; - } - if (ret == -EBUSY) { - ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; - return; - } - if (ret < 0) { - ldpp_dout(this, 5) << "WARNING: failed to cleanup stale reservation from queue and/or lock queue: " << queue_name - << ". error: " << ret << dendl; - } - Timer timer(io_context); - timer.expires_from_now(std::chrono::seconds(reservations_cleanup_period_s)); - boost::system::error_code ec; - timer.async_wait(yield[ec]); - } - } - - // processing of a specific queue - void process_queue(const std::string& queue_name, yield_context yield) { - constexpr auto max_elements = 1024; - auto is_idle = false; - const std::string start_marker; - - // start a the cleanup coroutine for the queue - spawn::spawn(io_context, [this, queue_name](yield_context yield) { - cleanup_queue(queue_name, yield); - }, make_stack_allocator()); - - while (true) { - // if queue was empty the last time, sleep for idle timeout - if (is_idle) { - Timer timer(io_context); - timer.expires_from_now(std::chrono::microseconds(queue_idle_sleep_us)); - boost::system::error_code ec; - timer.async_wait(yield[ec]); - } - - // get list of entries in the queue - is_idle = true; - bool truncated = false; - std::string end_marker; - std::vector entries; - auto total_entries = 0U; - { - librados::ObjectReadOperation op; - op.assert_exists(); - bufferlist obl; - int rval; - rados::cls::lock::assert_locked(&op, queue_name+"_lock", - ClsLockType::EXCLUSIVE, - lock_cookie, - "" /*no tag*/); - cls_2pc_queue_list_entries(op, start_marker, max_elements, &obl, &rval); - // check ownership and list entries in one batch - auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, nullptr, optional_yield(io_context, yield)); - if (ret == -ENOENT) { - // queue was deleted - ldpp_dout(this, 5) << "INFO: queue: " - << queue_name << ". was removed. processing will stop" << dendl; - return; - } - if (ret == -EBUSY) { - ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; - return; - } - if (ret < 0) { - ldpp_dout(this, 5) << "WARNING: failed to get list of entries in queue and/or lock queue: " - << queue_name << ". error: " << ret << " (will retry)" << dendl; - continue; - } - ret = cls_2pc_queue_list_entries_result(obl, entries, &truncated, end_marker); - if (ret < 0) { - ldpp_dout(this, 5) << "WARNING: failed to parse list of entries in queue: " - << queue_name << ". error: " << ret << " (will retry)" << dendl; - continue; - } - } - total_entries = entries.size(); - if (total_entries == 0) { - // nothing in the queue - continue; - } - // log when queue is not idle - ldpp_dout(this, 20) << "INFO: found: " << total_entries << " entries in: " << queue_name << - ". end marker is: " << end_marker << dendl; - - is_idle = false; - auto has_error = false; - auto remove_entries = false; - auto entry_idx = 1U; - tokens_waiter waiter(io_context); - for (auto& entry : entries) { - if (has_error) { - // bail out on first error - break; - } - // TODO pass entry pointer instead of by-value - spawn::spawn(yield, [this, &queue_name, entry_idx, total_entries, &end_marker, &remove_entries, &has_error, &waiter, entry](yield_context yield) { - const auto token = waiter.make_token(); - if (process_entry(entry, yield)) { - ldpp_dout(this, 20) << "INFO: processing of entry: " << - entry.marker << " (" << entry_idx << "/" << total_entries << ") from: " << queue_name << " ok" << dendl; - remove_entries = true; - } else { - if (set_min_marker(end_marker, entry.marker) < 0) { - ldpp_dout(this, 1) << "ERROR: cannot determin minimum between malformed markers: " << end_marker << ", " << entry.marker << dendl; - } else { - ldpp_dout(this, 20) << "INFO: new end marker for removal: " << end_marker << " from: " << queue_name << dendl; - } - has_error = true; - ldpp_dout(this, 20) << "INFO: processing of entry: " << - entry.marker << " (" << entry_idx << "/" << total_entries << ") from: " << queue_name << " failed" << dendl; - } - }, make_stack_allocator()); - ++entry_idx; - } - - // wait for all pending work to finish - waiter.async_wait(yield); - - // delete all published entries from queue - if (remove_entries) { - librados::ObjectWriteOperation op; - op.assert_exists(); - rados::cls::lock::assert_locked(&op, queue_name+"_lock", - ClsLockType::EXCLUSIVE, - lock_cookie, - "" /*no tag*/); - cls_2pc_queue_remove_entries(op, end_marker); - // check ownership and deleted entries in one batch - const auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); - if (ret == -ENOENT) { - // queue was deleted - ldpp_dout(this, 5) << "INFO: queue: " - << queue_name << ". was removed. processing will stop" << dendl; - return; - } - if (ret == -EBUSY) { - ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; - return; - } - if (ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to remove entries and/or lock queue up to: " << end_marker << " from queue: " - << queue_name << ". error: " << ret << dendl; - } else { - ldpp_dout(this, 20) << "INFO: removed entries up to: " << end_marker << " from queue: " - << queue_name << dendl; - } - } - } - } - - // lits of owned queues - using owned_queues_t = std::unordered_set; - - // process all queues - // find which of the queues is owned by this daemon and process it - void process_queues(yield_context yield) { - auto has_error = false; - owned_queues_t owned_queues; - - // add randomness to the duration between queue checking - // to make sure that different daemons are not synced - std::random_device seed; - std::mt19937 rnd_gen(seed()); - const auto min_jitter = 100; // ms - const auto max_jitter = 500; // ms - std::uniform_int_distribution<> duration_jitter(min_jitter, max_jitter); - - std::vector queue_gc; - std::mutex queue_gc_lock; - while (true) { - Timer timer(io_context); - const auto duration = (has_error ? - std::chrono::milliseconds(queues_update_retry_ms) : std::chrono::milliseconds(queues_update_period_ms)) + - std::chrono::milliseconds(duration_jitter(rnd_gen)); - timer.expires_from_now(duration); - const auto tp = ceph::coarse_real_time::clock::to_time_t(ceph::coarse_real_time::clock::now() + duration); - ldpp_dout(this, 20) << "INFO: next queues processing will happen at: " << std::ctime(&tp) << dendl; - boost::system::error_code ec; - timer.async_wait(yield[ec]); - - queues_t queues; - auto ret = read_queue_list(queues, optional_yield(io_context, yield)); - if (ret < 0) { - has_error = true; - continue; - } - - for (const auto& queue_name : queues) { - // try to lock the queue to check if it is owned by this rgw - // or if ownershif needs to be taken - librados::ObjectWriteOperation op; - op.assert_exists(); - rados::cls::lock::lock(&op, queue_name+"_lock", - ClsLockType::EXCLUSIVE, - lock_cookie, - "" /*no tag*/, - "" /*no description*/, - failover_time, - LOCK_FLAG_MAY_RENEW); - - ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); - if (ret == -EBUSY) { - // lock is already taken by another RGW - ldpp_dout(this, 20) << "INFO: queue: " << queue_name << " owned (locked) by another daemon" << dendl; - // if queue was owned by this RGW, processing should be stopped, queue would be deleted from list afterwards - continue; - } - if (ret == -ENOENT) { - // queue is deleted - processing will stop the next time we try to read from the queue - ldpp_dout(this, 10) << "INFO: queue: " << queue_name << " should not be locked - already deleted" << dendl; - continue; - } - if (ret < 0) { - // failed to lock for another reason, continue to process other queues - ldpp_dout(this, 1) << "ERROR: failed to lock queue: " << queue_name << ". error: " << ret << dendl; - has_error = true; - continue; - } - // add queue to list of owned queues - if (owned_queues.insert(queue_name).second) { - ldpp_dout(this, 10) << "INFO: queue: " << queue_name << " now owned (locked) by this daemon" << dendl; - // start processing this queue - spawn::spawn(io_context, [this, &queue_gc, &queue_gc_lock, queue_name](yield_context yield) { - process_queue(queue_name, yield); - // if queue processing ended, it measn that the queue was removed or not owned anymore - // mark it for deletion - std::lock_guard lock_guard(queue_gc_lock); - queue_gc.push_back(queue_name); - ldpp_dout(this, 10) << "INFO: queue: " << queue_name << " marked for removal" << dendl; - }, make_stack_allocator()); - } else { - ldpp_dout(this, 20) << "INFO: queue: " << queue_name << " ownership (lock) renewed" << dendl; - } - } - // erase all queue that were deleted - { - std::lock_guard lock_guard(queue_gc_lock); - std::for_each(queue_gc.begin(), queue_gc.end(), [this, &owned_queues](const std::string& queue_name) { - owned_queues.erase(queue_name); - ldpp_dout(this, 20) << "INFO: queue: " << queue_name << " removed" << dendl; - }); - queue_gc.clear(); - } - } - } - -public: - - ~Manager() { - work_guard.reset(); - io_context.stop(); - std::for_each(workers.begin(), workers.end(), [] (auto& worker) { worker.join(); }); - } - - // ctor: start all threads - Manager(CephContext* _cct, uint32_t _max_queue_size, uint32_t _queues_update_period_ms, - uint32_t _queues_update_retry_ms, uint32_t _queue_idle_sleep_us, u_int32_t failover_time_ms, - uint32_t _stale_reservations_period_s, uint32_t _reservations_cleanup_period_s, - uint32_t _worker_count, rgw::sal::RadosStore* store) : - max_queue_size(_max_queue_size), - queues_update_period_ms(_queues_update_period_ms), - queues_update_retry_ms(_queues_update_retry_ms), - queue_idle_sleep_us(_queue_idle_sleep_us), - failover_time(std::chrono::milliseconds(failover_time_ms)), - cct(_cct), - rados_ioctx(store->getRados()->get_notif_pool_ctx()), - lock_cookie(gen_rand_alphanumeric(cct, COOKIE_LEN)), - work_guard(boost::asio::make_work_guard(io_context)), - worker_count(_worker_count), - stale_reservations_period_s(_stale_reservations_period_s), - reservations_cleanup_period_s(_reservations_cleanup_period_s) - { - spawn::spawn(io_context, [this] (yield_context yield) { - process_queues(yield); - }, make_stack_allocator()); - - // start the worker threads to do the actual queue processing - const std::string WORKER_THREAD_NAME = "notif-worker"; - for (auto worker_id = 0U; worker_id < worker_count; ++worker_id) { - workers.emplace_back([this]() { - try { - io_context.run(); - } catch (const std::exception& err) { - ldpp_dout(this, 10) << "Notification worker failed with error: " << err.what() << dendl; - throw(err); - } - }); - const auto rc = ceph_pthread_setname(workers.back().native_handle(), - (WORKER_THREAD_NAME+std::to_string(worker_id)).c_str()); - ceph_assert(rc == 0); - } - ldpp_dout(this, 10) << "Started notification manager with: " << worker_count << " workers" << dendl; - } - - int add_persistent_topic(const std::string& topic_name, optional_yield y) { - if (topic_name == Q_LIST_OBJECT_NAME) { - ldpp_dout(this, 1) << "ERROR: topic name cannot be: " << Q_LIST_OBJECT_NAME << " (conflict with queue list object name)" << dendl; - return -EINVAL; - } - librados::ObjectWriteOperation op; - op.create(true); - cls_2pc_queue_init(op, topic_name, max_queue_size); - auto ret = rgw_rados_operate(this, rados_ioctx, topic_name, &op, y); - if (ret == -EEXIST) { - // queue already exists - nothing to do - ldpp_dout(this, 20) << "INFO: queue for topic: " << topic_name << " already exists. nothing to do" << dendl; - return 0; - } - if (ret < 0) { - // failed to create queue - ldpp_dout(this, 1) << "ERROR: failed to create queue for topic: " << topic_name << ". error: " << ret << dendl; - return ret; - } - - bufferlist empty_bl; - std::map new_topic{{topic_name, empty_bl}}; - op.omap_set(new_topic); - ret = rgw_rados_operate(this, rados_ioctx, Q_LIST_OBJECT_NAME, &op, y); - if (ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to add queue: " << topic_name << " to queue list. error: " << ret << dendl; - return ret; - } - ldpp_dout(this, 20) << "INFO: queue: " << topic_name << " added to queue list" << dendl; - return 0; - } - - int remove_persistent_topic(const std::string& topic_name, optional_yield y) { - librados::ObjectWriteOperation op; - op.remove(); - auto ret = rgw_rados_operate(this, rados_ioctx, topic_name, &op, y); - if (ret == -ENOENT) { - // queue already removed - nothing to do - ldpp_dout(this, 20) << "INFO: queue for topic: " << topic_name << " already removed. nothing to do" << dendl; - return 0; - } - if (ret < 0) { - // failed to remove queue - ldpp_dout(this, 1) << "ERROR: failed to remove queue for topic: " << topic_name << ". error: " << ret << dendl; - return ret; - } - - std::set topic_to_remove{{topic_name}}; - op.omap_rm_keys(topic_to_remove); - ret = rgw_rados_operate(this, rados_ioctx, Q_LIST_OBJECT_NAME, &op, y); - if (ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to remove queue: " << topic_name << " from queue list. error: " << ret << dendl; - return ret; - } - ldpp_dout(this, 20) << "INFO: queue: " << topic_name << " removed from queue list" << dendl; - return 0; - } -}; - -// singleton manager -// note that the manager itself is not a singleton, and multiple instances may co-exist -// TODO make the pointer atomic in allocation and deallocation to avoid race conditions -static Manager* s_manager = nullptr; - -constexpr size_t MAX_QUEUE_SIZE = 128*1000*1000; // 128MB -constexpr uint32_t Q_LIST_UPDATE_MSEC = 1000*30; // check queue list every 30seconds -constexpr uint32_t Q_LIST_RETRY_MSEC = 1000; // retry every second if queue list update failed -constexpr uint32_t IDLE_TIMEOUT_USEC = 100*1000; // idle sleep 100ms -constexpr uint32_t FAILOVER_TIME_MSEC = 3*Q_LIST_UPDATE_MSEC; // FAILOVER TIME 3x renew time -constexpr uint32_t WORKER_COUNT = 1; // 1 worker thread -constexpr uint32_t STALE_RESERVATIONS_PERIOD_S = 120; // cleanup reservations that are more than 2 minutes old -constexpr uint32_t RESERVATIONS_CLEANUP_PERIOD_S = 30; // reservation cleanup every 30 seconds - -bool init(CephContext* cct, rgw::sal::RadosStore* store, const DoutPrefixProvider *dpp) { - if (s_manager) { - return false; - } - // TODO: take conf from CephContext - s_manager = new Manager(cct, MAX_QUEUE_SIZE, - Q_LIST_UPDATE_MSEC, Q_LIST_RETRY_MSEC, - IDLE_TIMEOUT_USEC, FAILOVER_TIME_MSEC, - STALE_RESERVATIONS_PERIOD_S, RESERVATIONS_CLEANUP_PERIOD_S, - WORKER_COUNT, - store); - return true; -} - -void shutdown() { - delete s_manager; - s_manager = nullptr; -} - -int add_persistent_topic(const std::string& topic_name, optional_yield y) { - if (!s_manager) { - return -EAGAIN; - } - return s_manager->add_persistent_topic(topic_name, y); -} - -int remove_persistent_topic(const std::string& topic_name, optional_yield y) { - if (!s_manager) { - return -EAGAIN; - } - return s_manager->remove_persistent_topic(topic_name, y); -} - -rgw::sal::Object* get_object_with_atttributes( - const reservation_t& res, rgw::sal::Object* obj) { - // in case of copy obj, the tags and metadata are taken from source - const auto src_obj = res.src_object ? res.src_object : obj; - if (src_obj->get_attrs().empty()) { - if (!src_obj->get_bucket()) { - src_obj->set_bucket(res.bucket); - } - const auto ret = src_obj->get_obj_attrs(res.yield, res.dpp); - if (ret < 0) { - ldpp_dout(res.dpp, 20) << "failed to get attributes from object: " << - src_obj->get_key() << ". ret = " << ret << dendl; - return nullptr; - } - } - return src_obj; -} - -static inline void metadata_from_attributes( - reservation_t& res, rgw::sal::Object* obj) { - auto& metadata = res.x_meta_map; - const auto src_obj = get_object_with_atttributes(res, obj); - if (!src_obj) { - return; - } - res.metadata_fetched_from_attributes = true; - for (auto& attr : src_obj->get_attrs()) { - if (boost::algorithm::starts_with(attr.first, RGW_ATTR_META_PREFIX)) { - std::string_view key(attr.first); - key.remove_prefix(sizeof(RGW_ATTR_PREFIX)-1); - // we want to pass a null terminated version - // of the bufferlist, hence "to_str().c_str()" - metadata.emplace(key, attr.second.to_str().c_str()); - } - } -} - -static inline void tags_from_attributes( - const reservation_t& res, rgw::sal::Object* obj, KeyMultiValueMap& tags) { - const auto src_obj = get_object_with_atttributes(res, obj); - if (!src_obj) { - return; - } - const auto& attrs = src_obj->get_attrs(); - const auto attr_iter = attrs.find(RGW_ATTR_TAGS); - if (attr_iter != attrs.end()) { - auto bliter = attr_iter->second.cbegin(); - RGWObjTags obj_tags; - try { - ::decode(obj_tags, bliter); - } catch(buffer::error&) { - // not able to decode tags - return; - } - tags = std::move(obj_tags.get_tags()); - } -} - -// populate event from request -static inline void populate_event(reservation_t& res, - rgw::sal::Object* obj, - uint64_t size, - const ceph::real_time& mtime, - const std::string& etag, - const std::string& version, - EventType event_type, - rgw_pubsub_s3_event& event) { - event.eventTime = mtime; - event.eventName = to_event_string(event_type); - event.userIdentity = res.user_id; // user that triggered the change - event.x_amz_request_id = res.req_id; // request ID of the original change - event.x_amz_id_2 = res.store->getRados()->host_id; // RGW on which the change was made - // configurationId is filled from notification configuration - event.bucket_name = res.bucket->get_name(); - event.bucket_ownerIdentity = res.bucket->get_owner() ? res.bucket->get_owner()->get_id().id : ""; - const auto region = res.store->get_zone()->get_zonegroup().get_api_name(); - rgw::ARN bucket_arn(res.bucket->get_key()); - bucket_arn.region = region; - event.bucket_arn = to_string(bucket_arn); - event.object_key = res.object_name ? *res.object_name : obj->get_name(); - event.object_size = size; - event.object_etag = etag; - event.object_versionId = version; - event.awsRegion = region; - // use timestamp as per key sequence id (hex encoded) - const utime_t ts(real_clock::now()); - boost::algorithm::hex((const char*)&ts, (const char*)&ts + sizeof(utime_t), - std::back_inserter(event.object_sequencer)); - set_event_id(event.id, etag, ts); - event.bucket_id = res.bucket->get_bucket_id(); - // pass meta data - if (!res.metadata_fetched_from_attributes) { - // either no metadata exist or no metadata filter was used - metadata_from_attributes(res, obj); - } - event.x_meta_map = res.x_meta_map; - // pass tags - if (!res.tagset || - (*res.tagset).get_tags().empty()) { - // try to fetch the tags from the attributes - tags_from_attributes(res, obj, event.tags); - } else { - event.tags = (*res.tagset).get_tags(); - } - // opaque data will be filled from topic configuration -} - -static inline bool notification_match(reservation_t& res, - const rgw_pubsub_topic_filter& filter, - EventType event, - const RGWObjTags* req_tags) { - if (!match(filter.events, event)) { - return false; - } - const auto obj = res.object; - if (!match(filter.s3_filter.key_filter, - res.object_name ? *res.object_name : obj->get_name())) { - return false; - } - - if (!filter.s3_filter.metadata_filter.kv.empty()) { - // metadata filter exists - if (res.s) { - res.x_meta_map = res.s->info.x_meta_map; - } - metadata_from_attributes(res, obj); - if (!match(filter.s3_filter.metadata_filter, res.x_meta_map)) { - return false; - } - } - - if (!filter.s3_filter.tag_filter.kv.empty()) { - // tag filter exists - if (req_tags) { - // tags in the request - if (!match(filter.s3_filter.tag_filter, req_tags->get_tags())) { - return false; - } - } else if (res.tagset && !(*res.tagset).get_tags().empty()) { - // tags were cached in req_state - if (!match(filter.s3_filter.tag_filter, (*res.tagset).get_tags())) { - return false; - } - } else { - // try to fetch tags from the attributes - KeyMultiValueMap tags; - tags_from_attributes(res, obj, tags); - if (!match(filter.s3_filter.tag_filter, tags)) { - return false; - } - } - } - - return true; -} - - int publish_reserve(const DoutPrefixProvider* dpp, - EventType event_type, - reservation_t& res, - const RGWObjTags* req_tags) -{ - RGWPubSub ps(res.store, res.user_tenant); - RGWPubSub::Bucket ps_bucket(&ps, res.bucket->get_key()); - rgw_pubsub_bucket_topics bucket_topics; - auto rc = ps_bucket.get_topics(&bucket_topics); - if (rc < 0) { - // failed to fetch bucket topics - return rc; - } - for (const auto& bucket_topic : bucket_topics.topics) { - const rgw_pubsub_topic_filter& topic_filter = bucket_topic.second; - const rgw_pubsub_topic& topic_cfg = topic_filter.topic; - if (!notification_match(res, topic_filter, event_type, req_tags)) { - // notification does not apply to req_state - continue; - } - ldpp_dout(res.dpp, 20) << "INFO: notification: '" << topic_filter.s3_id << - "' on topic: '" << topic_cfg.dest.arn_topic << - "' and bucket: '" << res.bucket->get_name() << - "' (unique topic: '" << topic_cfg.name << - "') apply to event of type: '" << to_string(event_type) << "'" << dendl; - - cls_2pc_reservation::id_t res_id; - if (topic_cfg.dest.persistent) { - // TODO: take default reservation size from conf - constexpr auto DEFAULT_RESERVATION = 4*1024U; // 4K - res.size = DEFAULT_RESERVATION; - librados::ObjectWriteOperation op; - bufferlist obl; - int rval; - const auto& queue_name = topic_cfg.dest.arn_topic; - cls_2pc_queue_reserve(op, res.size, 1, &obl, &rval); - auto ret = rgw_rados_operate( - res.dpp, res.store->getRados()->get_notif_pool_ctx(), - queue_name, &op, res.yield, librados::OPERATION_RETURNVEC); - if (ret < 0) { - ldpp_dout(res.dpp, 1) << - "ERROR: failed to reserve notification on queue: " - << queue_name << ". error: " << ret << dendl; - // if no space is left in queue we ask client to slow down - return (ret == -ENOSPC) ? -ERR_RATE_LIMITED : ret; - } - ret = cls_2pc_queue_reserve_result(obl, res_id); - if (ret < 0) { - ldpp_dout(res.dpp, 1) << "ERROR: failed to parse reservation id. error: " << ret << dendl; - return ret; - } - } - res.topics.emplace_back(topic_filter.s3_id, topic_cfg, res_id); - } - return 0; -} - -int publish_commit(rgw::sal::Object* obj, - uint64_t size, - const ceph::real_time& mtime, - const std::string& etag, - const std::string& version, - EventType event_type, - reservation_t& res, - const DoutPrefixProvider* dpp) -{ - for (auto& topic : res.topics) { - if (topic.cfg.dest.persistent && - topic.res_id == cls_2pc_reservation::NO_ID) { - // nothing to commit or already committed/aborted - continue; - } - event_entry_t event_entry; - populate_event(res, obj, size, mtime, etag, version, event_type, event_entry.event); - event_entry.event.configurationId = topic.configurationId; - event_entry.event.opaque_data = topic.cfg.opaque_data; - if (topic.cfg.dest.persistent) { - event_entry.push_endpoint = std::move(topic.cfg.dest.push_endpoint); - event_entry.push_endpoint_args = - std::move(topic.cfg.dest.push_endpoint_args); - event_entry.arn_topic = topic.cfg.dest.arn_topic; - bufferlist bl; - encode(event_entry, bl); - const auto& queue_name = topic.cfg.dest.arn_topic; - if (bl.length() > res.size) { - // try to make a larger reservation, fail only if this is not possible - ldpp_dout(dpp, 5) << "WARNING: committed size: " << bl.length() - << " exceeded reserved size: " << res.size - << - " . trying to make a larger reservation on queue:" << queue_name - << dendl; - // first cancel the existing reservation - librados::ObjectWriteOperation op; - cls_2pc_queue_abort(op, topic.res_id); - auto ret = rgw_rados_operate( - dpp, res.store->getRados()->get_notif_pool_ctx(), - topic.cfg.dest.arn_topic, &op, - res.yield); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to abort reservation: " - << topic.res_id << - " when trying to make a larger reservation on queue: " << queue_name - << ". error: " << ret << dendl; - return ret; - } - // now try to make a bigger one - buffer::list obl; - int rval; - cls_2pc_queue_reserve(op, bl.length(), 1, &obl, &rval); - ret = rgw_rados_operate( - dpp, res.store->getRados()->get_notif_pool_ctx(), - queue_name, &op, res.yield, librados::OPERATION_RETURNVEC); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to reserve extra space on queue: " - << queue_name - << ". error: " << ret << dendl; - return (ret == -ENOSPC) ? -ERR_RATE_LIMITED : ret; - } - ret = cls_2pc_queue_reserve_result(obl, topic.res_id); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to parse reservation id for " - "extra space. error: " << ret << dendl; - return ret; - } - } - std::vector bl_data_vec{std::move(bl)}; - librados::ObjectWriteOperation op; - cls_2pc_queue_commit(op, bl_data_vec, topic.res_id); - const auto ret = rgw_rados_operate( - dpp, res.store->getRados()->get_notif_pool_ctx(), - queue_name, &op, res.yield); - topic.res_id = cls_2pc_reservation::NO_ID; - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to commit reservation to queue: " - << queue_name << ". error: " << ret - << dendl; - return ret; - } - } else { - try { - // TODO add endpoint LRU cache - const auto push_endpoint = RGWPubSubEndpoint::create( - topic.cfg.dest.push_endpoint, - topic.cfg.dest.arn_topic, - RGWHTTPArgs(topic.cfg.dest.push_endpoint_args, dpp), - dpp->get_cct()); - ldpp_dout(res.dpp, 20) << "INFO: push endpoint created: " - << topic.cfg.dest.push_endpoint << dendl; - const auto ret = push_endpoint->send_to_completion_async( - dpp->get_cct(), event_entry.event, res.yield); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: push to endpoint " - << topic.cfg.dest.push_endpoint - << " failed. error: " << ret << dendl; - if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed); - return ret; - } - if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); - } catch (const RGWPubSubEndpoint::configuration_error& e) { - ldpp_dout(dpp, 1) << "ERROR: failed to create push endpoint: " - << topic.cfg.dest.push_endpoint << ". error: " << e.what() << dendl; - if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed); - return -EINVAL; - } - } - } - return 0; -} - -int publish_abort(reservation_t& res) { - for (auto& topic : res.topics) { - if (!topic.cfg.dest.persistent || - topic.res_id == cls_2pc_reservation::NO_ID) { - // nothing to abort or already committed/aborted - continue; - } - const auto& queue_name = topic.cfg.dest.arn_topic; - librados::ObjectWriteOperation op; - cls_2pc_queue_abort(op, topic.res_id); - const auto ret = rgw_rados_operate( - res.dpp, res.store->getRados()->get_notif_pool_ctx(), - queue_name, &op, res.yield); - if (ret < 0) { - ldpp_dout(res.dpp, 1) << "ERROR: failed to abort reservation: " - << topic.res_id << - " from queue: " << queue_name << ". error: " << ret << dendl; - return ret; - } - topic.res_id = cls_2pc_reservation::NO_ID; - } - return 0; -} - -reservation_t::reservation_t(const DoutPrefixProvider* _dpp, - rgw::sal::RadosStore* _store, - const req_state* _s, - rgw::sal::Object* _object, - rgw::sal::Object* _src_object, - const std::string* _object_name) : - dpp(_s), store(_store), s(_s), size(0) /* XXX */, - object(_object), src_object(_src_object), bucket(_s->bucket.get()), - object_name(_object_name), - tagset(_s->tagset), - x_meta_map(_s->info.x_meta_map), - metadata_fetched_from_attributes(false), - user_id(_s->user->get_id().id), - user_tenant(_s->user->get_id().tenant), - req_id(_s->req_id), - yield(_s->yield) -{} - -reservation_t::reservation_t(const DoutPrefixProvider* _dpp, - rgw::sal::RadosStore* _store, - rgw::sal::Object* _object, - rgw::sal::Object* _src_object, - rgw::sal::Bucket* _bucket, - const std::string& _user_id, - const std::string& _user_tenant, - const std::string& _req_id, - optional_yield y) : - dpp(_dpp), store(_store), s(nullptr), size(0) /* XXX */, - object(_object), src_object(_src_object), bucket(_bucket), - object_name(nullptr), - metadata_fetched_from_attributes(false), - user_id(_user_id), - user_tenant(_user_tenant), - req_id(_req_id), - yield(y) -{} - -reservation_t::~reservation_t() { - publish_abort(*this); -} - -} // namespace rgw::notify diff --git a/src/rgw/store/rados/rgw_notify.h b/src/rgw/store/rados/rgw_notify.h deleted file mode 100644 index 175dc11463d..00000000000 --- a/src/rgw/store/rados/rgw_notify.h +++ /dev/null @@ -1,117 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include -#include "common/ceph_time.h" -#include "include/common_fwd.h" -#include "rgw_notify_event_type.h" -#include "common/async/yield_context.h" -#include "cls/2pc_queue/cls_2pc_queue_types.h" -#include "rgw_pubsub.h" - -// forward declarations -namespace rgw::sal { - class RadosStore; - class RGWObject; -} - -class RGWRados; -struct rgw_obj_key; - -namespace rgw::notify { - -// initialize the notification manager -// notification manager is dequeing the 2-phase-commit queues -// and send the notifications to the endpoints -bool init(CephContext* cct, rgw::sal::RadosStore* store, const DoutPrefixProvider *dpp); - -// shutdown the notification manager -void shutdown(); - -// create persistent delivery queue for a topic (endpoint) -// this operation also add a topic name to the common (to all RGWs) list of all topics -int add_persistent_topic(const std::string& topic_name, optional_yield y); - -// remove persistent delivery queue for a topic (endpoint) -// this operation also remove the topic name from the common (to all RGWs) list of all topics -int remove_persistent_topic(const std::string& topic_name, optional_yield y); - -// struct holding reservation information -// populated in the publish_reserve call -// then used to commit or abort the reservation -struct reservation_t { - struct topic_t { - topic_t(const std::string& _configurationId, const rgw_pubsub_topic& _cfg, - cls_2pc_reservation::id_t _res_id) : - configurationId(_configurationId), cfg(_cfg), res_id(_res_id) {} - - const std::string configurationId; - const rgw_pubsub_topic cfg; - // res_id is reset after topic is committed/aborted - cls_2pc_reservation::id_t res_id; - }; - - const DoutPrefixProvider* const dpp; - std::vector topics; - rgw::sal::RadosStore* const store; - const req_state* const s; - size_t size; - rgw::sal::Object* const object; - rgw::sal::Object* const src_object; // may differ from object - rgw::sal::Bucket* const bucket; - const std::string* const object_name; - boost::optional tagset; - meta_map_t x_meta_map; // metadata cached by value - bool metadata_fetched_from_attributes; - const std::string user_id; - const std::string user_tenant; - const std::string req_id; - optional_yield yield; - - /* ctor for rgw_op callers */ - reservation_t(const DoutPrefixProvider* _dpp, - rgw::sal::RadosStore* _store, - const req_state* _s, - rgw::sal::Object* _object, - rgw::sal::Object* _src_object, - const std::string* _object_name); - - /* ctor for non-request caller (e.g., lifecycle) */ - reservation_t(const DoutPrefixProvider* _dpp, - rgw::sal::RadosStore* _store, - rgw::sal::Object* _object, - rgw::sal::Object* _src_object, - rgw::sal::Bucket* _bucket, - const std::string& _user_id, - const std::string& _user_tenant, - const std::string& _req_id, - optional_yield y); - - // dtor doing resource leak guarding - // aborting the reservation if not already committed or aborted - ~reservation_t(); -}; - -// create a reservation on the 2-phase-commit queue - int publish_reserve(const DoutPrefixProvider *dpp, - EventType event_type, - reservation_t& reservation, - const RGWObjTags* req_tags); - -// commit the reservation to the queue -int publish_commit(rgw::sal::Object* obj, - uint64_t size, - const ceph::real_time& mtime, - const std::string& etag, - const std::string& version, - EventType event_type, - reservation_t& reservation, - const DoutPrefixProvider *dpp); - -// cancel the reservation -int publish_abort(reservation_t& reservation); - -} - diff --git a/src/rgw/store/rados/rgw_obj_manifest.cc b/src/rgw/store/rados/rgw_obj_manifest.cc deleted file mode 100644 index 3838f5cf328..00000000000 --- a/src/rgw/store/rados/rgw_obj_manifest.cc +++ /dev/null @@ -1,404 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_obj_manifest.h" - -#include "services/svc_zone.h" -#include "rgw_rados.h" -#include "rgw_bucket.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -int RGWObjManifest::generator::create_next(uint64_t ofs) -{ - if (ofs < last_ofs) /* only going forward */ - return -EINVAL; - - uint64_t max_head_size = manifest->get_max_head_size(); - - if (ofs < max_head_size) { - manifest->set_head_size(ofs); - } - - if (ofs >= max_head_size) { - manifest->set_head_size(max_head_size); - cur_stripe = (ofs - max_head_size) / rule.stripe_max_size; - cur_stripe_size = rule.stripe_max_size; - - if (cur_part_id == 0 && max_head_size > 0) { - cur_stripe++; - } - } - - last_ofs = ofs; - manifest->set_obj_size(ofs); - - manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, NULL, &cur_obj); - - return 0; -} - -int RGWObjManifest::append(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, - const RGWZoneParams& zone_params) -{ - if (explicit_objs || m.explicit_objs) { - return append_explicit(dpp, m, zonegroup, zone_params); - } - - if (rules.empty()) { - *this = m; - return 0; - } - - string override_prefix; - - if (prefix.empty()) { - prefix = m.prefix; - } - - if (prefix != m.prefix) { - override_prefix = m.prefix; - } - - map::iterator miter = m.rules.begin(); - if (miter == m.rules.end()) { - return append_explicit(dpp, m, zonegroup, zone_params); - } - - for (; miter != m.rules.end(); ++miter) { - map::reverse_iterator last_rule = rules.rbegin(); - - RGWObjManifestRule& rule = last_rule->second; - - if (rule.part_size == 0) { - rule.part_size = obj_size - rule.start_ofs; - } - - RGWObjManifestRule& next_rule = miter->second; - if (!next_rule.part_size) { - next_rule.part_size = m.obj_size - next_rule.start_ofs; - } - - string rule_prefix = prefix; - if (!rule.override_prefix.empty()) { - rule_prefix = rule.override_prefix; - } - - string next_rule_prefix = m.prefix; - if (!next_rule.override_prefix.empty()) { - next_rule_prefix = next_rule.override_prefix; - } - - if (rule.part_size != next_rule.part_size || - rule.stripe_max_size != next_rule.stripe_max_size || - rule_prefix != next_rule_prefix) { - if (next_rule_prefix != prefix) { - append_rules(m, miter, &next_rule_prefix); - } else { - append_rules(m, miter, NULL); - } - break; - } - - uint64_t expected_part_num = rule.start_part_num + 1; - if (rule.part_size > 0) { - expected_part_num = rule.start_part_num + (obj_size + next_rule.start_ofs - rule.start_ofs) / rule.part_size; - } - - if (expected_part_num != next_rule.start_part_num) { - append_rules(m, miter, NULL); - break; - } - } - - set_obj_size(obj_size + m.obj_size); - - return 0; -} - -void RGWObjManifest::append_rules(RGWObjManifest& m, map::iterator& miter, - string *override_prefix) -{ - for (; miter != m.rules.end(); ++miter) { - RGWObjManifestRule rule = miter->second; - rule.start_ofs += obj_size; - if (override_prefix) - rule.override_prefix = *override_prefix; - rules[rule.start_ofs] = rule; - } -} - -void RGWObjManifest::convert_to_explicit(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) -{ - if (explicit_objs) { - return; - } - obj_iterator iter = obj_begin(dpp); - - while (iter != obj_end(dpp)) { - RGWObjManifestPart& part = objs[iter.get_stripe_ofs()]; - const rgw_obj_select& os = iter.get_location(); - const rgw_raw_obj& raw_loc = os.get_raw_obj(zonegroup, zone_params); - part.loc_ofs = 0; - - uint64_t ofs = iter.get_stripe_ofs(); - - if (ofs == 0) { - part.loc = obj; - } else { - RGWSI_Tier_RADOS::raw_obj_to_obj(tail_placement.bucket, raw_loc, &part.loc); - } - ++iter; - uint64_t next_ofs = iter.get_stripe_ofs(); - - part.size = next_ofs - ofs; - } - - explicit_objs = true; - rules.clear(); - prefix.clear(); -} - -int RGWObjManifest::append_explicit(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) -{ - if (!explicit_objs) { - convert_to_explicit(dpp, zonegroup, zone_params); - } - if (!m.explicit_objs) { - m.convert_to_explicit(dpp, zonegroup, zone_params); - } - map::iterator iter; - uint64_t base = obj_size; - for (iter = m.objs.begin(); iter != m.objs.end(); ++iter) { - RGWObjManifestPart& part = iter->second; - objs[base + iter->first] = part; - } - obj_size += m.obj_size; - - return 0; -} - -bool RGWObjManifest::get_rule(uint64_t ofs, RGWObjManifestRule *rule) -{ - if (rules.empty()) { - return false; - } - - map::iterator iter = rules.upper_bound(ofs); - if (iter != rules.begin()) { - --iter; - } - - *rule = iter->second; - - return true; -} - -int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m, - const rgw_placement_rule& head_placement_rule, - const rgw_placement_rule *tail_placement_rule, - const rgw_bucket& _b, const rgw_obj& _obj) -{ - manifest = _m; - - if (!tail_placement_rule) { - manifest->set_tail_placement(head_placement_rule, _b); - } else { - rgw_placement_rule new_tail_rule = *tail_placement_rule; - new_tail_rule.inherit_from(head_placement_rule); - manifest->set_tail_placement(new_tail_rule, _b); - } - - manifest->set_head(head_placement_rule, _obj, 0); - last_ofs = 0; - - if (manifest->get_prefix().empty()) { - char buf[33]; - gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1); - - string oid_prefix = "."; - oid_prefix.append(buf); - oid_prefix.append("_"); - - manifest->set_prefix(oid_prefix); - } - - bool found = manifest->get_rule(0, &rule); - if (!found) { - derr << "ERROR: manifest->get_rule() could not find rule" << dendl; - return -EIO; - } - - uint64_t head_size = manifest->get_head_size(); - - if (head_size > 0) { - cur_stripe_size = head_size; - } else { - cur_stripe_size = rule.stripe_max_size; - } - - cur_part_id = rule.start_part_num; - - manifest->get_implicit_location(cur_part_id, cur_stripe, 0, NULL, &cur_obj); - - // Normal object which not generated through copy operation - manifest->set_tail_instance(_obj.key.instance); - - return 0; -} - -void RGWObjManifestPart::generate_test_instances(std::list& o) -{ - o.push_back(new RGWObjManifestPart); - - RGWObjManifestPart *p = new RGWObjManifestPart; - rgw_bucket b; - init_bucket(&b, "tenant", "bucket", ".pool", ".index_pool", "marker_", "12"); - - p->loc = rgw_obj(b, "object"); - p->loc_ofs = 512 * 1024; - p->size = 128 * 1024; - o.push_back(p); -} - -void RGWObjManifest::generate_test_instances(std::list& o) -{ - RGWObjManifest *m = new RGWObjManifest; - map objs; - uint64_t total_size = 0; - for (int i = 0; i<10; i++) { - RGWObjManifestPart p; - rgw_bucket b; - init_bucket(&b, "tenant", "bucket", ".pool", ".index_pool", "marker_", "12"); - p.loc = rgw_obj(b, "object"); - p.loc_ofs = 0; - p.size = 512 * 1024; - total_size += p.size; - objs[total_size] = p; - } - m->set_explicit(total_size, objs); - o.push_back(m); - o.push_back(new RGWObjManifest); -} - -void RGWObjManifestPart::dump(Formatter *f) const -{ - f->open_object_section("loc"); - loc.dump(f); - f->close_section(); - f->dump_unsigned("loc_ofs", loc_ofs); - f->dump_unsigned("size", size); -} - -void RGWObjManifest::obj_iterator::dump(Formatter *f) const -{ - f->dump_unsigned("part_ofs", part_ofs); - f->dump_unsigned("stripe_ofs", stripe_ofs); - f->dump_unsigned("ofs", ofs); - f->dump_unsigned("stripe_size", stripe_size); - f->dump_int("cur_part_id", cur_part_id); - f->dump_int("cur_stripe", cur_stripe); - f->dump_string("cur_override_prefix", cur_override_prefix); - f->dump_object("location", location); -} - -void RGWObjManifest::dump(Formatter *f) const -{ - map::const_iterator iter = objs.begin(); - f->open_array_section("objs"); - for (; iter != objs.end(); ++iter) { - f->dump_unsigned("ofs", iter->first); - f->open_object_section("part"); - iter->second.dump(f); - f->close_section(); - } - f->close_section(); - f->dump_unsigned("obj_size", obj_size); - ::encode_json("explicit_objs", explicit_objs, f); - ::encode_json("head_size", head_size, f); - ::encode_json("max_head_size", max_head_size, f); - ::encode_json("prefix", prefix, f); - ::encode_json("rules", rules, f); - ::encode_json("tail_instance", tail_instance, f); - ::encode_json("tail_placement", tail_placement, f); - - // nullptr being passed into iterators since there - // is no cct and we aren't doing anything with these - // iterators that would write do the log - f->dump_object("begin_iter", obj_begin(nullptr)); - f->dump_object("end_iter", obj_end(nullptr)); -} - -void RGWObjManifestRule::dump(Formatter *f) const -{ - encode_json("start_part_num", start_part_num, f); - encode_json("start_ofs", start_ofs, f); - encode_json("part_size", part_size, f); - encode_json("stripe_max_size", stripe_max_size, f); - encode_json("override_prefix", override_prefix, f); -} - -void rgw_obj_select::dump(Formatter *f) const -{ - f->dump_string("placement_rule", placement_rule.to_str()); - f->dump_object("obj", obj); - f->dump_object("raw_obj", raw_obj); - f->dump_bool("is_raw", is_raw); -} - -void RGWObjTier::dump(Formatter *f) const -{ - encode_json("name", name, f); - encode_json("tier_placement", tier_placement, f); - encode_json("is_multipart_upload", is_multipart_upload, f); -} - -// returns true on success, false on failure -static bool rgw_get_obj_data_pool(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params, - const rgw_placement_rule& head_placement_rule, - const rgw_obj& obj, rgw_pool *pool) -{ - if (!zone_params.get_head_data_pool(head_placement_rule, obj, pool)) { - RGWZonePlacementInfo placement; - if (!zone_params.get_placement(zonegroup.default_placement.name, &placement)) { - return false; - } - - if (!obj.in_extra_data) { - *pool = placement.get_data_pool(zonegroup.default_placement.storage_class); - } else { - *pool = placement.get_data_extra_pool(); - } - } - - return true; -} - -static bool rgw_obj_to_raw(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params, - const rgw_placement_rule& head_placement_rule, - const rgw_obj& obj, rgw_raw_obj *raw_obj) -{ - get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); - - return rgw_get_obj_data_pool(zonegroup, zone_params, head_placement_rule, obj, &raw_obj->pool); -} - -rgw_raw_obj rgw_obj_select::get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const -{ - if (!is_raw) { - rgw_raw_obj r; - rgw_obj_to_raw(zonegroup, zone_params, placement_rule, obj, &r); - return r; - } - return raw_obj; -} - -// returns true on success, false on failure -bool RGWRados::get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) -{ - return rgw_get_obj_data_pool(svc.zone->get_zonegroup(), svc.zone->get_zone_params(), placement_rule, obj, pool); -} - diff --git a/src/rgw/store/rados/rgw_obj_manifest.h b/src/rgw/store/rados/rgw_obj_manifest.h deleted file mode 100644 index ac73359305e..00000000000 --- a/src/rgw/store/rados/rgw_obj_manifest.h +++ /dev/null @@ -1,609 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2019 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "rgw_common.h" -#include "rgw_compression_types.h" -#include "rgw_sal.h" -#include "rgw_zone.h" - -class RGWSI_Zone; -struct RGWZoneGroup; -struct RGWZoneParams; -class RGWRados; -namespace rgw { namespace sal { - class RadosStore; -} }; - -class rgw_obj_select { - rgw_placement_rule placement_rule; - rgw_obj obj; - rgw_raw_obj raw_obj; - bool is_raw; - -public: - rgw_obj_select() : is_raw(false) {} - explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {} - explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {} - rgw_obj_select(const rgw_obj_select& rhs) { - placement_rule = rhs.placement_rule; - is_raw = rhs.is_raw; - if (is_raw) { - raw_obj = rhs.raw_obj; - } else { - obj = rhs.obj; - } - } - - rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const; - rgw_raw_obj get_raw_obj(rgw::sal::RadosStore* store) const; - - rgw_obj_select& operator=(const rgw_obj& rhs) { - obj = rhs; - is_raw = false; - return *this; - } - - rgw_obj_select& operator=(const rgw_raw_obj& rhs) { - raw_obj = rhs; - is_raw = true; - return *this; - } - - void set_placement_rule(const rgw_placement_rule& rule) { - placement_rule = rule; - } - void dump(Formatter *f) const; -}; - -struct RGWObjManifestPart { - rgw_obj loc; /* the object where the data is located */ - uint64_t loc_ofs; /* the offset at that object where the data is located */ - uint64_t size; /* the part size */ - - RGWObjManifestPart() : loc_ofs(0), size(0) {} - - void encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); - encode(loc, bl); - encode(loc_ofs, bl); - encode(size, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl); - decode(loc, bl); - decode(loc_ofs, bl); - decode(size, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - static void generate_test_instances(std::list& o); -}; -WRITE_CLASS_ENCODER(RGWObjManifestPart) - -/* - The manifest defines a set of rules for structuring the object parts. - There are a few terms to note: - - head: the head part of the object, which is the part that contains - the first chunk of data. An object might not have a head (as in the - case of multipart-part objects). - - stripe: data portion of a single rgw object that resides on a single - rados object. - - part: a collection of stripes that make a contiguous part of an - object. A regular object will only have one part (although might have - many stripes), a multipart object might have many parts. Each part - has a fixed stripe size, although the last stripe of a part might - be smaller than that. Consecutive parts may be merged if their stripe - value is the same. -*/ - -struct RGWObjManifestRule { - uint32_t start_part_num; - uint64_t start_ofs; - uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */ - uint64_t stripe_max_size; /* underlying obj max size */ - std::string override_prefix; - - RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {} - RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) : - start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {} - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(start_part_num, bl); - encode(start_ofs, bl); - encode(part_size, bl); - encode(stripe_max_size, bl); - encode(override_prefix, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(start_part_num, bl); - decode(start_ofs, bl); - decode(part_size, bl); - decode(stripe_max_size, bl); - if (struct_v >= 2) - decode(override_prefix, bl); - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; -}; -WRITE_CLASS_ENCODER(RGWObjManifestRule) - -struct RGWObjTier { - std::string name; - RGWZoneGroupPlacementTier tier_placement; - bool is_multipart_upload{false}; - - RGWObjTier(): name("none") {} - - void encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); - encode(name, bl); - encode(tier_placement, bl); - encode(is_multipart_upload, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); - decode(name, bl); - decode(tier_placement, bl); - decode(is_multipart_upload, bl); - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; -}; -WRITE_CLASS_ENCODER(RGWObjTier) - -class RGWObjManifest { -protected: - bool explicit_objs{false}; /* really old manifest? */ - std::map objs; - - uint64_t obj_size{0}; - - rgw_obj obj; - uint64_t head_size{0}; - rgw_placement_rule head_placement_rule; - - uint64_t max_head_size{0}; - std::string prefix; - rgw_bucket_placement tail_placement; /* might be different than the original bucket, - as object might have been copied across pools */ - std::map rules; - - std::string tail_instance; /* tail object's instance */ - - std::string tier_type; - RGWObjTier tier_config; - - void convert_to_explicit(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params); - int append_explicit(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params); - void append_rules(RGWObjManifest& m, std::map::iterator& iter, std::string *override_prefix); - -public: - - RGWObjManifest() = default; - RGWObjManifest(const RGWObjManifest& rhs) { - *this = rhs; - } - RGWObjManifest& operator=(const RGWObjManifest& rhs) { - explicit_objs = rhs.explicit_objs; - objs = rhs.objs; - obj_size = rhs.obj_size; - obj = rhs.obj; - head_size = rhs.head_size; - max_head_size = rhs.max_head_size; - prefix = rhs.prefix; - tail_placement = rhs.tail_placement; - rules = rhs.rules; - tail_instance = rhs.tail_instance; - tier_type = rhs.tier_type; - tier_config = rhs.tier_config; - return *this; - } - - std::map& get_explicit_objs() { - return objs; - } - - - void set_explicit(uint64_t _size, std::map& _objs) { - explicit_objs = true; - objs.swap(_objs); - set_obj_size(_size); - } - - void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, - std::string *override_prefix, rgw_obj_select *location) const; - - void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) { - RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size); - rules[0] = rule; - max_head_size = tail_ofs; - } - - void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) { - RGWObjManifestRule rule(0, 0, 0, stripe_max_size); - rule.start_part_num = part_num; - rules[0] = rule; - max_head_size = 0; - } - - void encode(bufferlist& bl) const { - ENCODE_START(8, 6, bl); - encode(obj_size, bl); - encode(objs, bl); - encode(explicit_objs, bl); - encode(obj, bl); - encode(head_size, bl); - encode(max_head_size, bl); - encode(prefix, bl); - encode(rules, bl); - bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket); - encode(encode_tail_bucket, bl); - if (encode_tail_bucket) { - encode(tail_placement.bucket, bl); - } - bool encode_tail_instance = (tail_instance != obj.key.instance); - encode(encode_tail_instance, bl); - if (encode_tail_instance) { - encode(tail_instance, bl); - } - encode(head_placement_rule, bl); - encode(tail_placement.placement_rule, bl); - encode(tier_type, bl); - encode(tier_config, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl); - decode(obj_size, bl); - decode(objs, bl); - if (struct_v >= 3) { - decode(explicit_objs, bl); - decode(obj, bl); - decode(head_size, bl); - decode(max_head_size, bl); - decode(prefix, bl); - decode(rules, bl); - } else { - explicit_objs = true; - if (!objs.empty()) { - std::map::iterator iter = objs.begin(); - obj = iter->second.loc; - head_size = iter->second.size; - max_head_size = head_size; - } - } - - if (explicit_objs && head_size > 0 && !objs.empty()) { - /* patch up manifest due to issue 16435: - * the first object in the explicit objs list might not be the one we need to access, use the - * head object instead if set. This would happen if we had an old object that was created - * when the explicit objs manifest was around, and it got copied. - */ - rgw_obj& obj_0 = objs[0].loc; - if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) { - objs[0].loc = obj; - objs[0].size = head_size; - } - } - - if (struct_v >= 4) { - if (struct_v < 6) { - decode(tail_placement.bucket, bl); - } else { - bool need_to_decode; - decode(need_to_decode, bl); - if (need_to_decode) { - decode(tail_placement.bucket, bl); - } else { - tail_placement.bucket = obj.bucket; - } - } - } - - if (struct_v >= 5) { - if (struct_v < 6) { - decode(tail_instance, bl); - } else { - bool need_to_decode; - decode(need_to_decode, bl); - if (need_to_decode) { - decode(tail_instance, bl); - } else { - tail_instance = obj.key.instance; - } - } - } else { // old object created before 'tail_instance' field added to manifest - tail_instance = obj.key.instance; - } - - if (struct_v >= 7) { - decode(head_placement_rule, bl); - decode(tail_placement.placement_rule, bl); - } - - if (struct_v >= 8) { - decode(tier_type, bl); - decode(tier_config, bl); - } - - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - static void generate_test_instances(std::list& o); - - int append(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup, - const RGWZoneParams& zone_params); - - bool get_rule(uint64_t ofs, RGWObjManifestRule *rule); - - bool empty() const { - if (explicit_objs) - return objs.empty(); - return rules.empty(); - } - - bool has_explicit_objs() const { - return explicit_objs; - } - - bool has_tail() const { - if (explicit_objs) { - if (objs.size() == 1) { - auto iter = objs.begin(); - const rgw_obj& o = iter->second.loc; - return !(obj == o); - } - return (objs.size() >= 2); - } - return (obj_size > head_size); - } - - void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) { - head_placement_rule = placement_rule; - obj = _o; - head_size = _s; - - if (explicit_objs && head_size > 0) { - objs[0].loc = obj; - objs[0].size = head_size; - } - } - - const rgw_obj& get_obj() const { - return obj; - } - - void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) { - tail_placement.placement_rule = placement_rule; - tail_placement.bucket = _b; - } - - const rgw_bucket_placement& get_tail_placement() const { - return tail_placement; - } - - const rgw_placement_rule& get_head_placement_rule() const { - return head_placement_rule; - } - - void set_prefix(const std::string& _p) { - prefix = _p; - } - - const std::string& get_prefix() const { - return prefix; - } - - void set_tail_instance(const std::string& _ti) { - tail_instance = _ti; - } - - const std::string& get_tail_instance() const { - return tail_instance; - } - - void set_head_size(uint64_t _s) { - head_size = _s; - } - - void set_obj_size(uint64_t s) { - obj_size = s; - } - - uint64_t get_obj_size() const { - return obj_size; - } - - uint64_t get_head_size() const { - return head_size; - } - - uint64_t get_max_head_size() const { - return max_head_size; - } - - const std::string& get_tier_type() { - return tier_type; - } - - inline void set_tier_type(std::string value) { - /* Only "cloud-s3" tier-type is supported for now */ - if (value == "cloud-s3") { - tier_type = value; - } - } - - inline void set_tier_config(RGWObjTier t) { - /* Set only if tier_type set to "cloud-s3" */ - if (tier_type != "cloud-s3") - return; - - tier_config.name = t.name; - tier_config.tier_placement = t.tier_placement; - tier_config.is_multipart_upload = t.is_multipart_upload; - } - - inline const void get_tier_config(RGWObjTier* t) { - if (tier_type != "cloud-s3") - return; - - t->name = tier_config.name; - t->tier_placement = tier_config.tier_placement; - t->is_multipart_upload = tier_config.is_multipart_upload; - } - - class obj_iterator { - const DoutPrefixProvider *dpp; - const RGWObjManifest *manifest = nullptr; - uint64_t part_ofs = 0; /* where current part starts */ - uint64_t stripe_ofs = 0; /* where current stripe starts */ - uint64_t ofs = 0; /* current position within the object */ - uint64_t stripe_size = 0; /* current part size */ - - int cur_part_id = 0; - int cur_stripe = 0; - std::string cur_override_prefix; - - rgw_obj_select location; - - std::map::const_iterator rule_iter; - std::map::const_iterator next_rule_iter; - std::map::const_iterator explicit_iter; - - void update_explicit_pos(); - - public: - obj_iterator() = default; - explicit obj_iterator(const DoutPrefixProvider *_dpp, const RGWObjManifest *_m) - : obj_iterator(_dpp, _m, 0) - {} - obj_iterator(const DoutPrefixProvider *_dpp, const RGWObjManifest *_m, uint64_t _ofs) : dpp(_dpp), manifest(_m) { - seek(_ofs); - } - void seek(uint64_t ofs); - - void operator++(); - bool operator==(const obj_iterator& rhs) const { - return (ofs == rhs.ofs); - } - bool operator!=(const obj_iterator& rhs) const { - return (ofs != rhs.ofs); - } - const rgw_obj_select& get_location() { - return location; - } - - /* where current part starts */ - uint64_t get_part_ofs() const { - return part_ofs; - } - - /* start of current stripe */ - uint64_t get_stripe_ofs() { - if (manifest->explicit_objs) { - return explicit_iter->first; - } - return stripe_ofs; - } - - /* current ofs relative to start of rgw object */ - uint64_t get_ofs() const { - return ofs; - } - - /* stripe number */ - int get_cur_stripe() const { - return cur_stripe; - } - - /* current stripe size */ - uint64_t get_stripe_size() { - if (manifest->explicit_objs) { - return explicit_iter->second.size; - } - return stripe_size; - } - - /* offset where data starts within current stripe */ - uint64_t location_ofs() { - if (manifest->explicit_objs) { - return explicit_iter->second.loc_ofs; - } - return 0; /* all stripes start at zero offset */ - } - - void update_location(); - - void dump(Formatter *f) const; - }; // class obj_iterator - - obj_iterator obj_begin(const DoutPrefixProvider *dpp) const { return obj_iterator{dpp, this}; } - obj_iterator obj_end(const DoutPrefixProvider *dpp) const { return obj_iterator{dpp, this, obj_size}; } - obj_iterator obj_find(const DoutPrefixProvider *dpp, uint64_t ofs) const { - return obj_iterator{dpp, this, std::min(ofs, obj_size)}; - } - - /* - * simple object generator. Using a simple single rule manifest. - */ - class generator { - RGWObjManifest *manifest; - uint64_t last_ofs; - uint64_t cur_part_ofs; - int cur_part_id; - int cur_stripe; - uint64_t cur_stripe_size; - std::string cur_oid; - - std::string oid_prefix; - - rgw_obj_select cur_obj; - - RGWObjManifestRule rule; - - public: - generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0), - cur_stripe(0), cur_stripe_size(0) {} - int create_begin(CephContext *cct, RGWObjManifest *manifest, - const rgw_placement_rule& head_placement_rule, - const rgw_placement_rule *tail_placement_rule, - const rgw_bucket& bucket, - const rgw_obj& obj); - - int create_next(uint64_t ofs); - - rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); } - rgw_raw_obj get_cur_obj(rgw::sal::RadosStore* store) const { return cur_obj.get_raw_obj(store); } - - /* total max size of current stripe (including head obj) */ - uint64_t cur_stripe_max_size() const { - return cur_stripe_size; - } - }; -}; -WRITE_CLASS_ENCODER(RGWObjManifest) diff --git a/src/rgw/store/rados/rgw_object_expirer_core.cc b/src/rgw/store/rados/rgw_object_expirer_core.cc deleted file mode 100644 index ec1bf3fb6dc..00000000000 --- a/src/rgw/store/rados/rgw_object_expirer_core.cc +++ /dev/null @@ -1,442 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include -#include -#include -#include - - -#include "auth/Crypto.h" - -#include "common/armor.h" -#include "common/ceph_json.h" -#include "common/config.h" -#include "common/ceph_argparse.h" -#include "common/Formatter.h" -#include "common/errno.h" - -#include "global/global_init.h" - -#include "include/utime.h" -#include "include/str_list.h" - -#include "rgw_user.h" -#include "rgw_bucket.h" -#include "rgw_acl.h" -#include "rgw_acl_s3.h" -#include "rgw_log.h" -#include "rgw_formats.h" -#include "rgw_usage.h" -#include "rgw_object_expirer_core.h" -#include "rgw_zone.h" -#include "rgw_sal_rados.h" - -#include "services/svc_rados.h" -#include "services/svc_zone.h" -#include "services/svc_sys_obj.h" -#include "services/svc_bi_rados.h" - -#include "cls/lock/cls_lock_client.h" -#include "cls/timeindex/cls_timeindex_client.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -static string objexp_lock_name = "gc_process"; - -static string objexp_hint_get_shardname(int shard_num) -{ - char buf[64]; - snprintf(buf, sizeof(buf), "obj_delete_at_hint.%010u", (unsigned)shard_num); - return buf; -} - -static int objexp_key_shard(const rgw_obj_index_key& key, int num_shards) -{ - string obj_key = key.name + key.instance; - return RGWSI_BucketIndex_RADOS::bucket_shard_index(obj_key, num_shards); -} - -static string objexp_hint_get_keyext(const string& tenant_name, - const string& bucket_name, - const string& bucket_id, - const rgw_obj_key& obj_key) { - return tenant_name + (tenant_name.empty() ? "" : ":") + bucket_name + ":" + bucket_id + - ":" + obj_key.name + ":" + obj_key.instance; -} - -static void objexp_get_shard(int shard_num, - string *shard) -{ - *shard = objexp_hint_get_shardname(shard_num); -} - -static int objexp_hint_parse(const DoutPrefixProvider *dpp, CephContext *cct, cls_timeindex_entry &ti_entry, - objexp_hint_entry *hint_entry) -{ - try { - auto iter = ti_entry.value.cbegin(); - decode(*hint_entry, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: couldn't decode avail_pools" << dendl; - } - - return 0; -} - -int RGWObjExpStore::objexp_hint_add(const DoutPrefixProvider *dpp, - const ceph::real_time& delete_at, - const string& tenant_name, - const string& bucket_name, - const string& bucket_id, - const rgw_obj_index_key& obj_key) -{ - const string keyext = objexp_hint_get_keyext(tenant_name, bucket_name, - bucket_id, obj_key); - objexp_hint_entry he = { - .tenant = tenant_name, - .bucket_name = bucket_name, - .bucket_id = bucket_id, - .obj_key = obj_key, - .exp_time = delete_at }; - bufferlist hebl; - encode(he, hebl); - librados::ObjectWriteOperation op; - cls_timeindex_add(op, utime_t(delete_at), keyext, hebl); - - string shard_name = objexp_hint_get_shardname(objexp_key_shard(obj_key, cct->_conf->rgw_objexp_hints_num_shards)); - auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, shard_name)); - int r = obj.open(dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; - return r; - } - return obj.operate(dpp, &op, null_yield); -} - -int RGWObjExpStore::objexp_hint_list(const DoutPrefixProvider *dpp, - const string& oid, - const ceph::real_time& start_time, - const ceph::real_time& end_time, - const int max_entries, - const string& marker, - list& entries, /* out */ - string *out_marker, /* out */ - bool *truncated) /* out */ -{ - librados::ObjectReadOperation op; - cls_timeindex_list(op, utime_t(start_time), utime_t(end_time), marker, max_entries, entries, - out_marker, truncated); - - auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid)); - int r = obj.open(dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; - return r; - } - bufferlist obl; - int ret = obj.operate(dpp, &op, &obl, null_yield); - - if ((ret < 0 ) && (ret != -ENOENT)) { - return ret; - } - - if ((ret == -ENOENT) && truncated) { - *truncated = false; - } - - return 0; -} - -static int cls_timeindex_trim_repeat(const DoutPrefixProvider *dpp, - rgw_rados_ref ref, - const string& oid, - const utime_t& from_time, - const utime_t& to_time, - const string& from_marker, - const string& to_marker) -{ - bool done = false; - do { - librados::ObjectWriteOperation op; - cls_timeindex_trim(op, from_time, to_time, from_marker, to_marker); - int r = rgw_rados_operate(dpp, ref.pool.ioctx(), oid, &op, null_yield); - if (r == -ENODATA) - done = true; - else if (r < 0) - return r; - } while (!done); - - return 0; -} - -int RGWObjExpStore::objexp_hint_trim(const DoutPrefixProvider *dpp, - const string& oid, - const ceph::real_time& start_time, - const ceph::real_time& end_time, - const string& from_marker, - const string& to_marker) -{ - auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid)); - int r = obj.open(dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; - return r; - } - auto& ref = obj.get_ref(); - int ret = cls_timeindex_trim_repeat(dpp, ref, oid, utime_t(start_time), utime_t(end_time), - from_marker, to_marker); - if ((ret < 0 ) && (ret != -ENOENT)) { - return ret; - } - - return 0; -} - -int RGWObjectExpirer::garbage_single_object(const DoutPrefixProvider *dpp, objexp_hint_entry& hint) -{ - RGWBucketInfo bucket_info; - std::unique_ptr bucket; - - int ret = driver->get_bucket(dpp, nullptr, rgw_bucket(hint.tenant, hint.bucket_name, hint.bucket_id), &bucket, null_yield); - if (-ENOENT == ret) { - ldpp_dout(dpp, 15) << "NOTICE: cannot find bucket = " \ - << hint.bucket_name << ". The object must be already removed" << dendl; - return -ERR_PRECONDITION_FAILED; - } else if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: could not init bucket = " \ - << hint.bucket_name << "due to ret = " << ret << dendl; - return ret; - } - - rgw_obj_key key = hint.obj_key; - if (key.instance.empty()) { - key.instance = "null"; - } - - std::unique_ptr obj = bucket->get_object(key); - obj->set_atomic(); - ret = obj->delete_object(dpp, null_yield); - - return ret; -} - -void RGWObjectExpirer::garbage_chunk(const DoutPrefixProvider *dpp, - list& entries, /* in */ - bool& need_trim) /* out */ -{ - need_trim = false; - - for (list::iterator iter = entries.begin(); - iter != entries.end(); - ++iter) - { - objexp_hint_entry hint; - ldpp_dout(dpp, 15) << "got removal hint for: " << iter->key_ts.sec() \ - << " - " << iter->key_ext << dendl; - - int ret = objexp_hint_parse(dpp, driver->ctx(), *iter, &hint); - if (ret < 0) { - ldpp_dout(dpp, 1) << "cannot parse removal hint for " << hint.obj_key << dendl; - continue; - } - - /* PRECOND_FAILED simply means that our hint is not valid. - * We can silently ignore that and move forward. */ - ret = garbage_single_object(dpp, hint); - if (ret == -ERR_PRECONDITION_FAILED) { - ldpp_dout(dpp, 15) << "not actual hint for object: " << hint.obj_key << dendl; - } else if (ret < 0) { - ldpp_dout(dpp, 1) << "cannot remove expired object: " << hint.obj_key << dendl; - } - - need_trim = true; - } - - return; -} - -void RGWObjectExpirer::trim_chunk(const DoutPrefixProvider *dpp, - const string& shard, - const utime_t& from, - const utime_t& to, - const string& from_marker, - const string& to_marker) -{ - ldpp_dout(dpp, 20) << "trying to trim removal hints to=" << to - << ", to_marker=" << to_marker << dendl; - - real_time rt_from = from.to_real_time(); - real_time rt_to = to.to_real_time(); - - int ret = exp_store.objexp_hint_trim(dpp, shard, rt_from, rt_to, - from_marker, to_marker); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR during trim: " << ret << dendl; - } - - return; -} - -bool RGWObjectExpirer::process_single_shard(const DoutPrefixProvider *dpp, - const string& shard, - const utime_t& last_run, - const utime_t& round_start) -{ - string marker; - string out_marker; - bool truncated = false; - bool done = true; - - CephContext *cct = driver->ctx(); - int num_entries = cct->_conf->rgw_objexp_chunk_size; - - int max_secs = cct->_conf->rgw_objexp_gc_interval; - utime_t end = ceph_clock_now(); - end += max_secs; - - rados::cls::lock::Lock l(objexp_lock_name); - - utime_t time(max_secs, 0); - l.set_duration(time); - - int ret = l.lock_exclusive(&static_cast(driver)->getRados()->objexp_pool_ctx, shard); - if (ret == -EBUSY) { /* already locked by another processor */ - ldpp_dout(dpp, 5) << __func__ << "(): failed to acquire lock on " << shard << dendl; - return false; - } - - do { - real_time rt_last = last_run.to_real_time(); - real_time rt_start = round_start.to_real_time(); - - list entries; - ret = exp_store.objexp_hint_list(dpp, shard, rt_last, rt_start, - num_entries, marker, entries, - &out_marker, &truncated); - if (ret < 0) { - ldpp_dout(dpp, 10) << "cannot get removal hints from shard: " << shard - << dendl; - continue; - } - - bool need_trim; - garbage_chunk(dpp, entries, need_trim); - - if (need_trim) { - trim_chunk(dpp, shard, last_run, round_start, marker, out_marker); - } - - utime_t now = ceph_clock_now(); - if (now >= end) { - done = false; - break; - } - - marker = out_marker; - } while (truncated); - - l.unlock(&static_cast(driver)->getRados()->objexp_pool_ctx, shard); - return done; -} - -/* Returns true if all shards have been processed successfully. */ -bool RGWObjectExpirer::inspect_all_shards(const DoutPrefixProvider *dpp, - const utime_t& last_run, - const utime_t& round_start) -{ - CephContext * const cct = driver->ctx(); - int num_shards = cct->_conf->rgw_objexp_hints_num_shards; - bool all_done = true; - - for (int i = 0; i < num_shards; i++) { - string shard; - objexp_get_shard(i, &shard); - - ldpp_dout(dpp, 20) << "processing shard = " << shard << dendl; - - if (! process_single_shard(dpp, shard, last_run, round_start)) { - all_done = false; - } - } - - return all_done; -} - -bool RGWObjectExpirer::going_down() -{ - return down_flag; -} - -void RGWObjectExpirer::start_processor() -{ - worker = new OEWorker(driver->ctx(), this); - worker->create("rgw_obj_expirer"); -} - -void RGWObjectExpirer::stop_processor() -{ - down_flag = true; - if (worker) { - worker->stop(); - worker->join(); - } - delete worker; - worker = NULL; -} - -void *RGWObjectExpirer::OEWorker::entry() { - utime_t last_run; - do { - utime_t start = ceph_clock_now(); - ldpp_dout(this, 2) << "object expiration: start" << dendl; - if (oe->inspect_all_shards(this, last_run, start)) { - /* All shards have been processed properly. Next time we can start - * from this moment. */ - last_run = start; - } - ldpp_dout(this, 2) << "object expiration: stop" << dendl; - - - if (oe->going_down()) - break; - - utime_t end = ceph_clock_now(); - end -= start; - int secs = cct->_conf->rgw_objexp_gc_interval; - - if (secs <= end.sec()) - continue; // next round - - secs -= end.sec(); - - std::unique_lock l{lock}; - cond.wait_for(l, std::chrono::seconds(secs)); - } while (!oe->going_down()); - - return NULL; -} - -void RGWObjectExpirer::OEWorker::stop() -{ - std::lock_guard l{lock}; - cond.notify_all(); -} - -CephContext *RGWObjectExpirer::OEWorker::get_cct() const -{ - return cct; -} - -unsigned RGWObjectExpirer::OEWorker::get_subsys() const -{ - return dout_subsys; -} - -std::ostream& RGWObjectExpirer::OEWorker::gen_prefix(std::ostream& out) const -{ - return out << "rgw object expirer Worker thread: "; -} diff --git a/src/rgw/store/rados/rgw_object_expirer_core.h b/src/rgw/store/rados/rgw_object_expirer_core.h deleted file mode 100644 index fccd4199e7e..00000000000 --- a/src/rgw/store/rados/rgw_object_expirer_core.h +++ /dev/null @@ -1,148 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_OBJEXP_H -#define CEPH_OBJEXP_H - -#include -#include -#include -#include -#include - -#include "auth/Crypto.h" - -#include "common/armor.h" -#include "common/ceph_json.h" -#include "common/config.h" -#include "common/ceph_argparse.h" -#include "common/Formatter.h" -#include "common/errno.h" - -#include "common/ceph_mutex.h" -#include "common/Cond.h" -#include "common/Thread.h" - -#include "global/global_init.h" - -#include "include/common_fwd.h" -#include "include/utime.h" -#include "include/str_list.h" - -#include "rgw_sal_rados.h" - -class RGWSI_RADOS; -class RGWSI_Zone; -class RGWBucketInfo; -class cls_timeindex_entry; - -class RGWObjExpStore { - CephContext *cct; - RGWSI_RADOS *rados_svc; - rgw::sal::RadosStore* driver; -public: - RGWObjExpStore(CephContext *_cct, RGWSI_RADOS *_rados_svc, rgw::sal::RadosStore* _driver) : cct(_cct), - rados_svc(_rados_svc), - driver(_driver) {} - - int objexp_hint_add(const DoutPrefixProvider *dpp, - const ceph::real_time& delete_at, - const std::string& tenant_name, - const std::string& bucket_name, - const std::string& bucket_id, - const rgw_obj_index_key& obj_key); - - int objexp_hint_list(const DoutPrefixProvider *dpp, - const std::string& oid, - const ceph::real_time& start_time, - const ceph::real_time& end_time, - const int max_entries, - const std::string& marker, - std::list& entries, /* out */ - std::string *out_marker, /* out */ - bool *truncated); /* out */ - - int objexp_hint_trim(const DoutPrefixProvider *dpp, - const std::string& oid, - const ceph::real_time& start_time, - const ceph::real_time& end_time, - const std::string& from_marker, - const std::string& to_marker); -}; - -class RGWObjectExpirer { -protected: - rgw::sal::Driver* driver; - RGWObjExpStore exp_store; - - class OEWorker : public Thread, public DoutPrefixProvider { - CephContext *cct; - RGWObjectExpirer *oe; - ceph::mutex lock = ceph::make_mutex("OEWorker"); - ceph::condition_variable cond; - - public: - OEWorker(CephContext * const cct, - RGWObjectExpirer * const oe) - : cct(cct), - oe(oe) { - } - - void *entry() override; - void stop(); - - CephContext *get_cct() const override; - unsigned get_subsys() const override; - std::ostream& gen_prefix(std::ostream& out) const override; - }; - - OEWorker *worker{nullptr}; - std::atomic down_flag = { false }; - -public: - explicit RGWObjectExpirer(rgw::sal::Driver* _driver) - : driver(_driver), - exp_store(_driver->ctx(), static_cast(driver)->svc()->rados, static_cast(driver)), - worker(NULL) { - } - ~RGWObjectExpirer() { - stop_processor(); - } - - int hint_add(const DoutPrefixProvider *dpp, - const ceph::real_time& delete_at, - const std::string& tenant_name, - const std::string& bucket_name, - const std::string& bucket_id, - const rgw_obj_index_key& obj_key) { - return exp_store.objexp_hint_add(dpp, delete_at, tenant_name, bucket_name, - bucket_id, obj_key); - } - - int garbage_single_object(const DoutPrefixProvider *dpp, objexp_hint_entry& hint); - - void garbage_chunk(const DoutPrefixProvider *dpp, - std::list& entries, /* in */ - bool& need_trim); /* out */ - - void trim_chunk(const DoutPrefixProvider *dpp, - const std::string& shard, - const utime_t& from, - const utime_t& to, - const std::string& from_marker, - const std::string& to_marker); - - bool process_single_shard(const DoutPrefixProvider *dpp, - const std::string& shard, - const utime_t& last_run, - const utime_t& round_start); - - bool inspect_all_shards(const DoutPrefixProvider *dpp, - const utime_t& last_run, - const utime_t& round_start); - - bool going_down(); - void start_processor(); - void stop_processor(); -}; -#endif /* CEPH_OBJEXP_H */ diff --git a/src/rgw/store/rados/rgw_otp.cc b/src/rgw/store/rados/rgw_otp.cc deleted file mode 100644 index 07cc14f113b..00000000000 --- a/src/rgw/store/rados/rgw_otp.cc +++ /dev/null @@ -1,211 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include - -#include -#include -#include - -#include "common/errno.h" -#include "common/Formatter.h" -#include "common/ceph_json.h" -#include "rgw_otp.h" -#include "rgw_zone.h" -#include "rgw_metadata.h" - -#include "include/types.h" - -#include "rgw_common.h" -#include "rgw_tools.h" - -#include "services/svc_zone.h" -#include "services/svc_meta.h" -#include "services/svc_meta_be.h" -#include "services/svc_meta_be_otp.h" -#include "services/svc_otp.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - - -class RGWOTPMetadataHandler; - -class RGWOTPMetadataObject : public RGWMetadataObject { - friend class RGWOTPMetadataHandler; - - otp_devices_list_t devices; -public: - RGWOTPMetadataObject() {} - RGWOTPMetadataObject(otp_devices_list_t&& _devices, const obj_version& v, const real_time m) { - devices = std::move(_devices); - objv = v; - mtime = m; - } - - void dump(Formatter *f) const override { - encode_json("devices", devices, f); - } - - otp_devices_list_t& get_devs() { - return devices; - } -}; - - -class RGWOTPMetadataHandler : public RGWOTPMetadataHandlerBase { - friend class RGWOTPCtl; - - struct Svc { - RGWSI_Zone *zone; - RGWSI_MetaBackend *meta_be; - RGWSI_OTP *otp; - } svc; - - int init(RGWSI_Zone *zone, - RGWSI_MetaBackend *_meta_be, - RGWSI_OTP *_otp) { - base_init(zone->ctx(), _otp->get_be_handler().get()); - svc.zone = zone; - svc.meta_be = _meta_be; - svc.otp = _otp; - return 0; - } - - int call(std::function f) { - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - RGWSI_OTP_BE_Ctx ctx(op->ctx()); - return f(ctx); - }); - } - - RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { - otp_devices_list_t devices; - try { - JSONDecoder::decode_json("devices", devices, jo); - } catch (JSONDecoder::err& e) { - return nullptr; - } - - return new RGWOTPMetadataObject(std::move(devices), objv, mtime); - } - - int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { - RGWObjVersionTracker objv_tracker; - - std::unique_ptr mdo(new RGWOTPMetadataObject); - - - RGWSI_OTP_BE_Ctx be_ctx(op->ctx()); - - int ret = svc.otp->read_all(be_ctx, - entry, - &mdo->get_devs(), - &mdo->get_mtime(), - &objv_tracker, - y, - dpp); - if (ret < 0) { - return ret; - } - - mdo->objv = objv_tracker.read_version; - - *obj = mdo.release(); - - return 0; - } - - int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *_obj, RGWObjVersionTracker& objv_tracker, - optional_yield y, - const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) override { - RGWOTPMetadataObject *obj = static_cast(_obj); - - RGWSI_OTP_BE_Ctx be_ctx(op->ctx()); - - int ret = svc.otp->store_all(dpp, be_ctx, - entry, - obj->devices, - obj->mtime, - &objv_tracker, - y); - if (ret < 0) { - return ret; - } - - return STATUS_APPLIED; - } - - int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp) override { - RGWSI_MBOTP_RemoveParams params; - - RGWSI_OTP_BE_Ctx be_ctx(op->ctx()); - - return svc.otp->remove_all(dpp, be_ctx, - entry, - &objv_tracker, - y); - } - -public: - RGWOTPMetadataHandler() {} - - string get_type() override { return "otp"; } -}; - - -RGWOTPCtl::RGWOTPCtl(RGWSI_Zone *zone_svc, - RGWSI_OTP *otp_svc) -{ - svc.zone = zone_svc; - svc.otp = otp_svc; -} - - -void RGWOTPCtl::init(RGWOTPMetadataHandler *_meta_handler) -{ - meta_handler = _meta_handler; - be_handler = meta_handler->get_be_handler(); -} - -int RGWOTPCtl::read_all(const rgw_user& uid, - RGWOTPInfo *info, - optional_yield y, - const DoutPrefixProvider *dpp, - const GetParams& params) -{ - info->uid = uid; - return meta_handler->call([&](RGWSI_OTP_BE_Ctx& ctx) { - return svc.otp->read_all(ctx, uid, &info->devices, params.mtime, params.objv_tracker, y, dpp); - }); -} - -int RGWOTPCtl::store_all(const DoutPrefixProvider *dpp, - const RGWOTPInfo& info, - optional_yield y, - const PutParams& params) -{ - return meta_handler->call([&](RGWSI_OTP_BE_Ctx& ctx) { - return svc.otp->store_all(dpp, ctx, info.uid, info.devices, params.mtime, params.objv_tracker, y); - }); -} - -int RGWOTPCtl::remove_all(const DoutPrefixProvider *dpp, - const rgw_user& uid, - optional_yield y, - const RemoveParams& params) -{ - return meta_handler->call([&](RGWSI_OTP_BE_Ctx& ctx) { - return svc.otp->remove_all(dpp, ctx, uid, params.objv_tracker, y); - }); -} - - -RGWMetadataHandler *RGWOTPMetaHandlerAllocator::alloc() -{ - return new RGWOTPMetadataHandler(); -} diff --git a/src/rgw/store/rados/rgw_otp.h b/src/rgw/store/rados/rgw_otp.h deleted file mode 100644 index eacff15314c..00000000000 --- a/src/rgw/store/rados/rgw_otp.h +++ /dev/null @@ -1,114 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_OTP_H -#define CEPH_RGW_OTP_H - -#include "rgw_sal_fwd.h" -#include "cls/otp/cls_otp_types.h" -#include "services/svc_meta_be_otp.h" - -#include "rgw_basic_types.h" -#include "rgw_metadata.h" - - -class RGWObjVersionTracker; -class RGWMetadataHandler; -class RGWOTPMetadataHandler; -class RGWSI_Zone; -class RGWSI_OTP; -class RGWSI_MetaBackend; - -class RGWOTPMetadataHandlerBase : public RGWMetadataHandler_GenericMetaBE { -public: - virtual ~RGWOTPMetadataHandlerBase() {} - virtual int init(RGWSI_Zone *zone, - RGWSI_MetaBackend *_meta_be, - RGWSI_OTP *_otp) = 0; -}; - -class RGWOTPMetaHandlerAllocator { -public: - static RGWMetadataHandler *alloc(); -}; - -struct RGWOTPInfo { - rgw_user uid; - otp_devices_list_t devices; -}; - - -class RGWOTPCtl -{ - struct Svc { - RGWSI_Zone *zone{nullptr}; - RGWSI_OTP *otp{nullptr}; - } svc; - - RGWOTPMetadataHandler *meta_handler; - RGWSI_MetaBackend_Handler *be_handler; - -public: - RGWOTPCtl(RGWSI_Zone *zone_svc, - RGWSI_OTP *otp_svc); - - void init(RGWOTPMetadataHandler *_meta_handler); - - struct GetParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - ceph::real_time *mtime{nullptr}; - - GetParams() {} - - GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - GetParams& set_mtime(ceph::real_time *_mtime) { - mtime = _mtime; - return *this; - } - }; - - struct PutParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - ceph::real_time mtime; - - PutParams() {} - - PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - PutParams& set_mtime(const ceph::real_time& _mtime) { - mtime = _mtime; - return *this; - } - }; - - struct RemoveParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - - RemoveParams() {} - - RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - }; - - int read_all(const rgw_user& uid, RGWOTPInfo *info, optional_yield y, - const DoutPrefixProvider *dpp, - const GetParams& params = {}); - int store_all(const DoutPrefixProvider *dpp, - const RGWOTPInfo& info, optional_yield y, - const PutParams& params = {}); - int remove_all(const DoutPrefixProvider *dpp, - const rgw_user& user, optional_yield y, - const RemoveParams& params = {}); -}; - -#endif - diff --git a/src/rgw/store/rados/rgw_period.cc b/src/rgw/store/rados/rgw_period.cc deleted file mode 100644 index 61602b354e2..00000000000 --- a/src/rgw/store/rados/rgw_period.cc +++ /dev/null @@ -1,324 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_sync.h" - -#include "services/svc_zone.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; -using namespace rgw_zone_defaults; - -int RGWPeriod::get_zonegroup(RGWZoneGroup& zonegroup, - const string& zonegroup_id) const -{ - map::const_iterator iter; - if (!zonegroup_id.empty()) { - iter = period_map.zonegroups.find(zonegroup_id); - } else { - iter = period_map.zonegroups.find("default"); - } - if (iter != period_map.zonegroups.end()) { - zonegroup = iter->second; - return 0; - } - - return -ENOENT; -} - -int RGWPeriod::get_latest_epoch(const DoutPrefixProvider *dpp, epoch_t& latest_epoch, optional_yield y) -{ - RGWPeriodLatestEpochInfo info; - - int ret = read_latest_epoch(dpp, info, y); - if (ret < 0) { - return ret; - } - - latest_epoch = info.epoch; - - return 0; -} - -int RGWPeriod::delete_obj(const DoutPrefixProvider *dpp, optional_yield y) -{ - rgw_pool pool(get_pool(cct)); - - // delete the object for each period epoch - for (epoch_t e = 1; e <= epoch; e++) { - RGWPeriod p{get_id(), e}; - rgw_raw_obj oid{pool, p.get_period_oid()}; - auto sysobj = sysobj_svc->get_obj(oid); - int ret = sysobj.wop().remove(dpp, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to delete period object " << oid - << ": " << cpp_strerror(-ret) << dendl; - } - } - - // delete the .latest_epoch object - rgw_raw_obj oid{pool, get_period_oid_prefix() + get_latest_epoch_oid()}; - auto sysobj = sysobj_svc->get_obj(oid); - int ret = sysobj.wop().remove(dpp, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to delete period object " << oid - << ": " << cpp_strerror(-ret) << dendl; - } - return ret; -} - -int RGWPeriod::add_zonegroup(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, optional_yield y) -{ - if (zonegroup.realm_id != realm_id) { - return 0; - } - int ret = period_map.update(zonegroup, cct); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl; - return ret; - } - - return store_info(dpp, false, y); -} - -int RGWPeriod::update(const DoutPrefixProvider *dpp, optional_yield y) -{ - auto zone_svc = sysobj_svc->get_zone_svc(); - ldpp_dout(dpp, 20) << __func__ << " realm " << realm_id << " period " << get_id() << dendl; - list zonegroups; - int ret = zone_svc->list_zonegroups(dpp, zonegroups); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to list zonegroups: " << cpp_strerror(-ret) << dendl; - return ret; - } - - // clear zone short ids of removed zones. period_map.update() will add the - // remaining zones back - period_map.short_zone_ids.clear(); - - for (auto& iter : zonegroups) { - RGWZoneGroup zg(string(), iter); - ret = zg.init(dpp, cct, sysobj_svc, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: zg.init() failed: " << cpp_strerror(-ret) << dendl; - continue; - } - - if (zg.realm_id != realm_id) { - ldpp_dout(dpp, 20) << "skipping zonegroup " << zg.get_name() << " zone realm id " << zg.realm_id << ", not on our realm " << realm_id << dendl; - continue; - } - - if (zg.master_zone.empty()) { - ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl; - return -EINVAL; - } - - if (zg.zones.find(zg.master_zone) == zg.zones.end()) { - ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() - << " has a non existent master zone "<< dendl; - return -EINVAL; - } - - if (zg.is_master_zonegroup()) { - master_zonegroup = zg.get_id(); - master_zone = zg.master_zone; - } - - int ret = period_map.update(zg, cct); - if (ret < 0) { - return ret; - } - } - - ret = period_config.read(dpp, sysobj_svc, realm_id, y); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: failed to read period config: " - << cpp_strerror(ret) << dendl; - return ret; - } - return 0; -} - -void RGWPeriod::fork() -{ - ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl; - predecessor_uuid = id; - id = get_staging_id(realm_id); - period_map.reset(); - realm_epoch++; -} - -static int read_sync_status(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw_meta_sync_status *sync_status) -{ - rgw::sal::RadosStore* rados_store = static_cast(driver); - // initialize a sync status manager to read the status - RGWMetaSyncStatusManager mgr(rados_store, rados_store->svc()->rados->get_async_processor()); - int r = mgr.init(dpp); - if (r < 0) { - return r; - } - r = mgr.read_sync_status(dpp, sync_status); - mgr.stop(); - return r; -} - -int RGWPeriod::update_sync_status(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, /* for now */ - const RGWPeriod ¤t_period, - std::ostream& error_stream, - bool force_if_stale) -{ - rgw_meta_sync_status status; - int r = read_sync_status(dpp, driver, &status); - if (r < 0) { - ldpp_dout(dpp, 0) << "period failed to read sync status: " - << cpp_strerror(-r) << dendl; - return r; - } - - std::vector markers; - - const auto current_epoch = current_period.get_realm_epoch(); - if (current_epoch != status.sync_info.realm_epoch) { - // no sync status markers for the current period - ceph_assert(current_epoch > status.sync_info.realm_epoch); - const int behind = current_epoch - status.sync_info.realm_epoch; - if (!force_if_stale && current_epoch > 1) { - error_stream << "ERROR: This zone is " << behind << " period(s) behind " - "the current master zone in metadata sync. If this zone is promoted " - "to master, any metadata changes during that time are likely to " - "be lost.\n" - "Waiting for this zone to catch up on metadata sync (see " - "'radosgw-admin sync status') is recommended.\n" - "To promote this zone to master anyway, add the flag " - "--yes-i-really-mean-it." << std::endl; - return -EINVAL; - } - // empty sync status markers - other zones will skip this period during - // incremental metadata sync - markers.resize(status.sync_info.num_shards); - } else { - markers.reserve(status.sync_info.num_shards); - for (auto& i : status.sync_markers) { - auto& marker = i.second; - // filter out markers from other periods - if (marker.realm_epoch != current_epoch) { - marker.marker.clear(); - } - markers.emplace_back(std::move(marker.marker)); - } - } - - std::swap(sync_status, markers); - return 0; -} - -int RGWPeriod::commit(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWRealm& realm, const RGWPeriod& current_period, - std::ostream& error_stream, optional_yield y, - bool force_if_stale) -{ - auto zone_svc = sysobj_svc->get_zone_svc(); - ldpp_dout(dpp, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl; - // gateway must be in the master zone to commit - if (master_zone != zone_svc->get_zone_params().get_id()) { - error_stream << "Cannot commit period on zone " - << zone_svc->get_zone_params().get_id() << ", it must be sent to " - "the period's master zone " << master_zone << '.' << std::endl; - return -EINVAL; - } - // period predecessor must match current period - if (predecessor_uuid != current_period.get_id()) { - error_stream << "Period predecessor " << predecessor_uuid - << " does not match current period " << current_period.get_id() - << ". Use 'period pull' to get the latest period from the master, " - "reapply your changes, and try again." << std::endl; - return -EINVAL; - } - // realm epoch must be 1 greater than current period - if (realm_epoch != current_period.get_realm_epoch() + 1) { - error_stream << "Period's realm epoch " << realm_epoch - << " does not come directly after current realm epoch " - << current_period.get_realm_epoch() << ". Use 'realm pull' to get the " - "latest realm and period from the master zone, reapply your changes, " - "and try again." << std::endl; - return -EINVAL; - } - // did the master zone change? - if (master_zone != current_period.get_master_zone()) { - // store the current metadata sync status in the period - int r = update_sync_status(dpp, driver, current_period, error_stream, force_if_stale); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to update metadata sync status: " - << cpp_strerror(-r) << dendl; - return r; - } - // create an object with a new period id - r = create(dpp, y, true); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl; - return r; - } - // set as current period - r = realm.set_current_period(dpp, *this, y); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to update realm's current period: " - << cpp_strerror(-r) << dendl; - return r; - } - ldpp_dout(dpp, 4) << "Promoted to master zone and committed new period " - << id << dendl; - realm.notify_new_period(dpp, *this, y); - return 0; - } - // period must be based on current epoch - if (epoch != current_period.get_epoch()) { - error_stream << "Period epoch " << epoch << " does not match " - "predecessor epoch " << current_period.get_epoch() - << ". Use 'period pull' to get the latest epoch from the master zone, " - "reapply your changes, and try again." << std::endl; - return -EINVAL; - } - // set period as next epoch - set_id(current_period.get_id()); - set_epoch(current_period.get_epoch() + 1); - set_predecessor(current_period.get_predecessor()); - realm_epoch = current_period.get_realm_epoch(); - // write the period to rados - int r = store_info(dpp, false, y); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to store period: " << cpp_strerror(-r) << dendl; - return r; - } - // set as latest epoch - r = update_latest_epoch(dpp, epoch, y); - if (r == -EEXIST) { - // already have this epoch (or a more recent one) - return 0; - } - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to set latest epoch: " << cpp_strerror(-r) << dendl; - return r; - } - r = reflect(dpp, y); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to update local objects: " << cpp_strerror(-r) << dendl; - return r; - } - ldpp_dout(dpp, 4) << "Committed new epoch " << epoch - << " for period " << id << dendl; - realm.notify_new_period(dpp, *this, y); - return 0; -} - -void RGWPeriod::generate_test_instances(list &o) -{ - RGWPeriod *z = new RGWPeriod; - o.push_back(z); - o.push_back(new RGWPeriod); -} - - diff --git a/src/rgw/store/rados/rgw_rest_pubsub.cc b/src/rgw/store/rados/rgw_rest_pubsub.cc deleted file mode 100644 index 23d56615ac9..00000000000 --- a/src/rgw/store/rados/rgw_rest_pubsub.cc +++ /dev/null @@ -1,1069 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include -#include -#include -#include "rgw_rest_pubsub.h" -#include "rgw_pubsub_push.h" -#include "rgw_pubsub.h" -#include "rgw_op.h" -#include "rgw_rest.h" -#include "rgw_rest_s3.h" -#include "rgw_arn.h" -#include "rgw_auth_s3.h" -#include "rgw_notify.h" -#include "rgw_sal_rados.h" -#include "services/svc_zone.h" -#include "common/dout.h" -#include "rgw_url.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -static const char* AWS_SNS_NS("https://sns.amazonaws.com/doc/2010-03-31/"); - -bool verify_transport_security(CephContext *cct, const RGWEnv& env) { - const auto is_secure = rgw_transport_is_secure(cct, env); - if (!is_secure && g_conf().get_val("rgw_allow_notification_secrets_in_cleartext")) { - ldout(cct, 0) << "WARNING: bypassing endpoint validation, allows sending secrets over insecure transport" << dendl; - return true; - } - return is_secure; -} - -// make sure that endpoint is a valid URL -// make sure that if user/password are passed inside URL, it is over secure connection -// update rgw_pubsub_sub_dest to indicate that a password is stored in the URL -bool validate_and_update_endpoint_secret(rgw_pubsub_sub_dest& dest, CephContext *cct, const RGWEnv& env) { - if (dest.push_endpoint.empty()) { - return true; - } - std::string user; - std::string password; - if (!rgw::parse_url_userinfo(dest.push_endpoint, user, password)) { - ldout(cct, 1) << "endpoint validation error: malformed endpoint URL:" << dest.push_endpoint << dendl; - return false; - } - // this should be verified inside parse_url() - ceph_assert(user.empty() == password.empty()); - if (!user.empty()) { - dest.stored_secret = true; - if (!verify_transport_security(cct, env)) { - ldout(cct, 1) << "endpoint validation error: sending secrets over insecure transport" << dendl; - return false; - } - } - return true; -} - -bool topic_has_endpoint_secret(const rgw_pubsub_topic_subs& topic) { - return topic.topic.dest.stored_secret; -} - -bool topics_has_endpoint_secret(const rgw_pubsub_topics& topics) { - for (const auto& topic : topics.topics) { - if (topic_has_endpoint_secret(topic.second)) return true; - } - return false; -} - -// command (AWS compliant): -// POST -// Action=CreateTopic&Name=[&OpaqueData=data][&push-endpoint=[&persistent][&=]] -class RGWPSCreateTopicOp : public RGWOp { - private: - std::optional ps; - std::string topic_name; - rgw_pubsub_sub_dest dest; - std::string topic_arn; - std::string opaque_data; - - int get_params() { - topic_name = s->info.args.get("Name"); - if (topic_name.empty()) { - ldpp_dout(this, 1) << "CreateTopic Action 'Name' argument is missing" << dendl; - return -EINVAL; - } - - opaque_data = s->info.args.get("OpaqueData"); - - dest.push_endpoint = s->info.args.get("push-endpoint"); - s->info.args.get_bool("persistent", &dest.persistent, false); - - if (!validate_and_update_endpoint_secret(dest, s->cct, *(s->info.env))) { - return -EINVAL; - } - for (const auto& param : s->info.args.get_params()) { - if (param.first == "Action" || param.first == "Name" || param.first == "PayloadHash") { - continue; - } - dest.push_endpoint_args.append(param.first+"="+param.second+"&"); - } - - if (!dest.push_endpoint_args.empty()) { - // remove last separator - dest.push_endpoint_args.pop_back(); - } - if (!dest.push_endpoint.empty() && dest.persistent) { - const auto ret = rgw::notify::add_persistent_topic(topic_name, s->yield); - if (ret < 0) { - ldpp_dout(this, 1) << "CreateTopic Action failed to create queue for persistent topics. error:" << ret << dendl; - return ret; - } - } - - // dest object only stores endpoint info - dest.arn_topic = topic_name; - // the topic ARN will be sent in the reply - const rgw::ARN arn(rgw::Partition::aws, rgw::Service::sns, - driver->get_zone()->get_zonegroup().get_name(), - s->user->get_tenant(), topic_name); - topic_arn = arn.to_string(); - return 0; - } - - public: - int verify_permission(optional_yield) override { - return 0; - } - - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - void execute(optional_yield) override; - - const char* name() const override { return "pubsub_topic_create"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_CREATE; } - uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } - - void send_response() override { - if (op_ret) { - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - - const auto f = s->formatter; - f->open_object_section_in_ns("CreateTopicResponse", AWS_SNS_NS); - f->open_object_section("CreateTopicResult"); - encode_xml("TopicArn", topic_arn, f); - f->close_section(); // CreateTopicResult - f->open_object_section("ResponseMetadata"); - encode_xml("RequestId", s->req_id, f); - f->close_section(); // ResponseMetadata - f->close_section(); // CreateTopicResponse - rgw_flush_formatter_and_reset(s, f); - } -}; - -void RGWPSCreateTopicOp::execute(optional_yield y) { - op_ret = get_params(); - if (op_ret < 0) { - return; - } - - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - op_ret = ps->create_topic(this, topic_name, dest, topic_arn, opaque_data, y); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to create topic '" << topic_name << "', ret=" << op_ret << dendl; - return; - } - ldpp_dout(this, 20) << "successfully created topic '" << topic_name << "'" << dendl; -} - -// command (AWS compliant): -// POST -// Action=ListTopics -class RGWPSListTopicsOp : public RGWOp { -private: - std::optional ps; - rgw_pubsub_topics result; - -public: - int verify_permission(optional_yield) override { - return 0; - } - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - void execute(optional_yield) override; - - const char* name() const override { return "pubsub_topics_list"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPICS_LIST; } - uint32_t op_mask() override { return RGW_OP_TYPE_READ; } - - void send_response() override { - if (op_ret) { - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - - const auto f = s->formatter; - f->open_object_section_in_ns("ListTopicsResponse", AWS_SNS_NS); - f->open_object_section("ListTopicsResult"); - encode_xml("Topics", result, f); - f->close_section(); // ListTopicsResult - f->open_object_section("ResponseMetadata"); - encode_xml("RequestId", s->req_id, f); - f->close_section(); // ResponseMetadat - f->close_section(); // ListTopicsResponse - rgw_flush_formatter_and_reset(s, f); - } -}; - -void RGWPSListTopicsOp::execute(optional_yield y) { - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - op_ret = ps->get_topics(&result); - // if there are no topics it is not considered an error - op_ret = op_ret == -ENOENT ? 0 : op_ret; - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get topics, ret=" << op_ret << dendl; - return; - } - if (topics_has_endpoint_secret(result) && !verify_transport_security(s->cct, *(s->info.env))) { - ldpp_dout(this, 1) << "topics contain secrets and cannot be sent over insecure transport" << dendl; - op_ret = -EPERM; - return; - } - ldpp_dout(this, 20) << "successfully got topics" << dendl; -} - -// command (extension to AWS): -// POST -// Action=GetTopic&TopicArn= -class RGWPSGetTopicOp : public RGWOp { - private: - std::string topic_name; - std::optional ps; - rgw_pubsub_topic_subs result; - - int get_params() { - const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); - - if (!topic_arn || topic_arn->resource.empty()) { - ldpp_dout(this, 1) << "GetTopic Action 'TopicArn' argument is missing or invalid" << dendl; - return -EINVAL; - } - - topic_name = topic_arn->resource; - return 0; - } - - public: - int verify_permission(optional_yield y) override { - return 0; - } - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - void execute(optional_yield y) override; - - const char* name() const override { return "pubsub_topic_get"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_GET; } - uint32_t op_mask() override { return RGW_OP_TYPE_READ; } - - void send_response() override { - if (op_ret) { - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - - const auto f = s->formatter; - f->open_object_section("GetTopicResponse"); - f->open_object_section("GetTopicResult"); - encode_xml("Topic", result.topic, f); - f->close_section(); - f->open_object_section("ResponseMetadata"); - encode_xml("RequestId", s->req_id, f); - f->close_section(); - f->close_section(); - rgw_flush_formatter_and_reset(s, f); - } -}; - -void RGWPSGetTopicOp::execute(optional_yield y) { - op_ret = get_params(); - if (op_ret < 0) { - return; - } - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - op_ret = ps->get_topic(topic_name, &result); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; - return; - } - if (topic_has_endpoint_secret(result) && !verify_transport_security(s->cct, *(s->info.env))) { - ldpp_dout(this, 1) << "topic '" << topic_name << "' contain secret and cannot be sent over insecure transport" << dendl; - op_ret = -EPERM; - return; - } - ldpp_dout(this, 1) << "successfully got topic '" << topic_name << "'" << dendl; -} - -// command (AWS compliant): -// POST -// Action=GetTopicAttributes&TopicArn= -class RGWPSGetTopicAttributesOp : public RGWOp { - private: - std::string topic_name; - std::optional ps; - rgw_pubsub_topic_subs result; - - int get_params() { - const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); - - if (!topic_arn || topic_arn->resource.empty()) { - ldpp_dout(this, 1) << "GetTopicAttribute Action 'TopicArn' argument is missing or invalid" << dendl; - return -EINVAL; - } - - topic_name = topic_arn->resource; - return 0; - } - - public: - int verify_permission(optional_yield y) override { - return 0; - } - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - void execute(optional_yield y) override; - - const char* name() const override { return "pubsub_topic_get"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_GET; } - uint32_t op_mask() override { return RGW_OP_TYPE_READ; } - - void send_response() override { - if (op_ret) { - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - - const auto f = s->formatter; - f->open_object_section_in_ns("GetTopicAttributesResponse", AWS_SNS_NS); - f->open_object_section("GetTopicAttributesResult"); - result.topic.dump_xml_as_attributes(f); - f->close_section(); // GetTopicAttributesResult - f->open_object_section("ResponseMetadata"); - encode_xml("RequestId", s->req_id, f); - f->close_section(); // ResponseMetadata - f->close_section(); // GetTopicAttributesResponse - rgw_flush_formatter_and_reset(s, f); - } -}; - -void RGWPSGetTopicAttributesOp::execute(optional_yield y) { - op_ret = get_params(); - if (op_ret < 0) { - return; - } - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - op_ret = ps->get_topic(topic_name, &result); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; - return; - } - if (topic_has_endpoint_secret(result) && !verify_transport_security(s->cct, *(s->info.env))) { - ldpp_dout(this, 1) << "topic '" << topic_name << "' contain secret and cannot be sent over insecure transport" << dendl; - op_ret = -EPERM; - return; - } - ldpp_dout(this, 1) << "successfully got topic '" << topic_name << "'" << dendl; -} - -// command (AWS compliant): -// POST -// Action=DeleteTopic&TopicArn= -class RGWPSDeleteTopicOp : public RGWOp { - private: - std::string topic_name; - std::optional ps; - - int get_params() { - const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); - - if (!topic_arn || topic_arn->resource.empty()) { - ldpp_dout(this, 1) << "DeleteTopic Action 'TopicArn' argument is missing or invalid" << dendl; - return -EINVAL; - } - - topic_name = topic_arn->resource; - - // upon deletion it is not known if topic is persistent or not - // will try to delete the persistent topic anyway - const auto ret = rgw::notify::remove_persistent_topic(topic_name, s->yield); - if (ret == -ENOENT) { - // topic was not persistent, or already deleted - return 0; - } - if (ret < 0) { - ldpp_dout(this, 1) << "DeleteTopic Action failed to remove queue for persistent topics. error:" << ret << dendl; - return ret; - } - - return 0; - } - - public: - int verify_permission(optional_yield) override { - return 0; - } - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - void execute(optional_yield y) override; - - const char* name() const override { return "pubsub_topic_delete"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_DELETE; } - uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; } - - void send_response() override { - if (op_ret) { - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - - const auto f = s->formatter; - f->open_object_section_in_ns("DeleteTopicResponse", AWS_SNS_NS); - f->open_object_section("ResponseMetadata"); - encode_xml("RequestId", s->req_id, f); - f->close_section(); // ResponseMetadata - f->close_section(); // DeleteTopicResponse - rgw_flush_formatter_and_reset(s, f); - } -}; - -void RGWPSDeleteTopicOp::execute(optional_yield y) { - op_ret = get_params(); - if (op_ret < 0) { - return; - } - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - op_ret = ps->remove_topic(this, topic_name, y); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to remove topic '" << topic_name << ", ret=" << op_ret << dendl; - return; - } - ldpp_dout(this, 1) << "successfully removed topic '" << topic_name << "'" << dendl; -} - -namespace { -// utility classes and functions for handling parameters with the following format: -// Attributes.entry.{N}.{key|value}={VALUE} -// N - any unsigned number -// VALUE - url encoded string - -// and Attribute is holding key and value -// ctor and set are done according to the "type" argument -// if type is not "key" or "value" its a no-op -class Attribute { - std::string key; - std::string value; -public: - Attribute(const std::string& type, const std::string& key_or_value) { - set(type, key_or_value); - } - void set(const std::string& type, const std::string& key_or_value) { - if (type == "key") { - key = key_or_value; - } else if (type == "value") { - value = key_or_value; - } - } - const std::string& get_key() const { return key; } - const std::string& get_value() const { return value; } -}; - -using AttributeMap = std::map; - -// aggregate the attributes into a map -// the key and value are associated by the index (N) -// no assumptions are made on the order in which these parameters are added -void update_attribute_map(const std::string& input, AttributeMap& map) { - const boost::char_separator sep("."); - const boost::tokenizer tokens(input, sep); - auto token = tokens.begin(); - if (*token != "Attributes") { - return; - } - ++token; - - if (*token != "entry") { - return; - } - ++token; - - unsigned idx; - try { - idx = std::stoul(*token); - } catch (const std::invalid_argument&) { - return; - } - ++token; - - std::string key_or_value = ""; - // get the rest of the string regardless of dots - // this is to allow dots in the value - while (token != tokens.end()) { - key_or_value.append(*token+"."); - ++token; - } - // remove last separator - key_or_value.pop_back(); - - auto pos = key_or_value.find("="); - if (pos != std::string::npos) { - const auto key_or_value_lhs = key_or_value.substr(0, pos); - const auto key_or_value_rhs = url_decode(key_or_value.substr(pos + 1, key_or_value.size() - 1)); - const auto map_it = map.find(idx); - if (map_it == map.end()) { - // new entry - map.emplace(std::make_pair(idx, Attribute(key_or_value_lhs, key_or_value_rhs))); - } else { - // existing entry - map_it->second.set(key_or_value_lhs, key_or_value_rhs); - } - } -} -} - -void RGWHandler_REST_PSTopic_AWS::rgw_topic_parse_input() { - if (post_body.size() > 0) { - ldpp_dout(s, 10) << "Content of POST: " << post_body << dendl; - - if (post_body.find("Action") != std::string::npos) { - const boost::char_separator sep("&"); - const boost::tokenizer> tokens(post_body, sep); - AttributeMap map; - for (const auto& t : tokens) { - auto pos = t.find("="); - if (pos != std::string::npos) { - const auto key = t.substr(0, pos); - if (key == "Action") { - s->info.args.append(key, t.substr(pos + 1, t.size() - 1)); - } else if (key == "Name" || key == "TopicArn") { - const auto value = url_decode(t.substr(pos + 1, t.size() - 1)); - s->info.args.append(key, value); - } else { - update_attribute_map(t, map); - } - } - } - // update the regular args with the content of the attribute map - for (const auto& attr : map) { - s->info.args.append(attr.second.get_key(), attr.second.get_value()); - } - } - const auto payload_hash = rgw::auth::s3::calc_v4_payload_hash(post_body); - s->info.args.append("PayloadHash", payload_hash); - } -} - -RGWOp* RGWHandler_REST_PSTopic_AWS::op_post() { - rgw_topic_parse_input(); - - if (s->info.args.exists("Action")) { - const auto action = s->info.args.get("Action"); - if (action.compare("CreateTopic") == 0) - return new RGWPSCreateTopicOp(); - if (action.compare("DeleteTopic") == 0) - return new RGWPSDeleteTopicOp; - if (action.compare("ListTopics") == 0) - return new RGWPSListTopicsOp(); - if (action.compare("GetTopic") == 0) - return new RGWPSGetTopicOp(); - if (action.compare("GetTopicAttributes") == 0) - return new RGWPSGetTopicAttributesOp(); - } - - return nullptr; -} - -int RGWHandler_REST_PSTopic_AWS::authorize(const DoutPrefixProvider* dpp, optional_yield y) { - return RGW_Auth_S3::authorize(dpp, driver, auth_registry, s, y); -} - -namespace { -// return a unique topic by prefexing with the notification name: _ -std::string topic_to_unique(const std::string& topic, const std::string& notification) { - return notification + "_" + topic; -} - -// extract the topic from a unique topic of the form: _ -[[maybe_unused]] std::string unique_to_topic(const std::string& unique_topic, const std::string& notification) { - if (unique_topic.find(notification + "_") == std::string::npos) { - return ""; - } - return unique_topic.substr(notification.length() + 1); -} - -// from list of bucket topics, find the one that was auto-generated by a notification -auto find_unique_topic(const rgw_pubsub_bucket_topics& bucket_topics, const std::string& notif_name) { - auto it = std::find_if(bucket_topics.topics.begin(), bucket_topics.topics.end(), [&](const auto& val) { return notif_name == val.second.s3_id; }); - return it != bucket_topics.topics.end() ? - std::optional>(it->second): - std::nullopt; -} -} - -int remove_notification_by_topic(const DoutPrefixProvider *dpp, const std::string& topic_name, const RGWPubSub::BucketRef& b, optional_yield y, RGWPubSub& ps) { - int op_ret = b->remove_notification(dpp, topic_name, y); - if (op_ret < 0) { - ldpp_dout(dpp, 1) << "failed to remove notification of topic '" << topic_name << "', ret=" << op_ret << dendl; - } - op_ret = ps.remove_topic(dpp, topic_name, y); - if (op_ret < 0) { - ldpp_dout(dpp, 1) << "failed to remove auto-generated topic '" << topic_name << "', ret=" << op_ret << dendl; - } - return op_ret; -} - -int delete_all_notifications(const DoutPrefixProvider *dpp, const rgw_pubsub_bucket_topics& bucket_topics, const RGWPubSub::BucketRef& b, optional_yield y, RGWPubSub& ps) { - // delete all notifications of on a bucket - for (const auto& topic : bucket_topics.topics) { - const auto op_ret = remove_notification_by_topic(dpp, topic.first, b, y, ps); - if (op_ret < 0) { - return op_ret; - } - } - return 0; -} - -// command (S3 compliant): PUT /?notification -// a "notification" and a subscription will be auto-generated -// actual configuration is XML encoded in the body of the message -class RGWPSCreateNotifOp : public RGWDefaultResponseOp { - private: - std::optional ps; - std::string bucket_name; - RGWBucketInfo bucket_info; - rgw_pubsub_s3_notifications configurations; - - int get_params() { - bool exists; - const auto no_value = s->info.args.get("notification", &exists); - if (!exists) { - ldpp_dout(this, 1) << "missing required param 'notification'" << dendl; - return -EINVAL; - } - if (no_value.length() > 0) { - ldpp_dout(this, 1) << "param 'notification' should not have any value" << dendl; - return -EINVAL; - } - if (s->bucket_name.empty()) { - ldpp_dout(this, 1) << "request must be on a bucket" << dendl; - return -EINVAL; - } - bucket_name = s->bucket_name; - return 0; - } - - public: - int verify_permission(optional_yield y) override; - - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - - const char* name() const override { return "pubsub_notification_create_s3"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_CREATE; } - uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; } - - int get_params_from_body() { - const auto max_size = s->cct->_conf->rgw_max_put_param_size; - int r; - bufferlist data; - std::tie(r, data) = read_all_input(s, max_size, false); - - if (r < 0) { - ldpp_dout(this, 1) << "failed to read XML payload" << dendl; - return r; - } - if (data.length() == 0) { - ldpp_dout(this, 1) << "XML payload missing" << dendl; - return -EINVAL; - } - - RGWXMLDecoder::XMLParser parser; - - if (!parser.init()){ - ldpp_dout(this, 1) << "failed to initialize XML parser" << dendl; - return -EINVAL; - } - if (!parser.parse(data.c_str(), data.length(), 1)) { - ldpp_dout(this, 1) << "failed to parse XML payload" << dendl; - return -ERR_MALFORMED_XML; - } - try { - // NotificationConfigurations is mandatory - // It can be empty which means we delete all the notifications - RGWXMLDecoder::decode_xml("NotificationConfiguration", configurations, &parser, true); - } catch (RGWXMLDecoder::err& err) { - ldpp_dout(this, 1) << "failed to parse XML payload. error: " << err << dendl; - return -ERR_MALFORMED_XML; - } - return 0; - } - - void execute(optional_yield) override; -}; - -void RGWPSCreateNotifOp::execute(optional_yield y) { - op_ret = get_params_from_body(); - if (op_ret < 0) { - return; - } - - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - auto b = ps->get_bucket(bucket_info.bucket); - ceph_assert(b); - - if(configurations.list.empty()) { - // get all topics on a bucket - rgw_pubsub_bucket_topics bucket_topics; - op_ret = b->get_topics(&bucket_topics); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl; - return; - } - - op_ret = delete_all_notifications(this, bucket_topics, b, y, *ps); - return; - } - - for (const auto& c : configurations.list) { - const auto& notif_name = c.id; - if (notif_name.empty()) { - ldpp_dout(this, 1) << "missing notification id" << dendl; - op_ret = -EINVAL; - return; - } - if (c.topic_arn.empty()) { - ldpp_dout(this, 1) << "missing topic ARN in notification: '" << notif_name << "'" << dendl; - op_ret = -EINVAL; - return; - } - - const auto arn = rgw::ARN::parse(c.topic_arn); - if (!arn || arn->resource.empty()) { - ldpp_dout(this, 1) << "topic ARN has invalid format: '" << c.topic_arn << "' in notification: '" << notif_name << "'" << dendl; - op_ret = -EINVAL; - return; - } - - if (std::find(c.events.begin(), c.events.end(), rgw::notify::UnknownEvent) != c.events.end()) { - ldpp_dout(this, 1) << "unknown event type in notification: '" << notif_name << "'" << dendl; - op_ret = -EINVAL; - return; - } - - const auto topic_name = arn->resource; - - // get topic information. destination information is stored in the topic - rgw_pubsub_topic topic_info; - op_ret = ps->get_topic(topic_name, &topic_info); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; - return; - } - // make sure that full topic configuration match - // TODO: use ARN match function - - // create unique topic name. this has 2 reasons: - // (1) topics cannot be shared between different S3 notifications because they hold the filter information - // (2) make topic clneaup easier, when notification is removed - const auto unique_topic_name = topic_to_unique(topic_name, notif_name); - // generate the internal topic. destination is stored here for the "push-only" case - // when no subscription exists - // ARN is cached to make the "GET" method faster - op_ret = ps->create_topic(this, unique_topic_name, topic_info.dest, topic_info.arn, topic_info.opaque_data, y); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to auto-generate unique topic '" << unique_topic_name << - "', ret=" << op_ret << dendl; - return; - } - ldpp_dout(this, 20) << "successfully auto-generated unique topic '" << unique_topic_name << "'" << dendl; - // generate the notification - rgw::notify::EventTypeList events; - op_ret = b->create_notification(this, unique_topic_name, c.events, std::make_optional(c.filter), notif_name, y); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to auto-generate notification for unique topic '" << unique_topic_name << - "', ret=" << op_ret << dendl; - // rollback generated topic (ignore return value) - ps->remove_topic(this, unique_topic_name, y); - return; - } - ldpp_dout(this, 20) << "successfully auto-generated notification for unique topic '" << unique_topic_name << "'" << dendl; - } -} - -int RGWPSCreateNotifOp::verify_permission(optional_yield y) { - int ret = get_params(); - if (ret < 0) { - return ret; - } - - std::unique_ptr user = driver->get_user(s->owner.get_id()); - std::unique_ptr bucket; - ret = driver->get_bucket(this, user.get(), s->owner.get_id().tenant, bucket_name, &bucket, y); - if (ret < 0) { - ldpp_dout(this, 1) << "failed to get bucket info, cannot verify ownership" << dendl; - return ret; - } - bucket_info = bucket->get_info(); - - if (bucket_info.owner != s->owner.get_id()) { - ldpp_dout(this, 1) << "user doesn't own bucket, not allowed to create notification" << dendl; - return -EPERM; - } - return 0; -} - -// command (extension to S3): DELETE /bucket?notification[=] -class RGWPSDeleteNotifOp : public RGWDefaultResponseOp { - private: - std::optional ps; - std::string bucket_name; - RGWBucketInfo bucket_info; - std::string notif_name; - - public: - int verify_permission(optional_yield y) override; - - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - - const char* name() const override { return "pubsub_notification_delete_s3"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_DELETE; } - uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; } - - int get_params() { - bool exists; - notif_name = s->info.args.get("notification", &exists); - if (!exists) { - ldpp_dout(this, 1) << "missing required param 'notification'" << dendl; - return -EINVAL; - } - if (s->bucket_name.empty()) { - ldpp_dout(this, 1) << "request must be on a bucket" << dendl; - return -EINVAL; - } - bucket_name = s->bucket_name; - return 0; - } - - void execute(optional_yield y) override; -}; - -void RGWPSDeleteNotifOp::execute(optional_yield y) { - op_ret = get_params(); - if (op_ret < 0) { - return; - } - - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - auto b = ps->get_bucket(bucket_info.bucket); - ceph_assert(b); - - // get all topics on a bucket - rgw_pubsub_bucket_topics bucket_topics; - op_ret = b->get_topics(&bucket_topics); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl; - return; - } - - if (!notif_name.empty()) { - // delete a specific notification - const auto unique_topic = find_unique_topic(bucket_topics, notif_name); - if (unique_topic) { - const auto unique_topic_name = unique_topic->get().topic.name; - op_ret = remove_notification_by_topic(this, unique_topic_name, b, y, *ps); - return; - } - // notification to be removed is not found - considered success - ldpp_dout(this, 20) << "notification '" << notif_name << "' already removed" << dendl; - return; - } - - op_ret = delete_all_notifications(this, bucket_topics, b, y, *ps); -} - -int RGWPSDeleteNotifOp::verify_permission(optional_yield y) { - int ret = get_params(); - if (ret < 0) { - return ret; - } - - std::unique_ptr user = driver->get_user(s->owner.get_id()); - std::unique_ptr bucket; - ret = driver->get_bucket(this, user.get(), s->owner.get_id().tenant, bucket_name, &bucket, y); - if (ret < 0) { - return ret; - } - bucket_info = bucket->get_info(); - - if (bucket_info.owner != s->owner.get_id()) { - ldpp_dout(this, 1) << "user doesn't own bucket, cannot remove notification" << dendl; - return -EPERM; - } - return 0; -} - -// command (S3 compliant): GET /bucket?notification[=] -class RGWPSListNotifsOp : public RGWOp { -private: - std::string bucket_name; - RGWBucketInfo bucket_info; - std::optional ps; - std::string notif_name; - rgw_pubsub_s3_notifications notifications; - - int get_params() { - bool exists; - notif_name = s->info.args.get("notification", &exists); - if (!exists) { - ldpp_dout(this, 1) << "missing required param 'notification'" << dendl; - return -EINVAL; - } - if (s->bucket_name.empty()) { - ldpp_dout(this, 1) << "request must be on a bucket" << dendl; - return -EINVAL; - } - bucket_name = s->bucket_name; - return 0; - } - - public: - int verify_permission(optional_yield y) override; - - void pre_exec() override { - rgw_bucket_object_pre_exec(s); - } - - const char* name() const override { return "pubsub_notifications_get_s3"; } - RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_LIST; } - uint32_t op_mask() override { return RGW_OP_TYPE_READ; } - - void execute(optional_yield y) override; - void send_response() override { - if (op_ret) { - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - notifications.dump_xml(s->formatter); - rgw_flush_formatter_and_reset(s, s->formatter); - } -}; - -void RGWPSListNotifsOp::execute(optional_yield y) { - ps.emplace(static_cast(driver), s->owner.get_id().tenant); - auto b = ps->get_bucket(bucket_info.bucket); - ceph_assert(b); - - // get all topics on a bucket - rgw_pubsub_bucket_topics bucket_topics; - op_ret = b->get_topics(&bucket_topics); - if (op_ret < 0) { - ldpp_dout(this, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl; - return; - } - if (!notif_name.empty()) { - // get info of a specific notification - const auto unique_topic = find_unique_topic(bucket_topics, notif_name); - if (unique_topic) { - notifications.list.emplace_back(unique_topic->get()); - return; - } - op_ret = -ENOENT; - ldpp_dout(this, 1) << "failed to get notification info for '" << notif_name << "', ret=" << op_ret << dendl; - return; - } - // loop through all topics of the bucket - for (const auto& topic : bucket_topics.topics) { - if (topic.second.s3_id.empty()) { - // not an s3 notification - continue; - } - notifications.list.emplace_back(topic.second); - } -} - -int RGWPSListNotifsOp::verify_permission(optional_yield y) { - int ret = get_params(); - if (ret < 0) { - return ret; - } - - std::unique_ptr user = driver->get_user(s->owner.get_id()); - std::unique_ptr bucket; - ret = driver->get_bucket(this, user.get(), s->owner.get_id().tenant, bucket_name, &bucket, y); - if (ret < 0) { - return ret; - } - bucket_info = bucket->get_info(); - - if (bucket_info.owner != s->owner.get_id()) { - ldpp_dout(this, 1) << "user doesn't own bucket, cannot get notification list" << dendl; - return -EPERM; - } - - return 0; -} - -RGWOp* RGWHandler_REST_PSNotifs_S3::op_get() { - return new RGWPSListNotifsOp(); -} - -RGWOp* RGWHandler_REST_PSNotifs_S3::op_put() { - return new RGWPSCreateNotifOp(); -} - -RGWOp* RGWHandler_REST_PSNotifs_S3::op_delete() { - return new RGWPSDeleteNotifOp(); -} - -RGWOp* RGWHandler_REST_PSNotifs_S3::create_get_op() { - return new RGWPSListNotifsOp(); -} - -RGWOp* RGWHandler_REST_PSNotifs_S3::create_put_op() { - return new RGWPSCreateNotifOp(); -} - -RGWOp* RGWHandler_REST_PSNotifs_S3::create_delete_op() { - return new RGWPSDeleteNotifOp(); -} - diff --git a/src/rgw/store/rados/rgw_rest_pubsub.h b/src/rgw/store/rados/rgw_rest_pubsub.h deleted file mode 100644 index 3b1a1bc9670..00000000000 --- a/src/rgw/store/rados/rgw_rest_pubsub.h +++ /dev/null @@ -1,39 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#pragma once - -#include "rgw_rest_s3.h" - -// s3 compliant notification handler factory -class RGWHandler_REST_PSNotifs_S3 : public RGWHandler_REST_S3 { -protected: - int init_permissions(RGWOp* op, optional_yield y) override {return 0;} - int read_permissions(RGWOp* op, optional_yield y) override {return 0;} - bool supports_quota() override {return false;} - RGWOp* op_get() override; - RGWOp* op_put() override; - RGWOp* op_delete() override; -public: - using RGWHandler_REST_S3::RGWHandler_REST_S3; - virtual ~RGWHandler_REST_PSNotifs_S3() = default; - // following are used to generate the operations when invoked by another REST handler - static RGWOp* create_get_op(); - static RGWOp* create_put_op(); - static RGWOp* create_delete_op(); -}; - -// AWS compliant topics handler factory -class RGWHandler_REST_PSTopic_AWS : public RGWHandler_REST { - const rgw::auth::StrategyRegistry& auth_registry; - const std::string& post_body; - void rgw_topic_parse_input(); -protected: - RGWOp* op_post() override; -public: - RGWHandler_REST_PSTopic_AWS(const rgw::auth::StrategyRegistry& _auth_registry, const std::string& _post_body) : - auth_registry(_auth_registry), - post_body(_post_body) {} - virtual ~RGWHandler_REST_PSTopic_AWS() = default; - int postauth_init(optional_yield) override { return 0; } - int authorize(const DoutPrefixProvider* dpp, optional_yield y) override; -}; diff --git a/src/rgw/store/rados/rgw_rest_realm.cc b/src/rgw/store/rados/rgw_rest_realm.cc deleted file mode 100644 index 79640a2a108..00000000000 --- a/src/rgw/store/rados/rgw_rest_realm.cc +++ /dev/null @@ -1,376 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/errno.h" -#include "rgw_rest_realm.h" -#include "rgw_rest_s3.h" -#include "rgw_rest_config.h" -#include "rgw_zone.h" -#include "rgw_sal_rados.h" - -#include "services/svc_zone.h" -#include "services/svc_mdlog.h" - -#include "include/ceph_assert.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -// reject 'period push' if we would have to fetch too many intermediate periods -static const uint32_t PERIOD_HISTORY_FETCH_MAX = 64; - -// base period op, shared between Get and Post -class RGWOp_Period_Base : public RGWRESTOp { - protected: - RGWPeriod period; - std::ostringstream error_stream; - public: - int verify_permission(optional_yield) override { return 0; } - void send_response() override; -}; - -// reply with the period object on success -void RGWOp_Period_Base::send_response() -{ - set_req_state_err(s, op_ret, error_stream.str()); - dump_errno(s); - - if (op_ret < 0) { - if (!s->err.message.empty()) { - ldpp_dout(this, 4) << "Request failed with " << op_ret - << ": " << s->err.message << dendl; - } - end_header(s); - return; - } - - encode_json("period", period, s->formatter); - end_header(s, NULL, "application/json", s->formatter->get_len()); - flusher.flush(); -} - -// GET /admin/realm/period -class RGWOp_Period_Get : public RGWOp_Period_Base { - public: - void execute(optional_yield y) override; - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("zone", RGW_CAP_READ); - } - int verify_permission(optional_yield) override { - return check_caps(s->user->get_caps()); - } - const char* name() const override { return "get_period"; } -}; - -void RGWOp_Period_Get::execute(optional_yield y) -{ - string realm_id, realm_name, period_id; - epoch_t epoch = 0; - RESTArgs::get_string(s, "realm_id", realm_id, &realm_id); - RESTArgs::get_string(s, "realm_name", realm_name, &realm_name); - RESTArgs::get_string(s, "period_id", period_id, &period_id); - RESTArgs::get_uint32(s, "epoch", 0, &epoch); - - period.set_id(period_id); - period.set_epoch(epoch); - - op_ret = period.init(this, driver->ctx(), static_cast(driver)->svc()->sysobj, realm_id, y, realm_name); - if (op_ret < 0) - ldpp_dout(this, 5) << "failed to read period" << dendl; -} - -// POST /admin/realm/period -class RGWOp_Period_Post : public RGWOp_Period_Base { - public: - void execute(optional_yield y) override; - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("zone", RGW_CAP_WRITE); - } - int verify_permission(optional_yield) override { - return check_caps(s->user->get_caps()); - } - const char* name() const override { return "post_period"; } - RGWOpType get_type() override { return RGW_OP_PERIOD_POST; } -}; - -void RGWOp_Period_Post::execute(optional_yield y) -{ - auto cct = driver->ctx(); - - // initialize the period without reading from rados - period.init(this, cct, static_cast(driver)->svc()->sysobj, y, false); - - // decode the period from input - const auto max_size = cct->_conf->rgw_max_put_param_size; - bool empty; - op_ret = get_json_input(cct, s, period, max_size, &empty); - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to decode period" << dendl; - return; - } - - // require period.realm_id to match our realm - if (period.get_realm() != static_cast(driver)->svc()->zone->get_realm().get_id()) { - error_stream << "period with realm id " << period.get_realm() - << " doesn't match current realm " << static_cast(driver)->svc()->zone->get_realm().get_id() << std::endl; - op_ret = -EINVAL; - return; - } - - // load the realm and current period from rados; there may be a more recent - // period that we haven't restarted with yet. we also don't want to modify - // the objects in use by RGWRados - RGWRealm realm(period.get_realm()); - op_ret = realm.init(this, cct, static_cast(driver)->svc()->sysobj, y); - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to read current realm: " - << cpp_strerror(-op_ret) << dendl; - return; - } - - RGWPeriod current_period; - op_ret = current_period.init(this, cct, static_cast(driver)->svc()->sysobj, realm.get_id(), y); - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to read current period: " - << cpp_strerror(-op_ret) << dendl; - return; - } - - // if period id is empty, handle as 'period commit' - if (period.get_id().empty()) { - op_ret = period.commit(this, driver, realm, current_period, error_stream, y); - if (op_ret < 0) { - ldpp_dout(this, -1) << "master zone failed to commit period" << dendl; - } - return; - } - - // if it's not period commit, nobody is allowed to push to the master zone - if (period.get_master_zone() == static_cast(driver)->svc()->zone->get_zone_params().get_id()) { - ldpp_dout(this, 10) << "master zone rejecting period id=" - << period.get_id() << " epoch=" << period.get_epoch() << dendl; - op_ret = -EINVAL; // XXX: error code - return; - } - - // write the period to rados - op_ret = period.store_info(this, false, y); - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to store period " << period.get_id() << dendl; - return; - } - // set as latest epoch - op_ret = period.update_latest_epoch(this, period.get_epoch(), y); - if (op_ret == -EEXIST) { - // already have this epoch (or a more recent one) - ldpp_dout(this, 4) << "already have epoch >= " << period.get_epoch() - << " for period " << period.get_id() << dendl; - op_ret = 0; - return; - } - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to set latest epoch" << dendl; - return; - } - - auto period_history = static_cast(driver)->svc()->mdlog->get_period_history(); - - // decide whether we can set_current_period() or set_latest_epoch() - if (period.get_id() != current_period.get_id()) { - auto current_epoch = current_period.get_realm_epoch(); - // discard periods in the past - if (period.get_realm_epoch() < current_epoch) { - ldpp_dout(this, 10) << "discarding period " << period.get_id() - << " with realm epoch " << period.get_realm_epoch() - << " older than current epoch " << current_epoch << dendl; - // return success to ack that we have this period - return; - } - // discard periods too far in the future - if (period.get_realm_epoch() > current_epoch + PERIOD_HISTORY_FETCH_MAX) { - ldpp_dout(this, -1) << "discarding period " << period.get_id() - << " with realm epoch " << period.get_realm_epoch() << " too far in " - "the future from current epoch " << current_epoch << dendl; - op_ret = -ENOENT; // XXX: error code - return; - } - // attach a copy of the period into the period history - auto cursor = period_history->attach(this, RGWPeriod{period}, y); - if (!cursor) { - // we're missing some history between the new period and current_period - op_ret = cursor.get_error(); - ldpp_dout(this, -1) << "failed to collect the periods between current period " - << current_period.get_id() << " (realm epoch " << current_epoch - << ") and the new period " << period.get_id() - << " (realm epoch " << period.get_realm_epoch() - << "): " << cpp_strerror(-op_ret) << dendl; - return; - } - if (cursor.has_next()) { - // don't switch if we have a newer period in our history - ldpp_dout(this, 4) << "attached period " << period.get_id() - << " to history, but the history contains newer periods" << dendl; - return; - } - // set as current period - op_ret = realm.set_current_period(this, period, y); - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to update realm's current period" << dendl; - return; - } - ldpp_dout(this, 4) << "period " << period.get_id() - << " is newer than current period " << current_period.get_id() - << ", updating realm's current period and notifying zone" << dendl; - realm.notify_new_period(this, period, y); - return; - } - // reflect the period into our local objects - op_ret = period.reflect(this, y); - if (op_ret < 0) { - ldpp_dout(this, -1) << "failed to update local objects: " - << cpp_strerror(-op_ret) << dendl; - return; - } - ldpp_dout(this, 4) << "period epoch " << period.get_epoch() - << " is newer than current epoch " << current_period.get_epoch() - << ", updating period's latest epoch and notifying zone" << dendl; - realm.notify_new_period(this, period, y); - // update the period history - period_history->insert(RGWPeriod{period}); -} - -class RGWHandler_Period : public RGWHandler_Auth_S3 { - protected: - using RGWHandler_Auth_S3::RGWHandler_Auth_S3; - - RGWOp *op_get() override { return new RGWOp_Period_Get; } - RGWOp *op_post() override { return new RGWOp_Period_Post; } -}; - -class RGWRESTMgr_Period : public RGWRESTMgr { - public: - RGWHandler_REST* get_handler(rgw::sal::Driver* driver, - req_state*, - const rgw::auth::StrategyRegistry& auth_registry, - const std::string&) override { - return new RGWHandler_Period(auth_registry); - } -}; - - -// GET /admin/realm -class RGWOp_Realm_Get : public RGWRESTOp { - std::unique_ptr realm; -public: - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("zone", RGW_CAP_READ); - } - int verify_permission(optional_yield) override { - return check_caps(s->user->get_caps()); - } - void execute(optional_yield y) override; - void send_response() override; - const char* name() const override { return "get_realm"; } -}; - -void RGWOp_Realm_Get::execute(optional_yield y) -{ - string id; - RESTArgs::get_string(s, "id", id, &id); - string name; - RESTArgs::get_string(s, "name", name, &name); - - // read realm - realm.reset(new RGWRealm(id, name)); - op_ret = realm->init(this, g_ceph_context, static_cast(driver)->svc()->sysobj, y); - if (op_ret < 0) - ldpp_dout(this, -1) << "failed to read realm id=" << id - << " name=" << name << dendl; -} - -void RGWOp_Realm_Get::send_response() -{ - set_req_state_err(s, op_ret); - dump_errno(s); - - if (op_ret < 0) { - end_header(s); - return; - } - - encode_json("realm", *realm, s->formatter); - end_header(s, NULL, "application/json", s->formatter->get_len()); - flusher.flush(); -} - -// GET /admin/realm?list -class RGWOp_Realm_List : public RGWRESTOp { - std::string default_id; - std::list realms; -public: - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("zone", RGW_CAP_READ); - } - int verify_permission(optional_yield) override { - return check_caps(s->user->get_caps()); - } - void execute(optional_yield y) override; - void send_response() override; - const char* name() const override { return "list_realms"; } -}; - -void RGWOp_Realm_List::execute(optional_yield y) -{ - { - // read default realm - RGWRealm realm(driver->ctx(), static_cast(driver)->svc()->sysobj); - [[maybe_unused]] int ret = realm.read_default_id(this, default_id, y); - } - op_ret = static_cast(driver)->svc()->zone->list_realms(this, realms); - if (op_ret < 0) - ldpp_dout(this, -1) << "failed to list realms" << dendl; -} - -void RGWOp_Realm_List::send_response() -{ - set_req_state_err(s, op_ret); - dump_errno(s); - - if (op_ret < 0) { - end_header(s); - return; - } - - s->formatter->open_object_section("realms_list"); - encode_json("default_info", default_id, s->formatter); - encode_json("realms", realms, s->formatter); - s->formatter->close_section(); - end_header(s, NULL, "application/json", s->formatter->get_len()); - flusher.flush(); -} - -class RGWHandler_Realm : public RGWHandler_Auth_S3 { -protected: - using RGWHandler_Auth_S3::RGWHandler_Auth_S3; - RGWOp *op_get() override { - if (s->info.args.sub_resource_exists("list")) - return new RGWOp_Realm_List; - return new RGWOp_Realm_Get; - } -}; - -RGWRESTMgr_Realm::RGWRESTMgr_Realm() -{ - // add the /admin/realm/period resource - register_resource("period", new RGWRESTMgr_Period); -} - -RGWHandler_REST* -RGWRESTMgr_Realm::get_handler(rgw::sal::Driver* driver, - req_state*, - const rgw::auth::StrategyRegistry& auth_registry, - const std::string&) -{ - return new RGWHandler_Realm(auth_registry); -} diff --git a/src/rgw/store/rados/rgw_rest_realm.h b/src/rgw/store/rados/rgw_rest_realm.h deleted file mode 100644 index a0d1dc1c92a..00000000000 --- a/src/rgw/store/rados/rgw_rest_realm.h +++ /dev/null @@ -1,16 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include "rgw_rest.h" - -class RGWRESTMgr_Realm : public RGWRESTMgr { -public: - RGWRESTMgr_Realm(); - - RGWHandler_REST* get_handler(rgw::sal::Driver* driver, - req_state*, - const rgw::auth::StrategyRegistry& auth_registry, - const std::string&) override; -}; diff --git a/src/rgw/store/rados/rgw_rest_user.cc b/src/rgw/store/rados/rgw_rest_user.cc deleted file mode 100644 index c2aeece247d..00000000000 --- a/src/rgw/store/rados/rgw_rest_user.cc +++ /dev/null @@ -1,1109 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/ceph_json.h" - -#include "rgw_op.h" -#include "rgw_user.h" -#include "rgw_rest_user.h" -#include "rgw_sal.h" - -#include "include/str_list.h" -#include "include/ceph_assert.h" - -#include "services/svc_zone.h" -#include "services/svc_sys_obj.h" -#include "rgw_zone.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -class RGWOp_User_List : public RGWRESTOp { - -public: - RGWOp_User_List() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_READ); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "list_user"; } -}; - -void RGWOp_User_List::execute(optional_yield y) -{ - RGWUserAdminOpState op_state(driver); - - uint32_t max_entries; - std::string marker; - RESTArgs::get_uint32(s, "max-entries", 1000, &max_entries); - RESTArgs::get_string(s, "marker", marker, &marker); - - op_state.max_entries = max_entries; - op_state.marker = marker; - op_ret = RGWUserAdminOp_User::list(this, driver, op_state, flusher); -} - -class RGWOp_User_Info : public RGWRESTOp { - -public: - RGWOp_User_Info() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_READ); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "get_user_info"; } -}; - -void RGWOp_User_Info::execute(optional_yield y) -{ - RGWUserAdminOpState op_state(driver); - - std::string uid_str, access_key_str; - bool fetch_stats; - bool sync_stats; - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - RESTArgs::get_string(s, "access-key", access_key_str, &access_key_str); - - // if uid was not supplied in rest argument, error out now, otherwise we'll - // end up initializing anonymous user, for which keys.init will eventually - // return -EACESS - if (uid_str.empty() && access_key_str.empty()){ - op_ret=-EINVAL; - return; - } - - rgw_user uid(uid_str); - - RESTArgs::get_bool(s, "stats", false, &fetch_stats); - - RESTArgs::get_bool(s, "sync", false, &sync_stats); - - op_state.set_user_id(uid); - op_state.set_access_key(access_key_str); - op_state.set_fetch_stats(fetch_stats); - op_state.set_sync_stats(sync_stats); - - op_ret = RGWUserAdminOp_User::info(s, driver, op_state, flusher, y); -} - -class RGWOp_User_Create : public RGWRESTOp { - -public: - RGWOp_User_Create() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "create_user"; } -}; - -void RGWOp_User_Create::execute(optional_yield y) -{ - std::string uid_str; - std::string display_name; - std::string email; - std::string access_key; - std::string secret_key; - std::string key_type_str; - std::string caps; - std::string tenant_name; - std::string op_mask_str; - std::string default_placement_str; - std::string placement_tags_str; - - bool gen_key; - bool suspended; - bool system; - bool exclusive; - - int32_t max_buckets; - const int32_t default_max_buckets = - s->cct->_conf.get_val("rgw_user_max_buckets"); - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "display-name", display_name, &display_name); - RESTArgs::get_string(s, "email", email, &email); - RESTArgs::get_string(s, "access-key", access_key, &access_key); - RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); - RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); - RESTArgs::get_string(s, "user-caps", caps, &caps); - RESTArgs::get_string(s, "tenant", tenant_name, &tenant_name); - RESTArgs::get_bool(s, "generate-key", true, &gen_key); - RESTArgs::get_bool(s, "suspended", false, &suspended); - RESTArgs::get_int32(s, "max-buckets", default_max_buckets, &max_buckets); - RESTArgs::get_bool(s, "system", false, &system); - RESTArgs::get_bool(s, "exclusive", false, &exclusive); - RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str); - RESTArgs::get_string(s, "default-placement", default_placement_str, &default_placement_str); - RESTArgs::get_string(s, "placement-tags", placement_tags_str, &placement_tags_str); - - if (!s->user->get_info().system && system) { - ldpp_dout(this, 0) << "cannot set system flag by non-system user" << dendl; - op_ret = -EINVAL; - return; - } - - if (!tenant_name.empty()) { - uid.tenant = tenant_name; - } - - // TODO: validate required args are passed in. (for eg. uid and display_name here) - op_state.set_user_id(uid); - op_state.set_display_name(display_name); - op_state.set_user_email(email); - op_state.set_caps(caps); - op_state.set_access_key(access_key); - op_state.set_secret_key(secret_key); - - if (!op_mask_str.empty()) { - uint32_t op_mask; - int ret = rgw_parse_op_type_list(op_mask_str, &op_mask); - if (ret < 0) { - ldpp_dout(this, 0) << "failed to parse op_mask: " << ret << dendl; - op_ret = -EINVAL; - return; - } - op_state.set_op_mask(op_mask); - } - - if (!key_type_str.empty()) { - int32_t key_type = KEY_TYPE_UNDEFINED; - if (key_type_str.compare("swift") == 0) - key_type = KEY_TYPE_SWIFT; - else if (key_type_str.compare("s3") == 0) - key_type = KEY_TYPE_S3; - - op_state.set_key_type(key_type); - } - - if (max_buckets != default_max_buckets) { - if (max_buckets < 0) { - max_buckets = -1; - } - op_state.set_max_buckets(max_buckets); - } - if (s->info.args.exists("suspended")) - op_state.set_suspension(suspended); - - if (s->info.args.exists("system")) - op_state.set_system(system); - - if (s->info.args.exists("exclusive")) - op_state.set_exclusive(exclusive); - - if (gen_key) - op_state.set_generate_key(); - - if (!default_placement_str.empty()) { - rgw_placement_rule target_rule; - target_rule.from_str(default_placement_str); - if (!driver->valid_placement(target_rule)) { - ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << target_rule.to_str() << dendl; - op_ret = -EINVAL; - return; - } - op_state.set_default_placement(target_rule); - } - - if (!placement_tags_str.empty()) { - list placement_tags_list; - get_str_list(placement_tags_str, ",", placement_tags_list); - op_state.set_placement_tags(placement_tags_list); - } - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_User::create(s, driver, op_state, flusher, y); -} - -class RGWOp_User_Modify : public RGWRESTOp { - -public: - RGWOp_User_Modify() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "modify_user"; } -}; - -void RGWOp_User_Modify::execute(optional_yield y) -{ - std::string uid_str; - std::string display_name; - std::string email; - std::string access_key; - std::string secret_key; - std::string key_type_str; - std::string op_mask_str; - std::string default_placement_str; - std::string placement_tags_str; - - bool gen_key; - bool suspended; - bool system; - bool email_set; - bool quota_set; - int32_t max_buckets; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "display-name", display_name, &display_name); - RESTArgs::get_string(s, "email", email, &email, &email_set); - RESTArgs::get_string(s, "access-key", access_key, &access_key); - RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); - RESTArgs::get_bool(s, "generate-key", false, &gen_key); - RESTArgs::get_bool(s, "suspended", false, &suspended); - RESTArgs::get_int32(s, "max-buckets", RGW_DEFAULT_MAX_BUCKETS, &max_buckets, "a_set); - RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); - - RESTArgs::get_bool(s, "system", false, &system); - RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str); - RESTArgs::get_string(s, "default-placement", default_placement_str, &default_placement_str); - RESTArgs::get_string(s, "placement-tags", placement_tags_str, &placement_tags_str); - - if (!s->user->get_info().system && system) { - ldpp_dout(this, 0) << "cannot set system flag by non-system user" << dendl; - op_ret = -EINVAL; - return; - } - - op_state.set_user_id(uid); - op_state.set_display_name(display_name); - - if (email_set) - op_state.set_user_email(email); - - op_state.set_access_key(access_key); - op_state.set_secret_key(secret_key); - - if (quota_set) { - if (max_buckets < 0 ) { - max_buckets = -1; - } - op_state.set_max_buckets(max_buckets); - } - if (gen_key) - op_state.set_generate_key(); - - if (!key_type_str.empty()) { - int32_t key_type = KEY_TYPE_UNDEFINED; - if (key_type_str.compare("swift") == 0) - key_type = KEY_TYPE_SWIFT; - else if (key_type_str.compare("s3") == 0) - key_type = KEY_TYPE_S3; - - op_state.set_key_type(key_type); - } - - if (!op_mask_str.empty()) { - uint32_t op_mask; - if (rgw_parse_op_type_list(op_mask_str, &op_mask) < 0) { - ldpp_dout(this, 0) << "failed to parse op_mask" << dendl; - op_ret = -EINVAL; - return; - } - op_state.set_op_mask(op_mask); - } - - if (s->info.args.exists("suspended")) - op_state.set_suspension(suspended); - - if (s->info.args.exists("system")) - op_state.set_system(system); - - if (!op_mask_str.empty()) { - uint32_t op_mask; - int ret = rgw_parse_op_type_list(op_mask_str, &op_mask); - if (ret < 0) { - ldpp_dout(this, 0) << "failed to parse op_mask: " << ret << dendl; - op_ret = -EINVAL; - return; - } - op_state.set_op_mask(op_mask); - } - - if (!default_placement_str.empty()) { - rgw_placement_rule target_rule; - target_rule.from_str(default_placement_str); - if (!driver->valid_placement(target_rule)) { - ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << target_rule.to_str() << dendl; - op_ret = -EINVAL; - return; - } - op_state.set_default_placement(target_rule); - } - - if (!placement_tags_str.empty()) { - list placement_tags_list; - get_str_list(placement_tags_str, ",", placement_tags_list); - op_state.set_placement_tags(placement_tags_list); - } - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_User::modify(s, driver, op_state, flusher, y); -} - -class RGWOp_User_Remove : public RGWRESTOp { - -public: - RGWOp_User_Remove() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "remove_user"; } -}; - -void RGWOp_User_Remove::execute(optional_yield y) -{ - std::string uid_str; - bool purge_data; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_bool(s, "purge-data", false, &purge_data); - - // FIXME: no double checking - if (!uid.empty()) - op_state.set_user_id(uid); - - op_state.set_purge_data(purge_data); - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_User::remove(s, driver, op_state, flusher, s->yield); -} - -class RGWOp_Subuser_Create : public RGWRESTOp { - -public: - RGWOp_Subuser_Create() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "create_subuser"; } -}; - -void RGWOp_Subuser_Create::execute(optional_yield y) -{ - std::string uid_str; - std::string subuser; - std::string secret_key; - std::string access_key; - std::string perm_str; - std::string key_type_str; - - bool gen_subuser = false; // FIXME placeholder - bool gen_secret; - bool gen_access; - - uint32_t perm_mask = 0; - int32_t key_type = KEY_TYPE_SWIFT; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "subuser", subuser, &subuser); - RESTArgs::get_string(s, "access-key", access_key, &access_key); - RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); - RESTArgs::get_string(s, "access", perm_str, &perm_str); - RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); - RESTArgs::get_bool(s, "generate-secret", false, &gen_secret); - RESTArgs::get_bool(s, "gen-access-key", false, &gen_access); - - perm_mask = rgw_str_to_perm(perm_str.c_str()); - op_state.set_perm(perm_mask); - - op_state.set_user_id(uid); - op_state.set_subuser(subuser); - op_state.set_access_key(access_key); - op_state.set_secret_key(secret_key); - op_state.set_generate_subuser(gen_subuser); - - if (gen_access) - op_state.set_gen_access(); - - if (gen_secret) - op_state.set_gen_secret(); - - if (!key_type_str.empty()) { - if (key_type_str.compare("swift") == 0) - key_type = KEY_TYPE_SWIFT; - else if (key_type_str.compare("s3") == 0) - key_type = KEY_TYPE_S3; - } - op_state.set_key_type(key_type); - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_Subuser::create(s, driver, op_state, flusher, y); -} - -class RGWOp_Subuser_Modify : public RGWRESTOp { - -public: - RGWOp_Subuser_Modify() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "modify_subuser"; } -}; - -void RGWOp_Subuser_Modify::execute(optional_yield y) -{ - std::string uid_str; - std::string subuser; - std::string secret_key; - std::string key_type_str; - std::string perm_str; - - RGWUserAdminOpState op_state(driver); - - uint32_t perm_mask; - int32_t key_type = KEY_TYPE_SWIFT; - - bool gen_secret; - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "subuser", subuser, &subuser); - RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); - RESTArgs::get_string(s, "access", perm_str, &perm_str); - RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); - RESTArgs::get_bool(s, "generate-secret", false, &gen_secret); - - perm_mask = rgw_str_to_perm(perm_str.c_str()); - op_state.set_perm(perm_mask); - - op_state.set_user_id(uid); - op_state.set_subuser(subuser); - - if (!secret_key.empty()) - op_state.set_secret_key(secret_key); - - if (gen_secret) - op_state.set_gen_secret(); - - if (!key_type_str.empty()) { - if (key_type_str.compare("swift") == 0) - key_type = KEY_TYPE_SWIFT; - else if (key_type_str.compare("s3") == 0) - key_type = KEY_TYPE_S3; - } - op_state.set_key_type(key_type); - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_Subuser::modify(s, driver, op_state, flusher, y); -} - -class RGWOp_Subuser_Remove : public RGWRESTOp { - -public: - RGWOp_Subuser_Remove() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "remove_subuser"; } -}; - -void RGWOp_Subuser_Remove::execute(optional_yield y) -{ - std::string uid_str; - std::string subuser; - bool purge_keys; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "subuser", subuser, &subuser); - RESTArgs::get_bool(s, "purge-keys", true, &purge_keys); - - op_state.set_user_id(uid); - op_state.set_subuser(subuser); - - if (purge_keys) - op_state.set_purge_keys(); - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_Subuser::remove(s, driver, op_state, flusher, y); -} - -class RGWOp_Key_Create : public RGWRESTOp { - -public: - RGWOp_Key_Create() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "create_access_key"; } -}; - -void RGWOp_Key_Create::execute(optional_yield y) -{ - std::string uid_str; - std::string subuser; - std::string access_key; - std::string secret_key; - std::string key_type_str; - - bool gen_key; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "subuser", subuser, &subuser); - RESTArgs::get_string(s, "access-key", access_key, &access_key); - RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); - RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); - RESTArgs::get_bool(s, "generate-key", true, &gen_key); - - op_state.set_user_id(uid); - op_state.set_subuser(subuser); - op_state.set_access_key(access_key); - op_state.set_secret_key(secret_key); - - if (gen_key) - op_state.set_generate_key(); - - if (!key_type_str.empty()) { - int32_t key_type = KEY_TYPE_UNDEFINED; - if (key_type_str.compare("swift") == 0) - key_type = KEY_TYPE_SWIFT; - else if (key_type_str.compare("s3") == 0) - key_type = KEY_TYPE_S3; - - op_state.set_key_type(key_type); - } - - op_ret = RGWUserAdminOp_Key::create(s, driver, op_state, flusher, y); -} - -class RGWOp_Key_Remove : public RGWRESTOp { - -public: - RGWOp_Key_Remove() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "remove_access_key"; } -}; - -void RGWOp_Key_Remove::execute(optional_yield y) -{ - std::string uid_str; - std::string subuser; - std::string access_key; - std::string key_type_str; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "subuser", subuser, &subuser); - RESTArgs::get_string(s, "access-key", access_key, &access_key); - RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); - - op_state.set_user_id(uid); - op_state.set_subuser(subuser); - op_state.set_access_key(access_key); - - if (!key_type_str.empty()) { - int32_t key_type = KEY_TYPE_UNDEFINED; - if (key_type_str.compare("swift") == 0) - key_type = KEY_TYPE_SWIFT; - else if (key_type_str.compare("s3") == 0) - key_type = KEY_TYPE_S3; - - op_state.set_key_type(key_type); - } - - op_ret = RGWUserAdminOp_Key::remove(s, driver, op_state, flusher, y); -} - -class RGWOp_Caps_Add : public RGWRESTOp { - -public: - RGWOp_Caps_Add() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "add_user_caps"; } -}; - -void RGWOp_Caps_Add::execute(optional_yield y) -{ - std::string uid_str; - std::string caps; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "user-caps", caps, &caps); - - op_state.set_user_id(uid); - op_state.set_caps(caps); - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_Caps::add(s, driver, op_state, flusher, y); -} - -class RGWOp_Caps_Remove : public RGWRESTOp { - -public: - RGWOp_Caps_Remove() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "remove_user_caps"; } -}; - -void RGWOp_Caps_Remove::execute(optional_yield y) -{ - std::string uid_str; - std::string caps; - - RGWUserAdminOpState op_state(driver); - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - rgw_user uid(uid_str); - - RESTArgs::get_string(s, "user-caps", caps, &caps); - - op_state.set_user_id(uid); - op_state.set_caps(caps); - - bufferlist data; - op_ret = driver->forward_request_to_master(s, s->user.get(), nullptr, data, nullptr, s->info, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; - return; - } - op_ret = RGWUserAdminOp_Caps::remove(s, driver, op_state, flusher, y); -} - -struct UserQuotas { - RGWQuota quota; - - UserQuotas() {} - - explicit UserQuotas(RGWUserInfo& info){ - quota.bucket_quota = info.quota.bucket_quota; - quota.user_quota = info.quota.user_quota; - } - - void dump(Formatter *f) const { - encode_json("bucket_quota", quota.bucket_quota, f); - encode_json("user_quota", quota.user_quota, f); - } - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("bucket_quota", quota.bucket_quota, obj); - JSONDecoder::decode_json("user_quota", quota.user_quota, obj); - } -}; - -class RGWOp_Quota_Info : public RGWRESTOp { - -public: - RGWOp_Quota_Info() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_READ); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "get_quota_info"; } -}; - - -void RGWOp_Quota_Info::execute(optional_yield y) -{ - RGWUserAdminOpState op_state(driver); - - std::string uid_str; - std::string quota_type; - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - RESTArgs::get_string(s, "quota-type", quota_type, "a_type); - - if (uid_str.empty()) { - op_ret = -EINVAL; - return; - } - - rgw_user uid(uid_str); - - bool show_all = quota_type.empty(); - bool show_bucket = show_all || (quota_type == "bucket"); - bool show_user = show_all || (quota_type == "user"); - - if (!(show_all || show_bucket || show_user)) { - op_ret = -EINVAL; - return; - } - - op_state.set_user_id(uid); - - RGWUser user; - op_ret = user.init(s, driver, op_state, y); - if (op_ret < 0) - return; - - if (!op_state.has_existing_user()) { - op_ret = -ERR_NO_SUCH_USER; - return; - } - - RGWUserInfo info; - string err_msg; - op_ret = user.info(info, &err_msg); - if (op_ret < 0) - return; - - flusher.start(0); - if (show_all) { - UserQuotas quotas(info); - encode_json("quota", quotas, s->formatter); - } else if (show_user) { - encode_json("user_quota", info.quota.user_quota, s->formatter); - } else { - encode_json("bucket_quota", info.quota.bucket_quota, s->formatter); - } - - flusher.flush(); -} - -class RGWOp_Quota_Set : public RGWRESTOp { - -public: - RGWOp_Quota_Set() {} - - int check_caps(const RGWUserCaps& caps) override { - return caps.check_cap("users", RGW_CAP_WRITE); - } - - void execute(optional_yield y) override; - - const char* name() const override { return "set_quota_info"; } -}; - -/** - * set quota - * - * two different ways to set the quota info: as json struct in the message body or via http params. - * - * as json: - * - * PUT /admin/user?uid=["a-type=] - * - * whereas quota-type is optional and is either user, or bucket - * - * if quota-type is not specified then we expect to get a structure that contains both quotas, - * otherwise we'll only get the relevant configuration. - * - * E.g., if quota type not specified: - * { - * "user_quota" : { - * "max_size_kb" : 4096, - * "max_objects" : -1, - * "enabled" : false - * }, - * "bucket_quota" : { - * "max_size_kb" : 1024, - * "max_objects" : -1, - * "enabled" : true - * } - * } - * - * - * or if quota type is specified: - * { - * "max_size_kb" : 4096, - * "max_objects" : -1, - * "enabled" : false - * } - * - * Another option is not to pass any body and set the following http params: - * - * - * max-size-kb= - * max-objects= - * enabled[={true,false}] - * - * all params are optionals and default to the current settings. With this type of configuration the - * quota-type param is mandatory. - * - */ - -void RGWOp_Quota_Set::execute(optional_yield y) -{ - RGWUserAdminOpState op_state(driver); - - std::string uid_str; - std::string quota_type; - - RESTArgs::get_string(s, "uid", uid_str, &uid_str); - RESTArgs::get_string(s, "quota-type", quota_type, "a_type); - - if (uid_str.empty()) { - op_ret = -EINVAL; - return; - } - - rgw_user uid(uid_str); - - bool set_all = quota_type.empty(); - bool set_bucket = set_all || (quota_type == "bucket"); - bool set_user = set_all || (quota_type == "user"); - - if (!(set_all || set_bucket || set_user)) { - ldpp_dout(this, 20) << "invalid quota type" << dendl; - op_ret = -EINVAL; - return; - } - - bool use_http_params; - - if (s->content_length > 0) { - use_http_params = false; - } else { - const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING"); - use_http_params = (!encoding || strcmp(encoding, "chunked") != 0); - } - - if (use_http_params && set_all) { - ldpp_dout(this, 20) << "quota type was not specified, can't set all quotas via http headers" << dendl; - op_ret = -EINVAL; - return; - } - - op_state.set_user_id(uid); - - RGWUser user; - op_ret = user.init(s, driver, op_state, y); - if (op_ret < 0) { - ldpp_dout(this, 20) << "failed initializing user info: " << op_ret << dendl; - return; - } - - if (!op_state.has_existing_user()) { - op_ret = -ERR_NO_SUCH_USER; - return; - } - -#define QUOTA_INPUT_MAX_LEN 1024 - if (set_all) { - UserQuotas quotas; - - if ((op_ret = get_json_input(driver->ctx(), s, quotas, QUOTA_INPUT_MAX_LEN, NULL)) < 0) { - ldpp_dout(this, 20) << "failed to retrieve input" << dendl; - return; - } - - op_state.set_user_quota(quotas.quota.user_quota); - op_state.set_bucket_quota(quotas.quota.bucket_quota); - } else { - RGWQuotaInfo quota; - - if (!use_http_params) { - bool empty; - op_ret = get_json_input(driver->ctx(), s, quota, QUOTA_INPUT_MAX_LEN, &empty); - if (op_ret < 0) { - ldpp_dout(this, 20) << "failed to retrieve input" << dendl; - if (!empty) - return; - - /* was probably chunked input, but no content provided, configure via http params */ - use_http_params = true; - } - } - - if (use_http_params) { - RGWUserInfo info; - string err_msg; - op_ret = user.info(info, &err_msg); - if (op_ret < 0) { - ldpp_dout(this, 20) << "failed to get user info: " << op_ret << dendl; - return; - } - RGWQuotaInfo *old_quota; - if (set_user) { - old_quota = &info.quota.user_quota; - } else { - old_quota = &info.quota.bucket_quota; - } - - RESTArgs::get_int64(s, "max-objects", old_quota->max_objects, "a.max_objects); - RESTArgs::get_int64(s, "max-size", old_quota->max_size, "a.max_size); - int64_t max_size_kb; - bool has_max_size_kb = false; - RESTArgs::get_int64(s, "max-size-kb", 0, &max_size_kb, &has_max_size_kb); - if (has_max_size_kb) { - quota.max_size = max_size_kb * 1024; - } - RESTArgs::get_bool(s, "enabled", old_quota->enabled, "a.enabled); - } - - if (set_user) { - op_state.set_user_quota(quota); - } else { - op_state.set_bucket_quota(quota); - } - } - - string err; - op_ret = user.modify(s, op_state, y, &err); - if (op_ret < 0) { - ldpp_dout(this, 20) << "failed updating user info: " << op_ret << ": " << err << dendl; - return; - } -} - -RGWOp *RGWHandler_User::op_get() -{ - if (s->info.args.sub_resource_exists("quota")) - return new RGWOp_Quota_Info; - - if (s->info.args.sub_resource_exists("list")) - return new RGWOp_User_List; - - return new RGWOp_User_Info; -} - -RGWOp *RGWHandler_User::op_put() -{ - if (s->info.args.sub_resource_exists("subuser")) - return new RGWOp_Subuser_Create; - - if (s->info.args.sub_resource_exists("key")) - return new RGWOp_Key_Create; - - if (s->info.args.sub_resource_exists("caps")) - return new RGWOp_Caps_Add; - - if (s->info.args.sub_resource_exists("quota")) - return new RGWOp_Quota_Set; - - return new RGWOp_User_Create; -} - -RGWOp *RGWHandler_User::op_post() -{ - if (s->info.args.sub_resource_exists("subuser")) - return new RGWOp_Subuser_Modify; - - return new RGWOp_User_Modify; -} - -RGWOp *RGWHandler_User::op_delete() -{ - if (s->info.args.sub_resource_exists("subuser")) - return new RGWOp_Subuser_Remove; - - if (s->info.args.sub_resource_exists("key")) - return new RGWOp_Key_Remove; - - if (s->info.args.sub_resource_exists("caps")) - return new RGWOp_Caps_Remove; - - return new RGWOp_User_Remove; -} - diff --git a/src/rgw/store/rados/rgw_rest_user.h b/src/rgw/store/rados/rgw_rest_user.h deleted file mode 100644 index ee585be4508..00000000000 --- a/src/rgw/store/rados/rgw_rest_user.h +++ /dev/null @@ -1,36 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include "rgw_rest.h" -#include "rgw_rest_s3.h" - - -class RGWHandler_User : public RGWHandler_Auth_S3 { -protected: - RGWOp *op_get() override; - RGWOp *op_put() override; - RGWOp *op_post() override; - RGWOp *op_delete() override; -public: - using RGWHandler_Auth_S3::RGWHandler_Auth_S3; - ~RGWHandler_User() override = default; - - int read_permissions(RGWOp*, optional_yield) override { - return 0; - } -}; - -class RGWRESTMgr_User : public RGWRESTMgr { -public: - RGWRESTMgr_User() = default; - ~RGWRESTMgr_User() override = default; - - RGWHandler_REST *get_handler(rgw::sal::Driver* driver, - req_state*, - const rgw::auth::StrategyRegistry& auth_registry, - const std::string&) override { - return new RGWHandler_User(auth_registry); - } -}; diff --git a/src/rgw/store/rados/rgw_sal_rados.cc b/src/rgw/store/rados/rgw_sal_rados.cc deleted file mode 100644 index 577569dd5d5..00000000000 --- a/src/rgw/store/rados/rgw_sal_rados.cc +++ /dev/null @@ -1,3630 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2020 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/Clock.h" -#include "common/errno.h" - -#include "rgw_sal.h" -#include "rgw_sal_rados.h" -#include "rgw_bucket.h" -#include "rgw_multi.h" -#include "rgw_acl_s3.h" -#include "rgw_aio.h" -#include "rgw_aio_throttle.h" -#include "rgw_tracer.h" - -#include "rgw_zone.h" -#include "rgw_rest_conn.h" -#include "rgw_service.h" -#include "rgw_lc.h" -#include "rgw_lc_tier.h" -#include "rgw_rest_admin.h" -#include "rgw_rest_bucket.h" -#include "rgw_rest_metadata.h" -#include "rgw_rest_log.h" -#include "rgw_rest_config.h" -#include "rgw_rest_ratelimit.h" -#include "rgw_rest_realm.h" -#include "rgw_rest_user.h" -#include "services/svc_sys_obj.h" -#include "services/svc_meta.h" -#include "services/svc_meta_be_sobj.h" -#include "services/svc_cls.h" -#include "services/svc_zone.h" -#include "services/svc_tier_rados.h" -#include "services/svc_quota.h" -#include "services/svc_config_key.h" -#include "services/svc_zone_utils.h" -#include "services/svc_role_rados.h" -#include "services/svc_user.h" -#include "cls/rgw/cls_rgw_client.h" - -#include "rgw_pubsub.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -static string mp_ns = RGW_OBJ_NS_MULTIPART; - -namespace rgw::sal { - -// default number of entries to list with each bucket listing call -// (use marker to bridge between calls) -static constexpr size_t listing_max_entries = 1000; - -static int decode_policy(CephContext* cct, - bufferlist& bl, - RGWAccessControlPolicy* policy) -{ - auto iter = bl.cbegin(); - try { - policy->decode(iter); - } catch (buffer::error& err) { - ldout(cct, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl; - return -EIO; - } - if (cct->_conf->subsys.should_gather()) { - ldout(cct, 15) << __func__ << " Read AccessControlPolicy"; - RGWAccessControlPolicy_S3* s3policy = static_cast(policy); - s3policy->to_xml(*_dout); - *_dout << dendl; - } - return 0; -} - -static int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider* dpp, - RadosStore* store, - User* user, - Attrs& bucket_attrs, - RGWAccessControlPolicy* policy, - optional_yield y) -{ - auto aiter = bucket_attrs.find(RGW_ATTR_ACL); - - if (aiter != bucket_attrs.end()) { - int ret = decode_policy(store->ctx(), aiter->second, policy); - if (ret < 0) - return ret; - } else { - ldout(store->ctx(), 0) << "WARNING: couldn't find acl header for bucket, generating default" << dendl; - /* object exists, but policy is broken */ - int r = user->load_user(dpp, y); - if (r < 0) - return r; - - policy->create_default(user->get_id(), user->get_display_name()); - } - return 0; -} - -int RadosCompletions::drain() -{ - int ret = 0; - while (!handles.empty()) { - librados::AioCompletion* handle = handles.front(); - handles.pop_front(); - handle->wait_for_complete(); - int r = handle->get_return_value(); - handle->release(); - if (r < 0) { - ret = r; - } - } - return ret; -} - -int RadosUser::list_buckets(const DoutPrefixProvider* dpp, const std::string& marker, - const std::string& end_marker, uint64_t max, bool need_stats, - BucketList &buckets, optional_yield y) -{ - RGWUserBuckets ulist; - bool is_truncated = false; - int ret; - - buckets.clear(); - ret = store->ctl()->user->list_buckets(dpp, info.user_id, marker, end_marker, max, - need_stats, &ulist, &is_truncated, y); - if (ret < 0) - return ret; - - buckets.set_truncated(is_truncated); - for (const auto& ent : ulist.get_buckets()) { - buckets.add(std::unique_ptr(new RadosBucket(this->store, ent.second, this))); - } - - return 0; -} - -int RadosUser::create_bucket(const DoutPrefixProvider* dpp, - const rgw_bucket& b, - const std::string& zonegroup_id, - rgw_placement_rule& placement_rule, - std::string& swift_ver_location, - const RGWQuotaInfo * pquota_info, - const RGWAccessControlPolicy& policy, - Attrs& attrs, - RGWBucketInfo& info, - obj_version& ep_objv, - bool exclusive, - bool obj_lock_enabled, - bool* existed, - req_info& req_info, - std::unique_ptr* bucket_out, - optional_yield y) -{ - int ret; - bufferlist in_data; - RGWBucketInfo master_info; - rgw_bucket* pmaster_bucket; - uint32_t* pmaster_num_shards; - real_time creation_time; - std::unique_ptr bucket; - obj_version objv,* pobjv = NULL; - - /* If it exists, look it up; otherwise create it */ - ret = store->get_bucket(dpp, this, b, &bucket, y); - if (ret < 0 && ret != -ENOENT) - return ret; - - if (ret != -ENOENT) { - RGWAccessControlPolicy old_policy(store->ctx()); - *existed = true; - if (swift_ver_location.empty()) { - swift_ver_location = bucket->get_info().swift_ver_location; - } - placement_rule.inherit_from(bucket->get_info().placement_rule); - - // don't allow changes to the acl policy - int r = rgw_op_get_bucket_policy_from_attr(dpp, store, this, bucket->get_attrs(), - &old_policy, y); - if (r >= 0 && old_policy != policy) { - bucket_out->swap(bucket); - return -EEXIST; - } - } else { - bucket = std::unique_ptr(new RadosBucket(store, b, this)); - *existed = false; - bucket->set_attrs(attrs); - } - - if (!store->svc()->zone->is_meta_master()) { - JSONParser jp; - ret = store->forward_request_to_master(dpp, this, NULL, in_data, &jp, req_info, y); - if (ret < 0) { - return ret; - } - - JSONDecoder::decode_json("entry_point_object_ver", ep_objv, &jp); - JSONDecoder::decode_json("object_ver", objv, &jp); - JSONDecoder::decode_json("bucket_info", master_info, &jp); - ldpp_dout(dpp, 20) << "parsed: objv.tag=" << objv.tag << " objv.ver=" << objv.ver << dendl; - std::time_t ctime = ceph::real_clock::to_time_t(master_info.creation_time); - ldpp_dout(dpp, 20) << "got creation time: << " << std::put_time(std::localtime(&ctime), "%F %T") << dendl; - pmaster_bucket= &master_info.bucket; - creation_time = master_info.creation_time; - pmaster_num_shards = &master_info.layout.current_index.layout.normal.num_shards; - pobjv = &objv; - if (master_info.obj_lock_enabled()) { - info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; - } - } else { - pmaster_bucket = NULL; - pmaster_num_shards = NULL; - if (obj_lock_enabled) - info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED; - } - - std::string zid = zonegroup_id; - if (zid.empty()) { - zid = store->svc()->zone->get_zonegroup().get_id(); - } - - if (*existed) { - rgw_placement_rule selected_placement_rule; - ret = store->svc()->zone->select_bucket_placement(dpp, this->get_info(), - zid, placement_rule, - &selected_placement_rule, nullptr, y); - if (selected_placement_rule != info.placement_rule) { - ret = -EEXIST; - bucket_out->swap(bucket); - return ret; - } - } else { - - ret = store->getRados()->create_bucket(this->get_info(), bucket->get_key(), - zid, placement_rule, swift_ver_location, pquota_info, - attrs, info, pobjv, &ep_objv, creation_time, - pmaster_bucket, pmaster_num_shards, y, dpp, - exclusive); - if (ret == -EEXIST) { - *existed = true; - /* bucket already existed, might have raced with another bucket creation, - * or might be partial bucket creation that never completed. Read existing - * bucket info, verify that the reported bucket owner is the current user. - * If all is ok then update the user's list of buckets. Otherwise inform - * client about a name conflict. - */ - if (info.owner.compare(this->get_id()) != 0) { - return -EEXIST; - } - ret = 0; - } else if (ret != 0) { - return ret; - } - } - - bucket->set_version(ep_objv); - bucket->get_info() = info; - - RadosBucket* rbucket = static_cast(bucket.get()); - ret = rbucket->link(dpp, this, y, false); - if (ret && !*existed && ret != -EEXIST) { - /* if it exists (or previously existed), don't remove it! */ - ret = rbucket->unlink(dpp, this, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to unlink bucket: ret=" << ret - << dendl; - } - } else if (ret == -EEXIST || (ret == 0 && *existed)) { - ret = -ERR_BUCKET_EXISTS; - } - - bucket_out->swap(bucket); - - return ret; -} - -int RadosUser::read_attrs(const DoutPrefixProvider* dpp, optional_yield y) -{ - return store->ctl()->user->get_attrs_by_uid(dpp, get_id(), &attrs, y, &objv_tracker); -} - -int RadosUser::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) -{ - for(auto& it : new_attrs) { - attrs[it.first] = it.second; - } - return store_user(dpp, y, false); -} - -int RadosUser::read_stats(const DoutPrefixProvider *dpp, - optional_yield y, RGWStorageStats* stats, - ceph::real_time* last_stats_sync, - ceph::real_time* last_stats_update) -{ - return store->ctl()->user->read_stats(dpp, get_id(), stats, y, last_stats_sync, last_stats_update); -} - -int RadosUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) -{ - return store->svc()->user->read_stats_async(dpp, get_id(), cb); -} - -int RadosUser::complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) -{ - return store->svc()->user->complete_flush_stats(dpp, get_id(), y); -} - -int RadosUser::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, - RGWUsageIter& usage_iter, - map& usage) -{ - std::string bucket_name; - return store->getRados()->read_usage(dpp, get_id(), bucket_name, start_epoch, - end_epoch, max_entries, is_truncated, - usage_iter, usage); -} - -int RadosUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) -{ - std::string bucket_name; - - return store->getRados()->trim_usage(dpp, get_id(), bucket_name, start_epoch, end_epoch); -} - -int RadosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) -{ - return store->ctl()->user->get_info_by_uid(dpp, info.user_id, &info, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker).set_attrs(&attrs)); -} - -int RadosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info) -{ - return store->ctl()->user->store_info(dpp, info, y, - RGWUserCtl::PutParams().set_objv_tracker(&objv_tracker) - .set_exclusive(exclusive) - .set_attrs(&attrs) - .set_old_info(old_info)); -} - -int RadosUser::remove_user(const DoutPrefixProvider* dpp, optional_yield y) -{ - return store->ctl()->user->remove_info(dpp, info, y, - RGWUserCtl::RemoveParams().set_objv_tracker(&objv_tracker)); -} - -int RadosUser::verify_mfa(const std::string& mfa_str, bool* verified, - const DoutPrefixProvider* dpp, optional_yield y) -{ - vector params; - get_str_vec(mfa_str, " ", params); - - if (params.size() != 2) { - ldpp_dout(dpp, 5) << "NOTICE: invalid mfa string provided: " << mfa_str << dendl; - return -EINVAL; - } - - string& serial = params[0]; - string& pin = params[1]; - - auto i = info.mfa_ids.find(serial); - if (i == info.mfa_ids.end()) { - ldpp_dout(dpp, 5) << "NOTICE: user does not have mfa device with serial=" << serial << dendl; - return -EACCES; - } - - int ret = store->svc()->cls->mfa.check_mfa(dpp, info.user_id, serial, pin, y); - if (ret < 0) { - ldpp_dout(dpp, 20) << "NOTICE: failed to check MFA, serial=" << serial << dendl; - return -EACCES; - } - - *verified = true; - - return 0; -} - -RadosBucket::~RadosBucket() {} - -int RadosBucket::remove_bucket(const DoutPrefixProvider* dpp, - bool delete_children, - bool forward_to_master, - req_info* req_info, - optional_yield y) -{ - int ret; - - // Refresh info - ret = load_bucket(dpp, y); - if (ret < 0) { - return ret; - } - - ListParams params; - params.list_versions = true; - params.allow_unordered = true; - - ListResults results; - - do { - results.objs.clear(); - - ret = list(dpp, params, 1000, results, y); - if (ret < 0) { - return ret; - } - - if (!results.objs.empty() && !delete_children) { - ldpp_dout(dpp, -1) << "ERROR: could not remove non-empty bucket " << info.bucket.name << - dendl; - return -ENOTEMPTY; - } - - for (const auto& obj : results.objs) { - rgw_obj_key key(obj.key); - /* xxx dang */ - ret = rgw_remove_object(dpp, store, this, key); - if (ret < 0 && ret != -ENOENT) { - return ret; - } - } - } while(results.is_truncated); - - ret = abort_multiparts(dpp, store->ctx()); - if (ret < 0) { - return ret; - } - - // remove lifecycle config, if any (XXX note could be made generic) - (void) store->getRados()->get_lc()->remove_bucket_config( - this, get_attrs()); - - ret = store->ctl()->bucket->sync_user_stats(dpp, info.owner, info, y, nullptr); - if (ret < 0) { - ldout(store->ctx(), 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; - } - - RGWObjVersionTracker ot; - - // if we deleted children above we will force delete, as any that - // remain is detrius from a prior bug - ret = store->getRados()->delete_bucket(info, ot, y, dpp, !delete_children); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: could not remove bucket " << - info.bucket.name << dendl; - return ret; - } - - // if bucket has notification definitions associated with it - // they should be removed (note that any pending notifications on the bucket are still going to be sent) - RGWPubSub ps(store, info.owner.tenant); - RGWPubSub::Bucket ps_bucket(&ps, info.bucket); - const auto ps_ret = ps_bucket.remove_notifications(dpp, y); - if (ps_ret < 0 && ps_ret != -ENOENT) { - ldpp_dout(dpp, -1) << "ERROR: unable to remove notifications from bucket. ret=" << ps_ret << dendl; - } - - ret = store->ctl()->bucket->unlink_bucket(info.owner, info.bucket, y, dpp, false); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: unable to remove user bucket information" << dendl; - } - - if (forward_to_master) { - bufferlist in_data; - ret = store->forward_request_to_master(dpp, owner, &ot.read_version, in_data, nullptr, *req_info, y); - if (ret < 0) { - if (ret == -ENOENT) { - /* adjust error, we want to return with NoSuchBucket and not - * NoSuchKey */ - ret = -ERR_NO_SUCH_BUCKET; - } - return ret; - } - } - - return ret; -} - -int RadosBucket::remove_bucket_bypass_gc(int concurrent_max, bool - keep_index_consistent, - optional_yield y, const - DoutPrefixProvider *dpp) -{ - int ret; - map stats; - map common_prefixes; - RGWObjectCtx obj_ctx(store); - CephContext *cct = store->ctx(); - - string bucket_ver, master_ver; - - ret = load_bucket(dpp, null_yield); - if (ret < 0) - return ret; - - const auto& index = info.get_current_index(); - ret = read_stats(dpp, index, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, NULL); - if (ret < 0) - return ret; - - ret = abort_multiparts(dpp, cct); - if (ret < 0) { - return ret; - } - - rgw::sal::Bucket::ListParams params; - rgw::sal::Bucket::ListResults results; - - params.list_versions = true; - params.allow_unordered = true; - - std::unique_ptr handles = store->get_completions(); - - int max_aio = concurrent_max; - results.is_truncated = true; - - while (results.is_truncated) { - ret = list(dpp, params, listing_max_entries, results, null_yield); - if (ret < 0) - return ret; - - std::vector::iterator it = results.objs.begin(); - for (; it != results.objs.end(); ++it) { - RGWObjState *astate = NULL; - RGWObjManifest *amanifest = nullptr; - std::unique_ptr obj = get_object((*it).key); - - ret = store->getRados()->get_obj_state(dpp, &obj_ctx, obj->get_bucket()->get_info(), - obj.get(), &astate, &amanifest, - false, y); - if (ret == -ENOENT) { - ldpp_dout(dpp, 1) << "WARNING: cannot find obj state for obj " << obj << dendl; - continue; - } - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: get obj state returned with error " << ret << dendl; - return ret; - } - - if (amanifest) { - RGWObjManifest& manifest = *amanifest; - RGWObjManifest::obj_iterator miter = manifest.obj_begin(dpp); - std::unique_ptr head_obj = get_object(manifest.get_obj().key); - rgw_raw_obj raw_head_obj; - dynamic_cast(head_obj.get())->get_raw_obj(&raw_head_obj); - - for (; miter != manifest.obj_end(dpp) && max_aio--; ++miter) { - if (!max_aio) { - ret = handles->drain(); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; - return ret; - } - max_aio = concurrent_max; - } - - rgw_raw_obj last_obj = miter.get_location().get_raw_obj(store); - if (last_obj == raw_head_obj) { - // have the head obj deleted at the end - continue; - } - - ret = store->delete_raw_obj_aio(dpp, last_obj, handles.get()); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: delete obj aio failed with " << ret << dendl; - return ret; - } - } // for all shadow objs - - ret = head_obj->delete_obj_aio(dpp, astate, handles.get(), keep_index_consistent, null_yield); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: delete obj aio failed with " << ret << dendl; - return ret; - } - } - - if (!max_aio) { - ret = handles->drain(); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; - return ret; - } - max_aio = concurrent_max; - } - obj_ctx.invalidate(obj->get_obj()); - } // for all RGW objects in results - } // while is_truncated - - ret = handles->drain(); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; - return ret; - } - - sync_user_stats(dpp, y); - if (ret < 0) { - ldpp_dout(dpp, 1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; - } - - RGWObjVersionTracker objv_tracker; - - // this function can only be run if caller wanted children to be - // deleted, so we can ignore the check for children as any that - // remain are detritus from a prior bug - ret = remove_bucket(dpp, true, false, nullptr, y); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: could not remove bucket " << this << dendl; - return ret; - } - - return ret; -} - -int RadosBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y, bool get_stats) -{ - int ret; - - RGWSI_MetaBackend_CtxParams bectx_params = RGWSI_MetaBackend_CtxParams_SObj(); - RGWObjVersionTracker ep_ot; - if (info.bucket.bucket_id.empty()) { - ret = store->ctl()->bucket->read_bucket_info(info.bucket, &info, y, dpp, - RGWBucketCtl::BucketInstance::GetParams() - .set_mtime(&mtime) - .set_attrs(&attrs) - .set_bectx_params(bectx_params), - &ep_ot); - } else { - ret = store->ctl()->bucket->read_bucket_instance_info(info.bucket, &info, y, dpp, - RGWBucketCtl::BucketInstance::GetParams() - .set_mtime(&mtime) - .set_attrs(&attrs) - .set_bectx_params(bectx_params)); - } - if (ret != 0) { - return ret; - } - - bucket_version = ep_ot.read_version; - - if (get_stats) { - ret = store->ctl()->bucket->read_bucket_stats(info.bucket, &ent, y, dpp); - } - - return ret; -} - -int RadosBucket::read_stats(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, std::string* bucket_ver, std::string* master_ver, - std::map& stats, - std::string* max_marker, bool* syncstopped) -{ - return store->getRados()->get_bucket_stats(dpp, info, idx_layout, shard_id, bucket_ver, master_ver, stats, max_marker, syncstopped); -} - -int RadosBucket::read_stats_async(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) -{ - return store->getRados()->get_bucket_stats_async(dpp, get_info(), idx_layout, shard_id, ctx); -} - -int RadosBucket::sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) -{ - return store->ctl()->bucket->sync_user_stats(dpp, owner->get_id(), info, y, &ent); -} - -int RadosBucket::update_container_stats(const DoutPrefixProvider* dpp) -{ - int ret; - map m; - - m[info.bucket.name] = ent; - ret = store->getRados()->update_containers_stats(m, dpp); - if (!ret) - return -EEXIST; - if (ret < 0) - return ret; - - map::iterator iter = m.find(info.bucket.name); - if (iter == m.end()) - return -EINVAL; - - ent.count = iter->second.count; - ent.size = iter->second.size; - ent.size_rounded = iter->second.size_rounded; - ent.creation_time = iter->second.creation_time; - ent.placement_rule = std::move(iter->second.placement_rule); - - info.creation_time = ent.creation_time; - info.placement_rule = ent.placement_rule; - - return 0; -} - -int RadosBucket::check_bucket_shards(const DoutPrefixProvider* dpp) -{ - return store->getRados()->check_bucket_shards(info, info.bucket, get_count(), dpp); -} - -int RadosBucket::link(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint, RGWObjVersionTracker* objv) -{ - RGWBucketEntryPoint ep; - ep.bucket = info.bucket; - ep.owner = new_user->get_id(); - ep.creation_time = get_creation_time(); - ep.linked = true; - Attrs ep_attrs; - rgw_ep_info ep_data{ep, ep_attrs}; - - int r = store->ctl()->bucket->link_bucket(new_user->get_id(), info.bucket, - get_creation_time(), y, dpp, update_entrypoint, - &ep_data); - if (r < 0) - return r; - - if (objv) - *objv = ep_data.ep_objv; - - return r; -} - -int RadosBucket::unlink(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint) -{ - return store->ctl()->bucket->unlink_bucket(new_user->get_id(), info.bucket, y, dpp, update_entrypoint); -} - -int RadosBucket::chown(const DoutPrefixProvider* dpp, User* new_user, User* old_user, optional_yield y, const std::string* marker) -{ - std::string obj_marker; - - if (marker == nullptr) - marker = &obj_marker; - - int r = this->link(dpp, new_user, y); - if (r < 0) { - return r; - } - if (!old_user) { - return r; - } - - return store->ctl()->bucket->chown(store, this, new_user->get_id(), - old_user->get_display_name(), *marker, y, dpp); -} - -int RadosBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive, ceph::real_time _mtime) -{ - mtime = _mtime; - return store->getRados()->put_bucket_instance_info(info, exclusive, mtime, &attrs, dpp); -} - -/* Make sure to call get_bucket_info() if you need it first */ -bool RadosBucket::is_owner(User* user) -{ - return (info.owner.compare(user->get_id()) == 0); -} - -int RadosBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) -{ - return store->getRados()->check_bucket_empty(dpp, info, y); -} - -int RadosBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, - optional_yield y, bool check_size_only) -{ - return store->getRados()->check_quota(dpp, owner->get_id(), get_key(), - quota, obj_size, y, check_size_only); -} - -int RadosBucket::merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) -{ - for(auto& it : new_attrs) { - attrs[it.first] = it.second; - } - return store->ctl()->bucket->set_bucket_instance_attrs(get_info(), - new_attrs, &get_info().objv_tracker, y, dpp); -} - -int RadosBucket::try_refresh_info(const DoutPrefixProvider* dpp, ceph::real_time* pmtime) -{ - return store->getRados()->try_refresh_bucket_info(info, pmtime, dpp, &attrs); -} - -int RadosBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, - RGWUsageIter& usage_iter, - map& usage) -{ - return store->getRados()->read_usage(dpp, owner->get_id(), get_name(), start_epoch, - end_epoch, max_entries, is_truncated, - usage_iter, usage); -} - -int RadosBucket::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) -{ - return store->getRados()->trim_usage(dpp, owner->get_id(), get_name(), start_epoch, end_epoch); -} - -int RadosBucket::remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) -{ - return store->getRados()->remove_objs_from_index(dpp, info, objs_to_unlink); -} - -int RadosBucket::check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) -{ - return store->getRados()->bucket_check_index(dpp, info, &existing_stats, &calculated_stats); -} - -int RadosBucket::rebuild_index(const DoutPrefixProvider *dpp) -{ - return store->getRados()->bucket_rebuild_index(dpp, info); -} - -int RadosBucket::set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) -{ - return store->getRados()->cls_obj_set_bucket_tag_timeout(dpp, info, timeout); -} - -int RadosBucket::purge_instance(const DoutPrefixProvider* dpp) -{ - int max_shards = (info.layout.current_index.layout.normal.num_shards > 0 ? info.layout.current_index.layout.normal.num_shards : 1); - for (int i = 0; i < max_shards; i++) { - RGWRados::BucketShard bs(store->getRados()); - int shard_id = (info.layout.current_index.layout.normal.num_shards > 0 ? i : -1); - int ret = bs.init(dpp, info, info.layout.current_index, shard_id); - if (ret < 0) { - cerr << "ERROR: bs.init(bucket=" << info.bucket << ", shard=" << shard_id - << "): " << cpp_strerror(-ret) << std::endl; - return ret; - } - ret = store->getRados()->bi_remove(dpp, bs); - if (ret < 0) { - cerr << "ERROR: failed to remove bucket index object: " - << cpp_strerror(-ret) << std::endl; - return ret; - } - } - return 0; -} - -int RadosBucket::set_acl(const DoutPrefixProvider* dpp, RGWAccessControlPolicy &acl, optional_yield y) -{ - bufferlist aclbl; - - acls = acl; - acl.encode(aclbl); - map& attrs = get_attrs(); - - attrs[RGW_ATTR_ACL] = aclbl; - info.owner = acl.get_owner().get_id(); - - int r = store->ctl()->bucket->store_bucket_instance_info(info.bucket, - info, y, dpp, - RGWBucketCtl::BucketInstance::PutParams().set_attrs(&attrs)); - if (r < 0) { - cerr << "ERROR: failed to set bucket owner: " << cpp_strerror(-r) << std::endl; - return r; - } - - return 0; -} - -std::unique_ptr RadosBucket::get_object(const rgw_obj_key& k) -{ - return std::make_unique(this->store, k, this); -} - -int RadosBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max, ListResults& results, optional_yield y) -{ - RGWRados::Bucket target(store->getRados(), get_info()); - if (params.shard_id >= 0) { - target.set_shard_id(params.shard_id); - } - RGWRados::Bucket::List list_op(&target); - - list_op.params.prefix = params.prefix; - list_op.params.delim = params.delim; - list_op.params.marker = params.marker; - list_op.params.ns = params.ns; - list_op.params.end_marker = params.end_marker; - list_op.params.ns = params.ns; - list_op.params.enforce_ns = params.enforce_ns; - list_op.params.access_list_filter = params.access_list_filter; - list_op.params.force_check_filter = params.force_check_filter; - list_op.params.list_versions = params.list_versions; - list_op.params.allow_unordered = params.allow_unordered; - - int ret = list_op.list_objects(dpp, max, &results.objs, &results.common_prefixes, &results.is_truncated, y); - if (ret >= 0) { - results.next_marker = list_op.get_next_marker(); - params.marker = results.next_marker; - } - - return ret; -} - -std::unique_ptr RadosBucket::get_multipart_upload( - const std::string& oid, - std::optional upload_id, - ACLOwner owner, ceph::real_time mtime) -{ - return std::make_unique(this->store, this, oid, upload_id, - std::move(owner), mtime); -} - -int RadosBucket::list_multiparts(const DoutPrefixProvider *dpp, - const string& prefix, - string& marker, - const string& delim, - const int& max_uploads, - vector>& uploads, - map *common_prefixes, - bool *is_truncated) -{ - rgw::sal::Bucket::ListParams params; - rgw::sal::Bucket::ListResults results; - MultipartMetaFilter mp_filter; - - params.prefix = prefix; - params.delim = delim; - params.marker = marker; - params.ns = RGW_OBJ_NS_MULTIPART; - params.access_list_filter = &mp_filter; - - int ret = list(dpp, params, max_uploads, results, null_yield); - - if (ret < 0) - return ret; - - if (!results.objs.empty()) { - for (const rgw_bucket_dir_entry& dentry : results.objs) { - rgw_obj_key key(dentry.key); - ACLOwner owner(rgw_user(dentry.meta.owner)); - owner.set_name(dentry.meta.owner_display_name); - uploads.push_back(this->get_multipart_upload(key.name, - std::nullopt, std::move(owner))); - } - } - if (common_prefixes) { - *common_prefixes = std::move(results.common_prefixes); - } - *is_truncated = results.is_truncated; - marker = params.marker.name; - - return 0; -} - -int RadosBucket::abort_multiparts(const DoutPrefixProvider* dpp, - CephContext* cct) -{ - constexpr int max = 1000; - int ret, num_deleted = 0; - vector> uploads; - string marker; - bool is_truncated; - - const std::string empty_delim; - const std::string empty_prefix; - - do { - ret = list_multiparts(dpp, empty_prefix, marker, empty_delim, - max, uploads, nullptr, &is_truncated); - if (ret < 0) { - ldpp_dout(dpp, 0) << __func__ << - " ERROR : calling list_bucket_multiparts; ret=" << ret << - "; bucket=\"" << this << "\"" << dendl; - return ret; - } - ldpp_dout(dpp, 20) << __func__ << - " INFO: aborting and cleaning up multipart upload(s); bucket=\"" << - this << "\"; uploads.size()=" << uploads.size() << - "; is_truncated=" << is_truncated << dendl; - - if (!uploads.empty()) { - for (const auto& upload : uploads) { - ret = upload->abort(dpp, cct); - if (ret < 0) { - // we're doing a best-effort; if something cannot be found, - // log it and keep moving forward - if (ret != -ENOENT && ret != -ERR_NO_SUCH_UPLOAD) { - ldpp_dout(dpp, 0) << __func__ << - " ERROR : failed to abort and clean-up multipart upload \"" << - upload->get_meta() << "\"" << dendl; - return ret; - } else { - ldpp_dout(dpp, 10) << __func__ << - " NOTE : unable to find part(s) of " - "aborted multipart upload of \"" << upload->get_meta() << - "\" for cleaning up" << dendl; - } - } - num_deleted++; - } - if (num_deleted) { - ldpp_dout(dpp, 0) << __func__ << - " WARNING : aborted " << num_deleted << - " incomplete multipart uploads" << dendl; - } - } - } while (is_truncated); - - return 0; -} - -std::unique_ptr RadosStore::get_user(const rgw_user &u) -{ - return std::make_unique(this, u); -} - -std::string RadosStore::get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) -{ - return getRados()->get_cluster_fsid(dpp, y); -} - -int RadosStore::get_user_by_access_key(const DoutPrefixProvider* dpp, const std::string& key, optional_yield y, std::unique_ptr* user) -{ - RGWUserInfo uinfo; - User* u; - RGWObjVersionTracker objv_tracker; - - int r = ctl()->user->get_info_by_access_key(dpp, key, &uinfo, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); - if (r < 0) - return r; - - u = new RadosUser(this, uinfo); - if (!u) - return -ENOMEM; - - u->get_version_tracker() = objv_tracker; - - user->reset(u); - return 0; -} - -int RadosStore::get_user_by_email(const DoutPrefixProvider* dpp, const std::string& email, optional_yield y, std::unique_ptr* user) -{ - RGWUserInfo uinfo; - User* u; - RGWObjVersionTracker objv_tracker; - - int r = ctl()->user->get_info_by_email(dpp, email, &uinfo, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); - if (r < 0) - return r; - - u = new RadosUser(this, uinfo); - if (!u) - return -ENOMEM; - - u->get_version_tracker() = objv_tracker; - - user->reset(u); - return 0; -} - -int RadosStore::get_user_by_swift(const DoutPrefixProvider* dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) -{ - RGWUserInfo uinfo; - User* u; - RGWObjVersionTracker objv_tracker; - - int r = ctl()->user->get_info_by_swift(dpp, user_str, &uinfo, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); - if (r < 0) - return r; - - u = new RadosUser(this, uinfo); - if (!u) - return -ENOMEM; - - u->get_version_tracker() = objv_tracker; - - user->reset(u); - return 0; -} - -std::unique_ptr RadosStore::get_object(const rgw_obj_key& k) -{ - return std::make_unique(this, k); -} - -int RadosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) -{ - int ret; - Bucket* bp; - - bp = new RadosBucket(this, b, u); - ret = bp->load_bucket(dpp, y); - if (ret < 0) { - delete bp; - return ret; - } - - bucket->reset(bp); - return 0; -} - -int RadosStore::get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) -{ - Bucket* bp; - - bp = new RadosBucket(this, i, u); - /* Don't need to fetch the bucket info, use the provided one */ - - bucket->reset(bp); - return 0; -} - -int RadosStore::get_bucket(const DoutPrefixProvider* dpp, User* u, const std::string& tenant, const std::string& name, std::unique_ptr* bucket, optional_yield y) -{ - rgw_bucket b; - - b.tenant = tenant; - b.name = name; - - return get_bucket(dpp, u, b, bucket, y); -} - -bool RadosStore::is_meta_master() -{ - return svc()->zone->is_meta_master(); -} - -int RadosStore::forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv, - bufferlist& in_data, - JSONParser* jp, req_info& info, - optional_yield y) -{ - if (is_meta_master()) { - /* We're master, don't forward */ - return 0; - } - - if (!svc()->zone->get_master_conn()) { - ldpp_dout(dpp, 0) << "rest connection is invalid" << dendl; - return -EINVAL; - } - ldpp_dout(dpp, 0) << "sending request to master zonegroup" << dendl; - bufferlist response; - std::string uid_str = user->get_id().to_str(); -#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response - int ret = svc()->zone->get_master_conn()->forward(dpp, rgw_user(uid_str), info, - objv, MAX_REST_RESPONSE, - &in_data, &response, y); - if (ret < 0) - return ret; - - ldpp_dout(dpp, 20) << "response: " << response.c_str() << dendl; - if (jp && !jp->parse(response.c_str(), response.length())) { - ldpp_dout(dpp, 0) << "failed parsing response from master zonegroup" << dendl; - return -EINVAL; - } - - return 0; -} - -int RadosStore::forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, - bufferlist& in_data, - RGWXMLDecoder::XMLParser* parser, req_info& info, - optional_yield y) -{ - if (is_meta_master()) { - /* We're master, don't forward */ - return 0; - } - - if (!svc()->zone->get_master_conn()) { - ldpp_dout(dpp, 0) << "rest connection is invalid" << dendl; - return -EINVAL; - } - ldpp_dout(dpp, 0) << "sending request to master zonegroup" << dendl; - bufferlist response; -#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response - int ret = svc()->zone->get_master_conn()->forward_iam_request(dpp, key, info, - objv, MAX_REST_RESPONSE, - &in_data, &response, y); - if (ret < 0) - return ret; - - ldpp_dout(dpp, 20) << "response: " << response.c_str() << dendl; - - std::string r = response.c_str(); - std::string str_to_search = """; - std::string str_to_replace = "\""; - boost::replace_all(r, str_to_search, str_to_replace); - ldpp_dout(dpp, 20) << "r: " << r.c_str() << dendl; - - if (parser && !parser->parse(r.c_str(), r.length(), 1)) { - ldpp_dout(dpp, 0) << "ERROR: failed to parse response from master zonegroup" << dendl; - return -EIO; - } - - return 0; -} - -std::string RadosStore::zone_unique_id(uint64_t unique_num) -{ - return svc()->zone_utils->unique_id(unique_num); -} - -std::string RadosStore::zone_unique_trans_id(const uint64_t unique_num) -{ - return svc()->zone_utils->unique_trans_id(unique_num); -} - -int RadosStore::get_zonegroup(const std::string& id, - std::unique_ptr* zonegroup) -{ - ZoneGroup* zg; - RGWZoneGroup rzg; - int r = svc()->zone->get_zonegroup(id, rzg); - if (r < 0) - return r; - - zg = new RadosZoneGroup(this, rzg); - if (!zg) - return -ENOMEM; - - zonegroup->reset(zg); - return 0; -} - -int RadosStore::list_all_zones(const DoutPrefixProvider* dpp, std::list& zone_ids) -{ - return svc()->zone->list_zones(dpp, zone_ids); -} - -int RadosStore::cluster_stat(RGWClusterStat& stats) -{ - rados_cluster_stat_t rados_stats; - int ret; - - ret = rados->get_rados_handle()->cluster_stat(rados_stats); - if (ret < 0) - return ret; - - stats.kb = rados_stats.kb; - stats.kb_used = rados_stats.kb_used; - stats.kb_avail = rados_stats.kb_avail; - stats.num_objects = rados_stats.num_objects; - - return ret; -} - -std::unique_ptr RadosStore::get_lifecycle(void) -{ - return std::make_unique(this); -} - -std::unique_ptr RadosStore::get_completions(void) -{ - return std::make_unique(); -} - -std::unique_ptr RadosStore::get_notification( - rgw::sal::Object* obj, rgw::sal::Object* src_obj, req_state* s, rgw::notify::EventType event_type, const std::string* object_name) -{ - return std::make_unique(s, this, obj, src_obj, s, event_type, object_name); -} - -std::unique_ptr RadosStore::get_notification(const DoutPrefixProvider* dpp, rgw::sal::Object* obj, rgw::sal::Object* src_obj, rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) -{ - return std::make_unique(dpp, this, obj, src_obj, event_type, _bucket, _user_id, _user_tenant, _req_id, y); -} - -int RadosStore::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj) -{ - return rados->delete_raw_obj(dpp, obj); -} - -int RadosStore::delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, Completions* aio) -{ - RadosCompletions* raio = static_cast(aio); - - return rados->delete_raw_obj_aio(dpp, obj, raio->handles); -} - -void RadosStore::get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj) -{ - rados->obj_to_raw(placement_rule, obj, raw_obj); -} - -int RadosStore::get_raw_chunk_size(const DoutPrefixProvider* dpp, const rgw_raw_obj& obj, uint64_t* chunk_size) -{ - return rados->get_max_chunk_size(obj.pool, chunk_size, dpp); -} - -int RadosStore::initialize(CephContext *cct, const DoutPrefixProvider *dpp) -{ - std::unique_ptr zg = - std::make_unique(this, svc()->zone->get_zonegroup()); - zone = make_unique(this, std::move(zg)); - return 0; -} - -int RadosStore::log_usage(const DoutPrefixProvider *dpp, map& usage_info) -{ - return rados->log_usage(dpp, usage_info); -} - -int RadosStore::log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) -{ - rgw_raw_obj obj(svc()->zone->get_zone_params().log_pool, oid); - - int ret = rados->append_async(dpp, obj, bl.length(), bl); - if (ret == -ENOENT) { - ret = rados->create_pool(dpp, svc()->zone->get_zone_params().log_pool); - if (ret < 0) - return ret; - // retry - ret = rados->append_async(dpp, obj, bl.length(), bl); - } - - return ret; -} - -int RadosStore::register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, - const map& meta) -{ - return rados->register_to_service_map(dpp, daemon_type, meta); -} - -void RadosStore::get_quota(RGWQuota& quota) -{ - quota.bucket_quota = svc()->quota->get_bucket_quota(); - quota.user_quota = svc()->quota->get_user_quota(); -} - -void RadosStore::get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) -{ - bucket_ratelimit = svc()->zone->get_current_period().get_config().bucket_ratelimit; - user_ratelimit = svc()->zone->get_current_period().get_config().user_ratelimit; - anon_ratelimit = svc()->zone->get_current_period().get_config().anon_ratelimit; -} - -int RadosStore::set_buckets_enabled(const DoutPrefixProvider* dpp, vector& buckets, bool enabled) -{ - return rados->set_buckets_enabled(buckets, enabled, dpp); -} - -int RadosStore::get_sync_policy_handler(const DoutPrefixProvider* dpp, - std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef* phandler, - optional_yield y) -{ - return ctl()->bucket->get_sync_policy_handler(zone, bucket, phandler, y, dpp); -} - -RGWDataSyncStatusManager* RadosStore::get_data_sync_manager(const rgw_zone_id& source_zone) -{ - return rados->get_data_sync_manager(source_zone); -} - -int RadosStore::read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, - RGWUsageIter& usage_iter, - map& usage) -{ - rgw_user uid; - std::string bucket_name; - - return rados->read_usage(dpp, uid, bucket_name, start_epoch, end_epoch, max_entries, - is_truncated, usage_iter, usage); -} - -int RadosStore::trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) -{ - rgw_user uid; - std::string bucket_name; - - return rados->trim_usage(dpp, uid, bucket_name, start_epoch, end_epoch); -} - -int RadosStore::get_config_key_val(std::string name, bufferlist* bl) -{ - return svc()->config_key->get(name, true, bl); -} - -int RadosStore::meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) -{ - return ctl()->meta.mgr->list_keys_init(dpp, section, marker, phandle); -} - -int RadosStore::meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, list& keys, bool* truncated) -{ - return ctl()->meta.mgr->list_keys_next(dpp, handle, max, keys, truncated); -} - -void RadosStore::meta_list_keys_complete(void* handle) -{ - ctl()->meta.mgr->list_keys_complete(handle); -} - -std::string RadosStore::meta_get_marker(void* handle) -{ - return ctl()->meta.mgr->get_marker(handle); -} - -int RadosStore::meta_remove(const DoutPrefixProvider* dpp, std::string& metadata_key, optional_yield y) -{ - return ctl()->meta.mgr->remove(metadata_key, y, dpp); -} - -void RadosStore::finalize(void) -{ - if (rados) - rados->finalize(); -} - -void RadosStore::register_admin_apis(RGWRESTMgr* mgr) -{ - mgr->register_resource("user", new RGWRESTMgr_User); - mgr->register_resource("bucket", new RGWRESTMgr_Bucket); - /*Registering resource for /admin/metadata */ - mgr->register_resource("metadata", new RGWRESTMgr_Metadata); - mgr->register_resource("log", new RGWRESTMgr_Log); - /* XXX These may become global when cbodley is done with his zone work */ - mgr->register_resource("config", new RGWRESTMgr_Config); - mgr->register_resource("realm", new RGWRESTMgr_Realm); - mgr->register_resource("ratelimit", new RGWRESTMgr_Ratelimit); -} - -std::unique_ptr RadosStore::get_lua_manager() -{ - return std::make_unique(this); -} - -std::unique_ptr RadosStore::get_role(std::string name, - std::string tenant, - std::string path, - std::string trust_policy, - std::string max_session_duration_str, - std::multimap tags) -{ - return std::make_unique(this, name, tenant, path, trust_policy, max_session_duration_str, tags); -} - -std::unique_ptr RadosStore::get_role(std::string id) -{ - return std::make_unique(this, id); -} - -std::unique_ptr RadosStore::get_role(const RGWRoleInfo& info) -{ - return std::make_unique(this, info); -} - -int RadosStore::get_roles(const DoutPrefixProvider *dpp, - optional_yield y, - const std::string& path_prefix, - const std::string& tenant, - vector>& roles) -{ - auto pool = svc()->zone->get_zone_params().roles_pool; - std::string prefix; - - // List all roles if path prefix is empty - if (! path_prefix.empty()) { - prefix = tenant + RGWRole::role_path_oid_prefix + path_prefix; - } else { - prefix = tenant + RGWRole::role_path_oid_prefix; - } - - //Get the filtered objects - list result; - bool is_truncated; - RGWListRawObjsCtx ctx; - do { - list oids; - int r = rados->list_raw_objects(dpp, pool, prefix, 1000, ctx, oids, &is_truncated); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: listing filtered objects failed: " - << prefix << ": " << cpp_strerror(-r) << dendl; - return r; - } - for (const auto& iter : oids) { - result.push_back(iter.substr(RGWRole::role_path_oid_prefix.size())); - } - } while (is_truncated); - - for (const auto& it : result) { - //Find the role oid prefix from the end - size_t pos = it.rfind(RGWRole::role_oid_prefix); - if (pos == std::string::npos) { - continue; - } - // Split the result into path and info_oid + id - std::string path = it.substr(0, pos); - - /*Make sure that prefix is part of path (False results could've been returned) - because of the role info oid + id appended to the path)*/ - if(path_prefix.empty() || path.find(path_prefix) != std::string::npos) { - //Get id from info oid prefix + id - std::string id = it.substr(pos + RGWRole::role_oid_prefix.length()); - - std::unique_ptr role = get_role(id); - int ret = role->read_info(dpp, y); - if (ret < 0) { - return ret; - } - roles.push_back(std::move(role)); - } - } - - return 0; -} - -std::unique_ptr RadosStore::get_oidc_provider() -{ - return std::make_unique(this); -} - -int RadosStore::get_oidc_providers(const DoutPrefixProvider *dpp, - const std::string& tenant, - vector>& providers) -{ - std::string prefix = tenant + RGWOIDCProvider::oidc_url_oid_prefix; - auto pool = svc()->zone->get_zone_params().oidc_pool; - - //Get the filtered objects - list result; - bool is_truncated; - RGWListRawObjsCtx ctx; - do { - list oids; - int r = rados->list_raw_objects(dpp, pool, prefix, 1000, ctx, oids, &is_truncated); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: listing filtered objects failed: OIDC pool: " - << pool.name << ": " << prefix << ": " << cpp_strerror(-r) << dendl; - return r; - } - for (const auto& iter : oids) { - std::unique_ptr provider = get_oidc_provider(); - bufferlist bl; - - r = rgw_get_system_obj(svc()->sysobj, pool, iter, bl, nullptr, nullptr, null_yield, dpp); - if (r < 0) { - return r; - } - - try { - using ceph::decode; - auto iter = bl.cbegin(); - decode(*provider, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode oidc provider info from pool: " - << pool.name << ": " << iter << dendl; - return -EIO; - } - - providers.push_back(std::move(provider)); - } - } while (is_truncated); - - return 0; -} - -std::unique_ptr RadosStore::get_append_writer(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - const std::string& unique_tag, - uint64_t position, - uint64_t *cur_accounted_size) -{ - auto aio = rgw::make_throttle(ctx()->_conf->rgw_put_obj_min_window_size, y); - return std::make_unique(dpp, y, - std::move(_head_obj), - this, std::move(aio), owner, - ptail_placement_rule, - unique_tag, position, - cur_accounted_size); -} - -std::unique_ptr RadosStore::get_atomic_writer(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t olh_epoch, - const std::string& unique_tag) -{ - auto aio = rgw::make_throttle(ctx()->_conf->rgw_put_obj_min_window_size, y); - return std::make_unique(dpp, y, - std::move(_head_obj), - this, std::move(aio), owner, - ptail_placement_rule, - olh_epoch, unique_tag); -} - -const std::string& RadosStore::get_compression_type(const rgw_placement_rule& rule) -{ - return svc()->zone->get_zone_params().get_compression_type(rule); -} - -bool RadosStore::valid_placement(const rgw_placement_rule& rule) -{ - return svc()->zone->get_zone_params().valid_placement(rule); -} - -int RadosStore::get_obj_head_ioctx(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx* ioctx) -{ - return rados->get_obj_head_ioctx(dpp, bucket_info, obj, ioctx); -} - -RadosObject::~RadosObject() -{ - if (rados_ctx_owned) - delete rados_ctx; -} - -int RadosObject::get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **pstate, optional_yield y, bool follow_olh) -{ - int ret = store->getRados()->get_obj_state(dpp, rados_ctx, bucket->get_info(), this, pstate, &manifest, follow_olh, y); - if (ret < 0) { - return ret; - } - - /* Don't overwrite obj, atomic, or prefetch */ - rgw_obj obj = get_obj(); - bool is_atomic = state.is_atomic; - bool prefetch_data = state.prefetch_data; - - state = **pstate; - - state.obj = obj; - state.is_atomic = is_atomic; - state.prefetch_data = prefetch_data; - return ret; -} - -int RadosObject::read_attrs(const DoutPrefixProvider* dpp, RGWRados::Object::Read &read_op, optional_yield y, rgw_obj* target_obj) -{ - read_op.params.attrs = &attrs; - read_op.params.target_obj = target_obj; - read_op.params.obj_size = &state.size; - read_op.params.lastmod = &state.mtime; - - return read_op.prepare(y, dpp); -} - -int RadosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) -{ - Attrs empty; - return store->getRados()->set_attrs(dpp, rados_ctx, - bucket->get_info(), - this, - setattrs ? *setattrs : empty, - delattrs ? delattrs : nullptr, - y); -} - -int RadosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj) -{ - RGWRados::Object op_target(store->getRados(), bucket, *rados_ctx, this); - RGWRados::Object::Read read_op(&op_target); - - return read_attrs(dpp, read_op, y, target_obj); -} - -int RadosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) -{ - rgw_obj target = get_obj(); - rgw_obj save = get_obj(); - int r = get_obj_attrs(y, dpp, &target); - if (r < 0) { - return r; - } - - /* Temporarily set target */ - state.obj = target; - set_atomic(); - attrs[attr_name] = attr_val; - r = set_obj_attrs(dpp, &attrs, nullptr, y); - /* Restore target */ - state.obj = save; - - return r; -} - -int RadosObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) -{ - Attrs rmattr; - bufferlist bl; - - set_atomic(); - rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); -} - -bool RadosObject::is_expired() { - auto iter = attrs.find(RGW_ATTR_DELETE_AT); - if (iter != attrs.end()) { - utime_t delete_at; - try { - auto bufit = iter->second.cbegin(); - decode(delete_at, bufit); - } catch (buffer::error& err) { - ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl; - return false; - } - - if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { - return true; - } - } - - return false; -} - -void RadosObject::gen_rand_obj_instance_name() -{ - store->getRados()->gen_rand_obj_instance_name(&state.obj.key); -} - -void RadosObject::raw_obj_to_obj(const rgw_raw_obj& raw_obj) -{ - rgw_obj tobj = get_obj(); - RGWSI_Tier_RADOS::raw_obj_to_obj(get_bucket()->get_key(), raw_obj, &tobj); - set_key(tobj.key); -} - -void RadosObject::get_raw_obj(rgw_raw_obj* raw_obj) -{ - store->getRados()->obj_to_raw((bucket->get_info()).placement_rule, get_obj(), raw_obj); -} - -int RadosObject::omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count, - std::map *m, - bool* pmore, optional_yield y) -{ - rgw_raw_obj raw_obj; - get_raw_obj(&raw_obj); - auto sysobj = store->svc()->sysobj->get_obj(raw_obj); - - return sysobj.omap().get_vals(dpp, marker, count, m, pmore, y); -} - -int RadosObject::omap_get_all(const DoutPrefixProvider *dpp, std::map *m, - optional_yield y) -{ - rgw_raw_obj raw_obj; - get_raw_obj(&raw_obj); - auto sysobj = store->svc()->sysobj->get_obj(raw_obj); - - return sysobj.omap().get_all(dpp, m, y); -} - -int RadosObject::omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, - const std::set& keys, - Attrs* vals) -{ - int ret; - rgw_raw_obj head_obj; - librados::IoCtx cur_ioctx; - rgw_obj obj = get_obj(); - - store->getRados()->obj_to_raw(bucket->get_placement_rule(), obj, &head_obj); - ret = store->get_obj_head_ioctx(dpp, bucket->get_info(), obj, &cur_ioctx); - if (ret < 0) { - return ret; - } - - return cur_ioctx.omap_get_vals_by_keys(oid, keys, vals); -} - -int RadosObject::omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, - bool must_exist, optional_yield y) -{ - rgw_raw_obj raw_meta_obj; - rgw_obj obj = get_obj(); - - store->getRados()->obj_to_raw(bucket->get_placement_rule(), obj, &raw_meta_obj); - - auto sysobj = store->svc()->sysobj->get_obj(raw_meta_obj); - - return sysobj.omap().set_must_exist(must_exist).set(dpp, key, val, y); -} - -std::unique_ptr RadosObject::get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) -{ - return std::make_unique(dpp, store, this, lock_name); -} - -int RadosObject::transition(Bucket* bucket, - const rgw_placement_rule& placement_rule, - const real_time& mtime, - uint64_t olh_epoch, - const DoutPrefixProvider* dpp, - optional_yield y) -{ - return store->getRados()->transition_obj(*rados_ctx, bucket, *this, placement_rule, mtime, olh_epoch, dpp, y); -} - -int RadosObject::transition_to_cloud(Bucket* bucket, - rgw::sal::PlacementTier* tier, - rgw_bucket_dir_entry& o, - std::set& cloud_targets, - CephContext* cct, - bool update_object, - const DoutPrefixProvider* dpp, - optional_yield y) -{ - /* init */ - rgw::sal::RadosPlacementTier* rtier = static_cast(tier); - string id = "cloudid"; - string endpoint = rtier->get_rt().t.s3.endpoint; - RGWAccessKey key = rtier->get_rt().t.s3.key; - string region = rtier->get_rt().t.s3.region; - HostStyle host_style = rtier->get_rt().t.s3.host_style; - string bucket_name = rtier->get_rt().t.s3.target_path; - const rgw::sal::ZoneGroup& zonegroup = store->get_zone()->get_zonegroup(); - - if (bucket_name.empty()) { - bucket_name = "rgwx-" + zonegroup.get_name() + "-" + tier->get_storage_class() + - "-cloud-bucket"; - boost::algorithm::to_lower(bucket_name); - } - - /* Create RGW REST connection */ - S3RESTConn conn(cct, id, { endpoint }, key, zonegroup.get_id(), region, host_style); - - RGWLCCloudTierCtx tier_ctx(cct, dpp, o, store, bucket->get_info(), - this, conn, bucket_name, - rtier->get_rt().t.s3.target_storage_class); - tier_ctx.acl_mappings = rtier->get_rt().t.s3.acl_mappings; - tier_ctx.multipart_min_part_size = rtier->get_rt().t.s3.multipart_min_part_size; - tier_ctx.multipart_sync_threshold = rtier->get_rt().t.s3.multipart_sync_threshold; - tier_ctx.storage_class = tier->get_storage_class(); - - ldpp_dout(dpp, 0) << "Transitioning object(" << o.key << ") to the cloud endpoint(" << endpoint << ")" << dendl; - - /* Transition object to cloud end point */ - int ret = rgw_cloud_tier_transfer_object(tier_ctx, cloud_targets); - - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to transfer object(" << o.key << ") to the cloud endpoint(" << endpoint << ") ret=" << ret << dendl; - return ret; - } - - if (update_object) { - real_time read_mtime; - - std::unique_ptr read_op(get_read_op()); - read_op->params.lastmod = &read_mtime; - - ret = read_op->prepare(null_yield, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: Updating tier object(" << o.key << ") failed ret=" << ret << dendl; - return ret; - } - - if (read_mtime != tier_ctx.o.meta.mtime) { - /* raced */ - ldpp_dout(dpp, 0) << "ERROR: Updating tier object(" << o.key << ") failed ret=" << -ECANCELED << dendl; - return -ECANCELED; - } - - rgw_placement_rule target_placement; - target_placement.inherit_from(tier_ctx.bucket_info.placement_rule); - target_placement.storage_class = tier->get_storage_class(); - - ret = write_cloud_tier(dpp, null_yield, tier_ctx.o.versioned_epoch, - tier, tier_ctx.is_multipart_upload, - target_placement, tier_ctx.obj); - - } - - return ret; -} - -int RadosObject::write_cloud_tier(const DoutPrefixProvider* dpp, - optional_yield y, - uint64_t olh_epoch, - PlacementTier* tier, - bool is_multipart_upload, - rgw_placement_rule& target_placement, - Object* head_obj) -{ - rgw::sal::RadosPlacementTier* rtier = static_cast(tier); - map attrs = get_attrs(); - RGWRados::Object op_target(store->getRados(), bucket, *rados_ctx, this); - RGWRados::Object::Write obj_op(&op_target); - - obj_op.meta.modify_tail = true; - obj_op.meta.flags = PUT_OBJ_CREATE; - obj_op.meta.category = RGWObjCategory::CloudTiered; - obj_op.meta.delete_at = real_time(); - bufferlist blo; - obj_op.meta.data = &blo; - obj_op.meta.if_match = NULL; - obj_op.meta.if_nomatch = NULL; - obj_op.meta.user_data = NULL; - obj_op.meta.zones_trace = NULL; - obj_op.meta.delete_at = real_time(); - obj_op.meta.olh_epoch = olh_epoch; - - RGWObjManifest *pmanifest; - RGWObjManifest manifest; - - pmanifest = &manifest; - RGWObjTier tier_config; - tier_config.name = tier->get_storage_class(); - tier_config.tier_placement = rtier->get_rt(); - tier_config.is_multipart_upload = is_multipart_upload; - - pmanifest->set_tier_type("cloud-s3"); - pmanifest->set_tier_config(tier_config); - - /* check if its necessary */ - pmanifest->set_head(target_placement, head_obj->get_obj(), 0); - pmanifest->set_tail_placement(target_placement, head_obj->get_obj().bucket); - pmanifest->set_obj_size(0); - obj_op.meta.manifest = pmanifest; - - /* update storage class */ - bufferlist bl; - bl.append(tier->get_storage_class()); - attrs[RGW_ATTR_STORAGE_CLASS] = bl; - - attrs.erase(RGW_ATTR_ID_TAG); - attrs.erase(RGW_ATTR_TAIL_TAG); - - return obj_op.write_meta(dpp, 0, 0, attrs, y); -} - -int RadosObject::get_max_chunk_size(const DoutPrefixProvider* dpp, rgw_placement_rule placement_rule, uint64_t* max_chunk_size, uint64_t* alignment) -{ - return store->getRados()->get_max_chunk_size(placement_rule, get_obj(), max_chunk_size, dpp, alignment); -} - -void RadosObject::get_max_aligned_size(uint64_t size, uint64_t alignment, - uint64_t* max_size) -{ - store->getRados()->get_max_aligned_size(size, alignment, max_size); -} - -bool RadosObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) -{ - rgw_obj obj; - rgw_pool p1, p2; - - obj = get_obj(); - - if (r1 == r2) - return true; - - if (!store->getRados()->get_obj_data_pool(r1, obj, &p1)) { - return false; - } - if (!store->getRados()->get_obj_data_pool(r2, obj, &p2)) { - return false; - } - - return p1 == p2; -} - -int RadosObject::dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) -{ - int ret; - RGWObjManifest *amanifest{nullptr}; - rgw_raw_obj head_obj; - - RGWRados::Object op_target(store->getRados(), get_bucket(), *rados_ctx, this); - RGWRados::Object::Read parent_op(&op_target); - uint64_t obj_size; - - parent_op.params.obj_size = &obj_size; - parent_op.params.attrs = &get_attrs(); - - ret = parent_op.prepare(y, dpp); - if (ret < 0) { - return ret; - } - - head_obj = parent_op.state.head_obj; - - ret = op_target.get_manifest(dpp, &amanifest, y); - if (ret < 0) { - return ret; - } - - ::encode_json("head", head_obj, f); - ::encode_json("manifest", *amanifest, f); - f->open_array_section("data_location"); - for (auto miter = amanifest->obj_begin(dpp); miter != amanifest->obj_end(dpp); ++miter) { - f->open_object_section("obj"); - rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(store); - uint64_t ofs = miter.get_ofs(); - uint64_t left = amanifest->get_obj_size() - ofs; - ::encode_json("ofs", miter.get_ofs(), f); - ::encode_json("loc", raw_loc, f); - ::encode_json("loc_ofs", miter.location_ofs(), f); - uint64_t loc_size = miter.get_stripe_size(); - if (loc_size > left) { - loc_size = left; - } - ::encode_json("loc_size", loc_size, f); - f->close_section(); - } - f->close_section(); - - return 0; -} - -std::unique_ptr RadosObject::get_read_op() -{ - return std::make_unique(this, rados_ctx); -} - -RadosObject::RadosReadOp::RadosReadOp(RadosObject *_source, RGWObjectCtx *_rctx) : - source(_source), - rctx(_rctx), - op_target(_source->store->getRados(), - _source->get_bucket(), - *static_cast(rctx), - _source), - parent_op(&op_target) -{ } - -int RadosObject::RadosReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp) -{ - uint64_t obj_size; - - parent_op.conds.mod_ptr = params.mod_ptr; - parent_op.conds.unmod_ptr = params.unmod_ptr; - parent_op.conds.high_precision_time = params.high_precision_time; - parent_op.conds.mod_zone_id = params.mod_zone_id; - parent_op.conds.mod_pg_ver = params.mod_pg_ver; - parent_op.conds.if_match = params.if_match; - parent_op.conds.if_nomatch = params.if_nomatch; - parent_op.params.lastmod = params.lastmod; - parent_op.params.target_obj = params.target_obj; - parent_op.params.obj_size = &obj_size; - parent_op.params.attrs = &source->get_attrs(); - - int ret = parent_op.prepare(y, dpp); - if (ret < 0) - return ret; - - source->set_key(parent_op.state.obj.key); - source->set_obj_size(obj_size); - - return ret; -} - -int RadosObject::RadosReadOp::read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider* dpp) -{ - return parent_op.read(ofs, end, bl, y, dpp); -} - -int RadosObject::RadosReadOp::get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) -{ - return parent_op.get_attr(dpp, name, dest, y); -} - -std::unique_ptr RadosObject::get_delete_op() -{ - return std::make_unique(this); -} - -RadosObject::RadosDeleteOp::RadosDeleteOp(RadosObject *_source) : - source(_source), - op_target(_source->store->getRados(), - _source->get_bucket(), - _source->get_ctx(), - _source), - parent_op(&op_target) -{ } - -int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) -{ - parent_op.params.bucket_owner = params.bucket_owner.get_id(); - parent_op.params.versioning_status = params.versioning_status; - parent_op.params.obj_owner = params.obj_owner; - parent_op.params.olh_epoch = params.olh_epoch; - parent_op.params.marker_version_id = params.marker_version_id; - parent_op.params.bilog_flags = params.bilog_flags; - parent_op.params.remove_objs = params.remove_objs; - parent_op.params.expiration_time = params.expiration_time; - parent_op.params.unmod_since = params.unmod_since; - parent_op.params.mtime = params.mtime; - parent_op.params.high_precision_time = params.high_precision_time; - parent_op.params.zones_trace = params.zones_trace; - parent_op.params.abortmp = params.abortmp; - parent_op.params.parts_accounted_size = params.parts_accounted_size; - - int ret = parent_op.delete_obj(y, dpp); - if (ret < 0) - return ret; - - result.delete_marker = parent_op.result.delete_marker; - result.version_id = parent_op.result.version_id; - - return ret; -} - -int RadosObject::delete_object(const DoutPrefixProvider* dpp, - optional_yield y, - bool prevent_versioning) -{ - RGWRados::Object del_target(store->getRados(), bucket, *rados_ctx, this); - RGWRados::Object::Delete del_op(&del_target); - - del_op.params.bucket_owner = bucket->get_info().owner; - del_op.params.versioning_status = prevent_versioning ? 0 : bucket->get_info().versioning_status(); - - return del_op.delete_obj(y, dpp); -} - -int RadosObject::delete_obj_aio(const DoutPrefixProvider* dpp, RGWObjState* astate, - Completions* aio, bool keep_index_consistent, - optional_yield y) -{ - RadosCompletions* raio = static_cast(aio); - - return store->getRados()->delete_obj_aio(dpp, get_obj(), bucket->get_info(), astate, - raio->handles, keep_index_consistent, y); -} - -int RadosObject::copy_object(User* user, - req_info* info, - const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, - rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, - const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, - ceph::real_time* mtime, - const ceph::real_time* mod_ptr, - const ceph::real_time* unmod_ptr, - bool high_precision_time, - const char* if_match, - const char* if_nomatch, - AttrsMod attrs_mod, - bool copy_if_newer, - Attrs& attrs, - RGWObjCategory category, - uint64_t olh_epoch, - boost::optional delete_at, - std::string* version_id, - std::string* tag, - std::string* etag, - void (*progress_cb)(off_t, void *), - void* progress_data, - const DoutPrefixProvider* dpp, - optional_yield y) -{ - return store->getRados()->copy_obj(*rados_ctx, - user->get_id(), - info, - source_zone, - dest_object, - this, - dest_bucket, - src_bucket, - dest_placement, - src_mtime, - mtime, - mod_ptr, - unmod_ptr, - high_precision_time, - if_match, - if_nomatch, - static_cast(attrs_mod), - copy_if_newer, - attrs, - category, - olh_epoch, - (delete_at ? *delete_at : real_time()), - version_id, - tag, - etag, - progress_cb, - progress_data, - dpp, - y); -} - -int RadosObject::RadosReadOp::iterate(const DoutPrefixProvider* dpp, int64_t ofs, int64_t end, RGWGetDataCB* cb, optional_yield y) -{ - return parent_op.iterate(dpp, ofs, end, cb, y); -} - -int RadosObject::swift_versioning_restore(bool& restored, - const DoutPrefixProvider* dpp) -{ - return store->getRados()->swift_versioning_restore(*rados_ctx, - bucket->get_owner()->get_id(), - bucket, - this, - restored, - dpp); -} - -int RadosObject::swift_versioning_copy(const DoutPrefixProvider* dpp, optional_yield y) -{ - return store->getRados()->swift_versioning_copy(*rados_ctx, - bucket->get_info().owner, - bucket, - this, - dpp, - y); -} - -int RadosMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct) -{ - std::unique_ptr meta_obj = get_meta_obj(); - meta_obj->set_in_extra_data(true); - meta_obj->set_hash_source(mp_obj.get_key()); - cls_rgw_obj_chain chain; - list remove_objs; - bool truncated; - int marker = 0; - int ret; - uint64_t parts_accounted_size = 0; - - do { - ret = list_parts(dpp, cct, 1000, marker, &marker, &truncated); - if (ret < 0) { - ldpp_dout(dpp, 20) << __func__ << ": RadosMultipartUpload::list_parts returned " << - ret << dendl; - return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret; - } - - for (auto part_it = parts.begin(); - part_it != parts.end(); - ++part_it) { - RadosMultipartPart* obj_part = dynamic_cast(part_it->second.get()); - if (obj_part->info.manifest.empty()) { - std::unique_ptr obj = bucket->get_object( - rgw_obj_key(obj_part->oid, std::string(), RGW_OBJ_NS_MULTIPART)); - obj->set_hash_source(mp_obj.get_key()); - ret = obj->delete_object(dpp, null_yield); - if (ret < 0 && ret != -ENOENT) - return ret; - } else { - auto target = meta_obj->get_obj(); - store->getRados()->update_gc_chain(dpp, target, obj_part->info.manifest, &chain); - RGWObjManifest::obj_iterator oiter = obj_part->info.manifest.obj_begin(dpp); - if (oiter != obj_part->info.manifest.obj_end(dpp)) { - std::unique_ptr head = bucket->get_object(rgw_obj_key()); - rgw_raw_obj raw_head = oiter.get_location().get_raw_obj(store); - dynamic_cast(head.get())->raw_obj_to_obj(raw_head); - - rgw_obj_index_key key; - head->get_key().get_index_key(&key); - remove_objs.push_back(key); - } - } - parts_accounted_size += obj_part->info.accounted_size; - } - } while (truncated); - - if (store->getRados()->get_gc() == nullptr) { - //Delete objects inline if gc hasn't been initialised (in case when bypass gc is specified) - store->getRados()->delete_objs_inline(dpp, chain, mp_obj.get_upload_id()); - } else { - /* use upload id as tag and do it synchronously */ - auto [ret, leftover_chain] = store->getRados()->send_chain_to_gc(chain, mp_obj.get_upload_id()); - if (ret < 0 && leftover_chain) { - ldpp_dout(dpp, 5) << __func__ << ": gc->send_chain() returned " << ret << dendl; - if (ret == -ENOENT) { - return -ERR_NO_SUCH_UPLOAD; - } - //Delete objects inline if send chain to gc fails - store->getRados()->delete_objs_inline(dpp, *leftover_chain, mp_obj.get_upload_id()); - } - } - - std::unique_ptr del_op = meta_obj->get_delete_op(); - del_op->params.bucket_owner = bucket->get_acl_owner(); - del_op->params.versioning_status = 0; - if (!remove_objs.empty()) { - del_op->params.remove_objs = &remove_objs; - } - - del_op->params.abortmp = true; - del_op->params.parts_accounted_size = parts_accounted_size; - - // and also remove the metadata obj - ret = del_op->delete_obj(dpp, null_yield); - if (ret < 0) { - ldpp_dout(dpp, 20) << __func__ << ": del_op.delete_obj returned " << - ret << dendl; - } - return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret; -} - -std::unique_ptr RadosMultipartUpload::get_meta_obj() -{ - return bucket->get_object(rgw_obj_key(get_meta(), string(), mp_ns)); -} - -int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) -{ - int ret; - std::string oid = mp_obj.get_key(); - RGWObjectCtx obj_ctx(store); - - do { - char buf[33]; - string tmp_obj_name; - std::unique_ptr obj; - gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); - std::string upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */ - upload_id.append(buf); - - mp_obj.init(oid, upload_id); - tmp_obj_name = mp_obj.get_meta(); - - obj = bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns)); - // the meta object will be indexed with 0 size, we c - obj->set_in_extra_data(true); - obj->set_hash_source(oid); - - RGWRados::Object op_target(store->getRados(), - obj->get_bucket(), - obj_ctx, obj.get()); - RGWRados::Object::Write obj_op(&op_target); - - op_target.set_versioning_disabled(true); /* no versioning for multipart meta */ - obj_op.meta.owner = owner.get_id(); - obj_op.meta.category = RGWObjCategory::MultiMeta; - obj_op.meta.flags = PUT_OBJ_CREATE_EXCL; - obj_op.meta.mtime = &mtime; - - multipart_upload_info upload_info; - upload_info.dest_placement = dest_placement; - - bufferlist bl; - encode(upload_info, bl); - obj_op.meta.data = &bl; - - ret = obj_op.write_meta(dpp, bl.length(), 0, attrs, y); - } while (ret == -EEXIST); - - return ret; -} - -int RadosMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct, - int num_parts, int marker, - int *next_marker, bool *truncated, - bool assume_unsorted) -{ - map parts_map; - map::iterator iter; - - std::unique_ptr obj = bucket->get_object( - rgw_obj_key(get_meta(), std::string(), RGW_OBJ_NS_MULTIPART)); - obj->set_in_extra_data(true); - - bool sorted_omap = is_v2_upload_id(get_upload_id()) && !assume_unsorted; - - parts.clear(); - - int ret; - if (sorted_omap) { - string p; - p = "part."; - char buf[32]; - - snprintf(buf, sizeof(buf), "%08d", marker); - p.append(buf); - - ret = obj->omap_get_vals(dpp, p, num_parts + 1, &parts_map, - nullptr, null_yield); - } else { - ret = obj->omap_get_all(dpp, &parts_map, null_yield); - } - if (ret < 0) { - return ret; - } - - int i; - int last_num = 0; - - uint32_t expected_next = marker + 1; - - for (i = 0, iter = parts_map.begin(); - (i < num_parts || !sorted_omap) && iter != parts_map.end(); - ++iter, ++i) { - bufferlist& bl = iter->second; - auto bli = bl.cbegin(); - std::unique_ptr part = std::make_unique(); - try { - decode(part->info, bli); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: could not part info, caught buffer::error" << - dendl; - return -EIO; - } - if (sorted_omap) { - if (part->info.num != expected_next) { - /* ouch, we expected a specific part num here, but we got a - * different one. Either a part is missing, or it could be a - * case of mixed rgw versions working on the same upload, - * where one gateway doesn't support correctly sorted omap - * keys for multipart upload just assume data is unsorted. - */ - return list_parts(dpp, cct, num_parts, marker, next_marker, truncated, true); - } - expected_next++; - } - if (sorted_omap || - (int)part->info.num > marker) { - last_num = part->info.num; - parts[part->info.num] = std::move(part); - } - } - - if (sorted_omap) { - if (truncated) { - *truncated = (iter != parts_map.end()); - } - } else { - /* rebuild a map with only num_parts entries */ - std::map> new_parts; - std::map>::iterator piter; - for (i = 0, piter = parts.begin(); - i < num_parts && piter != parts.end(); - ++i, ++piter) { - last_num = piter->first; - new_parts[piter->first] = std::move(piter->second); - } - - if (truncated) { - *truncated = (piter != parts.end()); - } - - parts.swap(new_parts); - } - - if (next_marker) { - *next_marker = last_num; - } - - return 0; -} - -int RadosMultipartUpload::complete(const DoutPrefixProvider *dpp, - optional_yield y, CephContext* cct, - map& part_etags, - list& remove_objs, - uint64_t& accounted_size, bool& compressed, - RGWCompressionInfo& cs_info, off_t& ofs, - std::string& tag, ACLOwner& owner, - uint64_t olh_epoch, - rgw::sal::Object* target_obj) -{ - char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; - std::string etag; - bufferlist etag_bl; - MD5 hash; - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - bool truncated; - int ret; - - int total_parts = 0; - int handled_parts = 0; - int max_parts = 1000; - int marker = 0; - uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; - auto etags_iter = part_etags.begin(); - rgw::sal::Attrs attrs = target_obj->get_attrs(); - - do { - ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); - if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_UPLOAD; - } - if (ret < 0) - return ret; - - total_parts += parts.size(); - if (!truncated && total_parts != (int)part_etags.size()) { - ldpp_dout(dpp, 0) << "NOTICE: total parts mismatch: have: " << total_parts - << " expected: " << part_etags.size() << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - - for (auto obj_iter = parts.begin(); etags_iter != part_etags.end() && obj_iter != parts.end(); ++etags_iter, ++obj_iter, ++handled_parts) { - RadosMultipartPart* part = dynamic_cast(obj_iter->second.get()); - uint64_t part_size = part->get_size(); - if (handled_parts < (int)part_etags.size() - 1 && - part_size < min_part_size) { - ret = -ERR_TOO_SMALL; - return ret; - } - - char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; - if (etags_iter->first != (int)obj_iter->first) { - ldpp_dout(dpp, 0) << "NOTICE: parts num mismatch: next requested: " - << etags_iter->first << " next uploaded: " - << obj_iter->first << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - string part_etag = rgw_string_unquote(etags_iter->second); - if (part_etag.compare(part->get_etag()) != 0) { - ldpp_dout(dpp, 0) << "NOTICE: etag mismatch: part: " << etags_iter->first - << " etag: " << etags_iter->second << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - - hex_to_buf(part->get_etag().c_str(), petag, - CEPH_CRYPTO_MD5_DIGESTSIZE); - hash.Update((const unsigned char *)petag, sizeof(petag)); - - RGWUploadPartInfo& obj_part = part->info; - - /* update manifest for part */ - string oid = mp_obj.get_part(part->info.num); - rgw_obj src_obj; - src_obj.init_ns(bucket->get_key(), oid, mp_ns); - - if (obj_part.manifest.empty()) { - ldpp_dout(dpp, 0) << "ERROR: empty manifest for object part: obj=" - << src_obj << dendl; - ret = -ERR_INVALID_PART; - return ret; - } else { - manifest.append(dpp, obj_part.manifest, store->svc()->zone->get_zonegroup(), store->svc()->zone->get_zone_params()); - } - - bool part_compressed = (obj_part.cs_info.compression_type != "none"); - if ((handled_parts > 0) && - ((part_compressed != compressed) || - (cs_info.compression_type != obj_part.cs_info.compression_type))) { - ldpp_dout(dpp, 0) << "ERROR: compression type was changed during multipart upload (" - << cs_info.compression_type << ">>" << obj_part.cs_info.compression_type << ")" << dendl; - ret = -ERR_INVALID_PART; - return ret; - } - - if (part_compressed) { - int64_t new_ofs; // offset in compression data for new part - if (cs_info.blocks.size() > 0) - new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len; - else - new_ofs = 0; - for (const auto& block : obj_part.cs_info.blocks) { - compression_block cb; - cb.old_ofs = block.old_ofs + cs_info.orig_size; - cb.new_ofs = new_ofs; - cb.len = block.len; - cs_info.blocks.push_back(cb); - new_ofs = cb.new_ofs + cb.len; - } - if (!compressed) - cs_info.compression_type = obj_part.cs_info.compression_type; - cs_info.orig_size += obj_part.cs_info.orig_size; - compressed = true; - } - - rgw_obj_index_key remove_key; - src_obj.key.get_index_key(&remove_key); - - remove_objs.push_back(remove_key); - - ofs += obj_part.size; - accounted_size += obj_part.accounted_size; - } - } while (truncated); - hash.Final((unsigned char *)final_etag); - - buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str); - snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], - sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, - "-%lld", (long long)part_etags.size()); - etag = final_etag_str; - ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; - - etag_bl.append(etag); - - attrs[RGW_ATTR_ETAG] = etag_bl; - - if (compressed) { - // write compression attribute to full object - bufferlist tmp; - encode(cs_info, tmp); - attrs[RGW_ATTR_COMPRESSION] = tmp; - } - - target_obj->set_atomic(); - - RGWRados::Object op_target(store->getRados(), - target_obj->get_bucket(), - dynamic_cast(target_obj)->get_ctx(), - target_obj); - RGWRados::Object::Write obj_op(&op_target); - - obj_op.meta.manifest = &manifest; - obj_op.meta.remove_objs = &remove_objs; - - obj_op.meta.ptag = &tag; /* use req_id as operation tag */ - obj_op.meta.owner = owner.get_id(); - obj_op.meta.flags = PUT_OBJ_CREATE; - obj_op.meta.modify_tail = true; - obj_op.meta.completeMultipart = true; - obj_op.meta.olh_epoch = olh_epoch; - - ret = obj_op.write_meta(dpp, ofs, accounted_size, attrs, y); - if (ret < 0) - return ret; - - return ret; -} - -int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs) -{ - if (!rule && !attrs) { - return 0; - } - - if (rule) { - if (!placement.empty()) { - *rule = &placement; - if (!attrs) { - /* Don't need attrs, done */ - return 0; - } - } else { - *rule = nullptr; - } - } - - /* We need either attributes or placement, so we need a read */ - std::unique_ptr meta_obj; - meta_obj = get_meta_obj(); - meta_obj->set_in_extra_data(true); - - multipart_upload_info upload_info; - bufferlist headbl; - - /* Read the obj head which contains the multipart_upload_info */ - std::unique_ptr read_op = meta_obj->get_read_op(); - meta_obj->set_prefetch_data(); - - int ret = read_op->prepare(y, dpp); - if (ret < 0) { - if (ret == -ENOENT) { - return -ERR_NO_SUCH_UPLOAD; - } - return ret; - } - - extract_span_context(meta_obj->get_attrs(), trace_ctx); - - if (attrs) { - /* Attrs are filled in by prepare */ - *attrs = meta_obj->get_attrs(); - if (!rule || *rule != nullptr) { - /* placement was cached; don't actually read */ - return 0; - } - } - - /* Now read the placement from the head */ - ret = read_op->read(0, store->ctx()->_conf->rgw_max_chunk_size, headbl, y, dpp); - if (ret < 0) { - if (ret == -ENOENT) { - return -ERR_NO_SUCH_UPLOAD; - } - return ret; - } - - if (headbl.length() <= 0) { - return -ERR_NO_SUCH_UPLOAD; - } - - /* Decode multipart_upload_info */ - auto hiter = headbl.cbegin(); - try { - decode(upload_info, hiter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode multipart upload info" << dendl; - return -EIO; - } - placement = upload_info.dest_placement; - *rule = &placement; - - return 0; -} - -std::unique_ptr RadosMultipartUpload::get_writer( - const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t part_num, - const std::string& part_num_str) -{ - auto aio = rgw::make_throttle(store->ctx()->_conf->rgw_put_obj_min_window_size, y); - return std::make_unique(dpp, y, this, - std::move(_head_obj), store, std::move(aio), owner, - ptail_placement_rule, part_num, part_num_str); -} - -MPRadosSerializer::MPRadosSerializer(const DoutPrefixProvider *dpp, RadosStore* store, RadosObject* obj, const std::string& lock_name) : - lock(lock_name) -{ - rgw_pool meta_pool; - rgw_raw_obj raw_obj; - - obj->get_raw_obj(&raw_obj); - oid = raw_obj.oid; - store->getRados()->get_obj_data_pool(obj->get_bucket()->get_placement_rule(), - obj->get_obj(), &meta_pool); - store->getRados()->open_pool_ctx(dpp, meta_pool, ioctx, true); -} - -int MPRadosSerializer::try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) -{ - op.assert_exists(); - lock.set_duration(dur); - lock.lock_exclusive(&op); - int ret = rgw_rados_operate(dpp, ioctx, oid, &op, y); - if (! ret) { - locked = true; - } - return ret; -} - -LCRadosSerializer::LCRadosSerializer(RadosStore* store, const std::string& _oid, const std::string& lock_name, const std::string& cookie) : - StoreLCSerializer(_oid), - lock(lock_name) -{ - ioctx = &store->getRados()->lc_pool_ctx; - lock.set_cookie(cookie); -} - -int LCRadosSerializer::try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) -{ - lock.set_duration(dur); - return lock.lock_exclusive(ioctx, oid); -} - -int RadosLifecycle::get_entry(const std::string& oid, const std::string& marker, - std::unique_ptr* entry) -{ - cls_rgw_lc_entry cls_entry; - int ret = cls_rgw_lc_get_entry(*store->getRados()->get_lc_pool_ctx(), oid, marker, cls_entry); - if (ret) - return ret; - - LCEntry* e; - e = new StoreLCEntry(cls_entry.bucket, cls_entry.start_time, cls_entry.status); - if (!e) - return -ENOMEM; - - entry->reset(e); - return 0; -} - -int RadosLifecycle::get_next_entry(const std::string& oid, const std::string& marker, - std::unique_ptr* entry) -{ - cls_rgw_lc_entry cls_entry; - int ret = cls_rgw_lc_get_next_entry(*store->getRados()->get_lc_pool_ctx(), oid, marker, - cls_entry); - - if (ret) - return ret; - - LCEntry* e; - e = new StoreLCEntry(cls_entry.bucket, cls_entry.start_time, cls_entry.status); - if (!e) - return -ENOMEM; - - entry->reset(e); - return 0; -} - -int RadosLifecycle::set_entry(const std::string& oid, LCEntry& entry) -{ - cls_rgw_lc_entry cls_entry; - - cls_entry.bucket = entry.get_bucket(); - cls_entry.start_time = entry.get_start_time(); - cls_entry.status = entry.get_status(); - - return cls_rgw_lc_set_entry(*store->getRados()->get_lc_pool_ctx(), oid, cls_entry); -} - -int RadosLifecycle::list_entries(const std::string& oid, const std::string& marker, - uint32_t max_entries, std::vector>& entries) -{ - entries.clear(); - - vector cls_entries; - int ret = cls_rgw_lc_list(*store->getRados()->get_lc_pool_ctx(), oid, marker, max_entries, cls_entries); - - if (ret < 0) - return ret; - - for (auto& entry : cls_entries) { - entries.push_back(std::make_unique(entry.bucket, oid, - entry.start_time, entry.status)); - } - - return ret; -} - -int RadosLifecycle::rm_entry(const std::string& oid, LCEntry& entry) -{ - cls_rgw_lc_entry cls_entry; - - cls_entry.bucket = entry.get_bucket(); - cls_entry.start_time = entry.get_start_time(); - cls_entry.status = entry.get_status(); - - return cls_rgw_lc_rm_entry(*store->getRados()->get_lc_pool_ctx(), oid, cls_entry); -} - -int RadosLifecycle::get_head(const std::string& oid, std::unique_ptr* head) -{ - cls_rgw_lc_obj_head cls_head; - int ret = cls_rgw_lc_get_head(*store->getRados()->get_lc_pool_ctx(), oid, cls_head); - if (ret) - return ret; - - LCHead* h; - h = new StoreLCHead(cls_head.start_date, cls_head.shard_rollover_date, cls_head.marker); - if (!h) - return -ENOMEM; - - head->reset(h); - return 0; -} - -int RadosLifecycle::put_head(const std::string& oid, LCHead& head) -{ - cls_rgw_lc_obj_head cls_head; - - cls_head.marker = head.get_marker(); - cls_head.start_date = head.get_start_date(); - cls_head.shard_rollover_date = head.get_shard_rollover_date(); - - return cls_rgw_lc_put_head(*store->getRados()->get_lc_pool_ctx(), oid, cls_head); -} - -std::unique_ptr RadosLifecycle::get_serializer(const std::string& lock_name, - const std::string& oid, - const std::string& cookie) -{ - return std::make_unique(store, oid, lock_name, cookie); -} - -int RadosNotification::publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags) -{ - return rgw::notify::publish_reserve(dpp, event_type, res, obj_tags); -} - -int RadosNotification::publish_commit(const DoutPrefixProvider* dpp, uint64_t size, - const ceph::real_time& mtime, const std::string& etag, const std::string& version) -{ - return rgw::notify::publish_commit(obj, size, mtime, etag, version, event_type, res, dpp); -} - -int RadosAtomicWriter::prepare(optional_yield y) -{ - return processor.prepare(y); -} - -int RadosAtomicWriter::process(bufferlist&& data, uint64_t offset) -{ - return processor.process(std::move(data), offset); -} - -int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) -{ - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, y); -} - -int RadosAppendWriter::prepare(optional_yield y) -{ - return processor.prepare(y); -} - -int RadosAppendWriter::process(bufferlist&& data, uint64_t offset) -{ - return processor.process(std::move(data), offset); -} - -int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) -{ - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, y); -} - -int RadosMultipartWriter::prepare(optional_yield y) -{ - return processor.prepare(y); -} - -int RadosMultipartWriter::process(bufferlist&& data, uint64_t offset) -{ - return processor.process(std::move(data), offset); -} - -int RadosMultipartWriter::complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) -{ - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, y); -} - -const std::string& RadosZoneGroup::get_endpoint() const -{ - if (!group.endpoints.empty()) { - return group.endpoints.front(); - } else { - // use zonegroup's master zone endpoints - auto z = group.zones.find(group.master_zone); - if (z != group.zones.end() && !z->second.endpoints.empty()) { - return z->second.endpoints.front(); - } - } - return empty; -} - -bool RadosZoneGroup::placement_target_exists(std::string& target) const -{ - return !!group.placement_targets.count(target); -} - -int RadosZoneGroup::get_placement_target_names(std::set& names) const -{ - for (const auto& target : group.placement_targets) { - names.emplace(target.second.name); - } - - return 0; -} - -int RadosZoneGroup::get_placement_tier(const rgw_placement_rule& rule, - std::unique_ptr* tier) -{ - std::map::const_iterator titer; - titer = group.placement_targets.find(rule.name); - if (titer == group.placement_targets.end()) { - return -ENOENT; - } - - const auto& target_rule = titer->second; - std::map::const_iterator ttier; - ttier = target_rule.tier_targets.find(rule.storage_class); - if (ttier == target_rule.tier_targets.end()) { - // not found - return -ENOENT; - } - - PlacementTier* t; - t = new RadosPlacementTier(store, ttier->second); - if (!t) - return -ENOMEM; - - tier->reset(t); - return 0; -} - -int RadosZoneGroup::get_zone_by_id(const std::string& id, std::unique_ptr* zone) -{ - RGWZone* rz = store->svc()->zone->find_zone(id); - if (!rz) - return -ENOENT; - - Zone* z = new RadosZone(store, clone(), *rz); - zone->reset(z); - return 0; -} - -int RadosZoneGroup::get_zone_by_name(const std::string& name, std::unique_ptr* zone) -{ - rgw_zone_id id; - int ret = store->svc()->zone->find_zone_id_by_name(name, &id); - if (ret < 0) - return ret; - - RGWZone* rz = store->svc()->zone->find_zone(id.id); - if (!rz) - return -ENOENT; - - Zone* z = new RadosZone(store, clone(), *rz); - zone->reset(z); - return 0; -} - -int RadosZoneGroup::list_zones(std::list& zone_ids) -{ - for (const auto& entry : group.zones) - { - zone_ids.push_back(entry.second.id); - } - return 0; -} - -std::unique_ptr RadosZone::clone() -{ - if (local_zone) - return std::make_unique(store, group->clone()); - - return std::make_unique(store, group->clone(), rgw_zone); -} - -const std::string& RadosZone::get_id() -{ - if (local_zone) - return store->svc()->zone->zone_id().id; - - return rgw_zone.id; -} - -const std::string& RadosZone::get_name() const -{ - if (local_zone) - return store->svc()->zone->zone_name(); - - return rgw_zone.name; -} - -bool RadosZone::is_writeable() -{ - if (local_zone) - return store->svc()->zone->zone_is_writeable(); - - return !rgw_zone.read_only; -} - -bool RadosZone::get_redirect_endpoint(std::string* endpoint) -{ - if (local_zone) - return store->svc()->zone->get_redirect_zone_endpoint(endpoint); - - endpoint = &rgw_zone.redirect_zone; - return true; -} - -bool RadosZone::has_zonegroup_api(const std::string& api) const -{ - return store->svc()->zone->has_zonegroup_api(api); -} - -const std::string& RadosZone::get_current_period_id() -{ - return store->svc()->zone->get_current_period_id(); -} - -const RGWAccessKey& RadosZone::get_system_key() -{ - return store->svc()->zone->get_zone_params().system_key; -} - -const std::string& RadosZone::get_realm_name() -{ - return store->svc()->zone->get_realm().get_name(); -} - -const std::string& RadosZone::get_realm_id() -{ - return store->svc()->zone->get_realm().get_id(); -} - -const std::string_view RadosZone::get_tier_type() -{ - if (local_zone) - return store->svc()->zone->get_zone().tier_type; - - return rgw_zone.id; -} - -RGWBucketSyncPolicyHandlerRef RadosZone::get_sync_policy_handler() -{ - return store->svc()->zone->get_sync_policy_handler(get_id()); -} - -RadosLuaManager::RadosLuaManager(RadosStore* _s) : - store(_s), - pool((store->svc() && store->svc()->zone) ? store->svc()->zone->get_zone_params().log_pool : rgw_pool()) -{ } - -int RadosLuaManager::get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) -{ - if (pool.empty()) { - ldpp_dout(dpp, 10) << "WARNING: missing pool when reading lua script " << dendl; - return 0; - } - bufferlist bl; - - int r = rgw_get_system_obj(store->svc()->sysobj, pool, key, bl, nullptr, nullptr, y, dpp); - if (r < 0) { - return r; - } - - auto iter = bl.cbegin(); - try { - ceph::decode(script, iter); - } catch (buffer::error& err) { - return -EIO; - } - - return 0; -} - -int RadosLuaManager::put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) -{ - if (pool.empty()) { - ldpp_dout(dpp, 10) << "WARNING: missing pool when writing lua script " << dendl; - return 0; - } - bufferlist bl; - ceph::encode(script, bl); - - int r = rgw_put_system_obj(dpp, store->svc()->sysobj, pool, key, bl, false, nullptr, real_time(), y); - if (r < 0) { - return r; - } - - return 0; -} - -int RadosLuaManager::del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) -{ - if (pool.empty()) { - ldpp_dout(dpp, 10) << "WARNING: missing pool when deleting lua script " << dendl; - return 0; - } - int r = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, key, nullptr, y); - if (r < 0 && r != -ENOENT) { - return r; - } - - return 0; -} - -const std::string PACKAGE_LIST_OBJECT_NAME = "lua_package_allowlist"; - -int RadosLuaManager::add_package(const DoutPrefixProvider *dpp, optional_yield y, const std::string& package_name) -{ - // add package to list - const bufferlist empty_bl; - std::map new_package{{package_name, empty_bl}}; - librados::ObjectWriteOperation op; - op.omap_set(new_package); - auto ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), - PACKAGE_LIST_OBJECT_NAME, &op, y); - - if (ret < 0) { - return ret; - } - return 0; -} - -int RadosLuaManager::remove_package(const DoutPrefixProvider *dpp, optional_yield y, const std::string& package_name) -{ - librados::ObjectWriteOperation op; - size_t pos = package_name.find(" "); - if (pos != package_name.npos) { - // remove specfic version of the the package - op.omap_rm_keys(std::set({package_name})); - auto ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), - PACKAGE_LIST_OBJECT_NAME, &op, y); - if (ret < 0) { - return ret; - } - return 0; - } - // otherwise, remove any existing versions of the package - rgw::lua::packages_t packages; - auto ret = list_packages(dpp, y, packages); - if (ret < 0 && ret != -ENOENT) { - return ret; - } - for(const auto& package : packages) { - const std::string package_no_version = package.substr(0, package.find(" ")); - if (package_no_version.compare(package_name) == 0) { - op.omap_rm_keys(std::set({package})); - ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), - PACKAGE_LIST_OBJECT_NAME, &op, y); - if (ret < 0) { - return ret; - } - } - } - return 0; -} - -int RadosLuaManager::list_packages(const DoutPrefixProvider *dpp, optional_yield y, rgw::lua::packages_t& packages) -{ - constexpr auto max_chunk = 1024U; - std::string start_after; - bool more = true; - int rval; - while (more) { - librados::ObjectReadOperation op; - rgw::lua::packages_t packages_chunk; - op.omap_get_keys2(start_after, max_chunk, &packages_chunk, &more, &rval); - const auto ret = rgw_rados_operate(dpp, *(store->getRados()->get_lc_pool_ctx()), - PACKAGE_LIST_OBJECT_NAME, &op, nullptr, y); - - if (ret < 0) { - return ret; - } - - packages.merge(packages_chunk); - } - - return 0; -} - -int RadosOIDCProvider::store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) -{ - auto sysobj = store->svc()->sysobj; - std::string oid = tenant + get_url_oid_prefix() + url; - - bufferlist bl; - using ceph::encode; - encode(*this, bl); - return rgw_put_system_obj(dpp, sysobj, store->svc()->zone->get_zone_params().oidc_pool, oid, bl, exclusive, nullptr, real_time(), y); -} - -int RadosOIDCProvider::read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) -{ - auto sysobj = store->svc()->sysobj; - auto& pool = store->svc()->zone->get_zone_params().oidc_pool; - std::string oid = tenant + get_url_oid_prefix() + url; - bufferlist bl; - - int ret = rgw_get_system_obj(sysobj, pool, oid, bl, nullptr, nullptr, null_yield, dpp); - if (ret < 0) { - return ret; - } - - try { - using ceph::decode; - auto iter = bl.cbegin(); - decode(*this, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode oidc provider info from pool: " << pool.name << - ": " << url << dendl; - return -EIO; - } - - return 0; -} - -int RadosOIDCProvider::delete_obj(const DoutPrefixProvider *dpp, optional_yield y) -{ - auto& pool = store->svc()->zone->get_zone_params().oidc_pool; - - std::string url, tenant; - auto ret = get_tenant_url_from_arn(tenant, url); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to parse arn" << dendl; - return -EINVAL; - } - - if (this->tenant != tenant) { - ldpp_dout(dpp, 0) << "ERROR: tenant in arn doesn't match that of user " << this->tenant << ", " - << tenant << ": " << dendl; - return -EINVAL; - } - - // Delete url - std::string oid = tenant + get_url_oid_prefix() + url; - ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: deleting oidc url from pool: " << pool.name << ": " - << provider_url << ": " << cpp_strerror(-ret) << dendl; - } - - return ret; -} - -int RadosRole::store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) -{ - using ceph::encode; - std::string oid; - - oid = info.id; - - bufferlist bl; - encode(this->info, bl); - - if (!this->info.tags.empty()) { - bufferlist bl_tags; - encode(this->info.tags, bl_tags); - map attrs; - attrs.emplace("tagging", bl_tags); - - RGWSI_MBSObj_PutParams params(bl, &attrs, info.mtime, exclusive); - std::unique_ptr ctx(store->svc()->role->svc.meta_be->alloc_ctx()); - ctx->init(store->svc()->role->get_be_handler()); - return store->svc()->role->svc.meta_be->put(ctx.get(), oid, params, &info.objv_tracker, y, dpp); - } else { - RGWSI_MBSObj_PutParams params(bl, nullptr, info.mtime, exclusive); - std::unique_ptr ctx(store->svc()->role->svc.meta_be->alloc_ctx()); - ctx->init(store->svc()->role->get_be_handler()); - return store->svc()->role->svc.meta_be->put(ctx.get(), oid, params, &info.objv_tracker, y, dpp); - } -} - -int RadosRole::store_name(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) -{ - auto sysobj = store->svc()->sysobj; - RGWNameToId nameToId; - nameToId.obj_id = info.id; - - std::string oid = info.tenant + get_names_oid_prefix() + info.name; - - bufferlist bl; - using ceph::encode; - encode(nameToId, bl); - - return rgw_put_system_obj(dpp, sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, exclusive, &info.objv_tracker, real_time(), y); -} - -int RadosRole::store_path(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) -{ - auto sysobj = store->svc()->sysobj; - std::string oid = info.tenant + get_path_oid_prefix() + info.path + get_info_oid_prefix() + info.id; - - bufferlist bl; - - return rgw_put_system_obj(dpp, sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, exclusive, &info.objv_tracker, real_time(), y); -} - -int RadosRole::read_id(const DoutPrefixProvider *dpp, const std::string& role_name, const std::string& tenant, std::string& role_id, optional_yield y) -{ - auto sysobj = store->svc()->sysobj; - std::string oid = info.tenant + get_names_oid_prefix() + role_name; - bufferlist bl; - - int ret = rgw_get_system_obj(sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, nullptr, nullptr, null_yield, dpp); - if (ret < 0) { - return ret; - } - - RGWNameToId nameToId; - try { - auto iter = bl.cbegin(); - using ceph::decode; - decode(nameToId, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode role from Role pool: " << role_name << dendl; - return -EIO; - } - role_id = nameToId.obj_id; - return 0; -} - -int RadosRole::read_name(const DoutPrefixProvider *dpp, optional_yield y) -{ - auto sysobj = store->svc()->sysobj; - std::string oid = info.tenant + get_names_oid_prefix() + info.name; - bufferlist bl; - - int ret = rgw_get_system_obj(sysobj, store->svc()->zone->get_zone_params().roles_pool, oid, bl, nullptr, nullptr, null_yield, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed reading role name from Role pool: " << info.name << - ": " << cpp_strerror(-ret) << dendl; - return ret; - } - - RGWNameToId nameToId; - try { - using ceph::decode; - auto iter = bl.cbegin(); - decode(nameToId, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode role name from Role pool: " << info.name << dendl; - return -EIO; - } - info.id = nameToId.obj_id; - return 0; -} - -int RadosRole::read_info(const DoutPrefixProvider *dpp, optional_yield y) -{ - std::string oid; - - oid = info.id; - ldpp_dout(dpp, 20) << "INFO: oid in read_info is: " << oid << dendl; - - bufferlist bl; - - RGWSI_MBSObj_GetParams params(&bl, &info.attrs, &info.mtime); - std::unique_ptr ctx(store->svc()->role->svc.meta_be->alloc_ctx()); - ctx->init(store->svc()->role->get_be_handler()); - int ret = store->svc()->role->svc.meta_be->get(ctx.get(), oid, params, &info.objv_tracker, y, dpp, true); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed reading role info from Role pool: " << info.id << ": " << cpp_strerror(-ret) << dendl; - return ret; - } - - try { - using ceph::decode; - auto iter = bl.cbegin(); - decode(this->info, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode role info from Role pool: " << info.id << dendl; - return -EIO; - } - - auto it = info.attrs.find("tagging"); - if (it != info.attrs.end()) { - bufferlist bl_tags = it->second; - try { - using ceph::decode; - auto iter = bl_tags.cbegin(); - decode(info.tags, iter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode attrs" << info.id << dendl; - return -EIO; - } - } - - return 0; -} - -int RadosRole::create(const DoutPrefixProvider *dpp, bool exclusive, const std::string& role_id, optional_yield y) -{ - int ret; - - if (! validate_input(dpp)) { - return -EINVAL; - } - - if (!role_id.empty()) { - info.id = role_id; - } - - /* check to see the name is not used */ - ret = read_id(dpp, info.name, info.tenant, info.id, y); - if (exclusive && ret == 0) { - ldpp_dout(dpp, 0) << "ERROR: name " << info.name << " already in use for role id " - << info.id << dendl; - return -EEXIST; - } else if ( ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "failed reading role id " << info.id << ": " - << cpp_strerror(-ret) << dendl; - return ret; - } - - if (info.id.empty()) { - /* create unique id */ - uuid_d new_uuid; - char uuid_str[37]; - new_uuid.generate_random(); - new_uuid.print(uuid_str); - info.id = uuid_str; - } - - //arn - info.arn = role_arn_prefix + info.tenant + ":role" + info.path + info.name; - - // Creation time - real_clock::time_point t = real_clock::now(); - - struct timeval tv; - real_clock::to_timeval(t, tv); - - char buf[30]; - struct tm result; - gmtime_r(&tv.tv_sec, &result); - strftime(buf,30,"%Y-%m-%dT%H:%M:%S", &result); - sprintf(buf + strlen(buf),".%dZ",(int)tv.tv_usec/1000); - info.creation_date.assign(buf, strlen(buf)); - - auto& pool = store->svc()->zone->get_zone_params().roles_pool; - ret = store_info(dpp, exclusive, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: storing role info in Role pool: " - << info.id << ": " << cpp_strerror(-ret) << dendl; - return ret; - } - - ret = store_name(dpp, exclusive, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: storing role name in Role pool: " - << info.name << ": " << cpp_strerror(-ret) << dendl; - - //Delete the role info that was stored in the previous call - std::string oid = get_info_oid_prefix() + info.id; - int info_ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (info_ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: cleanup of role id from Role pool: " - << info.id << ": " << cpp_strerror(-info_ret) << dendl; - } - return ret; - } - - ret = store_path(dpp, exclusive, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: storing role path in Role pool: " - << info.path << ": " << cpp_strerror(-ret) << dendl; - //Delete the role info that was stored in the previous call - std::string oid = get_info_oid_prefix() + info.id; - int info_ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (info_ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: cleanup of role id from Role pool: " - << info.id << ": " << cpp_strerror(-info_ret) << dendl; - } - //Delete role name that was stored in previous call - oid = info.tenant + get_names_oid_prefix() + info.name; - int name_ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (name_ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: cleanup of role name from Role pool: " - << info.name << ": " << cpp_strerror(-name_ret) << dendl; - } - return ret; - } - return 0; -} - -int RadosRole::delete_obj(const DoutPrefixProvider *dpp, optional_yield y) -{ - auto& pool = store->svc()->zone->get_zone_params().roles_pool; - - int ret = read_name(dpp, y); - if (ret < 0) { - return ret; - } - - ret = read_info(dpp, y); - if (ret < 0) { - return ret; - } - - if (! info.perm_policy_map.empty()) { - return -ERR_DELETE_CONFLICT; - } - - // Delete id - std::string oid = get_info_oid_prefix() + info.id; - ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: deleting role id from Role pool: " - << info.id << ": " << cpp_strerror(-ret) << dendl; - } - - // Delete name - oid = info.tenant + get_names_oid_prefix() + info.name; - ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: deleting role name from Role pool: " - << info.name << ": " << cpp_strerror(-ret) << dendl; - } - - // Delete path - oid = info.tenant + get_path_oid_prefix() + info.path + get_info_oid_prefix() + info.id; - ret = rgw_delete_system_obj(dpp, store->svc()->sysobj, pool, oid, nullptr, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: deleting role path from Role pool: " - << info.path << ": " << cpp_strerror(-ret) << dendl; - } - return ret; -} - -} // namespace rgw::sal - -extern "C" { - -void* newRadosStore(void) -{ - rgw::sal::RadosStore* store = new rgw::sal::RadosStore(); - if (store) { - RGWRados* rados = new RGWRados(); - - if (!rados) { - delete store; store = nullptr; - } else { - store->setRados(rados); - rados->set_store(store); - } - } - - return store; -} - -} diff --git a/src/rgw/store/rados/rgw_sal_rados.h b/src/rgw/store/rados/rgw_sal_rados.h deleted file mode 100644 index 499e0994807..00000000000 --- a/src/rgw/store/rados/rgw_sal_rados.h +++ /dev/null @@ -1,959 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2020 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#pragma once - -#include "rgw_sal_store.h" -#include "rgw_rados.h" -#include "rgw_notify.h" -#include "rgw_oidc_provider.h" -#include "rgw_role.h" -#include "rgw_multi.h" -#include "rgw_putobj_processor.h" -#include "services/svc_tier_rados.h" -#include "cls/lock/cls_lock_client.h" - -namespace rgw { namespace sal { - -class RadosMultipartUpload; - -class RadosCompletions : public Completions { - public: - std::list handles; - RadosCompletions() {} - ~RadosCompletions() = default; - virtual int drain() override; -}; - -class RadosPlacementTier: public StorePlacementTier { - RadosStore* store; - RGWZoneGroupPlacementTier tier; -public: - RadosPlacementTier(RadosStore* _store, const RGWZoneGroupPlacementTier& _tier) : store(_store), tier(_tier) {} - virtual ~RadosPlacementTier() = default; - - virtual const std::string& get_tier_type() { return tier.tier_type; } - virtual const std::string& get_storage_class() { return tier.storage_class; } - virtual bool retain_head_object() { return tier.retain_head_object; } - RGWZoneGroupPlacementTier& get_rt() { return tier; } -}; - -class RadosZoneGroup : public StoreZoneGroup { - RadosStore* store; - const RGWZoneGroup group; - std::string empty; -public: - RadosZoneGroup(RadosStore* _store, const RGWZoneGroup& _group) : store(_store), group(_group) {} - virtual ~RadosZoneGroup() = default; - - virtual const std::string& get_id() const override { return group.get_id(); }; - virtual const std::string& get_name() const override { return group.get_name(); }; - virtual int equals(const std::string& other_zonegroup) const override { - return group.equals(other_zonegroup); - }; - /** Get the endpoint from zonegroup, or from master zone if not set */ - virtual const std::string& get_endpoint() const override; - virtual bool placement_target_exists(std::string& target) const override; - virtual bool is_master_zonegroup() const override { - return group.is_master_zonegroup(); - }; - virtual const std::string& get_api_name() const override { return group.api_name; }; - virtual int get_placement_target_names(std::set& names) const override; - virtual const std::string& get_default_placement_name() const override { - return group.default_placement.name; }; - virtual int get_hostnames(std::list& names) const override { - names = group.hostnames; - return 0; - }; - virtual int get_s3website_hostnames(std::list& names) const override { - names = group.hostnames_s3website; - return 0; - }; - virtual int get_zone_count() const override { - return group.zones.size(); - } - virtual int get_placement_tier(const rgw_placement_rule& rule, std::unique_ptr* tier); - virtual int get_zone_by_id(const std::string& id, std::unique_ptr* zone) override; - virtual int get_zone_by_name(const std::string& name, std::unique_ptr* zone) override; - virtual int list_zones(std::list& zone_ids) override; - virtual std::unique_ptr clone() override { - return std::make_unique(store, group); - } - const RGWZoneGroup& get_group() const { return group; } -}; - -class RadosZone : public StoreZone { - protected: - RadosStore* store; - std::unique_ptr group; - RGWZone rgw_zone; - bool local_zone{false}; - public: - RadosZone(RadosStore* _store, std::unique_ptr _zg) : store(_store), group(std::move(_zg)), local_zone(true) {} - RadosZone(RadosStore* _store, std::unique_ptr _zg, RGWZone& z) : store(_store), group(std::move(_zg)), rgw_zone(z) {} - ~RadosZone() = default; - - virtual std::unique_ptr clone() override; - virtual ZoneGroup& get_zonegroup() override { return *(group.get()); } - virtual const std::string& get_id() override; - virtual const std::string& get_name() const override; - virtual bool is_writeable() override; - virtual bool get_redirect_endpoint(std::string* endpoint) override; - virtual bool has_zonegroup_api(const std::string& api) const override; - virtual const std::string& get_current_period_id() override; - virtual const RGWAccessKey& get_system_key() override; - virtual const std::string& get_realm_name() override; - virtual const std::string& get_realm_id() override; - virtual const std::string_view get_tier_type() override; - virtual RGWBucketSyncPolicyHandlerRef get_sync_policy_handler() override; -}; - -class RadosStore : public StoreDriver { - private: - RGWRados* rados; - RGWUserCtl* user_ctl; - std::string luarocks_path; - std::unique_ptr zone; - - public: - RadosStore() - : rados(nullptr) { - } - ~RadosStore() { - delete rados; - } - - virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) override; - virtual const std::string get_name() const override { - return "rados"; - } - virtual std::string get_cluster_id(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual std::unique_ptr get_user(const rgw_user& u) override; - virtual int get_user_by_access_key(const DoutPrefixProvider* dpp, const std::string& key, optional_yield y, std::unique_ptr* user) override; - virtual int get_user_by_email(const DoutPrefixProvider* dpp, const std::string& email, optional_yield y, std::unique_ptr* user) override; - virtual int get_user_by_swift(const DoutPrefixProvider* dpp, const std::string& user_str, optional_yield y, std::unique_ptr* user) override; - virtual std::unique_ptr get_object(const rgw_obj_key& k) override; - virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, const rgw_bucket& b, std::unique_ptr* bucket, optional_yield y) override; - virtual int get_bucket(User* u, const RGWBucketInfo& i, std::unique_ptr* bucket) override; - virtual int get_bucket(const DoutPrefixProvider* dpp, User* u, const std::string& tenant, const std::string&name, std::unique_ptr* bucket, optional_yield y) override; - virtual bool is_meta_master() override; - virtual int forward_request_to_master(const DoutPrefixProvider *dpp, User* user, obj_version* objv, - bufferlist& in_data, JSONParser* jp, req_info& info, - optional_yield y) override; - virtual int forward_iam_request_to_master(const DoutPrefixProvider *dpp, const RGWAccessKey& key, obj_version* objv, - bufferlist& in_data, - RGWXMLDecoder::XMLParser* parser, req_info& info, - optional_yield y) override; - virtual Zone* get_zone() { return zone.get(); } - virtual std::string zone_unique_id(uint64_t unique_num) override; - virtual std::string zone_unique_trans_id(const uint64_t unique_num) override; - virtual int get_zonegroup(const std::string& id, std::unique_ptr* zonegroup) override; - virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list& zone_ids) override; - virtual int cluster_stat(RGWClusterStat& stats) override; - virtual std::unique_ptr get_lifecycle(void) override; - virtual std::unique_ptr get_completions(void) override; - virtual std::unique_ptr get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj, req_state* s, rgw::notify::EventType event_type, const std::string* object_name=nullptr) override; - virtual std::unique_ptr get_notification( - const DoutPrefixProvider* dpp, rgw::sal::Object* obj, rgw::sal::Object* src_obj, - rgw::notify::EventType event_type, rgw::sal::Bucket* _bucket, std::string& _user_id, std::string& _user_tenant, - std::string& _req_id, optional_yield y) override; - virtual RGWLC* get_rgwlc(void) override { return rados->get_lc(); } - virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return rados->get_cr_registry(); } - - virtual int log_usage(const DoutPrefixProvider *dpp, std::map& usage_info) override; - virtual int log_op(const DoutPrefixProvider *dpp, std::string& oid, bufferlist& bl) override; - virtual int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, - const std::map& meta) override; - virtual void get_quota(RGWQuota& quota) override; - virtual void get_ratelimit(RGWRateLimitInfo& bucket_ratelimit, RGWRateLimitInfo& user_ratelimit, RGWRateLimitInfo& anon_ratelimit) override; - virtual int set_buckets_enabled(const DoutPrefixProvider* dpp, std::vector& buckets, bool enabled) override; - virtual int get_sync_policy_handler(const DoutPrefixProvider* dpp, - std::optional zone, - std::optional bucket, - RGWBucketSyncPolicyHandlerRef* phandler, - optional_yield y) override; - virtual RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone) override; - virtual void wakeup_meta_sync_shards(std::set& shard_ids) override { rados->wakeup_meta_sync_shards(shard_ids); } - virtual void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, boost::container::flat_map>& shard_ids) override { rados->wakeup_data_sync_shards(dpp, source_zone, shard_ids); } - virtual int clear_usage(const DoutPrefixProvider *dpp) override { return rados->clear_usage(dpp); } - virtual int read_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, - uint32_t max_entries, bool* is_truncated, - RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_all_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; - virtual int get_config_key_val(std::string name, bufferlist* bl) override; - virtual int meta_list_keys_init(const DoutPrefixProvider *dpp, const std::string& section, const std::string& marker, void** phandle) override; - virtual int meta_list_keys_next(const DoutPrefixProvider *dpp, void* handle, int max, std::list& keys, bool* truncated) override; - virtual void meta_list_keys_complete(void* handle) override; - virtual std::string meta_get_marker(void* handle) override; - virtual int meta_remove(const DoutPrefixProvider* dpp, std::string& metadata_key, optional_yield y) override; - virtual const RGWSyncModuleInstanceRef& get_sync_module() { return rados->get_sync_module(); } - virtual std::string get_host_id() { return rados->host_id; } - virtual std::unique_ptr get_lua_manager() override; - virtual std::unique_ptr get_role(std::string name, - std::string tenant, - std::string path="", - std::string trust_policy="", - std::string max_session_duration_str="", - std::multimap tags={}) override; - virtual std::unique_ptr get_role(std::string id) override; - virtual std::unique_ptr get_role(const RGWRoleInfo& info) override; - virtual int get_roles(const DoutPrefixProvider *dpp, - optional_yield y, - const std::string& path_prefix, - const std::string& tenant, - std::vector>& roles) override; - virtual std::unique_ptr get_oidc_provider() override; - virtual int get_oidc_providers(const DoutPrefixProvider *dpp, - const std::string& tenant, - std::vector>& providers) override; - virtual std::unique_ptr get_append_writer(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - const std::string& unique_tag, - uint64_t position, - uint64_t *cur_accounted_size) override; - virtual std::unique_ptr get_atomic_writer(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t olh_epoch, - const std::string& unique_tag) override; - virtual const std::string& get_compression_type(const rgw_placement_rule& rule) override; - virtual bool valid_placement(const rgw_placement_rule& rule) override; - - virtual void finalize(void) override; - - virtual CephContext* ctx(void) override { return rados->ctx(); } - - virtual const std::string& get_luarocks_path() const override { - return luarocks_path; - } - - virtual void set_luarocks_path(const std::string& path) override { - luarocks_path = path; - } - virtual void register_admin_apis(RGWRESTMgr* mgr) override; - - /* Unique to RadosStore */ - int get_obj_head_ioctx(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, - librados::IoCtx* ioctx); - int delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj); - int delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, Completions* aio); - void get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj); - int get_raw_chunk_size(const DoutPrefixProvider* dpp, const rgw_raw_obj& obj, uint64_t* chunk_size); - - void setRados(RGWRados * st) { rados = st; } - RGWRados* getRados(void) { return rados; } - - RGWServices* svc() { return &rados->svc; } - const RGWServices* svc() const { return &rados->svc; } - RGWCtl* ctl() { return &rados->ctl; } - const RGWCtl* ctl() const { return &rados->ctl; } - - void setUserCtl(RGWUserCtl *_ctl) { user_ctl = _ctl; } -}; - -class RadosUser : public StoreUser { - private: - RadosStore* store; - - public: - RadosUser(RadosStore *_st, const rgw_user& _u) : StoreUser(_u), store(_st) { } - RadosUser(RadosStore *_st, const RGWUserInfo& _i) : StoreUser(_i), store(_st) { } - RadosUser(RadosStore *_st) : store(_st) { } - RadosUser(RadosUser& _o) = default; - - virtual std::unique_ptr clone() override { - return std::unique_ptr(new RadosUser(*this)); - } - int list_buckets(const DoutPrefixProvider* dpp, const std::string& marker, const std::string& end_marker, - uint64_t max, bool need_stats, BucketList& buckets, - optional_yield y) override; - virtual int create_bucket(const DoutPrefixProvider* dpp, - const rgw_bucket& b, - const std::string& zonegroup_id, - rgw_placement_rule& placement_rule, - std::string& swift_ver_location, - const RGWQuotaInfo * pquota_info, - const RGWAccessControlPolicy& policy, - Attrs& attrs, - RGWBucketInfo& info, - obj_version& ep_objv, - bool exclusive, - bool obj_lock_enabled, - bool* existed, - req_info& req_info, - std::unique_ptr* bucket, - optional_yield y) override; - virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& new_attrs, optional_yield y) override; - virtual int read_stats(const DoutPrefixProvider *dpp, - optional_yield y, RGWStorageStats* stats, - ceph::real_time* last_stats_sync = nullptr, - ceph::real_time* last_stats_update = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; - virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, - bool* is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; - - virtual int load_user(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info = nullptr) override; - virtual int remove_user(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int verify_mfa(const std::string& mfa_str, bool* verified, const DoutPrefixProvider* dpp, optional_yield y) override; - - friend class RadosBucket; -}; - -class RadosObject : public StoreObject { - private: - RadosStore* store; - RGWAccessControlPolicy acls; - RGWObjManifest *manifest{nullptr}; - RGWObjectCtx* rados_ctx; - bool rados_ctx_owned; - - public: - - struct RadosReadOp : public ReadOp { - private: - RadosObject* source; - RGWObjectCtx* rctx; - RGWRados::Object op_target; - RGWRados::Object::Read parent_op; - - public: - RadosReadOp(RadosObject *_source, RGWObjectCtx *_rctx); - - virtual int prepare(optional_yield y, const DoutPrefixProvider* dpp) override; - - /* - * Both `read` and `iterate` read up through index `end` - * *inclusive*. The number of bytes that could be returned is - * `end - ofs + 1`. - */ - virtual int read(int64_t ofs, int64_t end, - bufferlist& bl, optional_yield y, - const DoutPrefixProvider* dpp) override; - virtual int iterate(const DoutPrefixProvider* dpp, - int64_t ofs, int64_t end, - RGWGetDataCB* cb, optional_yield y) override; - - virtual int get_attr(const DoutPrefixProvider* dpp, const char* name, bufferlist& dest, optional_yield y) override; - }; - - struct RadosDeleteOp : public DeleteOp { - private: - RadosObject* source; - RGWRados::Object op_target; - RGWRados::Object::Delete parent_op; - - public: - RadosDeleteOp(RadosObject* _source); - - virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override; - }; - - RadosObject(RadosStore *_st, const rgw_obj_key& _k) - : StoreObject(_k), - store(_st), - acls(), - rados_ctx(new RGWObjectCtx(dynamic_cast(store))), - rados_ctx_owned(true) { - } - RadosObject(RadosStore *_st, const rgw_obj_key& _k, Bucket* _b) - : StoreObject(_k, _b), - store(_st), - acls(), - rados_ctx(new RGWObjectCtx(dynamic_cast(store))) , - rados_ctx_owned(true) { - } - RadosObject(RadosObject& _o) : StoreObject(_o) { - store = _o.store; - acls = _o.acls; - manifest = _o.manifest; - rados_ctx = _o.rados_ctx; - rados_ctx_owned = false; - } - - virtual ~RadosObject(); - - virtual void invalidate() override { - StoreObject::invalidate(); - rados_ctx->invalidate(get_obj()); - } - virtual int delete_object(const DoutPrefixProvider* dpp, - optional_yield y, bool prevent_versioning) override; - virtual int delete_obj_aio(const DoutPrefixProvider* dpp, RGWObjState* astate, Completions* aio, - bool keep_index_consistent, optional_yield y) override; - virtual int copy_object(User* user, - req_info* info, const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, - const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, ceph::real_time* mtime, - const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, - bool high_precision_time, - const char* if_match, const char* if_nomatch, - AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, - RGWObjCategory category, uint64_t olh_epoch, - boost::optional delete_at, - std::string* version_id, std::string* tag, std::string* etag, - void (*progress_cb)(off_t, void *), void* progress_data, - const DoutPrefixProvider* dpp, optional_yield y) override; - virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } - virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } - virtual void set_atomic() override { - rados_ctx->set_atomic(state.obj); - StoreObject::set_atomic(); - } - virtual void set_prefetch_data() override { - rados_ctx->set_prefetch_data(state.obj); - StoreObject::set_prefetch_data(); - } - virtual void set_compressed() override { - rados_ctx->set_compressed(state.obj); - StoreObject::set_compressed(); - } - - virtual int get_obj_state(const DoutPrefixProvider* dpp, RGWObjState **state, optional_yield y, bool follow_olh = true) override; - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; - virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; - virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; - virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; - virtual bool is_expired() override; - virtual void gen_rand_obj_instance_name() override; - void get_raw_obj(rgw_raw_obj* raw_obj); - virtual std::unique_ptr clone() override { - return std::unique_ptr(new RadosObject(*this)); - } - virtual std::unique_ptr get_serializer(const DoutPrefixProvider *dpp, - const std::string& lock_name) override; - virtual int transition(Bucket* bucket, - const rgw_placement_rule& placement_rule, - const real_time& mtime, - uint64_t olh_epoch, - const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual int transition_to_cloud(Bucket* bucket, - rgw::sal::PlacementTier* tier, - rgw_bucket_dir_entry& o, - std::set& cloud_targets, - CephContext* cct, - bool update_object, - const DoutPrefixProvider* dpp, - optional_yield y) override; - virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override; - virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override; - - /* Swift versioning */ - virtual int swift_versioning_restore(bool& restored, - const DoutPrefixProvider* dpp) override; - virtual int swift_versioning_copy(const DoutPrefixProvider* dpp, - optional_yield y) override; - - /* OPs */ - virtual std::unique_ptr get_read_op() override; - virtual std::unique_ptr get_delete_op() override; - - /* OMAP */ - virtual int omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count, - std::map *m, - bool* pmore, optional_yield y) override; - virtual int omap_get_all(const DoutPrefixProvider *dpp, std::map *m, - optional_yield y) override; - virtual int omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid, - const std::set& keys, - Attrs* vals) override; - virtual int omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, - bool must_exist, optional_yield y) override; - - /* Internal to RadosStore */ - int get_max_chunk_size(const DoutPrefixProvider* dpp, - rgw_placement_rule placement_rule, - uint64_t* max_chunk_size, - uint64_t* alignment = nullptr); - void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t* max_size); - void raw_obj_to_obj(const rgw_raw_obj& raw_obj); - int write_cloud_tier(const DoutPrefixProvider* dpp, - optional_yield y, - uint64_t olh_epoch, - rgw::sal::PlacementTier* tier, - bool is_multipart_upload, - rgw_placement_rule& target_placement, - Object* head_obj); - RGWObjManifest* get_manifest() { return manifest; } - RGWObjectCtx& get_ctx() { return *rados_ctx; } - - private: - int read_attrs(const DoutPrefixProvider* dpp, RGWRados::Object::Read &read_op, optional_yield y, rgw_obj* target_obj = nullptr); -}; - -class RadosBucket : public StoreBucket { - private: - RadosStore* store; - RGWAccessControlPolicy acls; - - public: - RadosBucket(RadosStore *_st) - : store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, User* _u) - : StoreBucket(_u), - store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, const rgw_bucket& _b) - : StoreBucket(_b), - store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, const RGWBucketEnt& _e) - : StoreBucket(_e), - store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, const RGWBucketInfo& _i) - : StoreBucket(_i), - store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, const rgw_bucket& _b, User* _u) - : StoreBucket(_b, _u), - store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, const RGWBucketEnt& _e, User* _u) - : StoreBucket(_e, _u), - store(_st), - acls() { - } - - RadosBucket(RadosStore *_st, const RGWBucketInfo& _i, User* _u) - : StoreBucket(_i, _u), - store(_st), - acls() { - } - - virtual ~RadosBucket(); - virtual std::unique_ptr get_object(const rgw_obj_key& k) override; - virtual int list(const DoutPrefixProvider* dpp, ListParams&, int, ListResults&, optional_yield y) override; - virtual int remove_bucket(const DoutPrefixProvider* dpp, bool delete_children, bool forward_to_master, req_info* req_info, optional_yield y) override; - virtual int remove_bucket_bypass_gc(int concurrent_max, bool - keep_index_consistent, - optional_yield y, const - DoutPrefixProvider *dpp) override; - virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } - virtual int set_acl(const DoutPrefixProvider* dpp, RGWAccessControlPolicy& acl, optional_yield y) override; - virtual int load_bucket(const DoutPrefixProvider* dpp, optional_yield y, bool get_stats = false) override; - virtual int read_stats(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, std::string* bucket_ver, std::string* master_ver, - std::map& stats, - std::string* max_marker = nullptr, - bool* syncstopped = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, - const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) override; - virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int update_container_stats(const DoutPrefixProvider* dpp) override; - virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override; - virtual int chown(const DoutPrefixProvider* dpp, User* new_user, User* old_user, optional_yield y, const std::string* marker = nullptr) override; - virtual int put_info(const DoutPrefixProvider* dpp, bool exclusive, ceph::real_time mtime) override; - virtual bool is_owner(User* user) override; - virtual int check_empty(const DoutPrefixProvider* dpp, optional_yield y) override; - virtual int check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) override; - virtual int merge_and_store_attrs(const DoutPrefixProvider* dpp, Attrs& attrs, optional_yield y) override; - virtual int try_refresh_info(const DoutPrefixProvider* dpp, ceph::real_time* pmtime) override; - virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, - bool* is_truncated, RGWUsageIter& usage_iter, - std::map& usage) override; - virtual int trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch) override; - virtual int remove_objs_from_index(const DoutPrefixProvider *dpp, std::list& objs_to_unlink) override; - virtual int check_index(const DoutPrefixProvider *dpp, std::map& existing_stats, std::map& calculated_stats) override; - virtual int rebuild_index(const DoutPrefixProvider *dpp) override; - virtual int set_tag_timeout(const DoutPrefixProvider *dpp, uint64_t timeout) override; - virtual int purge_instance(const DoutPrefixProvider* dpp) override; - virtual std::unique_ptr clone() override { - return std::make_unique(*this); - } - virtual std::unique_ptr get_multipart_upload( - const std::string& oid, - std::optional upload_id=std::nullopt, - ACLOwner owner={}, ceph::real_time mtime=real_clock::now()) override; - virtual int list_multiparts(const DoutPrefixProvider *dpp, - const std::string& prefix, - std::string& marker, - const std::string& delim, - const int& max_uploads, - std::vector>& uploads, - std::map *common_prefixes, - bool *is_truncated) override; - virtual int abort_multiparts(const DoutPrefixProvider* dpp, - CephContext* cct) override; - - private: - int link(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint = true, RGWObjVersionTracker* objv = nullptr); - int unlink(const DoutPrefixProvider* dpp, User* new_user, optional_yield y, bool update_entrypoint = true); - friend class RadosUser; -}; - -class RadosMultipartPart : public StoreMultipartPart { -protected: - RGWUploadPartInfo info; - -public: - RadosMultipartPart() = default; - virtual ~RadosMultipartPart() = default; - - virtual uint32_t get_num() { return info.num; } - virtual uint64_t get_size() { return info.accounted_size; } - virtual const std::string& get_etag() { return info.etag; } - virtual ceph::real_time& get_mtime() { return info.modified; } - - /* For RadosStore code */ - RGWObjManifest& get_manifest() { return info.manifest; } - - friend class RadosMultipartUpload; -}; - -class RadosMultipartUpload : public StoreMultipartUpload { - RadosStore* store; - RGWMPObj mp_obj; - ACLOwner owner; - ceph::real_time mtime; - rgw_placement_rule placement; - RGWObjManifest manifest; - -public: - RadosMultipartUpload(RadosStore* _store, Bucket* _bucket, const std::string& oid, - std::optional upload_id, ACLOwner owner, - ceph::real_time _mtime) - : StoreMultipartUpload(_bucket), store(_store), mp_obj(oid, upload_id), - owner(owner), mtime(_mtime) {} - virtual ~RadosMultipartUpload() = default; - - virtual const std::string& get_meta() const override { return mp_obj.get_meta(); } - virtual const std::string& get_key() const override { return mp_obj.get_key(); } - virtual const std::string& get_upload_id() const override { return mp_obj.get_upload_id(); } - virtual const ACLOwner& get_owner() const override { return owner; } - virtual ceph::real_time& get_mtime() override { return mtime; } - virtual std::unique_ptr get_meta_obj() override; - virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) override; - virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct, - int num_parts, int marker, - int* next_marker, bool* truncated, - bool assume_unsorted = false) override; - virtual int abort(const DoutPrefixProvider* dpp, CephContext* cct) override; - virtual int complete(const DoutPrefixProvider* dpp, - optional_yield y, CephContext* cct, - std::map& part_etags, - std::list& remove_objs, - uint64_t& accounted_size, bool& compressed, - RGWCompressionInfo& cs_info, off_t& ofs, - std::string& tag, ACLOwner& owner, - uint64_t olh_epoch, - rgw::sal::Object* target_obj) override; - virtual int get_info(const DoutPrefixProvider *dpp, optional_yield y, rgw_placement_rule** rule, rgw::sal::Attrs* attrs = nullptr) override; - virtual std::unique_ptr get_writer(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t part_num, - const std::string& part_num_str) override; -}; - -class MPRadosSerializer : public StoreMPSerializer { - librados::IoCtx ioctx; - rados::cls::lock::Lock lock; - librados::ObjectWriteOperation op; - -public: - MPRadosSerializer(const DoutPrefixProvider *dpp, RadosStore* store, RadosObject* obj, const std::string& lock_name); - - virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override; - virtual int unlock() override { - return lock.unlock(&ioctx, oid); - } -}; - -class LCRadosSerializer : public StoreLCSerializer { - librados::IoCtx* ioctx; - rados::cls::lock::Lock lock; - -public: - LCRadosSerializer(RadosStore* store, const std::string& oid, const std::string& lock_name, const std::string& cookie); - - virtual int try_lock(const DoutPrefixProvider *dpp, utime_t dur, optional_yield y) override; - virtual int unlock() override { - return lock.unlock(ioctx, oid); - } -}; - -class RadosLifecycle : public StoreLifecycle { - RadosStore* store; - -public: - RadosLifecycle(RadosStore* _st) : store(_st) {} - - using StoreLifecycle::get_entry; - virtual int get_entry(const std::string& oid, const std::string& marker, std::unique_ptr* entry) override; - virtual int get_next_entry(const std::string& oid, const std::string& marker, std::unique_ptr* entry) override; - virtual int set_entry(const std::string& oid, LCEntry& entry) override; - virtual int list_entries(const std::string& oid, const std::string& marker, - uint32_t max_entries, - std::vector>& entries) override; - virtual int rm_entry(const std::string& oid, LCEntry& entry) override; - virtual int get_head(const std::string& oid, std::unique_ptr* head) override; - virtual int put_head(const std::string& oid, LCHead& head) override; - virtual std::unique_ptr get_serializer(const std::string& lock_name, - const std::string& oid, - const std::string& cookie) override; -}; - -class RadosNotification : public StoreNotification { - RadosStore* store; - /* XXX it feels incorrect to me that rgw::notify::reservation_t is - * currently RADOS-specific; instead, I think notification types such as - * reservation_t should be generally visible, whereas the internal - * notification behavior should be made portable (e.g., notification - * to non-RADOS message sinks) */ - rgw::notify::reservation_t res; - - public: - RadosNotification(const DoutPrefixProvider* _dpp, RadosStore* _store, Object* _obj, Object* _src_obj, req_state* _s, rgw::notify::EventType _type, const std::string* object_name=nullptr) : - StoreNotification(_obj, _src_obj, _type), store(_store), res(_dpp, _store, _s, _obj, _src_obj, object_name) { } - - RadosNotification(const DoutPrefixProvider* _dpp, RadosStore* _store, Object* _obj, Object* _src_obj, rgw::notify::EventType _type, rgw::sal::Bucket* _bucket, std::string& _user_id, std::string& _user_tenant, std::string& _req_id, optional_yield y) : - StoreNotification(_obj, _src_obj, _type), store(_store), res(_dpp, _store, _obj, _src_obj, _bucket, _user_id, _user_tenant, _req_id, y) {} - - ~RadosNotification() = default; - - rgw::notify::reservation_t& get_reservation(void) { - return res; - } - - virtual int publish_reserve(const DoutPrefixProvider *dpp, RGWObjTags* obj_tags = nullptr) override; - virtual int publish_commit(const DoutPrefixProvider* dpp, uint64_t size, - const ceph::real_time& mtime, const std::string& etag, const std::string& version) override; -}; - -class RadosAtomicWriter : public StoreWriter { -protected: - rgw::sal::RadosStore* store; - std::unique_ptr aio; - RGWObjectCtx* obj_ctx; - rgw::putobj::AtomicObjectProcessor processor; - -public: - RadosAtomicWriter(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - RadosStore* _store, std::unique_ptr _aio, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t olh_epoch, - const std::string& unique_tag) : - StoreWriter(dpp, y), - store(_store), - aio(std::move(_aio)), - obj_ctx(&dynamic_cast(_head_obj.get())->get_ctx()), - processor(&*aio, store, - ptail_placement_rule, owner, - *obj_ctx, - std::move(_head_obj), olh_epoch, unique_tag, - dpp, y) - {} - ~RadosAtomicWriter() = default; - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; -}; - -class RadosAppendWriter : public StoreWriter { -protected: - rgw::sal::RadosStore* store; - std::unique_ptr aio; - RGWObjectCtx* obj_ctx; - rgw::putobj::AppendObjectProcessor processor; - -public: - RadosAppendWriter(const DoutPrefixProvider *dpp, - optional_yield y, - std::unique_ptr _head_obj, - RadosStore* _store, std::unique_ptr _aio, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - const std::string& unique_tag, - uint64_t position, - uint64_t *cur_accounted_size) : - StoreWriter(dpp, y), - store(_store), - aio(std::move(_aio)), - obj_ctx(&dynamic_cast(_head_obj.get())->get_ctx()), - processor(&*aio, store, - ptail_placement_rule, owner, - *obj_ctx, - std::move(_head_obj), unique_tag, position, - cur_accounted_size, dpp, y) - {} - ~RadosAppendWriter() = default; - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; -}; - -class RadosMultipartWriter : public StoreWriter { -protected: - rgw::sal::RadosStore* store; - std::unique_ptr aio; - RGWObjectCtx* obj_ctx; - rgw::putobj::MultipartObjectProcessor processor; - -public: - RadosMultipartWriter(const DoutPrefixProvider *dpp, - optional_yield y, MultipartUpload* upload, - std::unique_ptr _head_obj, - RadosStore* _store, std::unique_ptr _aio, - const rgw_user& owner, - const rgw_placement_rule *ptail_placement_rule, - uint64_t part_num, const std::string& part_num_str) : - StoreWriter(dpp, y), - store(_store), - aio(std::move(_aio)), - obj_ctx(&dynamic_cast(_head_obj.get())->get_ctx()), - processor(&*aio, store, - ptail_placement_rule, owner, - *obj_ctx, - std::move(_head_obj), upload->get_upload_id(), - part_num, part_num_str, dpp, y) - {} - ~RadosMultipartWriter() = default; - - // prepare to start processing object data - virtual int prepare(optional_yield y) override; - - // Process a bufferlist - virtual int process(bufferlist&& data, uint64_t offset) override; - - // complete the operation and make its result visible to clients - virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; -}; - -class RadosLuaManager : public StoreLuaManager { - RadosStore* const store; - rgw_pool pool; - -public: - RadosLuaManager(RadosStore* _s); - virtual ~RadosLuaManager() = default; - - virtual int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script); - virtual int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script); - virtual int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key); - virtual int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name); - virtual int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name); - virtual int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages); -}; - -class RadosOIDCProvider : public RGWOIDCProvider { - RadosStore* store; -public: - RadosOIDCProvider(RadosStore* _store) : store(_store) {} - ~RadosOIDCProvider() = default; - - virtual int store_url(const DoutPrefixProvider *dpp, const std::string& url, bool exclusive, optional_yield y) override; - virtual int read_url(const DoutPrefixProvider *dpp, const std::string& url, const std::string& tenant) override; - virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override; - void encode(bufferlist& bl) const { - RGWOIDCProvider::encode(bl); - } - void decode(bufferlist::const_iterator& bl) { - RGWOIDCProvider::decode(bl); - } -}; - -class RadosRole : public RGWRole { - RadosStore* store; -public: - RadosRole(RadosStore* _store, std::string name, - std::string tenant, - std::string path, - std::string trust_policy, - std::string max_session_duration, - std::multimap tags) : RGWRole(name, tenant, path, trust_policy, max_session_duration, tags), store(_store) {} - RadosRole(RadosStore* _store, std::string id) : RGWRole(id), store(_store) {} - RadosRole(RadosStore* _store, const RGWRoleInfo& info) : RGWRole(info), store(_store) {} - RadosRole(RadosStore* _store) : store(_store) {} - ~RadosRole() = default; - - virtual int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) override; - virtual int store_name(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) override; - virtual int store_path(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y) override; - virtual int read_id(const DoutPrefixProvider *dpp, const std::string& role_name, const std::string& tenant, std::string& role_id, optional_yield y) override; - virtual int read_name(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int read_info(const DoutPrefixProvider *dpp, optional_yield y) override; - virtual int create(const DoutPrefixProvider *dpp, bool exclusive, const std::string& role_id, optional_yield y) override; - virtual int delete_obj(const DoutPrefixProvider *dpp, optional_yield y) override; -}; -}} // namespace rgw::sal - -WRITE_CLASS_ENCODER(rgw::sal::RadosOIDCProvider) diff --git a/src/rgw/store/rados/rgw_service.cc b/src/rgw/store/rados/rgw_service.cc deleted file mode 100644 index 4fcb1ebdef7..00000000000 --- a/src/rgw/store/rados/rgw_service.cc +++ /dev/null @@ -1,476 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_service.h" - -#include "services/svc_finisher.h" -#include "services/svc_bi_rados.h" -#include "services/svc_bilog_rados.h" -#include "services/svc_bucket_sobj.h" -#include "services/svc_bucket_sync_sobj.h" -#include "services/svc_cls.h" -#include "services/svc_config_key_rados.h" -#include "services/svc_mdlog.h" -#include "services/svc_meta.h" -#include "services/svc_meta_be.h" -#include "services/svc_meta_be_sobj.h" -#include "services/svc_meta_be_otp.h" -#include "services/svc_notify.h" -#include "services/svc_otp.h" -#include "services/svc_rados.h" -#include "services/svc_zone.h" -#include "services/svc_zone_utils.h" -#include "services/svc_quota.h" -#include "services/svc_sync_modules.h" -#include "services/svc_sys_obj.h" -#include "services/svc_sys_obj_cache.h" -#include "services/svc_sys_obj_core.h" -#include "services/svc_user_rados.h" -#include "services/svc_role_rados.h" - -#include "common/errno.h" - -#include "rgw_bucket.h" -#include "rgw_datalog.h" -#include "rgw_metadata.h" -#include "rgw_otp.h" -#include "rgw_user.h" -#include "rgw_role.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -RGWServices_Def::RGWServices_Def() = default; -RGWServices_Def::~RGWServices_Def() -{ - shutdown(); -} - -int RGWServices_Def::init(CephContext *cct, - bool have_cache, - bool raw, - bool run_sync, - optional_yield y, - const DoutPrefixProvider *dpp) -{ - finisher = std::make_unique(cct); - bucket_sobj = std::make_unique(cct); - bucket_sync_sobj = std::make_unique(cct); - bi_rados = std::make_unique(cct); - bilog_rados = std::make_unique(cct); - cls = std::make_unique(cct); - config_key_rados = std::make_unique(cct); - datalog_rados = std::make_unique(cct); - mdlog = std::make_unique(cct, run_sync); - meta = std::make_unique(cct); - meta_be_sobj = std::make_unique(cct); - meta_be_otp = std::make_unique(cct); - notify = std::make_unique(cct); - otp = std::make_unique(cct); - rados = std::make_unique(cct); - zone = std::make_unique(cct); - zone_utils = std::make_unique(cct); - quota = std::make_unique(cct); - sync_modules = std::make_unique(cct); - sysobj = std::make_unique(cct); - sysobj_core = std::make_unique(cct); - user_rados = std::make_unique(cct); - role_rados = std::make_unique(cct); - - if (have_cache) { - sysobj_cache = std::make_unique(dpp, cct); - } - - vector meta_bes{meta_be_sobj.get(), meta_be_otp.get()}; - - finisher->init(); - bi_rados->init(zone.get(), rados.get(), bilog_rados.get(), datalog_rados.get()); - bilog_rados->init(bi_rados.get()); - bucket_sobj->init(zone.get(), sysobj.get(), sysobj_cache.get(), - bi_rados.get(), meta.get(), meta_be_sobj.get(), - sync_modules.get(), bucket_sync_sobj.get()); - bucket_sync_sobj->init(zone.get(), - sysobj.get(), - sysobj_cache.get(), - bucket_sobj.get()); - cls->init(zone.get(), rados.get()); - config_key_rados->init(rados.get()); - mdlog->init(rados.get(), zone.get(), sysobj.get(), cls.get()); - meta->init(sysobj.get(), mdlog.get(), meta_bes); - meta_be_sobj->init(sysobj.get(), mdlog.get()); - meta_be_otp->init(sysobj.get(), mdlog.get(), cls.get()); - notify->init(zone.get(), rados.get(), finisher.get()); - otp->init(zone.get(), meta.get(), meta_be_otp.get()); - rados->init(); - zone->init(sysobj.get(), rados.get(), sync_modules.get(), bucket_sync_sobj.get()); - zone_utils->init(rados.get(), zone.get()); - quota->init(zone.get()); - sync_modules->init(zone.get()); - sysobj_core->core_init(rados.get(), zone.get()); - if (have_cache) { - sysobj_cache->init(rados.get(), zone.get(), notify.get()); - sysobj->init(rados.get(), sysobj_cache.get()); - } else { - sysobj->init(rados.get(), sysobj_core.get()); - } - user_rados->init(rados.get(), zone.get(), sysobj.get(), sysobj_cache.get(), - meta.get(), meta_be_sobj.get(), sync_modules.get()); - role_rados->init(zone.get(), meta.get(), meta_be_sobj.get(), sysobj.get()); - - can_shutdown = true; - - int r = finisher->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start finisher service (" << cpp_strerror(-r) << dendl; - return r; - } - - if (!raw) { - r = notify->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start notify service (" << cpp_strerror(-r) << dendl; - return r; - } - } - - r = rados->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start rados service (" << cpp_strerror(-r) << dendl; - return r; - } - - if (!raw) { - r = zone->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start zone service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = datalog_rados->start(dpp, &zone->get_zone(), - zone->get_zone_params(), - rados->get_rados_handle()); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start datalog_rados service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = mdlog->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start mdlog service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = sync_modules->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start sync modules service (" << cpp_strerror(-r) << dendl; - return r; - } - } - - r = cls->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start cls service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = config_key_rados->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start config_key service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = zone_utils->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start zone_utils service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = quota->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start quota service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = sysobj_core->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start sysobj_core service (" << cpp_strerror(-r) << dendl; - return r; - } - - if (have_cache) { - r = sysobj_cache->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start sysobj_cache service (" << cpp_strerror(-r) << dendl; - return r; - } - } - - r = sysobj->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start sysobj service (" << cpp_strerror(-r) << dendl; - return r; - } - - if (!raw) { - r = meta_be_sobj->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start meta_be_sobj service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = meta->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start meta service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = bucket_sobj->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start bucket service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = bucket_sync_sobj->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start bucket_sync service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = user_rados->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start user_rados service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = otp->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start otp service (" << cpp_strerror(-r) << dendl; - return r; - } - - r = role_rados->start(y, dpp); - if (r < 0) { - ldout(cct, 0) << "ERROR: failed to start role_rados service (" << cpp_strerror(-r) << dendl; - return r; - } - - } - - /* cache or core services will be started by sysobj */ - - return 0; -} - -void RGWServices_Def::shutdown() -{ - if (!can_shutdown) { - return; - } - - if (has_shutdown) { - return; - } - - role_rados->shutdown(); - datalog_rados.reset(); - user_rados->shutdown(); - sync_modules->shutdown(); - otp->shutdown(); - notify->shutdown(); - meta_be_otp->shutdown(); - meta_be_sobj->shutdown(); - meta->shutdown(); - mdlog->shutdown(); - config_key_rados->shutdown(); - cls->shutdown(); - bilog_rados->shutdown(); - bi_rados->shutdown(); - bucket_sync_sobj->shutdown(); - bucket_sobj->shutdown(); - finisher->shutdown(); - - sysobj->shutdown(); - sysobj_core->shutdown(); - notify->shutdown(); - if (sysobj_cache) { - sysobj_cache->shutdown(); - } - quota->shutdown(); - zone_utils->shutdown(); - zone->shutdown(); - rados->shutdown(); - - has_shutdown = true; - -} - - -int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) -{ - cct = _cct; - - int r = _svc.init(cct, have_cache, raw, run_sync, y, dpp); - if (r < 0) { - return r; - } - - finisher = _svc.finisher.get(); - bi_rados = _svc.bi_rados.get(); - bi = bi_rados; - bilog_rados = _svc.bilog_rados.get(); - bucket_sobj = _svc.bucket_sobj.get(); - bucket = bucket_sobj; - bucket_sync_sobj = _svc.bucket_sync_sobj.get(); - bucket_sync = bucket_sync_sobj; - cls = _svc.cls.get(); - config_key_rados = _svc.config_key_rados.get(); - config_key = config_key_rados; - datalog_rados = _svc.datalog_rados.get(); - mdlog = _svc.mdlog.get(); - meta = _svc.meta.get(); - meta_be_sobj = _svc.meta_be_sobj.get(); - meta_be_otp = _svc.meta_be_otp.get(); - notify = _svc.notify.get(); - otp = _svc.otp.get(); - rados = _svc.rados.get(); - zone = _svc.zone.get(); - zone_utils = _svc.zone_utils.get(); - quota = _svc.quota.get(); - sync_modules = _svc.sync_modules.get(); - sysobj = _svc.sysobj.get(); - cache = _svc.sysobj_cache.get(); - core = _svc.sysobj_core.get(); - user = _svc.user_rados.get(); - role = _svc.role_rados.get(); - - return 0; -} - -RGWServiceInstance::~RGWServiceInstance() {} - -int RGWServiceInstance::start(optional_yield y, const DoutPrefixProvider *dpp) -{ - if (start_state != StateInit) { - return 0; - } - - start_state = StateStarting;; /* setting started prior to do_start() on purpose so that circular - references can call start() on each other */ - - int r = do_start(y, dpp); - if (r < 0) { - return r; - } - - start_state = StateStarted; - - return 0; -} - -RGWCtlDef::RGWCtlDef() {} -RGWCtlDef::~RGWCtlDef() {} -RGWCtlDef::_meta::_meta() {} -RGWCtlDef::_meta::~_meta() {} - - -int RGWCtlDef::init(RGWServices& svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp) -{ - meta.mgr.reset(new RGWMetadataManager(svc.meta)); - - meta.user.reset(RGWUserMetaHandlerAllocator::alloc(svc.user)); - - auto sync_module = svc.sync_modules->get_sync_module(); - if (sync_module) { - meta.bucket.reset(sync_module->alloc_bucket_meta_handler()); - meta.bucket_instance.reset(sync_module->alloc_bucket_instance_meta_handler(driver)); - } else { - meta.bucket.reset(RGWBucketMetaHandlerAllocator::alloc()); - meta.bucket_instance.reset(RGWBucketInstanceMetaHandlerAllocator::alloc(driver)); - } - - meta.otp.reset(RGWOTPMetaHandlerAllocator::alloc()); - meta.role = std::make_unique(driver, svc.role); - - user.reset(new RGWUserCtl(svc.zone, svc.user, (RGWUserMetadataHandler *)meta.user.get())); - bucket.reset(new RGWBucketCtl(svc.zone, - svc.bucket, - svc.bucket_sync, - svc.bi, svc.user)); - otp.reset(new RGWOTPCtl(svc.zone, svc.otp)); - - RGWBucketMetadataHandlerBase *bucket_meta_handler = static_cast(meta.bucket.get()); - RGWBucketInstanceMetadataHandlerBase *bi_meta_handler = static_cast(meta.bucket_instance.get()); - - bucket_meta_handler->init(svc.bucket, bucket.get()); - bi_meta_handler->init(svc.zone, svc.bucket, svc.bi); - - RGWOTPMetadataHandlerBase *otp_handler = static_cast(meta.otp.get()); - otp_handler->init(svc.zone, svc.meta_be_otp, svc.otp); - - user->init(bucket.get()); - bucket->init(user.get(), - (RGWBucketMetadataHandler *)bucket_meta_handler, - (RGWBucketInstanceMetadataHandler *)bi_meta_handler, - svc.datalog_rados, - dpp); - - otp->init((RGWOTPMetadataHandler *)meta.otp.get()); - - return 0; -} - -int RGWCtl::init(RGWServices *_svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp) -{ - svc = _svc; - cct = svc->cct; - - int r = _ctl.init(*svc, driver, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start init ctls (" << cpp_strerror(-r) << dendl; - return r; - } - - meta.mgr = _ctl.meta.mgr.get(); - meta.user = _ctl.meta.user.get(); - meta.bucket = _ctl.meta.bucket.get(); - meta.bucket_instance = _ctl.meta.bucket_instance.get(); - meta.otp = _ctl.meta.otp.get(); - meta.role = _ctl.meta.role.get(); - - user = _ctl.user.get(); - bucket = _ctl.bucket.get(); - otp = _ctl.otp.get(); - - r = meta.user->attach(meta.mgr); - if (r < 0) { - ldout(cct, 0) << "ERROR: failed to start init meta.user ctl (" << cpp_strerror(-r) << dendl; - return r; - } - - r = meta.bucket->attach(meta.mgr); - if (r < 0) { - ldout(cct, 0) << "ERROR: failed to start init meta.bucket ctl (" << cpp_strerror(-r) << dendl; - return r; - } - - r = meta.bucket_instance->attach(meta.mgr); - if (r < 0) { - ldout(cct, 0) << "ERROR: failed to start init meta.bucket_instance ctl (" << cpp_strerror(-r) << dendl; - return r; - } - - r = meta.otp->attach(meta.mgr); - if (r < 0) { - ldout(cct, 0) << "ERROR: failed to start init otp ctl (" << cpp_strerror(-r) << dendl; - return r; - } - - r = meta.role->attach(meta.mgr); - if (r < 0) { - ldout(cct, 0) << "ERROR: failed to start init otp ctl (" << cpp_strerror(-r) << dendl; - return r; - } - return 0; -} - diff --git a/src/rgw/store/rados/rgw_service.h b/src/rgw/store/rados/rgw_service.h deleted file mode 100644 index dc4991388a9..00000000000 --- a/src/rgw/store/rados/rgw_service.h +++ /dev/null @@ -1,219 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SERVICE_H -#define CEPH_RGW_SERVICE_H - - -#include -#include -#include - -#include "common/async/yield_context.h" - -#include "rgw_common.h" - -struct RGWServices_Def; - -class RGWServiceInstance -{ - friend struct RGWServices_Def; - -protected: - CephContext *cct; - - enum StartState { - StateInit = 0, - StateStarting = 1, - StateStarted = 2, - } start_state{StateInit}; - - virtual void shutdown() {} - virtual int do_start(optional_yield, const DoutPrefixProvider *dpp) { - return 0; - } -public: - RGWServiceInstance(CephContext *_cct) : cct(_cct) {} - virtual ~RGWServiceInstance(); - - int start(optional_yield y, const DoutPrefixProvider *dpp); - bool is_started() { - return (start_state == StateStarted); - } - - CephContext *ctx() { - return cct; - } -}; - -class RGWSI_Finisher; -class RGWSI_Bucket; -class RGWSI_Bucket_SObj; -class RGWSI_Bucket_Sync; -class RGWSI_Bucket_Sync_SObj; -class RGWSI_BucketIndex; -class RGWSI_BucketIndex_RADOS; -class RGWSI_BILog_RADOS; -class RGWSI_Cls; -class RGWSI_ConfigKey; -class RGWSI_ConfigKey_RADOS; -class RGWSI_MDLog; -class RGWSI_Meta; -class RGWSI_MetaBackend; -class RGWSI_MetaBackend_SObj; -class RGWSI_MetaBackend_OTP; -class RGWSI_Notify; -class RGWSI_OTP; -class RGWSI_RADOS; -class RGWSI_Zone; -class RGWSI_ZoneUtils; -class RGWSI_Quota; -class RGWSI_SyncModules; -class RGWSI_SysObj; -class RGWSI_SysObj_Core; -class RGWSI_SysObj_Cache; -class RGWSI_User; -class RGWSI_User_RADOS; -class RGWDataChangesLog; -class RGWSI_Role_RADOS; - -struct RGWServices_Def -{ - bool can_shutdown{false}; - bool has_shutdown{false}; - - std::unique_ptr finisher; - std::unique_ptr bucket_sobj; - std::unique_ptr bucket_sync_sobj; - std::unique_ptr bi_rados; - std::unique_ptr bilog_rados; - std::unique_ptr cls; - std::unique_ptr config_key_rados; - std::unique_ptr mdlog; - std::unique_ptr meta; - std::unique_ptr meta_be_sobj; - std::unique_ptr meta_be_otp; - std::unique_ptr notify; - std::unique_ptr otp; - std::unique_ptr rados; - std::unique_ptr zone; - std::unique_ptr zone_utils; - std::unique_ptr quota; - std::unique_ptr sync_modules; - std::unique_ptr sysobj; - std::unique_ptr sysobj_core; - std::unique_ptr sysobj_cache; - std::unique_ptr user_rados; - std::unique_ptr datalog_rados; - std::unique_ptr role_rados; - - RGWServices_Def(); - ~RGWServices_Def(); - - int init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp); - void shutdown(); -}; - - -struct RGWServices -{ - RGWServices_Def _svc; - - CephContext *cct; - - RGWSI_Finisher *finisher{nullptr}; - RGWSI_Bucket *bucket{nullptr}; - RGWSI_Bucket_SObj *bucket_sobj{nullptr}; - RGWSI_Bucket_Sync *bucket_sync{nullptr}; - RGWSI_Bucket_Sync_SObj *bucket_sync_sobj{nullptr}; - RGWSI_BucketIndex *bi{nullptr}; - RGWSI_BucketIndex_RADOS *bi_rados{nullptr}; - RGWSI_BILog_RADOS *bilog_rados{nullptr}; - RGWSI_Cls *cls{nullptr}; - RGWSI_ConfigKey_RADOS *config_key_rados{nullptr}; - RGWSI_ConfigKey *config_key{nullptr}; - RGWDataChangesLog *datalog_rados{nullptr}; - RGWSI_MDLog *mdlog{nullptr}; - RGWSI_Meta *meta{nullptr}; - RGWSI_MetaBackend *meta_be_sobj{nullptr}; - RGWSI_MetaBackend *meta_be_otp{nullptr}; - RGWSI_Notify *notify{nullptr}; - RGWSI_OTP *otp{nullptr}; - RGWSI_RADOS *rados{nullptr}; - RGWSI_Zone *zone{nullptr}; - RGWSI_ZoneUtils *zone_utils{nullptr}; - RGWSI_Quota *quota{nullptr}; - RGWSI_SyncModules *sync_modules{nullptr}; - RGWSI_SysObj *sysobj{nullptr}; - RGWSI_SysObj_Cache *cache{nullptr}; - RGWSI_SysObj_Core *core{nullptr}; - RGWSI_User *user{nullptr}; - RGWSI_Role_RADOS *role{nullptr}; - - int do_init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp); - - int init(CephContext *cct, bool have_cache, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) { - return do_init(cct, have_cache, false, run_sync, y, dpp); - } - - int init_raw(CephContext *cct, bool have_cache, optional_yield y, const DoutPrefixProvider *dpp) { - return do_init(cct, have_cache, true, false, y, dpp); - } - void shutdown() { - _svc.shutdown(); - } -}; - -class RGWMetadataManager; -class RGWMetadataHandler; -class RGWUserCtl; -class RGWBucketCtl; -class RGWOTPCtl; - -struct RGWCtlDef { - struct _meta { - std::unique_ptr mgr; - std::unique_ptr bucket; - std::unique_ptr bucket_instance; - std::unique_ptr user; - std::unique_ptr otp; - std::unique_ptr role; - - _meta(); - ~_meta(); - } meta; - - std::unique_ptr user; - std::unique_ptr bucket; - std::unique_ptr otp; - - RGWCtlDef(); - ~RGWCtlDef(); - - int init(RGWServices& svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp); -}; - -struct RGWCtl { - CephContext *cct{nullptr}; - RGWServices *svc{nullptr}; - - RGWCtlDef _ctl; - - struct _meta { - RGWMetadataManager *mgr{nullptr}; - - RGWMetadataHandler *bucket{nullptr}; - RGWMetadataHandler *bucket_instance{nullptr}; - RGWMetadataHandler *user{nullptr}; - RGWMetadataHandler *otp{nullptr}; - RGWMetadataHandler *role{nullptr}; - } meta; - - RGWUserCtl *user{nullptr}; - RGWBucketCtl *bucket{nullptr}; - RGWOTPCtl *otp{nullptr}; - - int init(RGWServices *_svc, rgw::sal::Driver* driver, const DoutPrefixProvider *dpp); -}; - -#endif diff --git a/src/rgw/store/rados/rgw_sync.cc b/src/rgw/store/rados/rgw_sync.cc deleted file mode 100644 index 065d20985c4..00000000000 --- a/src/rgw/store/rados/rgw_sync.cc +++ /dev/null @@ -1,2567 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_sync.h" -#include "rgw_rest_conn.h" -#include "rgw_cr_rados.h" -#include "rgw_cr_rest.h" - -#include "services/svc_zone.h" -#include "services/svc_mdlog.h" -#include "services/svc_cls.h" - -#include - -#define dout_subsys ceph_subsys_rgw - -#undef dout_prefix -#define dout_prefix (*_dout << "meta sync: ") - -using namespace std; - -static string mdlog_sync_status_oid = "mdlog.sync-status"; -static string mdlog_sync_status_shard_prefix = "mdlog.sync-status.shard"; -static string mdlog_sync_full_sync_index_prefix = "meta.full-sync.index"; - -RGWContinuousLeaseCR::~RGWContinuousLeaseCR() {} - -RGWSyncErrorLogger::RGWSyncErrorLogger(rgw::sal::RadosStore* _store, const string &oid_prefix, int _num_shards) : store(_store), num_shards(_num_shards) { - for (int i = 0; i < num_shards; i++) { - oids.push_back(get_shard_oid(oid_prefix, i)); - } -} -string RGWSyncErrorLogger::get_shard_oid(const string& oid_prefix, int shard_id) { - char buf[oid_prefix.size() + 16]; - snprintf(buf, sizeof(buf), "%s.%d", oid_prefix.c_str(), shard_id); - return string(buf); -} - -RGWCoroutine *RGWSyncErrorLogger::log_error_cr(const DoutPrefixProvider *dpp, const string& source_zone, const string& section, const string& name, uint32_t error_code, const string& message) { - cls_log_entry entry; - - rgw_sync_error_info info(source_zone, error_code, message); - bufferlist bl; - encode(info, bl); - store->svc()->cls->timelog.prepare_entry(entry, real_clock::now(), section, name, bl); - - uint32_t shard_id = ++counter % num_shards; - - - return new RGWRadosTimelogAddCR(dpp, store, oids[shard_id], entry); -} - -void RGWSyncBackoff::update_wait_time() -{ - if (cur_wait == 0) { - cur_wait = 1; - } else { - cur_wait = (cur_wait << 1); - } - if (cur_wait >= max_secs) { - cur_wait = max_secs; - } -} - -void RGWSyncBackoff::backoff_sleep() -{ - update_wait_time(); - sleep(cur_wait); -} - -void RGWSyncBackoff::backoff(RGWCoroutine *op) -{ - update_wait_time(); - op->wait(utime_t(cur_wait, 0)); -} - -int RGWBackoffControlCR::operate(const DoutPrefixProvider *dpp) { - reenter(this) { - // retry the operation until it succeeds - while (true) { - yield { - std::lock_guard l{lock}; - cr = alloc_cr(); - cr->get(); - call(cr); - } - { - std::lock_guard l{lock}; - cr->put(); - cr = NULL; - } - if (retcode >= 0) { - break; - } - if (retcode != -EBUSY && retcode != -EAGAIN) { - ldout(cct, 0) << "ERROR: RGWBackoffControlCR called coroutine returned " << retcode << dendl; - if (exit_on_error) { - return set_cr_error(retcode); - } - } - if (reset_backoff) { - backoff.reset(); - } - yield backoff.backoff(this); - } - - // run an optional finisher - yield call(alloc_finisher_cr()); - if (retcode < 0) { - ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl; - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; -} - -void rgw_mdlog_info::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("num_objects", num_shards, obj); - JSONDecoder::decode_json("period", period, obj); - JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); -} - -void rgw_mdlog_entry::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("id", id, obj); - JSONDecoder::decode_json("section", section, obj); - JSONDecoder::decode_json("name", name, obj); - utime_t ut; - JSONDecoder::decode_json("timestamp", ut, obj); - timestamp = ut.to_real_time(); - JSONDecoder::decode_json("data", log_data, obj); -} - -void rgw_mdlog_shard_data::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("truncated", truncated, obj); - JSONDecoder::decode_json("entries", entries, obj); -}; - -int RGWShardCollectCR::operate(const DoutPrefixProvider *dpp) { - reenter(this) { - while (spawn_next()) { - current_running++; - - if (current_running >= max_concurrent) { - int child_ret; - yield wait_for_child(); - if (collect_next(&child_ret)) { - current_running--; - child_ret = handle_result(child_ret); - if (child_ret < 0) { - status = child_ret; - } - } - } - } - while (current_running > 0) { - int child_ret; - yield wait_for_child(); - if (collect_next(&child_ret)) { - current_running--; - child_ret = handle_result(child_ret); - if (child_ret < 0) { - status = child_ret; - } - } - } - if (status < 0) { - return set_cr_error(status); - } - return set_cr_done(); - } - return 0; -} - -class RGWReadRemoteMDLogInfoCR : public RGWShardCollectCR { - RGWMetaSyncEnv *sync_env; - - const std::string& period; - int num_shards; - map *mdlog_info; - - int shard_id; -#define READ_MDLOG_MAX_CONCURRENT 10 - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to fetch mdlog status: " << cpp_strerror(r) << dendl; - } - return r; - } -public: - RGWReadRemoteMDLogInfoCR(RGWMetaSyncEnv *_sync_env, - const std::string& period, int _num_shards, - map *_mdlog_info) : RGWShardCollectCR(_sync_env->cct, READ_MDLOG_MAX_CONCURRENT), - sync_env(_sync_env), - period(period), num_shards(_num_shards), - mdlog_info(_mdlog_info), shard_id(0) {} - bool spawn_next() override; -}; - -class RGWListRemoteMDLogCR : public RGWShardCollectCR { - RGWMetaSyncEnv *sync_env; - - const std::string& period; - map shards; - int max_entries_per_shard; - map *result; - - map::iterator iter; -#define READ_MDLOG_MAX_CONCURRENT 10 - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to list remote mdlog shard: " << cpp_strerror(r) << dendl; - } - return r; - } -public: - RGWListRemoteMDLogCR(RGWMetaSyncEnv *_sync_env, - const std::string& period, map& _shards, - int _max_entries_per_shard, - map *_result) : RGWShardCollectCR(_sync_env->cct, READ_MDLOG_MAX_CONCURRENT), - sync_env(_sync_env), period(period), - max_entries_per_shard(_max_entries_per_shard), - result(_result) { - shards.swap(_shards); - iter = shards.begin(); - } - bool spawn_next() override; -}; - -int RGWRemoteMetaLog::read_log_info(const DoutPrefixProvider *dpp, rgw_mdlog_info *log_info) -{ - rgw_http_param_pair pairs[] = { { "type", "metadata" }, - { NULL, NULL } }; - - int ret = conn->get_json_resource(dpp, "/admin/log", pairs, null_yield, *log_info); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog info" << dendl; - return ret; - } - - ldpp_dout(dpp, 20) << "remote mdlog, num_shards=" << log_info->num_shards << dendl; - - return 0; -} - -int RGWRemoteMetaLog::read_master_log_shards_info(const DoutPrefixProvider *dpp, const string &master_period, map *shards_info) -{ - if (store->svc()->zone->is_meta_master()) { - return 0; - } - - rgw_mdlog_info log_info; - int ret = read_log_info(dpp, &log_info); - if (ret < 0) { - return ret; - } - - return run(dpp, new RGWReadRemoteMDLogInfoCR(&sync_env, master_period, log_info.num_shards, shards_info)); -} - -int RGWRemoteMetaLog::read_master_log_shards_next(const DoutPrefixProvider *dpp, const string& period, map shard_markers, map *result) -{ - if (store->svc()->zone->is_meta_master()) { - return 0; - } - - return run(dpp, new RGWListRemoteMDLogCR(&sync_env, period, shard_markers, 1, result)); -} - -int RGWRemoteMetaLog::init() -{ - conn = store->svc()->zone->get_master_conn(); - - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - - error_logger = new RGWSyncErrorLogger(store, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS); - - init_sync_env(&sync_env); - - tn = sync_env.sync_tracer->add_node(sync_env.sync_tracer->root_node, "meta"); - - return 0; -} - -#define CLONE_MAX_ENTRIES 100 - -int RGWMetaSyncStatusManager::init(const DoutPrefixProvider *dpp) -{ - if (store->svc()->zone->is_meta_master()) { - return 0; - } - - if (!store->svc()->zone->get_master_conn()) { - ldpp_dout(dpp, -1) << "no REST connection to master zone" << dendl; - return -EIO; - } - - int r = rgw_init_ioctx(dpp, store->getRados()->get_rados_handle(), store->svc()->zone->get_zone_params().log_pool, ioctx, true); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to open log pool (" << store->svc()->zone->get_zone_params().log_pool << " ret=" << r << dendl; - return r; - } - - r = master_log.init(); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to init remote log, r=" << r << dendl; - return r; - } - - RGWMetaSyncEnv& sync_env = master_log.get_sync_env(); - - rgw_meta_sync_status sync_status; - r = read_sync_status(dpp, &sync_status); - if (r < 0 && r != -ENOENT) { - ldpp_dout(dpp, -1) << "ERROR: failed to read sync status, r=" << r << dendl; - return r; - } - - int num_shards = sync_status.sync_info.num_shards; - - for (int i = 0; i < num_shards; i++) { - shard_objs[i] = rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env.shard_obj_name(i)); - } - - std::unique_lock wl{ts_to_shard_lock}; - for (int i = 0; i < num_shards; i++) { - clone_markers.push_back(string()); - utime_shard ut; - ut.shard_id = i; - ts_to_shard[ut] = i; - } - - return 0; -} - -void RGWMetaSyncEnv::init(const DoutPrefixProvider *_dpp, CephContext *_cct, rgw::sal::RadosStore* _store, RGWRESTConn *_conn, - RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager, - RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer) { - dpp = _dpp; - cct = _cct; - store = _store; - conn = _conn; - async_rados = _async_rados; - http_manager = _http_manager; - error_logger = _error_logger; - sync_tracer = _sync_tracer; -} - -string RGWMetaSyncEnv::status_oid() -{ - return mdlog_sync_status_oid; -} - -string RGWMetaSyncEnv::shard_obj_name(int shard_id) -{ - char buf[mdlog_sync_status_shard_prefix.size() + 16]; - snprintf(buf, sizeof(buf), "%s.%d", mdlog_sync_status_shard_prefix.c_str(), shard_id); - - return string(buf); -} - -class RGWAsyncReadMDLogEntries : public RGWAsyncRadosRequest { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - RGWMetadataLog *mdlog; - int shard_id; - int max_entries; - -protected: - int _send_request(const DoutPrefixProvider *dpp) override { - real_time from_time; - real_time end_time; - - void *handle; - - mdlog->init_list_entries(shard_id, from_time, end_time, marker, &handle); - - int ret = mdlog->list_entries(dpp, handle, max_entries, entries, &marker, &truncated); - - mdlog->complete_list_entries(handle); - - return ret; - } -public: - string marker; - list entries; - bool truncated; - - RGWAsyncReadMDLogEntries(const DoutPrefixProvider *dpp, RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - RGWMetadataLog* mdlog, int _shard_id, - std::string _marker, int _max_entries) - : RGWAsyncRadosRequest(caller, cn), dpp(dpp), store(_store), mdlog(mdlog), - shard_id(_shard_id), max_entries(_max_entries), marker(std::move(_marker)) {} -}; - -class RGWReadMDLogEntriesCR : public RGWSimpleCoroutine { - RGWMetaSyncEnv *sync_env; - RGWMetadataLog *const mdlog; - int shard_id; - string marker; - string *pmarker; - int max_entries; - list *entries; - bool *truncated; - - RGWAsyncReadMDLogEntries *req{nullptr}; - -public: - RGWReadMDLogEntriesCR(RGWMetaSyncEnv *_sync_env, RGWMetadataLog* mdlog, - int _shard_id, string*_marker, int _max_entries, - list *_entries, bool *_truncated) - : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), mdlog(mdlog), - shard_id(_shard_id), pmarker(_marker), max_entries(_max_entries), - entries(_entries), truncated(_truncated) {} - - ~RGWReadMDLogEntriesCR() override { - if (req) { - req->finish(); - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - marker = *pmarker; - req = new RGWAsyncReadMDLogEntries(dpp, this, stack->create_completion_notifier(), - sync_env->store, mdlog, shard_id, marker, - max_entries); - sync_env->async_rados->queue(req); - return 0; - } - - int request_complete() override { - *pmarker = std::move(req->marker); - *entries = std::move(req->entries); - *truncated = req->truncated; - return req->get_ret_status(); - } -}; - - -class RGWReadRemoteMDLogShardInfoCR : public RGWCoroutine { - RGWMetaSyncEnv *env; - RGWRESTReadResource *http_op; - - const std::string& period; - int shard_id; - RGWMetadataLogInfo *shard_info; - -public: - RGWReadRemoteMDLogShardInfoCR(RGWMetaSyncEnv *env, const std::string& period, - int _shard_id, RGWMetadataLogInfo *_shard_info) - : RGWCoroutine(env->store->ctx()), env(env), http_op(NULL), - period(period), shard_id(_shard_id), shard_info(_shard_info) {} - - int operate(const DoutPrefixProvider *dpp) override { - auto store = env->store; - RGWRESTConn *conn = store->svc()->zone->get_master_conn(); - reenter(this) { - yield { - char buf[16]; - snprintf(buf, sizeof(buf), "%d", shard_id); - rgw_http_param_pair pairs[] = { { "type" , "metadata" }, - { "id", buf }, - { "period", period.c_str() }, - { "info" , NULL }, - { NULL, NULL } }; - - string p = "/admin/log/"; - - http_op = new RGWRESTReadResource(conn, p, pairs, NULL, - env->http_manager); - - init_new_io(http_op); - - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(env->dpp, 0) << "ERROR: failed to read from " << p << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - http_op->put(); - return set_cr_error(ret); - } - - return io_block(0); - } - yield { - int ret = http_op->wait(shard_info, null_yield); - http_op->put(); - if (ret < 0) { - return set_cr_error(ret); - } - return set_cr_done(); - } - } - return 0; - } -}; - -RGWCoroutine* create_read_remote_mdlog_shard_info_cr(RGWMetaSyncEnv *env, - const std::string& period, - int shard_id, - RGWMetadataLogInfo* info) -{ - return new RGWReadRemoteMDLogShardInfoCR(env, period, shard_id, info); -} - -class RGWListRemoteMDLogShardCR : public RGWSimpleCoroutine { - RGWMetaSyncEnv *sync_env; - RGWRESTReadResource *http_op; - - const std::string& period; - int shard_id; - string marker; - uint32_t max_entries; - rgw_mdlog_shard_data *result; - -public: - RGWListRemoteMDLogShardCR(RGWMetaSyncEnv *env, const std::string& period, - int _shard_id, const string& _marker, uint32_t _max_entries, - rgw_mdlog_shard_data *_result) - : RGWSimpleCoroutine(env->store->ctx()), sync_env(env), http_op(NULL), - period(period), shard_id(_shard_id), marker(_marker), max_entries(_max_entries), result(_result) {} - - int send_request(const DoutPrefixProvider *dpp) override { - RGWRESTConn *conn = sync_env->conn; - - char buf[32]; - snprintf(buf, sizeof(buf), "%d", shard_id); - - char max_entries_buf[32]; - snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", (int)max_entries); - - const char *marker_key = (marker.empty() ? "" : "marker"); - - rgw_http_param_pair pairs[] = { { "type", "metadata" }, - { "id", buf }, - { "period", period.c_str() }, - { "max-entries", max_entries_buf }, - { marker_key, marker.c_str() }, - { NULL, NULL } }; - - string p = "/admin/log/"; - - http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager); - init_new_io(http_op); - - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to read from " << p << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - http_op->put(); - return ret; - } - - return 0; - } - - int request_complete() override { - int ret = http_op->wait(result, null_yield); - http_op->put(); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to list remote mdlog shard, ret=" << ret << dendl; - return ret; - } - return 0; - } -}; - -RGWCoroutine* create_list_remote_mdlog_shard_cr(RGWMetaSyncEnv *env, - const std::string& period, - int shard_id, - const std::string& marker, - uint32_t max_entries, - rgw_mdlog_shard_data *result) -{ - return new RGWListRemoteMDLogShardCR(env, period, shard_id, marker, - max_entries, result); -} - -bool RGWReadRemoteMDLogInfoCR::spawn_next() { - if (shard_id >= num_shards) { - return false; - } - spawn(new RGWReadRemoteMDLogShardInfoCR(sync_env, period, shard_id, &(*mdlog_info)[shard_id]), false); - shard_id++; - return true; -} - -bool RGWListRemoteMDLogCR::spawn_next() { - if (iter == shards.end()) { - return false; - } - - spawn(new RGWListRemoteMDLogShardCR(sync_env, period, iter->first, iter->second, max_entries_per_shard, &(*result)[iter->first]), false); - ++iter; - return true; -} - -class RGWInitSyncStatusCoroutine : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - - rgw_meta_sync_info status; - vector shards_info; - boost::intrusive_ptr lease_cr; - boost::intrusive_ptr lease_stack; -public: - RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, - const rgw_meta_sync_info &status) - : RGWCoroutine(_sync_env->store->ctx()), sync_env(_sync_env), - status(status), shards_info(status.num_shards), - lease_cr(nullptr), lease_stack(nullptr) {} - - ~RGWInitSyncStatusCoroutine() override { - if (lease_cr) { - lease_cr->abort(); - } - } - - int operate(const DoutPrefixProvider *dpp) override { - int ret; - reenter(this) { - yield { - set_status("acquiring sync lock"); - uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; - string lock_name = "sync_lock"; - rgw::sal::RadosStore* store = sync_env->store; - lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), - lock_name, lock_duration, this)); - lease_stack.reset(spawn(lease_cr.get(), false)); - } - while (!lease_cr->is_locked()) { - if (lease_cr->is_done()) { - ldpp_dout(dpp, 5) << "failed to take lease" << dendl; - set_status("lease lock failed, early abort"); - return set_cr_error(lease_cr->get_ret_status()); - } - set_sleeping(true); - yield; - } - yield { - set_status("writing sync status"); - rgw::sal::RadosStore* store = sync_env->store; - call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, store->svc()->sysobj, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), - status)); - } - - if (retcode < 0) { - set_status("failed to write sync status"); - ldpp_dout(dpp, 0) << "ERROR: failed to write sync status, retcode=" << retcode << dendl; - yield lease_cr->go_down(); - return set_cr_error(retcode); - } - /* fetch current position in logs */ - set_status("fetching remote log position"); - yield { - for (int i = 0; i < (int)status.num_shards; i++) { - spawn(new RGWReadRemoteMDLogShardInfoCR(sync_env, status.period, i, - &shards_info[i]), false); - } - } - - drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ - - yield { - set_status("updating sync status"); - for (int i = 0; i < (int)status.num_shards; i++) { - rgw_meta_sync_marker marker; - RGWMetadataLogInfo& info = shards_info[i]; - marker.next_step_marker = info.marker; - marker.timestamp = info.last_update; - rgw::sal::RadosStore* store = sync_env->store; - spawn(new RGWSimpleRadosWriteCR(dpp, - sync_env->async_rados, - store->svc()->sysobj, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->shard_obj_name(i)), - marker), true); - } - } - yield { - set_status("changing sync state: build full sync maps"); - status.state = rgw_meta_sync_info::StateBuildingFullSyncMaps; - rgw::sal::RadosStore* store = sync_env->store; - call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, store->svc()->sysobj, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), - status)); - } - set_status("drop lock lease"); - yield lease_cr->go_down(); - while (collect(&ret, NULL)) { - if (ret < 0) { - return set_cr_error(ret); - } - yield; - } - drain_all(); - return set_cr_done(); - } - return 0; - } -}; - -class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - - RGWMetaSyncEnv *env; - const int num_shards; - int shard_id{0}; - map& markers; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to read metadata sync markers: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards, - map& markers) - : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS), - env(env), num_shards(num_shards), markers(markers) - {} - bool spawn_next() override; -}; - -bool RGWReadSyncStatusMarkersCR::spawn_next() -{ - if (shard_id >= num_shards) { - return false; - } - using CR = RGWSimpleRadosReadCR; - rgw_raw_obj obj{env->store->svc()->zone->get_zone_params().log_pool, - env->shard_obj_name(shard_id)}; - spawn(new CR(env->dpp, env->async_rados, env->store->svc()->sysobj, obj, &markers[shard_id]), false); - shard_id++; - return true; -} - -class RGWReadSyncStatusCoroutine : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - rgw_meta_sync_status *sync_status; - -public: - RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env, - rgw_meta_sync_status *_status) - : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), sync_status(_status) - {} - int operate(const DoutPrefixProvider *dpp) override; -}; - -int RGWReadSyncStatusCoroutine::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - // read sync info - using ReadInfoCR = RGWSimpleRadosReadCR; - yield { - bool empty_on_enoent = false; // fail on ENOENT - rgw_raw_obj obj{sync_env->store->svc()->zone->get_zone_params().log_pool, - sync_env->status_oid()}; - call(new ReadInfoCR(dpp, sync_env->async_rados, sync_env->store->svc()->sysobj, obj, - &sync_status->sync_info, empty_on_enoent)); - } - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to read sync status info with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - // read shard markers - using ReadMarkersCR = RGWReadSyncStatusMarkersCR; - yield call(new ReadMarkersCR(sync_env, sync_status->sync_info.num_shards, - sync_status->sync_markers)); - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to read sync status markers with " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; -} - -class RGWFetchAllMetaCR : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - - int num_shards; - - - int ret_status; - - list sections; - list::iterator sections_iter; - - struct meta_list_result { - list keys; - string marker; - uint64_t count{0}; - bool truncated{false}; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("keys", keys, obj); - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("count", count, obj); - JSONDecoder::decode_json("truncated", truncated, obj); - } - } result; - list::iterator iter; - - std::unique_ptr entries_index; - - boost::intrusive_ptr lease_cr; - boost::intrusive_ptr lease_stack; - bool lost_lock; - bool failed; - - string marker; - - map& markers; - - RGWSyncTraceNodeRef tn; - -public: - RGWFetchAllMetaCR(RGWMetaSyncEnv *_sync_env, int _num_shards, - map& _markers, - RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), - num_shards(_num_shards), - ret_status(0), lease_cr(nullptr), lease_stack(nullptr), - lost_lock(false), failed(false), markers(_markers) { - tn = sync_env->sync_tracer->add_node(_tn_parent, "fetch_all_meta"); - } - - ~RGWFetchAllMetaCR() override { - } - - void append_section_from_set(set& all_sections, const string& name) { - set::iterator iter = all_sections.find(name); - if (iter != all_sections.end()) { - sections.emplace_back(std::move(*iter)); - all_sections.erase(iter); - } - } - /* - * meta sync should go in the following order: user, bucket.instance, bucket - * then whatever other sections exist (if any) - */ - void rearrange_sections() { - set all_sections; - std::move(sections.begin(), sections.end(), - std::inserter(all_sections, all_sections.end())); - sections.clear(); - - append_section_from_set(all_sections, "user"); - append_section_from_set(all_sections, "bucket.instance"); - append_section_from_set(all_sections, "bucket"); - append_section_from_set(all_sections, "roles"); - - std::move(all_sections.begin(), all_sections.end(), - std::back_inserter(sections)); - } - - int operate(const DoutPrefixProvider *dpp) override { - RGWRESTConn *conn = sync_env->conn; - - reenter(this) { - yield { - set_status(string("acquiring lock (") + sync_env->status_oid() + ")"); - uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; - string lock_name = "sync_lock"; - lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, - sync_env->store, - rgw_raw_obj(sync_env->store->svc()->zone->get_zone_params().log_pool, sync_env->status_oid()), - lock_name, lock_duration, this)); - lease_stack.reset(spawn(lease_cr.get(), false)); - } - while (!lease_cr->is_locked()) { - if (lease_cr->is_done()) { - ldpp_dout(dpp, 5) << "failed to take lease" << dendl; - set_status("lease lock failed, early abort"); - return set_cr_error(lease_cr->get_ret_status()); - } - set_sleeping(true); - yield; - } - entries_index.reset(new RGWShardedOmapCRManager(sync_env->async_rados, sync_env->store, this, num_shards, - sync_env->store->svc()->zone->get_zone_params().log_pool, - mdlog_sync_full_sync_index_prefix)); - yield { - call(new RGWReadRESTResourceCR >(cct, conn, sync_env->http_manager, - "/admin/metadata", NULL, §ions)); - } - if (get_ret_status() < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch metadata sections" << dendl; - yield entries_index->finish(); - yield lease_cr->go_down(); - drain_all(); - return set_cr_error(get_ret_status()); - } - rearrange_sections(); - sections_iter = sections.begin(); - for (; sections_iter != sections.end(); ++sections_iter) { - do { - yield { -#define META_FULL_SYNC_CHUNK_SIZE "1000" - string entrypoint = string("/admin/metadata/") + *sections_iter; - rgw_http_param_pair pairs[] = { { "max-entries", META_FULL_SYNC_CHUNK_SIZE }, - { "marker", result.marker.c_str() }, - { NULL, NULL } }; - result.keys.clear(); - call(new RGWReadRESTResourceCR(cct, conn, sync_env->http_manager, - entrypoint, pairs, &result)); - } - ret_status = get_ret_status(); - if (ret_status == -ENOENT) { - set_retcode(0); /* reset coroutine status so that we don't return it */ - ret_status = 0; - } - if (ret_status < 0) { - tn->log(0, SSTR("ERROR: failed to fetch metadata section: " << *sections_iter)); - yield entries_index->finish(); - yield lease_cr->go_down(); - drain_all(); - return set_cr_error(ret_status); - } - iter = result.keys.begin(); - for (; iter != result.keys.end(); ++iter) { - if (!lease_cr->is_locked()) { - lost_lock = true; - tn->log(1, "lease is lost, abort"); - break; - } - yield; // allow entries_index consumer to make progress - - tn->log(20, SSTR("list metadata: section=" << *sections_iter << " key=" << *iter)); - string s = *sections_iter + ":" + *iter; - int shard_id; - rgw::sal::RadosStore* store = sync_env->store; - int ret = store->ctl()->meta.mgr->get_shard_id(*sections_iter, *iter, &shard_id); - if (ret < 0) { - tn->log(0, SSTR("ERROR: could not determine shard id for " << *sections_iter << ":" << *iter)); - ret_status = ret; - break; - } - if (!entries_index->append(s, shard_id)) { - break; - } - } - } while (result.truncated); - } - yield { - if (!entries_index->finish()) { - failed = true; - } - } - if (!failed) { - for (map::iterator iter = markers.begin(); iter != markers.end(); ++iter) { - int shard_id = (int)iter->first; - rgw_meta_sync_marker& marker = iter->second; - marker.total_entries = entries_index->get_total_entries(shard_id); - spawn(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, sync_env->store->svc()->sysobj, - rgw_raw_obj(sync_env->store->svc()->zone->get_zone_params().log_pool, sync_env->shard_obj_name(shard_id)), - marker), true); - } - } - - drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */ - - yield lease_cr->go_down(); - - int ret; - while (collect(&ret, NULL)) { - if (ret < 0) { - return set_cr_error(ret); - } - yield; - } - drain_all(); - if (failed) { - yield return set_cr_error(-EIO); - } - if (lost_lock) { - yield return set_cr_error(-EBUSY); - } - - if (ret_status < 0) { - yield return set_cr_error(ret_status); - } - - yield return set_cr_done(); - } - return 0; - } -}; - -static string full_sync_index_shard_oid(int shard_id) -{ - char buf[mdlog_sync_full_sync_index_prefix.size() + 16]; - snprintf(buf, sizeof(buf), "%s.%d", mdlog_sync_full_sync_index_prefix.c_str(), shard_id); - return string(buf); -} - -class RGWReadRemoteMetadataCR : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - - RGWRESTReadResource *http_op; - - string section; - string key; - - bufferlist *pbl; - - RGWSyncTraceNodeRef tn; - -public: - RGWReadRemoteMetadataCR(RGWMetaSyncEnv *_sync_env, - const string& _section, const string& _key, bufferlist *_pbl, - const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), - http_op(NULL), - section(_section), - key(_key), - pbl(_pbl) { - tn = sync_env->sync_tracer->add_node(_tn_parent, "read_remote_meta", - section + ":" + key); - } - - int operate(const DoutPrefixProvider *dpp) override { - RGWRESTConn *conn = sync_env->conn; - reenter(this) { - yield { - string key_encode; - url_encode(key, key_encode); - rgw_http_param_pair pairs[] = { { "key" , key.c_str()}, - { NULL, NULL } }; - - string p = string("/admin/metadata/") + section + "/" + key_encode; - - http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager); - - init_new_io(http_op); - - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog data" << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - http_op->put(); - return set_cr_error(ret); - } - - return io_block(0); - } - yield { - int ret = http_op->wait(pbl, null_yield); - http_op->put(); - if (ret < 0) { - return set_cr_error(ret); - } - return set_cr_done(); - } - } - return 0; - } -}; - -class RGWAsyncMetaStoreEntry : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - string raw_key; - bufferlist bl; - const DoutPrefixProvider *dpp; -protected: - int _send_request(const DoutPrefixProvider *dpp) override { - int ret = store->ctl()->meta.mgr->put(raw_key, bl, null_yield, dpp, RGWMDLogSyncType::APPLY_ALWAYS, true); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: can't store key: " << raw_key << " ret=" << ret << dendl; - return ret; - } - return 0; - } -public: - RGWAsyncMetaStoreEntry(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - const string& _raw_key, - bufferlist& _bl, - const DoutPrefixProvider *dpp) : RGWAsyncRadosRequest(caller, cn), store(_store), - raw_key(_raw_key), bl(_bl), dpp(dpp) {} -}; - - -class RGWMetaStoreEntryCR : public RGWSimpleCoroutine { - RGWMetaSyncEnv *sync_env; - string raw_key; - bufferlist bl; - - RGWAsyncMetaStoreEntry *req; - -public: - RGWMetaStoreEntryCR(RGWMetaSyncEnv *_sync_env, - const string& _raw_key, - bufferlist& _bl) : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), - raw_key(_raw_key), bl(_bl), req(NULL) { - } - - ~RGWMetaStoreEntryCR() override { - if (req) { - req->finish(); - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncMetaStoreEntry(this, stack->create_completion_notifier(), - sync_env->store, raw_key, bl, dpp); - sync_env->async_rados->queue(req); - return 0; - } - - int request_complete() override { - return req->get_ret_status(); - } -}; - -class RGWAsyncMetaRemoveEntry : public RGWAsyncRadosRequest { - rgw::sal::RadosStore* store; - string raw_key; - const DoutPrefixProvider *dpp; -protected: - int _send_request(const DoutPrefixProvider *dpp) override { - int ret = store->ctl()->meta.mgr->remove(raw_key, null_yield, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: can't remove key: " << raw_key << " ret=" << ret << dendl; - return ret; - } - return 0; - } -public: - RGWAsyncMetaRemoveEntry(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, - const string& _raw_key, const DoutPrefixProvider *dpp) : RGWAsyncRadosRequest(caller, cn), store(_store), - raw_key(_raw_key), dpp(dpp) {} -}; - - -class RGWMetaRemoveEntryCR : public RGWSimpleCoroutine { - RGWMetaSyncEnv *sync_env; - string raw_key; - - RGWAsyncMetaRemoveEntry *req; - -public: - RGWMetaRemoveEntryCR(RGWMetaSyncEnv *_sync_env, - const string& _raw_key) : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), - raw_key(_raw_key), req(NULL) { - } - - ~RGWMetaRemoveEntryCR() override { - if (req) { - req->finish(); - } - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new RGWAsyncMetaRemoveEntry(this, stack->create_completion_notifier(), - sync_env->store, raw_key, dpp); - sync_env->async_rados->queue(req); - return 0; - } - - int request_complete() override { - int r = req->get_ret_status(); - if (r == -ENOENT) { - r = 0; - } - return r; - } -}; - -#define META_SYNC_UPDATE_MARKER_WINDOW 10 - - -int RGWLastCallerWinsCR::operate(const DoutPrefixProvider *dpp) { - RGWCoroutine *call_cr; - reenter(this) { - while (cr) { - call_cr = cr; - cr = nullptr; - yield call(call_cr); - /* cr might have been modified at this point */ - } - return set_cr_done(); - } - return 0; -} - -class RGWMetaSyncShardMarkerTrack : public RGWSyncShardMarkerTrack { - RGWMetaSyncEnv *sync_env; - - string marker_oid; - rgw_meta_sync_marker sync_marker; - - RGWSyncTraceNodeRef tn; - -public: - RGWMetaSyncShardMarkerTrack(RGWMetaSyncEnv *_sync_env, - const string& _marker_oid, - const rgw_meta_sync_marker& _marker, - RGWSyncTraceNodeRef& _tn) : RGWSyncShardMarkerTrack(META_SYNC_UPDATE_MARKER_WINDOW), - sync_env(_sync_env), - marker_oid(_marker_oid), - sync_marker(_marker), - tn(_tn){} - - RGWCoroutine *store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override { - sync_marker.marker = new_marker; - if (index_pos > 0) { - sync_marker.pos = index_pos; - } - - if (!real_clock::is_zero(timestamp)) { - sync_marker.timestamp = timestamp; - } - - ldpp_dout(sync_env->dpp, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << " realm_epoch=" << sync_marker.realm_epoch << dendl; - tn->log(20, SSTR("new marker=" << new_marker)); - rgw::sal::RadosStore* store = sync_env->store; - return new RGWSimpleRadosWriteCR(sync_env->dpp, sync_env->async_rados, - store->svc()->sysobj, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, marker_oid), - sync_marker); - } - - RGWOrderCallCR *allocate_order_control_cr() override { - return new RGWLastCallerWinsCR(sync_env->cct); - } -}; - -RGWMetaSyncSingleEntryCR::RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env, - const string& _raw_key, const string& _entry_marker, - const RGWMDLogStatus& _op_status, - RGWMetaSyncShardMarkerTrack *_marker_tracker, const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), - sync_env(_sync_env), - raw_key(_raw_key), entry_marker(_entry_marker), - op_status(_op_status), - pos(0), sync_status(0), - marker_tracker(_marker_tracker), tries(0) { - error_injection = (sync_env->cct->_conf->rgw_sync_meta_inject_err_probability > 0); - tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", raw_key); -} - -int RGWMetaSyncSingleEntryCR::operate(const DoutPrefixProvider *dpp) { - reenter(this) { -#define NUM_TRANSIENT_ERROR_RETRIES 10 - - if (error_injection && - rand() % 10000 < cct->_conf->rgw_sync_meta_inject_err_probability * 10000.0) { - return set_cr_error(-EIO); - } - - if (op_status != MDLOG_STATUS_COMPLETE) { - tn->log(20, "skipping pending operation"); - yield call(marker_tracker->finish(entry_marker)); - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - tn->set_flag(RGW_SNS_FLAG_ACTIVE); - for (tries = 0; tries < NUM_TRANSIENT_ERROR_RETRIES; tries++) { - yield { - pos = raw_key.find(':'); - section = raw_key.substr(0, pos); - key = raw_key.substr(pos + 1); - tn->log(10, SSTR("fetching remote metadata entry" << (tries == 0 ? "" : " (retry)"))); - call(new RGWReadRemoteMetadataCR(sync_env, section, key, &md_bl, tn)); - } - - sync_status = retcode; - - if (sync_status == -ENOENT) { - break; - } - - if (sync_status < 0) { - if (tries < NUM_TRANSIENT_ERROR_RETRIES - 1) { - ldpp_dout(dpp, 20) << *this << ": failed to fetch remote metadata entry: " << section << ":" << key << ", will retry" << dendl; - continue; - } - - tn->log(10, SSTR("failed to read remote metadata entry: section=" << section << " key=" << key << " status=" << sync_status)); - log_error() << "failed to read remote metadata entry: section=" << section << " key=" << key << " status=" << sync_status << std::endl; - yield call(sync_env->error_logger->log_error_cr(dpp, sync_env->conn->get_remote_id(), section, key, -sync_status, - string("failed to read remote metadata entry: ") + cpp_strerror(-sync_status))); - return set_cr_error(sync_status); - } - - break; - } - - retcode = 0; - for (tries = 0; tries < NUM_TRANSIENT_ERROR_RETRIES; tries++) { - if (sync_status != -ENOENT) { - tn->log(10, SSTR("storing local metadata entry: " << section << ":" << key)); - yield call(new RGWMetaStoreEntryCR(sync_env, raw_key, md_bl)); - } else { - tn->log(10, SSTR("removing local metadata entry:" << section << ":" << key)); - yield call(new RGWMetaRemoveEntryCR(sync_env, raw_key)); - if (retcode == -ENOENT) { - retcode = 0; - break; - } - } - if ((retcode < 0) && (tries < NUM_TRANSIENT_ERROR_RETRIES - 1)) { - ldpp_dout(dpp, 20) << *this << ": failed to store metadata entry: " << section << ":" << key << ", got retcode=" << retcode << ", will retry" << dendl; - continue; - } - break; - } - - sync_status = retcode; - - if (sync_status == 0 && marker_tracker) { - /* update marker */ - yield call(marker_tracker->finish(entry_marker)); - sync_status = retcode; - } - if (sync_status < 0) { - tn->log(10, SSTR("failed, status=" << sync_status)); - return set_cr_error(sync_status); - } - tn->log(10, "success"); - return set_cr_done(); - } - return 0; -} - -class RGWCloneMetaLogCoroutine : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - RGWMetadataLog *mdlog; - - const std::string& period; - int shard_id; - string marker; - bool truncated = false; - string *new_marker; - - int max_entries = CLONE_MAX_ENTRIES; - - RGWRESTReadResource *http_op = nullptr; - boost::intrusive_ptr completion; - - RGWMetadataLogInfo shard_info; - rgw_mdlog_shard_data data; - -public: - RGWCloneMetaLogCoroutine(RGWMetaSyncEnv *_sync_env, RGWMetadataLog* mdlog, - const std::string& period, int _id, - const string& _marker, string *_new_marker) - : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), mdlog(mdlog), - period(period), shard_id(_id), marker(_marker), new_marker(_new_marker) { - if (new_marker) { - *new_marker = marker; - } - } - ~RGWCloneMetaLogCoroutine() override { - if (http_op) { - http_op->put(); - } - if (completion) { - completion->cancel(); - } - } - - int operate(const DoutPrefixProvider *dpp) override; - - int state_init(); - int state_read_shard_status(); - int state_read_shard_status_complete(); - int state_send_rest_request(const DoutPrefixProvider *dpp); - int state_receive_rest_response(); - int state_store_mdlog_entries(); - int state_store_mdlog_entries_complete(); -}; - -class RGWMetaSyncShardCR : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - - const rgw_pool& pool; - const std::string& period; //< currently syncing period id - const epoch_t realm_epoch; //< realm_epoch of period - RGWMetadataLog* mdlog; //< log of syncing period - uint32_t shard_id; - rgw_meta_sync_marker& sync_marker; - boost::optional temp_marker; //< for pending updates - string marker; - string max_marker; - const std::string& period_marker; //< max marker stored in next period - - RGWRadosGetOmapKeysCR::ResultPtr omapkeys; - std::set entries; - std::set::iterator iter; - - string oid; - - RGWMetaSyncShardMarkerTrack *marker_tracker = nullptr; - - list log_entries; - list::iterator log_iter; - bool truncated = false; - - string mdlog_marker; - string raw_key; - rgw_mdlog_entry mdlog_entry; - - ceph::mutex inc_lock = ceph::make_mutex("RGWMetaSyncShardCR::inc_lock"); - ceph::condition_variable inc_cond; - - boost::asio::coroutine incremental_cr; - boost::asio::coroutine full_cr; - - boost::intrusive_ptr lease_cr; - boost::intrusive_ptr lease_stack; - - bool lost_lock = false; - - bool *reset_backoff; - - // hold a reference to the cr stack while it's in the map - using StackRef = boost::intrusive_ptr; - map stack_to_pos; - map pos_to_prev; - - bool can_adjust_marker = false; - bool done_with_period = false; - - int total_entries = 0; - - RGWSyncTraceNodeRef tn; -public: - RGWMetaSyncShardCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool, - const std::string& period, epoch_t realm_epoch, - RGWMetadataLog* mdlog, uint32_t _shard_id, - rgw_meta_sync_marker& _marker, - const std::string& period_marker, bool *_reset_backoff, - RGWSyncTraceNodeRef& _tn) - : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), pool(_pool), - period(period), realm_epoch(realm_epoch), mdlog(mdlog), - shard_id(_shard_id), sync_marker(_marker), - period_marker(period_marker), - reset_backoff(_reset_backoff), tn(_tn) { - *reset_backoff = false; - } - - ~RGWMetaSyncShardCR() override { - delete marker_tracker; - if (lease_cr) { - lease_cr->abort(); - } - } - - void set_marker_tracker(RGWMetaSyncShardMarkerTrack *mt) { - delete marker_tracker; - marker_tracker = mt; - } - - int operate(const DoutPrefixProvider *dpp) override { - int r; - while (true) { - switch (sync_marker.state) { - case rgw_meta_sync_marker::FullSync: - r = full_sync(); - if (r < 0) { - ldpp_dout(dpp, 10) << "sync: full_sync: shard_id=" << shard_id << " r=" << r << dendl; - return set_cr_error(r); - } - return 0; - case rgw_meta_sync_marker::IncrementalSync: - r = incremental_sync(); - if (r < 0) { - ldpp_dout(dpp, 10) << "sync: incremental_sync: shard_id=" << shard_id << " r=" << r << dendl; - return set_cr_error(r); - } - return 0; - } - } - /* unreachable */ - return 0; - } - - void collect_children() - { - int child_ret; - RGWCoroutinesStack *child; - while (collect_next(&child_ret, &child)) { - auto iter = stack_to_pos.find(child); - if (iter == stack_to_pos.end()) { - /* some other stack that we don't care about */ - continue; - } - - string& pos = iter->second; - - if (child_ret < 0) { - ldpp_dout(sync_env->dpp, 0) << *this << ": child operation stack=" << child << " entry=" << pos << " returned " << child_ret << dendl; - // on any error code from RGWMetaSyncSingleEntryCR, we do not advance - // the sync status marker past this entry, and set - // can_adjust_marker=false to exit out of RGWMetaSyncShardCR. - // RGWMetaSyncShardControlCR will rerun RGWMetaSyncShardCR from the - // previous marker and retry - can_adjust_marker = false; - } - - map::iterator prev_iter = pos_to_prev.find(pos); - ceph_assert(prev_iter != pos_to_prev.end()); - - if (pos_to_prev.size() == 1) { - if (can_adjust_marker) { - sync_marker.marker = pos; - } - pos_to_prev.erase(prev_iter); - } else { - ceph_assert(pos_to_prev.size() > 1); - pos_to_prev.erase(prev_iter); - prev_iter = pos_to_prev.begin(); - if (can_adjust_marker) { - sync_marker.marker = prev_iter->second; - } - } - - ldpp_dout(sync_env->dpp, 4) << *this << ": adjusting marker pos=" << sync_marker.marker << dendl; - stack_to_pos.erase(iter); - } - } - - int full_sync() { -#define OMAP_GET_MAX_ENTRIES 100 - int max_entries = OMAP_GET_MAX_ENTRIES; - reenter(&full_cr) { - set_status("full_sync"); - tn->log(10, "start full sync"); - oid = full_sync_index_shard_oid(shard_id); - can_adjust_marker = true; - /* grab lock */ - yield { - uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; - string lock_name = "sync_lock"; - rgw::sal::RadosStore* store = sync_env->store; - lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), - lock_name, lock_duration, this)); - lease_stack.reset(spawn(lease_cr.get(), false)); - lost_lock = false; - } - while (!lease_cr->is_locked()) { - if (lease_cr->is_done()) { - drain_all(); - tn->log(5, "failed to take lease"); - return lease_cr->get_ret_status(); - } - set_sleeping(true); - yield; - } - tn->log(10, "took lease"); - - /* lock succeeded, a retry now should avoid previous backoff status */ - *reset_backoff = true; - - /* prepare marker tracker */ - set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env, - sync_env->shard_obj_name(shard_id), - sync_marker, tn)); - - marker = sync_marker.marker; - - total_entries = sync_marker.pos; - - /* sync! */ - do { - if (!lease_cr->is_locked()) { - tn->log(1, "lease is lost, abort"); - lost_lock = true; - break; - } - omapkeys = std::make_shared(); - yield call(new RGWRadosGetOmapKeysCR(sync_env->store, rgw_raw_obj(pool, oid), - marker, max_entries, omapkeys)); - if (retcode < 0) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: " << __func__ << "(): RGWRadosGetOmapKeysCR() returned ret=" << retcode << dendl; - tn->log(0, SSTR("ERROR: failed to list omap keys, status=" << retcode)); - yield lease_cr->go_down(); - drain_all(); - return retcode; - } - entries = std::move(omapkeys->entries); - tn->log(20, SSTR("retrieved " << entries.size() << " entries to sync")); - if (entries.size() > 0) { - tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - } - iter = entries.begin(); - for (; iter != entries.end(); ++iter) { - marker = *iter; - tn->log(20, SSTR("full sync: " << marker)); - total_entries++; - if (!marker_tracker->start(marker, total_entries, real_time())) { - tn->log(0, SSTR("ERROR: cannot start syncing " << marker << ". Duplicate entry?")); - } else { - // fetch remote and write locally - yield { - RGWCoroutinesStack *stack = spawn(new RGWMetaSyncSingleEntryCR(sync_env, marker, marker, MDLOG_STATUS_COMPLETE, marker_tracker, tn), false); - // stack_to_pos holds a reference to the stack - stack_to_pos[stack] = marker; - pos_to_prev[marker] = marker; - } - // limit spawn window - while (num_spawned() > static_cast(cct->_conf->rgw_meta_sync_spawn_window)) { - yield wait_for_child(); - collect_children(); - } - } - } - collect_children(); - } while (omapkeys->more && can_adjust_marker); - - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - - while (num_spawned() > 1) { - yield wait_for_child(); - collect_children(); - } - - if (!lost_lock) { - /* update marker to reflect we're done with full sync */ - if (can_adjust_marker) { - // apply updates to a temporary marker, or operate() will send us - // to incremental_sync() after we yield - temp_marker = sync_marker; - temp_marker->state = rgw_meta_sync_marker::IncrementalSync; - temp_marker->marker = std::move(temp_marker->next_step_marker); - temp_marker->next_step_marker.clear(); - temp_marker->realm_epoch = realm_epoch; - ldpp_dout(sync_env->dpp, 4) << *this << ": saving marker pos=" << temp_marker->marker << " realm_epoch=" << realm_epoch << dendl; - - using WriteMarkerCR = RGWSimpleRadosWriteCR; - yield call(new WriteMarkerCR(sync_env->dpp, sync_env->async_rados, sync_env->store->svc()->sysobj, - rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), - *temp_marker)); - } - - if (retcode < 0) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to set sync marker: retcode=" << retcode << dendl; - yield lease_cr->go_down(); - drain_all(); - return retcode; - } - // clean up full sync index - yield { - auto oid = full_sync_index_shard_oid(shard_id); - call(new RGWRadosRemoveCR(sync_env->store, {pool, oid})); - } - } - - /* - * if we reached here, it means that lost_lock is true, otherwise the state - * change in the previous block will prevent us from reaching here - */ - - yield lease_cr->go_down(); - - lease_cr.reset(); - - drain_all(); - - if (!can_adjust_marker) { - return -EAGAIN; - } - - if (lost_lock) { - return -EBUSY; - } - - tn->log(10, "full sync complete"); - - // apply the sync marker update - ceph_assert(temp_marker); - sync_marker = std::move(*temp_marker); - temp_marker = boost::none; - // must not yield after this point! - } - return 0; - } - - - int incremental_sync() { - reenter(&incremental_cr) { - set_status("incremental_sync"); - tn->log(10, "start incremental sync"); - can_adjust_marker = true; - /* grab lock */ - if (!lease_cr) { /* could have had a lease_cr lock from previous state */ - yield { - uint32_t lock_duration = cct->_conf->rgw_sync_lease_period; - string lock_name = "sync_lock"; - rgw::sal::RadosStore* store = sync_env->store; - lease_cr.reset( new RGWContinuousLeaseCR(sync_env->async_rados, store, - rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), - lock_name, lock_duration, this)); - lease_stack.reset(spawn(lease_cr.get(), false)); - lost_lock = false; - } - while (!lease_cr->is_locked()) { - if (lease_cr->is_done()) { - drain_all(); - tn->log(5, "failed to take lease"); - return lease_cr->get_ret_status(); - } - set_sleeping(true); - yield; - } - } - tn->log(10, "took lease"); - // if the period has advanced, we can't use the existing marker - if (sync_marker.realm_epoch < realm_epoch) { - ldpp_dout(sync_env->dpp, 4) << "clearing marker=" << sync_marker.marker - << " from old realm_epoch=" << sync_marker.realm_epoch - << " (now " << realm_epoch << ')' << dendl; - sync_marker.realm_epoch = realm_epoch; - sync_marker.marker.clear(); - } - mdlog_marker = sync_marker.marker; - set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env, - sync_env->shard_obj_name(shard_id), - sync_marker, tn)); - - /* - * mdlog_marker: the remote sync marker positiion - * sync_marker: the local sync marker position - * max_marker: the max mdlog position that we fetched - * marker: the current position we try to sync - * period_marker: the last marker before the next period begins (optional) - */ - marker = max_marker = sync_marker.marker; - /* inc sync */ - do { - if (!lease_cr->is_locked()) { - lost_lock = true; - tn->log(1, "lease is lost, abort"); - break; - } -#define INCREMENTAL_MAX_ENTRIES 100 - ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << " truncated=" << truncated << dendl; - if (!period_marker.empty() && period_marker <= mdlog_marker) { - tn->log(10, SSTR("finished syncing current period: mdlog_marker=" << mdlog_marker << " sync_marker=" << sync_marker.marker << " period_marker=" << period_marker)); - done_with_period = true; - break; - } - if (mdlog_marker <= max_marker || !truncated) { - /* we're at the tip, try to bring more entries */ - ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " syncing mdlog for shard_id=" << shard_id << dendl; - yield call(new RGWCloneMetaLogCoroutine(sync_env, mdlog, - period, shard_id, - mdlog_marker, &mdlog_marker)); - } - if (retcode < 0) { - tn->log(10, SSTR(*this << ": failed to fetch more log entries, retcode=" << retcode)); - yield lease_cr->go_down(); - drain_all(); - *reset_backoff = false; // back off and try again later - return retcode; - } - truncated = true; - *reset_backoff = true; /* if we got to this point, all systems function */ - if (mdlog_marker > max_marker) { - tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */ - tn->log(20, SSTR("mdlog_marker=" << mdlog_marker << " sync_marker=" << sync_marker.marker)); - marker = max_marker; - yield call(new RGWReadMDLogEntriesCR(sync_env, mdlog, shard_id, - &max_marker, INCREMENTAL_MAX_ENTRIES, - &log_entries, &truncated)); - if (retcode < 0) { - tn->log(10, SSTR("failed to list mdlog entries, retcode=" << retcode)); - yield lease_cr->go_down(); - drain_all(); - *reset_backoff = false; // back off and try again later - return retcode; - } - for (log_iter = log_entries.begin(); log_iter != log_entries.end() && !done_with_period; ++log_iter) { - if (!period_marker.empty() && period_marker <= log_iter->id) { - done_with_period = true; - if (period_marker < log_iter->id) { - tn->log(10, SSTR("found key=" << log_iter->id - << " past period_marker=" << period_marker)); - break; - } - ldpp_dout(sync_env->dpp, 10) << "found key at period_marker=" << period_marker << dendl; - // sync this entry, then return control to RGWMetaSyncCR - } - if (!mdlog_entry.convert_from(*log_iter)) { - tn->log(0, SSTR("ERROR: failed to convert mdlog entry, shard_id=" << shard_id << " log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp << " ... skipping entry")); - continue; - } - tn->log(20, SSTR("log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp)); - if (!marker_tracker->start(log_iter->id, 0, log_iter->timestamp.to_real_time())) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: cannot start syncing " << log_iter->id << ". Duplicate entry?" << dendl; - } else { - raw_key = log_iter->section + ":" + log_iter->name; - yield { - RGWCoroutinesStack *stack = spawn(new RGWMetaSyncSingleEntryCR(sync_env, raw_key, log_iter->id, mdlog_entry.log_data.status, marker_tracker, tn), false); - ceph_assert(stack); - // stack_to_pos holds a reference to the stack - stack_to_pos[stack] = log_iter->id; - pos_to_prev[log_iter->id] = marker; - } - // limit spawn window - while (num_spawned() > static_cast(cct->_conf->rgw_meta_sync_spawn_window)) { - yield wait_for_child(); - collect_children(); - } - } - marker = log_iter->id; - } - } - collect_children(); - ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " max_marker=" << max_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl; - if (done_with_period) { - // return control to RGWMetaSyncCR and advance to the next period - tn->log(10, SSTR(*this << ": done with period")); - break; - } - if (mdlog_marker == max_marker && can_adjust_marker) { - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - yield wait(utime_t(cct->_conf->rgw_meta_sync_poll_interval, 0)); - } - } while (can_adjust_marker); - - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - - while (num_spawned() > 1) { - yield wait_for_child(); - collect_children(); - } - - yield lease_cr->go_down(); - - drain_all(); - - if (lost_lock) { - return -EBUSY; - } - - if (!can_adjust_marker) { - return -EAGAIN; - } - - return set_cr_done(); - } - /* TODO */ - return 0; - } -}; - -class RGWMetaSyncShardControlCR : public RGWBackoffControlCR -{ - RGWMetaSyncEnv *sync_env; - - const rgw_pool& pool; - const std::string& period; - epoch_t realm_epoch; - RGWMetadataLog* mdlog; - uint32_t shard_id; - rgw_meta_sync_marker sync_marker; - const std::string period_marker; - - RGWSyncTraceNodeRef tn; - - static constexpr bool exit_on_error = false; // retry on all errors -public: - RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool, - const std::string& period, epoch_t realm_epoch, - RGWMetadataLog* mdlog, uint32_t _shard_id, - const rgw_meta_sync_marker& _marker, - std::string&& period_marker, - RGWSyncTraceNodeRef& _tn_parent) - : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env), - pool(_pool), period(period), realm_epoch(realm_epoch), mdlog(mdlog), - shard_id(_shard_id), sync_marker(_marker), - period_marker(std::move(period_marker)) { - tn = sync_env->sync_tracer->add_node(_tn_parent, "shard", - std::to_string(shard_id)); - } - - RGWCoroutine *alloc_cr() override { - return new RGWMetaSyncShardCR(sync_env, pool, period, realm_epoch, mdlog, - shard_id, sync_marker, period_marker, backoff_ptr(), tn); - } - - RGWCoroutine *alloc_finisher_cr() override { - rgw::sal::RadosStore* store = sync_env->store; - return new RGWSimpleRadosReadCR(sync_env->dpp, sync_env->async_rados, store->svc()->sysobj, - rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)), - &sync_marker); - } -}; - -class RGWMetaSyncCR : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - const rgw_pool& pool; - RGWPeriodHistory::Cursor cursor; //< sync position in period history - RGWPeriodHistory::Cursor next; //< next period in history - rgw_meta_sync_status sync_status; - RGWSyncTraceNodeRef tn; - - std::mutex mutex; //< protect access to shard_crs - - // TODO: it should be enough to hold a reference on the stack only, as calling - // RGWCoroutinesStack::wakeup() doesn't refer to the RGWCoroutine if it has - // already completed - using ControlCRRef = boost::intrusive_ptr; - using StackRef = boost::intrusive_ptr; - using RefPair = std::pair; - map shard_crs; - int ret{0}; - -public: - RGWMetaSyncCR(RGWMetaSyncEnv *_sync_env, const RGWPeriodHistory::Cursor &cursor, - const rgw_meta_sync_status& _sync_status, RGWSyncTraceNodeRef& _tn) - : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), - pool(sync_env->store->svc()->zone->get_zone_params().log_pool), - cursor(cursor), sync_status(_sync_status), tn(_tn) {} - - ~RGWMetaSyncCR() { - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - // loop through one period at a time - tn->log(1, "start"); - for (;;) { - if (cursor == sync_env->store->svc()->mdlog->get_period_history()->get_current()) { - next = RGWPeriodHistory::Cursor{}; - if (cursor) { - ldpp_dout(dpp, 10) << "RGWMetaSyncCR on current period=" - << cursor.get_period().get_id() << dendl; - } else { - ldpp_dout(dpp, 10) << "RGWMetaSyncCR with no period" << dendl; - } - } else { - next = cursor; - next.next(); - ldpp_dout(dpp, 10) << "RGWMetaSyncCR on period=" - << cursor.get_period().get_id() << ", next=" - << next.get_period().get_id() << dendl; - } - - yield { - // get the mdlog for the current period (may be empty) - auto& period_id = sync_status.sync_info.period; - auto realm_epoch = sync_status.sync_info.realm_epoch; - auto mdlog = sync_env->store->svc()->mdlog->get_log(period_id); - - tn->log(1, SSTR("realm epoch=" << realm_epoch << " period id=" << period_id)); - - // prevent wakeup() from accessing shard_crs while we're spawning them - std::lock_guard lock(mutex); - - // sync this period on each shard - for (const auto& m : sync_status.sync_markers) { - uint32_t shard_id = m.first; - auto& marker = m.second; - - std::string period_marker; - if (next) { - // read the maximum marker from the next period's sync status - period_marker = next.get_period().get_sync_status()[shard_id]; - if (period_marker.empty()) { - // no metadata changes have occurred on this shard, skip it - ldpp_dout(dpp, 10) << "RGWMetaSyncCR: skipping shard " << shard_id - << " with empty period marker" << dendl; - continue; - } - } - - using ShardCR = RGWMetaSyncShardControlCR; - auto cr = new ShardCR(sync_env, pool, period_id, realm_epoch, - mdlog, shard_id, marker, - std::move(period_marker), tn); - auto stack = spawn(cr, false); - shard_crs[shard_id] = RefPair{cr, stack}; - } - } - // wait for each shard to complete - while (ret == 0 && num_spawned() > 0) { - yield wait_for_child(); - collect(&ret, nullptr); - } - drain_all(); - { - // drop shard cr refs under lock - std::lock_guard lock(mutex); - shard_crs.clear(); - } - if (ret < 0) { - return set_cr_error(ret); - } - // advance to the next period - ceph_assert(next); - cursor = next; - - // write the updated sync info - sync_status.sync_info.period = cursor.get_period().get_id(); - sync_status.sync_info.realm_epoch = cursor.get_epoch(); - yield call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, - sync_env->store->svc()->sysobj, - rgw_raw_obj(pool, sync_env->status_oid()), - sync_status.sync_info)); - } - } - return 0; - } - - void wakeup(int shard_id) { - std::lock_guard lock(mutex); - auto iter = shard_crs.find(shard_id); - if (iter == shard_crs.end()) { - return; - } - iter->second.first->wakeup(); - } -}; - -void RGWRemoteMetaLog::init_sync_env(RGWMetaSyncEnv *env) { - env->dpp = dpp; - env->cct = store->ctx(); - env->store = store; - env->conn = conn; - env->async_rados = async_rados; - env->http_manager = &http_manager; - env->error_logger = error_logger; - env->sync_tracer = store->getRados()->get_sync_tracer(); -} - -int RGWRemoteMetaLog::read_sync_status(const DoutPrefixProvider *dpp, rgw_meta_sync_status *sync_status) -{ - if (store->svc()->zone->is_meta_master()) { - return 0; - } - // cannot run concurrently with run_sync(), so run in a separate manager - RGWCoroutinesManager crs(store->ctx(), store->getRados()->get_cr_registry()); - RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr()); - int ret = http_manager.start(); - if (ret < 0) { - ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl; - return ret; - } - RGWMetaSyncEnv sync_env_local = sync_env; - sync_env_local.http_manager = &http_manager; - tn->log(20, "read sync status"); - ret = crs.run(dpp, new RGWReadSyncStatusCoroutine(&sync_env_local, sync_status)); - http_manager.stop(); - return ret; -} - -int RGWRemoteMetaLog::init_sync_status(const DoutPrefixProvider *dpp) -{ - if (store->svc()->zone->is_meta_master()) { - return 0; - } - - rgw_mdlog_info mdlog_info; - int r = read_log_info(dpp, &mdlog_info); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl; - return r; - } - - rgw_meta_sync_info sync_info; - sync_info.num_shards = mdlog_info.num_shards; - auto cursor = store->svc()->mdlog->get_period_history()->get_current(); - if (cursor) { - sync_info.period = cursor.get_period().get_id(); - sync_info.realm_epoch = cursor.get_epoch(); - } - - return run(dpp, new RGWInitSyncStatusCoroutine(&sync_env, sync_info)); -} - -int RGWRemoteMetaLog::store_sync_info(const DoutPrefixProvider *dpp, const rgw_meta_sync_info& sync_info) -{ - tn->log(20, "store sync info"); - return run(dpp, new RGWSimpleRadosWriteCR(dpp, async_rados, store->svc()->sysobj, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sync_env.status_oid()), - sync_info)); -} - -// return a cursor to the period at our sync position -static RGWPeriodHistory::Cursor get_period_at(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, - const rgw_meta_sync_info& info, - optional_yield y) -{ - if (info.period.empty()) { - // return an empty cursor with error=0 - return RGWPeriodHistory::Cursor{}; - } - - // look for an existing period in our history - auto cursor = store->svc()->mdlog->get_period_history()->lookup(info.realm_epoch); - if (cursor) { - // verify that the period ids match - auto& existing = cursor.get_period().get_id(); - if (existing != info.period) { - ldpp_dout(dpp, -1) << "ERROR: sync status period=" << info.period - << " does not match period=" << existing - << " in history at realm epoch=" << info.realm_epoch << dendl; - return RGWPeriodHistory::Cursor{-EEXIST}; - } - return cursor; - } - - // read the period from rados or pull it from the master - RGWPeriod period; - int r = store->svc()->mdlog->pull_period(dpp, info.period, period, y); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to read period id " - << info.period << ": " << cpp_strerror(r) << dendl; - return RGWPeriodHistory::Cursor{r}; - } - // attach the period to our history - cursor = store->svc()->mdlog->get_period_history()->attach(dpp, std::move(period), y); - if (!cursor) { - r = cursor.get_error(); - ldpp_dout(dpp, -1) << "ERROR: failed to read period history back to " - << info.period << ": " << cpp_strerror(r) << dendl; - } - return cursor; -} - -int RGWRemoteMetaLog::run_sync(const DoutPrefixProvider *dpp, optional_yield y) -{ - if (store->svc()->zone->is_meta_master()) { - return 0; - } - - int r = 0; - - // get shard count and oldest log period from master - rgw_mdlog_info mdlog_info; - for (;;) { - if (going_down) { - ldpp_dout(dpp, 1) << __func__ << "(): going down" << dendl; - return 0; - } - r = read_log_info(dpp, &mdlog_info); - if (r == -EIO || r == -ENOENT) { - // keep retrying if master isn't alive or hasn't initialized the log - ldpp_dout(dpp, 10) << __func__ << "(): waiting for master.." << dendl; - backoff.backoff_sleep(); - continue; - } - backoff.reset(); - if (r < 0) { - ldpp_dout(dpp, -1) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl; - return r; - } - break; - } - - rgw_meta_sync_status sync_status; - do { - if (going_down) { - ldpp_dout(dpp, 1) << __func__ << "(): going down" << dendl; - return 0; - } - r = run(dpp, new RGWReadSyncStatusCoroutine(&sync_env, &sync_status)); - if (r < 0 && r != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch sync status r=" << r << dendl; - return r; - } - - if (!mdlog_info.period.empty()) { - // restart sync if the remote has a period, but: - // a) our status does not, or - // b) our sync period comes before the remote's oldest log period - if (sync_status.sync_info.period.empty() || - sync_status.sync_info.realm_epoch < mdlog_info.realm_epoch) { - sync_status.sync_info.state = rgw_meta_sync_info::StateInit; - string reason; - if (sync_status.sync_info.period.empty()) { - reason = "period is empty"; - } else { - reason = SSTR("sync_info realm epoch is behind: " << sync_status.sync_info.realm_epoch << " < " << mdlog_info.realm_epoch); - } - tn->log(1, "initialize sync (reason: " + reason + ")"); - ldpp_dout(dpp, 1) << "epoch=" << sync_status.sync_info.realm_epoch - << " in sync status comes before remote's oldest mdlog epoch=" - << mdlog_info.realm_epoch << ", restarting sync" << dendl; - } - } - - if (sync_status.sync_info.state == rgw_meta_sync_info::StateInit) { - ldpp_dout(dpp, 20) << __func__ << "(): init" << dendl; - sync_status.sync_info.num_shards = mdlog_info.num_shards; - auto cursor = store->svc()->mdlog->get_period_history()->get_current(); - if (cursor) { - // run full sync, then start incremental from the current period/epoch - sync_status.sync_info.period = cursor.get_period().get_id(); - sync_status.sync_info.realm_epoch = cursor.get_epoch(); - } - r = run(dpp, new RGWInitSyncStatusCoroutine(&sync_env, sync_status.sync_info)); - if (r == -EBUSY) { - backoff.backoff_sleep(); - continue; - } - backoff.reset(); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to init sync status r=" << r << dendl; - return r; - } - } - } while (sync_status.sync_info.state == rgw_meta_sync_info::StateInit); - - auto num_shards = sync_status.sync_info.num_shards; - if (num_shards != mdlog_info.num_shards) { - ldpp_dout(dpp, -1) << "ERROR: can't sync, mismatch between num shards, master num_shards=" << mdlog_info.num_shards << " local num_shards=" << num_shards << dendl; - return -EINVAL; - } - - RGWPeriodHistory::Cursor cursor; - do { - r = run(dpp, new RGWReadSyncStatusCoroutine(&sync_env, &sync_status)); - if (r < 0 && r != -ENOENT) { - tn->log(0, SSTR("ERROR: failed to fetch sync status r=" << r)); - return r; - } - - switch ((rgw_meta_sync_info::SyncState)sync_status.sync_info.state) { - case rgw_meta_sync_info::StateBuildingFullSyncMaps: - tn->log(20, "building full sync maps"); - r = run(dpp, new RGWFetchAllMetaCR(&sync_env, num_shards, sync_status.sync_markers, tn)); - if (r == -EBUSY || r == -EIO) { - backoff.backoff_sleep(); - continue; - } - backoff.reset(); - if (r < 0) { - tn->log(0, SSTR("ERROR: failed to fetch all metadata keys (r=" << r << ")")); - return r; - } - - sync_status.sync_info.state = rgw_meta_sync_info::StateSync; - r = store_sync_info(dpp, sync_status.sync_info); - if (r < 0) { - tn->log(0, SSTR("ERROR: failed to update sync status (r=" << r << ")")); - return r; - } - /* fall through */ - case rgw_meta_sync_info::StateSync: - tn->log(20, "sync"); - // find our position in the period history (if any) - cursor = get_period_at(dpp, store, sync_status.sync_info, y); - r = cursor.get_error(); - if (r < 0) { - return r; - } - meta_sync_cr = new RGWMetaSyncCR(&sync_env, cursor, sync_status, tn); - r = run(dpp, meta_sync_cr); - if (r < 0) { - tn->log(0, "ERROR: failed to fetch all metadata keys"); - return r; - } - break; - default: - tn->log(0, "ERROR: bad sync state!"); - return -EIO; - } - } while (!going_down); - - return 0; -} - -void RGWRemoteMetaLog::wakeup(int shard_id) -{ - if (!meta_sync_cr) { - return; - } - meta_sync_cr->wakeup(shard_id); -} - -int RGWCloneMetaLogCoroutine::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - do { - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": init request" << dendl; - return state_init(); - } - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": reading shard status" << dendl; - return state_read_shard_status(); - } - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": reading shard status complete" << dendl; - return state_read_shard_status_complete(); - } - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": sending rest request" << dendl; - return state_send_rest_request(dpp); - } - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": receiving rest response" << dendl; - return state_receive_rest_response(); - } - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": storing mdlog entries" << dendl; - return state_store_mdlog_entries(); - } - } while (truncated); - yield { - ldpp_dout(dpp, 20) << __func__ << ": shard_id=" << shard_id << ": storing mdlog entries complete" << dendl; - return state_store_mdlog_entries_complete(); - } - } - - return 0; -} - -int RGWCloneMetaLogCoroutine::state_init() -{ - data = rgw_mdlog_shard_data(); - - return 0; -} - -int RGWCloneMetaLogCoroutine::state_read_shard_status() -{ - const bool add_ref = false; // default constructs with refs=1 - - completion.reset(new RGWMetadataLogInfoCompletion( - [this](int ret, const cls_log_header& header) { - if (ret < 0) { - if (ret != -ENOENT) { - ldpp_dout(sync_env->dpp, 1) << "ERROR: failed to read mdlog info with " - << cpp_strerror(ret) << dendl; - } - } else { - shard_info.marker = header.max_marker; - shard_info.last_update = header.max_time.to_real_time(); - } - // wake up parent stack - io_complete(); - }), add_ref); - - int ret = mdlog->get_info_async(sync_env->dpp, shard_id, completion.get()); - if (ret < 0) { - ldpp_dout(sync_env->dpp, 0) << "ERROR: mdlog->get_info_async() returned ret=" << ret << dendl; - return set_cr_error(ret); - } - - return io_block(0); -} - -int RGWCloneMetaLogCoroutine::state_read_shard_status_complete() -{ - completion.reset(); - - ldpp_dout(sync_env->dpp, 20) << "shard_id=" << shard_id << " marker=" << shard_info.marker << " last_update=" << shard_info.last_update << dendl; - - marker = shard_info.marker; - - return 0; -} - -int RGWCloneMetaLogCoroutine::state_send_rest_request(const DoutPrefixProvider *dpp) -{ - RGWRESTConn *conn = sync_env->conn; - - char buf[32]; - snprintf(buf, sizeof(buf), "%d", shard_id); - - char max_entries_buf[32]; - snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", max_entries); - - const char *marker_key = (marker.empty() ? "" : "marker"); - - rgw_http_param_pair pairs[] = { { "type", "metadata" }, - { "id", buf }, - { "period", period.c_str() }, - { "max-entries", max_entries_buf }, - { marker_key, marker.c_str() }, - { NULL, NULL } }; - - http_op = new RGWRESTReadResource(conn, "/admin/log", pairs, NULL, sync_env->http_manager); - - init_new_io(http_op); - - int ret = http_op->aio_read(dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog data" << dendl; - log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl; - http_op->put(); - http_op = NULL; - return set_cr_error(ret); - } - - return io_block(0); -} - -int RGWCloneMetaLogCoroutine::state_receive_rest_response() -{ - int ret = http_op->wait(&data, null_yield); - if (ret < 0) { - error_stream << "http operation failed: " << http_op->to_str() << " status=" << http_op->get_http_status() << std::endl; - ldpp_dout(sync_env->dpp, 5) << "failed to wait for op, ret=" << ret << dendl; - http_op->put(); - http_op = NULL; - return set_cr_error(ret); - } - http_op->put(); - http_op = NULL; - - ldpp_dout(sync_env->dpp, 20) << "remote mdlog, shard_id=" << shard_id << " num of shard entries: " << data.entries.size() << dendl; - - truncated = ((int)data.entries.size() == max_entries); - - if (data.entries.empty()) { - if (new_marker) { - *new_marker = marker; - } - return set_cr_done(); - } - - if (new_marker) { - *new_marker = data.entries.back().id; - } - - return 0; -} - - -int RGWCloneMetaLogCoroutine::state_store_mdlog_entries() -{ - list dest_entries; - - vector::iterator iter; - for (iter = data.entries.begin(); iter != data.entries.end(); ++iter) { - rgw_mdlog_entry& entry = *iter; - ldpp_dout(sync_env->dpp, 20) << "entry: name=" << entry.name << dendl; - - cls_log_entry dest_entry; - dest_entry.id = entry.id; - dest_entry.section = entry.section; - dest_entry.name = entry.name; - dest_entry.timestamp = utime_t(entry.timestamp); - - encode(entry.log_data, dest_entry.data); - - dest_entries.push_back(dest_entry); - - marker = entry.id; - } - - RGWAioCompletionNotifier *cn = stack->create_completion_notifier(); - - int ret = mdlog->store_entries_in_shard(sync_env->dpp, dest_entries, shard_id, cn->completion()); - if (ret < 0) { - cn->put(); - ldpp_dout(sync_env->dpp, 10) << "failed to store md log entries shard_id=" << shard_id << " ret=" << ret << dendl; - return set_cr_error(ret); - } - return io_block(0); -} - -int RGWCloneMetaLogCoroutine::state_store_mdlog_entries_complete() -{ - return set_cr_done(); -} - -void rgw_meta_sync_info::decode_json(JSONObj *obj) -{ - string s; - JSONDecoder::decode_json("status", s, obj); - if (s == "init") { - state = StateInit; - } else if (s == "building-full-sync-maps") { - state = StateBuildingFullSyncMaps; - } else if (s == "sync") { - state = StateSync; - } - JSONDecoder::decode_json("num_shards", num_shards, obj); - JSONDecoder::decode_json("period", period, obj); - JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); -} - -void rgw_meta_sync_info::dump(Formatter *f) const -{ - string s; - switch ((SyncState)state) { - case StateInit: - s = "init"; - break; - case StateBuildingFullSyncMaps: - s = "building-full-sync-maps"; - break; - case StateSync: - s = "sync"; - break; - default: - s = "unknown"; - break; - } - encode_json("status", s, f); - encode_json("num_shards", num_shards, f); - encode_json("period", period, f); - encode_json("realm_epoch", realm_epoch, f); -} - - -void rgw_meta_sync_marker::decode_json(JSONObj *obj) -{ - int s; - JSONDecoder::decode_json("state", s, obj); - state = s; - JSONDecoder::decode_json("marker", marker, obj); - JSONDecoder::decode_json("next_step_marker", next_step_marker, obj); - JSONDecoder::decode_json("total_entries", total_entries, obj); - JSONDecoder::decode_json("pos", pos, obj); - utime_t ut; - JSONDecoder::decode_json("timestamp", ut, obj); - timestamp = ut.to_real_time(); - JSONDecoder::decode_json("realm_epoch", realm_epoch, obj); -} - -void rgw_meta_sync_marker::dump(Formatter *f) const -{ - encode_json("state", (int)state, f); - encode_json("marker", marker, f); - encode_json("next_step_marker", next_step_marker, f); - encode_json("total_entries", total_entries, f); - encode_json("pos", pos, f); - encode_json("timestamp", utime_t(timestamp), f); - encode_json("realm_epoch", realm_epoch, f); -} - -void rgw_meta_sync_status::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("info", sync_info, obj); - JSONDecoder::decode_json("markers", sync_markers, obj); -} - -void rgw_meta_sync_status::dump(Formatter *f) const { - encode_json("info", sync_info, f); - encode_json("markers", sync_markers, f); -} - -void rgw_sync_error_info::dump(Formatter *f) const { - encode_json("source_zone", source_zone, f); - encode_json("error_code", error_code, f); - encode_json("message", message, f); -} - diff --git a/src/rgw/store/rados/rgw_sync.h b/src/rgw/store/rados/rgw_sync.h deleted file mode 100644 index 8c4e511ae3e..00000000000 --- a/src/rgw/store/rados/rgw_sync.h +++ /dev/null @@ -1,549 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SYNC_H -#define CEPH_RGW_SYNC_H - -#include - -#include "include/stringify.h" - -#include "rgw_coroutine.h" -#include "rgw_http_client.h" -#include "rgw_metadata.h" -#include "rgw_meta_sync_status.h" -#include "rgw_sal.h" -#include "rgw_sal_rados.h" -#include "rgw_sync_trace.h" -#include "rgw_mdlog.h" - -#define ERROR_LOGGER_SHARDS 32 -#define RGW_SYNC_ERROR_LOG_SHARD_PREFIX "sync.error-log" - -struct rgw_mdlog_info { - uint32_t num_shards; - std::string period; //< period id of the master's oldest metadata log - epoch_t realm_epoch; //< realm epoch of oldest metadata log - - rgw_mdlog_info() : num_shards(0), realm_epoch(0) {} - - void decode_json(JSONObj *obj); -}; - - -struct rgw_mdlog_entry { - std::string id; - std::string section; - std::string name; - ceph::real_time timestamp; - RGWMetadataLogData log_data; - - void decode_json(JSONObj *obj); - - bool convert_from(cls_log_entry& le) { - id = le.id; - section = le.section; - name = le.name; - timestamp = le.timestamp.to_real_time(); - try { - auto iter = le.data.cbegin(); - decode(log_data, iter); - } catch (buffer::error& err) { - return false; - } - return true; - } -}; - -struct rgw_mdlog_shard_data { - std::string marker; - bool truncated; - std::vector entries; - - void decode_json(JSONObj *obj); -}; - -class RGWAsyncRadosProcessor; -class RGWMetaSyncStatusManager; -class RGWMetaSyncCR; -class RGWRESTConn; -class RGWSyncTraceManager; - -class RGWSyncErrorLogger { - rgw::sal::RadosStore* store; - - std::vector oids; - int num_shards; - - std::atomic counter = { 0 }; -public: - RGWSyncErrorLogger(rgw::sal::RadosStore* _store, const std::string &oid_prefix, int _num_shards); - RGWCoroutine *log_error_cr(const DoutPrefixProvider *dpp, const std::string& source_zone, const std::string& section, const std::string& name, uint32_t error_code, const std::string& message); - - static std::string get_shard_oid(const std::string& oid_prefix, int shard_id); -}; - -struct rgw_sync_error_info { - std::string source_zone; - uint32_t error_code; - std::string message; - - rgw_sync_error_info() : error_code(0) {} - rgw_sync_error_info(const std::string& _source_zone, uint32_t _error_code, const std::string& _message) : source_zone(_source_zone), error_code(_error_code), message(_message) {} - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(source_zone, bl); - encode(error_code, bl); - encode(message, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(source_zone, bl); - decode(error_code, bl); - decode(message, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; -}; -WRITE_CLASS_ENCODER(rgw_sync_error_info) - -#define DEFAULT_BACKOFF_MAX 30 - -class RGWSyncBackoff { - int cur_wait; - int max_secs; - - void update_wait_time(); -public: - explicit RGWSyncBackoff(int _max_secs = DEFAULT_BACKOFF_MAX) : cur_wait(0), max_secs(_max_secs) {} - - void backoff_sleep(); - void reset() { - cur_wait = 0; - } - - void backoff(RGWCoroutine *op); -}; - -class RGWBackoffControlCR : public RGWCoroutine -{ - RGWCoroutine *cr; - ceph::mutex lock; - - RGWSyncBackoff backoff; - bool reset_backoff; - - bool exit_on_error; - -protected: - bool *backoff_ptr() { - return &reset_backoff; - } - - ceph::mutex& cr_lock() { - return lock; - } - - RGWCoroutine *get_cr() { - return cr; - } - -public: - RGWBackoffControlCR(CephContext *_cct, bool _exit_on_error) - : RGWCoroutine(_cct), - cr(nullptr), - lock(ceph::make_mutex("RGWBackoffControlCR::lock:" + stringify(this))), - reset_backoff(false), exit_on_error(_exit_on_error) { - } - - ~RGWBackoffControlCR() override { - if (cr) { - cr->put(); - } - } - - virtual RGWCoroutine *alloc_cr() = 0; - virtual RGWCoroutine *alloc_finisher_cr() { return NULL; } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -struct RGWMetaSyncEnv { - const DoutPrefixProvider *dpp; - CephContext *cct{nullptr}; - rgw::sal::RadosStore* store{nullptr}; - RGWRESTConn *conn{nullptr}; - RGWAsyncRadosProcessor *async_rados{nullptr}; - RGWHTTPManager *http_manager{nullptr}; - RGWSyncErrorLogger *error_logger{nullptr}; - RGWSyncTraceManager *sync_tracer{nullptr}; - - RGWMetaSyncEnv() {} - - void init(const DoutPrefixProvider *_dpp, CephContext *_cct, rgw::sal::RadosStore* _store, RGWRESTConn *_conn, - RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager, - RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer); - - std::string shard_obj_name(int shard_id); - std::string status_oid(); -}; - -class RGWRemoteMetaLog : public RGWCoroutinesManager { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - RGWRESTConn *conn; - RGWAsyncRadosProcessor *async_rados; - - RGWHTTPManager http_manager; - RGWMetaSyncStatusManager *status_manager; - RGWSyncErrorLogger *error_logger{nullptr}; - RGWSyncTraceManager *sync_tracer{nullptr}; - - RGWMetaSyncCR *meta_sync_cr{nullptr}; - - RGWSyncBackoff backoff; - - RGWMetaSyncEnv sync_env; - - void init_sync_env(RGWMetaSyncEnv *env); - int store_sync_info(const DoutPrefixProvider *dpp, const rgw_meta_sync_info& sync_info); - - std::atomic going_down = { false }; - - RGWSyncTraceNodeRef tn; - -public: - RGWRemoteMetaLog(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* _store, - RGWAsyncRadosProcessor *async_rados, - RGWMetaSyncStatusManager *_sm) - : RGWCoroutinesManager(_store->ctx(), _store->getRados()->get_cr_registry()), - dpp(dpp), store(_store), conn(NULL), async_rados(async_rados), - http_manager(store->ctx(), completion_mgr), - status_manager(_sm) {} - - virtual ~RGWRemoteMetaLog() override; - - int init(); - void finish(); - - int read_log_info(const DoutPrefixProvider *dpp, rgw_mdlog_info *log_info); - int read_master_log_shards_info(const DoutPrefixProvider *dpp, const std::string& master_period, std::map *shards_info); - int read_master_log_shards_next(const DoutPrefixProvider *dpp, const std::string& period, std::map shard_markers, std::map *result); - int read_sync_status(const DoutPrefixProvider *dpp, rgw_meta_sync_status *sync_status); - int init_sync_status(const DoutPrefixProvider *dpp); - int run_sync(const DoutPrefixProvider *dpp, optional_yield y); - - void wakeup(int shard_id); - - RGWMetaSyncEnv& get_sync_env() { - return sync_env; - } -}; - -class RGWMetaSyncStatusManager : public DoutPrefixProvider { - rgw::sal::RadosStore* store; - librados::IoCtx ioctx; - - RGWRemoteMetaLog master_log; - - std::map shard_objs; - - struct utime_shard { - real_time ts; - int shard_id; - - utime_shard() : shard_id(-1) {} - - bool operator<(const utime_shard& rhs) const { - if (ts == rhs.ts) { - return shard_id < rhs.shard_id; - } - return ts < rhs.ts; - } - }; - - ceph::shared_mutex ts_to_shard_lock = ceph::make_shared_mutex("ts_to_shard_lock"); - std::map ts_to_shard; - std::vector clone_markers; - -public: - RGWMetaSyncStatusManager(rgw::sal::RadosStore* _store, RGWAsyncRadosProcessor *async_rados) - : store(_store), master_log(this, store, async_rados, this) - {} - - virtual ~RGWMetaSyncStatusManager() override; - - int init(const DoutPrefixProvider *dpp); - - int read_sync_status(const DoutPrefixProvider *dpp, rgw_meta_sync_status *sync_status) { - return master_log.read_sync_status(dpp, sync_status); - } - int init_sync_status(const DoutPrefixProvider *dpp) { return master_log.init_sync_status(dpp); } - int read_log_info(const DoutPrefixProvider *dpp, rgw_mdlog_info *log_info) { - return master_log.read_log_info(dpp, log_info); - } - int read_master_log_shards_info(const DoutPrefixProvider *dpp, const std::string& master_period, std::map *shards_info) { - return master_log.read_master_log_shards_info(dpp, master_period, shards_info); - } - int read_master_log_shards_next(const DoutPrefixProvider *dpp, const std::string& period, std::map shard_markers, std::map *result) { - return master_log.read_master_log_shards_next(dpp, period, shard_markers, result); - } - - int run(const DoutPrefixProvider *dpp, optional_yield y) { return master_log.run_sync(dpp, y); } - - - // implements DoutPrefixProvider - CephContext *get_cct() const override { return store->ctx(); } - unsigned get_subsys() const override; - std::ostream& gen_prefix(std::ostream& out) const override; - - void wakeup(int shard_id) { return master_log.wakeup(shard_id); } - void stop() { - master_log.finish(); - } -}; - -class RGWOrderCallCR : public RGWCoroutine -{ -public: - RGWOrderCallCR(CephContext *cct) : RGWCoroutine(cct) {} - - virtual void call_cr(RGWCoroutine *_cr) = 0; -}; - -class RGWLastCallerWinsCR : public RGWOrderCallCR -{ - RGWCoroutine *cr{nullptr}; - -public: - explicit RGWLastCallerWinsCR(CephContext *cct) : RGWOrderCallCR(cct) {} - ~RGWLastCallerWinsCR() { - if (cr) { - cr->put(); - } - } - - int operate(const DoutPrefixProvider *dpp) override; - - void call_cr(RGWCoroutine *_cr) override { - if (cr) { - cr->put(); - } - cr = _cr; - } -}; - -template -class RGWSyncShardMarkerTrack { - struct marker_entry { - uint64_t pos; - real_time timestamp; - - marker_entry() : pos(0) {} - marker_entry(uint64_t _p, const real_time& _ts) : pos(_p), timestamp(_ts) {} - }; - typename std::map pending; - - std::map finish_markers; - - int window_size; - int updates_since_flush; - - RGWOrderCallCR *order_cr{nullptr}; - -protected: - typename std::set need_retry_set; - - virtual RGWCoroutine *store_marker(const T& new_marker, uint64_t index_pos, const real_time& timestamp) = 0; - virtual RGWOrderCallCR *allocate_order_control_cr() = 0; - virtual void handle_finish(const T& marker) { } - -public: - RGWSyncShardMarkerTrack(int _window_size) : window_size(_window_size), updates_since_flush(0) {} - virtual ~RGWSyncShardMarkerTrack() { - if (order_cr) { - order_cr->put(); - } - } - - bool start(const T& pos, int index_pos, const real_time& timestamp) { - if (pending.find(pos) != pending.end()) { - return false; - } - pending[pos] = marker_entry(index_pos, timestamp); - return true; - } - - void try_update_high_marker(const T& pos, int index_pos, const real_time& timestamp) { - finish_markers[pos] = marker_entry(index_pos, timestamp); - } - - RGWCoroutine *finish(const T& pos) { - if (pending.empty()) { - /* can happen, due to a bug that ended up with multiple objects with the same name and version - * -- which can happen when versioning is enabled an the version is 'null'. - */ - return NULL; - } - - typename std::map::iterator iter = pending.begin(); - - bool is_first = (pos == iter->first); - - typename std::map::iterator pos_iter = pending.find(pos); - if (pos_iter == pending.end()) { - /* see pending.empty() comment */ - return NULL; - } - - finish_markers[pos] = pos_iter->second; - - pending.erase(pos); - - handle_finish(pos); - - updates_since_flush++; - - if (is_first && (updates_since_flush >= window_size || pending.empty())) { - return flush(); - } - return NULL; - } - - RGWCoroutine *flush() { - if (finish_markers.empty()) { - return NULL; - } - - typename std::map::iterator i; - - if (pending.empty()) { - i = finish_markers.end(); - } else { - i = finish_markers.lower_bound(pending.begin()->first); - } - if (i == finish_markers.begin()) { - return NULL; - } - updates_since_flush = 0; - - auto last = i; - --i; - const T& high_marker = i->first; - marker_entry& high_entry = i->second; - RGWCoroutine *cr = order(store_marker(high_marker, high_entry.pos, high_entry.timestamp)); - finish_markers.erase(finish_markers.begin(), last); - return cr; - } - - /* - * a key needs retry if it was processing when another marker that points - * to the same bucket shards arrives. Instead of processing it, we mark - * it as need_retry so that when we finish processing the original, we - * retry the processing on the same bucket shard, in case there are more - * entries to process. This closes a race that can happen. - */ - bool need_retry(const K& key) { - return (need_retry_set.find(key) != need_retry_set.end()); - } - - void set_need_retry(const K& key) { - need_retry_set.insert(key); - } - - void reset_need_retry(const K& key) { - need_retry_set.erase(key); - } - - RGWCoroutine *order(RGWCoroutine *cr) { - /* either returns a new RGWLastWriteWinsCR, or update existing one, in which case it returns - * nothing and the existing one will call the cr - */ - if (order_cr && order_cr->is_done()) { - order_cr->put(); - order_cr = nullptr; - } - if (!order_cr) { - order_cr = allocate_order_control_cr(); - order_cr->get(); - order_cr->call_cr(cr); - return order_cr; - } - order_cr->call_cr(cr); - return nullptr; /* don't call it a second time */ - } -}; - -class RGWMetaSyncShardMarkerTrack; - -class RGWMetaSyncSingleEntryCR : public RGWCoroutine { - RGWMetaSyncEnv *sync_env; - - std::string raw_key; - std::string entry_marker; - RGWMDLogStatus op_status; - - ssize_t pos; - std::string section; - std::string key; - - int sync_status; - - bufferlist md_bl; - - RGWMetaSyncShardMarkerTrack *marker_tracker; - - int tries; - - bool error_injection; - - RGWSyncTraceNodeRef tn; - -public: - RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env, - const std::string& _raw_key, const std::string& _entry_marker, - const RGWMDLogStatus& _op_status, - RGWMetaSyncShardMarkerTrack *_marker_tracker, const RGWSyncTraceNodeRef& _tn_parent); - - int operate(const DoutPrefixProvider *dpp) override; -}; - -class RGWShardCollectCR : public RGWCoroutine { - int current_running = 0; - protected: - int max_concurrent; - int status = 0; - - // called with the result of each child. error codes can be ignored by - // returning 0. if handle_result() returns a negative value, it's - // treated as an error and stored in 'status'. the last such error is - // reported to the caller with set_cr_error() - virtual int handle_result(int r) = 0; - public: - RGWShardCollectCR(CephContext *_cct, int _max_concurrent) - : RGWCoroutine(_cct), max_concurrent(_max_concurrent) - {} - - virtual bool spawn_next() = 0; - int operate(const DoutPrefixProvider *dpp) override; -}; - -// factory functions for meta sync coroutines needed in mdlog trimming - -RGWCoroutine* create_read_remote_mdlog_shard_info_cr(RGWMetaSyncEnv *env, - const std::string& period, - int shard_id, - RGWMetadataLogInfo* info); - -RGWCoroutine* create_list_remote_mdlog_shard_cr(RGWMetaSyncEnv *env, - const std::string& period, - int shard_id, - const std::string& marker, - uint32_t max_entries, - rgw_mdlog_shard_data *result); - -#endif diff --git a/src/rgw/store/rados/rgw_sync_counters.cc b/src/rgw/store/rados/rgw_sync_counters.cc deleted file mode 100644 index 1d23d58dcfb..00000000000 --- a/src/rgw/store/rados/rgw_sync_counters.cc +++ /dev/null @@ -1,28 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/ceph_context.h" -#include "rgw_sync_counters.h" - -namespace sync_counters { - -PerfCountersRef build(CephContext *cct, const std::string& name) -{ - PerfCountersBuilder b(cct, name, l_first, l_last); - - // share these counters with ceph-mgr - b.set_prio_default(PerfCountersBuilder::PRIO_USEFUL); - - b.add_u64_avg(l_fetch, "fetch_bytes", "Number of object bytes replicated"); - b.add_u64_counter(l_fetch_not_modified, "fetch_not_modified", "Number of objects already replicated"); - b.add_u64_counter(l_fetch_err, "fetch_errors", "Number of object replication errors"); - - b.add_time_avg(l_poll, "poll_latency", "Average latency of replication log requests"); - b.add_u64_counter(l_poll_err, "poll_errors", "Number of replication log request errors"); - - auto logger = PerfCountersRef{ b.create_perf_counters(), cct }; - cct->get_perfcounters_collection()->add(logger.get()); - return logger; -} - -} // namespace sync_counters diff --git a/src/rgw/store/rados/rgw_sync_counters.h b/src/rgw/store/rados/rgw_sync_counters.h deleted file mode 100644 index df3acc68023..00000000000 --- a/src/rgw/store/rados/rgw_sync_counters.h +++ /dev/null @@ -1,25 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include "common/perf_counters_collection.h" - -namespace sync_counters { - -enum { - l_first = 805000, - - l_fetch, - l_fetch_not_modified, - l_fetch_err, - - l_poll, - l_poll_err, - - l_last, -}; - -PerfCountersRef build(CephContext *cct, const std::string& name); - -} // namespace sync_counters diff --git a/src/rgw/store/rados/rgw_sync_error_repo.cc b/src/rgw/store/rados/rgw_sync_error_repo.cc deleted file mode 100644 index 44305b60b6b..00000000000 --- a/src/rgw/store/rados/rgw_sync_error_repo.cc +++ /dev/null @@ -1,205 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2020 Red Hat, Inc - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - */ - -#include "rgw_sync_error_repo.h" -#include "rgw_coroutine.h" -#include "rgw_sal.h" -#include "services/svc_rados.h" -#include "cls/cmpomap/client.h" - -namespace rgw::error_repo { - -// prefix for the binary encoding of keys. this particular value is not -// valid as the first byte of a utf8 code point, so we use this to -// differentiate the binary encoding from existing string keys for -// backward-compatibility -constexpr uint8_t binary_key_prefix = 0x80; - -struct key_type { - rgw_bucket_shard bs; - std::optional gen; -}; - -void encode(const key_type& k, bufferlist& bl, uint64_t f=0) -{ - ENCODE_START(1, 1, bl); - encode(k.bs, bl); - encode(k.gen, bl); - ENCODE_FINISH(bl); -} - -void decode(key_type& k, bufferlist::const_iterator& bl) -{ - DECODE_START(1, bl); - decode(k.bs, bl); - decode(k.gen, bl); - DECODE_FINISH(bl); -} - -std::string encode_key(const rgw_bucket_shard& bs, - std::optional gen) -{ - using ceph::encode; - const auto key = key_type{bs, gen}; - bufferlist bl; - encode(binary_key_prefix, bl); - encode(key, bl); - return bl.to_str(); -} - -int decode_key(std::string encoded, - rgw_bucket_shard& bs, - std::optional& gen) -{ - using ceph::decode; - key_type key; - const auto bl = bufferlist::static_from_string(encoded); - auto p = bl.cbegin(); - try { - uint8_t prefix; - decode(prefix, p); - if (prefix != binary_key_prefix) { - return -EINVAL; - } - decode(key, p); - } catch (const buffer::error&) { - return -EIO; - } - if (!p.end()) { - return -EIO; // buffer contained unexpected bytes - } - bs = std::move(key.bs); - gen = key.gen; - return 0; -} - -ceph::real_time decode_value(const bufferlist& bl) -{ - uint64_t value; - try { - using ceph::decode; - decode(value, bl); - } catch (const buffer::error&) { - value = 0; // empty buffer = 0 - } - return ceph::real_clock::zero() + ceph::timespan(value); -} - -int write(librados::ObjectWriteOperation& op, - const std::string& key, - ceph::real_time timestamp) -{ - // overwrite the existing timestamp if value is greater - const uint64_t value = timestamp.time_since_epoch().count(); - using namespace ::cls::cmpomap; - const bufferlist zero = u64_buffer(0); // compare against 0 for missing keys - return cmp_set_vals(op, Mode::U64, Op::GT, {{key, u64_buffer(value)}}, zero); -} - -int remove(librados::ObjectWriteOperation& op, - const std::string& key, - ceph::real_time timestamp) -{ - // remove the omap key if value >= existing - const uint64_t value = timestamp.time_since_epoch().count(); - using namespace ::cls::cmpomap; - return cmp_rm_keys(op, Mode::U64, Op::GTE, {{key, u64_buffer(value)}}); -} - -class RGWErrorRepoWriteCR : public RGWSimpleCoroutine { - RGWSI_RADOS::Obj obj; - std::string key; - ceph::real_time timestamp; - - boost::intrusive_ptr cn; - public: - RGWErrorRepoWriteCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj, - const std::string& key, ceph::real_time timestamp) - : RGWSimpleCoroutine(rados->ctx()), - obj(rados->obj(raw_obj)), - key(key), timestamp(timestamp) - {} - - int send_request(const DoutPrefixProvider *dpp) override { - librados::ObjectWriteOperation op; - int r = write(op, key, timestamp); - if (r < 0) { - return r; - } - r = obj.open(dpp); - if (r < 0) { - return r; - } - - cn = stack->create_completion_notifier(); - return obj.aio_operate(cn->completion(), &op); - } - - int request_complete() override { - return cn->completion()->get_return_value(); - } -}; - -RGWCoroutine* write_cr(RGWSI_RADOS* rados, - const rgw_raw_obj& obj, - const std::string& key, - ceph::real_time timestamp) -{ - return new RGWErrorRepoWriteCR(rados, obj, key, timestamp); -} - - -class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine { - RGWSI_RADOS::Obj obj; - std::string key; - ceph::real_time timestamp; - - boost::intrusive_ptr cn; - public: - RGWErrorRepoRemoveCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj, - const std::string& key, ceph::real_time timestamp) - : RGWSimpleCoroutine(rados->ctx()), - obj(rados->obj(raw_obj)), - key(key), timestamp(timestamp) - {} - - int send_request(const DoutPrefixProvider *dpp) override { - librados::ObjectWriteOperation op; - int r = remove(op, key, timestamp); - if (r < 0) { - return r; - } - r = obj.open(dpp); - if (r < 0) { - return r; - } - - cn = stack->create_completion_notifier(); - return obj.aio_operate(cn->completion(), &op); - } - - int request_complete() override { - return cn->completion()->get_return_value(); - } -}; - -RGWCoroutine* remove_cr(RGWSI_RADOS* rados, - const rgw_raw_obj& obj, - const std::string& key, - ceph::real_time timestamp) -{ - return new RGWErrorRepoRemoveCR(rados, obj, key, timestamp); -} - -} // namespace rgw::error_repo diff --git a/src/rgw/store/rados/rgw_sync_error_repo.h b/src/rgw/store/rados/rgw_sync_error_repo.h deleted file mode 100644 index 60525d281f0..00000000000 --- a/src/rgw/store/rados/rgw_sync_error_repo.h +++ /dev/null @@ -1,59 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2020 Red Hat, Inc - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - */ - -#pragma once - -#include -#include "include/rados/librados_fwd.hpp" -#include "include/buffer_fwd.h" -#include "common/ceph_time.h" - -class RGWSI_RADOS; -class RGWCoroutine; -struct rgw_raw_obj; -struct rgw_bucket_shard; - -namespace rgw::error_repo { - -// binary-encode a bucket/shard/gen and return it as a string -std::string encode_key(const rgw_bucket_shard& bs, - std::optional gen); - -// try to decode a key. returns -EINVAL if not in binary format -int decode_key(std::string encoded, - rgw_bucket_shard& bs, - std::optional& gen); - -// decode a timestamp as a uint64_t for CMPXATTR_MODE_U64 -ceph::real_time decode_value(const ceph::bufferlist& bl); - -// write an omap key iff the given timestamp is newer -int write(librados::ObjectWriteOperation& op, - const std::string& key, - ceph::real_time timestamp); -RGWCoroutine* write_cr(RGWSI_RADOS* rados, - const rgw_raw_obj& obj, - const std::string& key, - ceph::real_time timestamp); - -// remove an omap key iff there isn't a newer timestamp -int remove(librados::ObjectWriteOperation& op, - const std::string& key, - ceph::real_time timestamp); -RGWCoroutine* remove_cr(RGWSI_RADOS* rados, - const rgw_raw_obj& obj, - const std::string& key, - ceph::real_time timestamp); - -} // namespace rgw::error_repo diff --git a/src/rgw/store/rados/rgw_sync_module.cc b/src/rgw/store/rados/rgw_sync_module.cc deleted file mode 100644 index 5a1e70be34e..00000000000 --- a/src/rgw/store/rados/rgw_sync_module.cc +++ /dev/null @@ -1,87 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_common.h" -#include "rgw_coroutine.h" -#include "rgw_cr_rados.h" -#include "rgw_sync_module.h" -#include "rgw_data_sync.h" -#include "rgw_bucket.h" - -#include "rgw_sync_module_log.h" -#include "rgw_sync_module_es.h" -#include "rgw_sync_module_aws.h" - -#include - -#define dout_subsys ceph_subsys_rgw - -RGWMetadataHandler *RGWSyncModuleInstance::alloc_bucket_meta_handler() -{ - return RGWBucketMetaHandlerAllocator::alloc(); -} - -RGWBucketInstanceMetadataHandlerBase* RGWSyncModuleInstance::alloc_bucket_instance_meta_handler(rgw::sal::Driver* driver) -{ - return RGWBucketInstanceMetaHandlerAllocator::alloc(driver); -} - -RGWStatRemoteObjCBCR::RGWStatRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - src_bucket(_src_bucket), key(_key) { -} - -RGWCallStatRemoteObjCR::RGWCallStatRemoteObjCR(RGWDataSyncCtx *_sc, - rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - src_bucket(_src_bucket), key(_key) { -} - -int RGWCallStatRemoteObjCR::operate(const DoutPrefixProvider *dpp) { - reenter(this) { - yield { - call(new RGWStatRemoteObjCR(sync_env->async_rados, sync_env->driver, - sc->source_zone, - src_bucket, key, &mtime, &size, &etag, &attrs, &headers)); - } - if (retcode < 0) { - ldpp_dout(dpp, 10) << "RGWStatRemoteObjCR() returned " << retcode << dendl; - return set_cr_error(retcode); - } - ldpp_dout(dpp, 20) << "stat of remote obj: z=" << sc->source_zone - << " b=" << src_bucket << " k=" << key - << " size=" << size << " mtime=" << mtime << dendl; - yield { - RGWStatRemoteObjCBCR *cb = allocate_callback(); - if (cb) { - cb->set_result(mtime, size, etag, std::move(attrs), std::move(headers)); - call(cb); - } - } - if (retcode < 0) { - ldpp_dout(dpp, 10) << "RGWStatRemoteObjCR() callback returned " << retcode << dendl; - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; -} - -void rgw_register_sync_modules(RGWSyncModulesManager *modules_manager) -{ - RGWSyncModuleRef default_module(std::make_shared()); - modules_manager->register_module("rgw", default_module, true); - - RGWSyncModuleRef archive_module(std::make_shared()); - modules_manager->register_module("archive", archive_module); - - RGWSyncModuleRef log_module(std::make_shared()); - modules_manager->register_module("log", log_module); - - RGWSyncModuleRef es_module(std::make_shared()); - modules_manager->register_module("elasticsearch", es_module); - - RGWSyncModuleRef aws_module(std::make_shared()); - modules_manager->register_module("cloud", aws_module); -} diff --git a/src/rgw/store/rados/rgw_sync_module.h b/src/rgw/store/rados/rgw_sync_module.h deleted file mode 100644 index 6d974c39a27..00000000000 --- a/src/rgw/store/rados/rgw_sync_module.h +++ /dev/null @@ -1,202 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SYNC_MODULE_H -#define CEPH_RGW_SYNC_MODULE_H - -#include "rgw_common.h" -#include "rgw_coroutine.h" - -class RGWBucketInfo; -class RGWRemoteDataLog; -struct RGWDataSyncCtx; -struct RGWDataSyncEnv; -struct rgw_bucket_entry_owner; -struct rgw_obj_key; -struct rgw_bucket_sync_pipe; - - -class RGWDataSyncModule { -public: - RGWDataSyncModule() {} - virtual ~RGWDataSyncModule() {} - - virtual void init(RGWDataSyncCtx *sync_env, uint64_t instance_id) {} - - virtual RGWCoroutine *init_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) { - return nullptr; - } - - virtual RGWCoroutine *start_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) { - return nullptr; - } - virtual RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) = 0; - virtual RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& bucket_info, rgw_obj_key& key, real_time& mtime, - bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) = 0; - virtual RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& bucket_info, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) = 0; -}; - -class RGWRESTMgr; -class RGWMetadataHandler; -class RGWBucketInstanceMetadataHandlerBase; - -class RGWSyncModuleInstance { -public: - RGWSyncModuleInstance() {} - virtual ~RGWSyncModuleInstance() {} - virtual RGWDataSyncModule *get_data_handler() = 0; - virtual RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) { - return orig; - } - virtual bool supports_user_writes() { - return false; - } - virtual RGWMetadataHandler *alloc_bucket_meta_handler(); - virtual RGWBucketInstanceMetadataHandlerBase *alloc_bucket_instance_meta_handler(rgw::sal::Driver* driver); - - // indication whether the sync module start with full sync (default behavior) - // incremental sync would follow anyway - virtual bool should_full_sync() const { - return true; - } -}; - -typedef std::shared_ptr RGWSyncModuleInstanceRef; - -class JSONFormattable; - -class RGWSyncModule { - -public: - RGWSyncModule() {} - virtual ~RGWSyncModule() {} - - virtual bool supports_writes() { - return false; - } - virtual bool supports_data_export() = 0; - virtual int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) = 0; -}; - -typedef std::shared_ptr RGWSyncModuleRef; - - -class RGWSyncModulesManager { - ceph::mutex lock = ceph::make_mutex("RGWSyncModulesManager"); - - std::map modules; -public: - RGWSyncModulesManager() = default; - - void register_module(const std::string& name, RGWSyncModuleRef& module, bool is_default = false) { - std::lock_guard l{lock}; - modules[name] = module; - if (is_default) { - modules[std::string()] = module; - } - } - - bool get_module(const std::string& name, RGWSyncModuleRef *module) { - std::lock_guard l{lock}; - auto iter = modules.find(name); - if (iter == modules.end()) { - return false; - } - if (module != nullptr) { - *module = iter->second; - } - return true; - } - - - bool supports_data_export(const std::string& name) { - RGWSyncModuleRef module; - if (!get_module(name, &module)) { - return false; - } - - return module->supports_data_export(); - } - - int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const std::string& name, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) { - RGWSyncModuleRef module; - if (!get_module(name, &module)) { - return -ENOENT; - } - - return module.get()->create_instance(dpp, cct, config, instance); - } - - std::vector get_registered_module_names() const { - std::vector names; - for (auto& i: modules) { - if (!i.first.empty()) { - names.push_back(i.first); - } - } - return names; - } -}; - -class RGWStatRemoteObjCBCR : public RGWCoroutine { -protected: - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - rgw_bucket src_bucket; - rgw_obj_key key; - - ceph::real_time mtime; - uint64_t size = 0; - std::string etag; - std::map attrs; - std::map headers; -public: - RGWStatRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket& _src_bucket, rgw_obj_key& _key); - ~RGWStatRemoteObjCBCR() override {} - - void set_result(ceph::real_time& _mtime, - uint64_t _size, - const std::string& _etag, - std::map&& _attrs, - std::map&& _headers) { - mtime = _mtime; - size = _size; - etag = _etag; - attrs = std::move(_attrs); - headers = std::move(_headers); - } -}; - -class RGWCallStatRemoteObjCR : public RGWCoroutine { - ceph::real_time mtime; - uint64_t size{0}; - std::string etag; - std::map attrs; - std::map headers; - -protected: - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - - rgw_bucket src_bucket; - rgw_obj_key key; - -public: - RGWCallStatRemoteObjCR(RGWDataSyncCtx *_sc, - rgw_bucket& _src_bucket, rgw_obj_key& _key); - - ~RGWCallStatRemoteObjCR() override {} - - int operate(const DoutPrefixProvider *dpp) override; - - virtual RGWStatRemoteObjCBCR *allocate_callback() { - return nullptr; - } -}; - -void rgw_register_sync_modules(RGWSyncModulesManager *modules_manager); - -#endif diff --git a/src/rgw/store/rados/rgw_sync_module_aws.cc b/src/rgw/store/rados/rgw_sync_module_aws.cc deleted file mode 100644 index 6827f7f3a1a..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_aws.cc +++ /dev/null @@ -1,1836 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/errno.h" - -#include "rgw_common.h" -#include "rgw_coroutine.h" -#include "rgw_sync_module.h" -#include "rgw_data_sync.h" -#include "rgw_sync_module_aws.h" -#include "rgw_cr_rados.h" -#include "rgw_rest_conn.h" -#include "rgw_cr_rest.h" -#include "rgw_acl.h" -#include "rgw_zone.h" - -#include "services/svc_zone.h" - -#include - -#define dout_subsys ceph_subsys_rgw - - -#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) - -using namespace std; - -static string default_target_path = "rgw-${zonegroup}-${sid}/${bucket}"; - -static string get_key_oid(const rgw_obj_key& key) -{ - string oid = key.name; - if (!key.instance.empty() && - !key.have_null_instance()) { - oid += string(":") + key.instance; - } - return oid; -} - -static string obj_to_aws_path(rgw::sal::Object* obj) -{ - string path = obj->get_bucket()->get_name() + "/" + get_key_oid(obj->get_key()); - - - return path; -} - -/* - - json configuration definition: - - { - "connection": { - "access_key": , - "secret": , - "endpoint": , - "host_style": , - }, - "acls": [ { "type": , - "source_id": , - "dest_id": } ... ], # optional, acl mappings, no mappings if does not exist - "target_path": , # override default - - - # anything below here is for non trivial configuration - # can be used in conjuction with the above - - "default": { - "connection": { - "access_key": , - "secret": , - "endpoint": , - "host_style" , - }, - "acls": [ # list of source uids and how they map into destination uids in the dest objects acls - { - "type" : , # optional, default is id - "source_id": , - "dest_id": - } ... ] - "target_path": "rgwx-${sid}/${bucket}" # how a bucket name is mapped to destination path, - # final object name will be target_path + "/" + obj - }, - "connections": [ - { - "id": , - "access_key": , - "secret": , - "endpoint": , - } ... ], - "acl_profiles": [ - { - "id": , # acl mappings - "acls": [ { - "type": , - "source_id": , - "dest_id": - } ... ] - } - ], - "profiles": [ - { - "source_bucket": , # can specify either specific bucket name (foo), or prefix (foo*) - "target_path": , # (override default) - "connection_id": , # optional, if empty references default connection - "acls_id": , # optional, if empty references default mappings - } ... ], - } - -target path optional variables: - -(evaluated at init) -sid: sync instance id, randomly generated by sync process on first sync initalization -zonegroup: zonegroup name -zonegroup_id: zonegroup name -zone: zone name -zone_id: zone name - -(evaluated when syncing) -bucket: bucket name -owner: bucket owner - -*/ - -struct ACLMapping { - ACLGranteeTypeEnum type{ACL_TYPE_CANON_USER}; - string source_id; - string dest_id; - - ACLMapping() = default; - - ACLMapping(ACLGranteeTypeEnum t, - const string& s, - const string& d) : type(t), - source_id(s), - dest_id(d) {} - - void init(const JSONFormattable& config) { - const string& t = config["type"]; - - if (t == "email") { - type = ACL_TYPE_EMAIL_USER; - } else if (t == "uri") { - type = ACL_TYPE_GROUP; - } else { - type = ACL_TYPE_CANON_USER; - } - - source_id = config["source_id"]; - dest_id = config["dest_id"]; - } - - void dump_conf(CephContext *cct, JSONFormatter& jf) const { - Formatter::ObjectSection os(jf, "acl_mapping"); - string s; - switch (type) { - case ACL_TYPE_EMAIL_USER: - s = "email"; - break; - case ACL_TYPE_GROUP: - s = "uri"; - break; - default: - s = "id"; - break; - } - encode_json("type", s, &jf); - encode_json("source_id", source_id, &jf); - encode_json("dest_id", dest_id, &jf); - } -}; - -struct ACLMappings { - map acl_mappings; - - void init(const JSONFormattable& config) { - for (auto& c : config.array()) { - ACLMapping m; - m.init(c); - - acl_mappings.emplace(std::make_pair(m.source_id, m)); - } - } - void dump_conf(CephContext *cct, JSONFormatter& jf) const { - Formatter::ArraySection os(jf, "acls"); - - for (auto& i : acl_mappings) { - i.second.dump_conf(cct, jf); - } - } -}; - -struct AWSSyncConfig_ACLProfiles { - map > acl_profiles; - - void init(const JSONFormattable& config) { - for (auto& c : config.array()) { - const string& profile_id = c["id"]; - - std::shared_ptr ap{new ACLMappings}; - ap->init(c["acls"]); - - acl_profiles[profile_id] = ap; - } - } - void dump_conf(CephContext *cct, JSONFormatter& jf) const { - Formatter::ArraySection section(jf, "acl_profiles"); - - for (auto& p : acl_profiles) { - Formatter::ObjectSection section(jf, "profile"); - encode_json("id", p.first, &jf); - p.second->dump_conf(cct, jf); - } - } - - bool find(const string& profile_id, ACLMappings *result) const { - auto iter = acl_profiles.find(profile_id); - if (iter == acl_profiles.end()) { - return false; - } - *result = *iter->second; - return true; - } -}; - -struct AWSSyncConfig_Connection { - string connection_id; - string endpoint; - RGWAccessKey key; - std::optional region; - HostStyle host_style{PathStyle}; - - bool has_endpoint{false}; - bool has_key{false}; - bool has_host_style{false}; - - void init(const JSONFormattable& config) { - has_endpoint = config.exists("endpoint"); - has_key = config.exists("access_key") || config.exists("secret"); - has_host_style = config.exists("host_style"); - - connection_id = config["id"]; - endpoint = config["endpoint"]; - - key = RGWAccessKey(config["access_key"], config["secret"]); - - if (config.exists("region")) { - region = config["region"]; - } else { - region.reset(); - } - - string host_style_str = config["host_style"]; - if (host_style_str != "virtual") { - host_style = PathStyle; - } else { - host_style = VirtualStyle; - } - } - void dump_conf(CephContext *cct, JSONFormatter& jf) const { - Formatter::ObjectSection section(jf, "connection"); - encode_json("id", connection_id, &jf); - encode_json("endpoint", endpoint, &jf); - string s = (host_style == PathStyle ? "path" : "virtual"); - encode_json("region", region, &jf); - encode_json("host_style", s, &jf); - - { - Formatter::ObjectSection os(jf, "key"); - encode_json("access_key", key.id, &jf); - string secret = (key.key.empty() ? "" : "******"); - encode_json("secret", secret, &jf); - } - } -}; - -static int conf_to_uint64(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, const string& key, uint64_t *pval) -{ - string sval; - if (config.find(key, &sval)) { - string err; - uint64_t val = strict_strtoll(sval.c_str(), 10, &err); - if (!err.empty()) { - ldpp_dout(dpp, 0) << "ERROR: could not parse configurable value for cloud sync module: " << key << ": " << sval << dendl; - return -EINVAL; - } - *pval = val; - } - return 0; -} - -struct AWSSyncConfig_S3 { - uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE}; - uint64_t multipart_min_part_size{DEFAULT_MULTIPART_SYNC_PART_SIZE}; - - int init(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) { - int r = conf_to_uint64(dpp, cct, config, "multipart_sync_threshold", &multipart_sync_threshold); - if (r < 0) { - return r; - } - - r = conf_to_uint64(dpp, cct, config, "multipart_min_part_size", &multipart_min_part_size); - if (r < 0) { - return r; - } -#define MULTIPART_MIN_POSSIBLE_PART_SIZE (5 * 1024 * 1024) - if (multipart_min_part_size < MULTIPART_MIN_POSSIBLE_PART_SIZE) { - multipart_min_part_size = MULTIPART_MIN_POSSIBLE_PART_SIZE; - } - return 0; - } - - void dump_conf(CephContext *cct, JSONFormatter& jf) const { - Formatter::ObjectSection section(jf, "s3"); - encode_json("multipart_sync_threshold", multipart_sync_threshold, &jf); - encode_json("multipart_min_part_size", multipart_min_part_size, &jf); - } -}; - -struct AWSSyncConfig_Profile { - string source_bucket; - bool prefix{false}; - string target_path; - string connection_id; - string acls_id; - - std::shared_ptr conn_conf; - std::shared_ptr acls; - - std::shared_ptr conn; - - void init(const JSONFormattable& config) { - source_bucket = config["source_bucket"]; - - prefix = (!source_bucket.empty() && source_bucket[source_bucket.size() - 1] == '*'); - - if (prefix) { - source_bucket = source_bucket.substr(0, source_bucket.size() - 1); - } - - target_path = config["target_path"]; - connection_id = config["connection_id"]; - acls_id = config["acls_id"]; - - if (config.exists("connection")) { - conn_conf = make_shared(); - conn_conf->init(config["connection"]); - } - - if (config.exists("acls")) { - acls = make_shared(); - acls->init(config["acls"]); - } - } - - void dump_conf(CephContext *cct, JSONFormatter& jf, const char *section = "config") const { - Formatter::ObjectSection config(jf, section); - string sb{source_bucket}; - if (prefix) { - sb.append("*"); - } - encode_json("source_bucket", sb, &jf); - encode_json("target_path", target_path, &jf); - encode_json("connection_id", connection_id, &jf); - encode_json("acls_id", acls_id, &jf); - if (conn_conf.get()) { - conn_conf->dump_conf(cct, jf); - } - if (acls.get()) { - acls->dump_conf(cct, jf); - } - } -}; - -static void find_and_replace(const string& src, const string& find, const string& replace, string *dest) -{ - string s = src; - - size_t pos = s.find(find); - while (pos != string::npos) { - size_t next_ofs = pos + find.size(); - s = s.substr(0, pos) + replace + s.substr(next_ofs); - pos = s.find(find, next_ofs); - } - - *dest = s; -} - -static void apply_meta_param(const string& src, const string& param, const string& val, string *dest) -{ - string s = string("${") + param + "}"; - find_and_replace(src, s, val, dest); -} - - -struct AWSSyncConfig { - AWSSyncConfig_Profile default_profile; - std::shared_ptr root_profile; - - map > connections; - AWSSyncConfig_ACLProfiles acl_profiles; - - map > explicit_profiles; - - AWSSyncConfig_S3 s3; - - int init_profile(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& profile_conf, AWSSyncConfig_Profile& profile, - bool connection_must_exist) { - if (!profile.connection_id.empty()) { - if (profile.conn_conf) { - ldpp_dout(dpp, 0) << "ERROR: ambiguous profile connection configuration, connection_id=" << profile.connection_id << dendl; - return -EINVAL; - } - if (connections.find(profile.connection_id) == connections.end()) { - ldpp_dout(dpp, 0) << "ERROR: profile configuration reference non-existent connection_id=" << profile.connection_id << dendl; - return -EINVAL; - } - profile.conn_conf = connections[profile.connection_id]; - } else if (!profile.conn_conf) { - profile.connection_id = default_profile.connection_id; - auto i = connections.find(profile.connection_id); - if (i != connections.end()) { - profile.conn_conf = i->second; - } - } - - if (connection_must_exist && !profile.conn_conf) { - ldpp_dout(dpp, 0) << "ERROR: remote connection undefined for sync profile" << dendl; - return -EINVAL; - } - - if (profile.conn_conf && default_profile.conn_conf) { - if (!profile.conn_conf->has_endpoint) { - profile.conn_conf->endpoint = default_profile.conn_conf->endpoint; - } - if (!profile.conn_conf->has_host_style) { - profile.conn_conf->host_style = default_profile.conn_conf->host_style; - } - if (!profile.conn_conf->has_key) { - profile.conn_conf->key = default_profile.conn_conf->key; - } - } - - ACLMappings acl_mappings; - - if (!profile.acls_id.empty()) { - if (!acl_profiles.find(profile.acls_id, &acl_mappings)) { - ldpp_dout(dpp, 0) << "ERROR: profile configuration reference non-existent acls id=" << profile.acls_id << dendl; - return -EINVAL; - } - profile.acls = acl_profiles.acl_profiles[profile.acls_id]; - } else if (!profile.acls) { - if (default_profile.acls) { - profile.acls = default_profile.acls; - profile.acls_id = default_profile.acls_id; - } - } - - if (profile.target_path.empty()) { - profile.target_path = default_profile.target_path; - } - if (profile.target_path.empty()) { - profile.target_path = default_target_path; - } - - return 0; - } - - int init_target(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& profile_conf, std::shared_ptr *ptarget) { - std::shared_ptr profile; - profile.reset(new AWSSyncConfig_Profile); - profile->init(profile_conf); - - int ret = init_profile(dpp, cct, profile_conf, *profile, true); - if (ret < 0) { - return ret; - } - - auto& sb = profile->source_bucket; - - if (explicit_profiles.find(sb) != explicit_profiles.end()) { - ldpp_dout(dpp, 0) << "WARNING: duplicate target configuration in sync module" << dendl; - } - - explicit_profiles[sb] = profile; - if (ptarget) { - *ptarget = profile; - } - return 0; - } - - bool do_find_profile(const rgw_bucket bucket, std::shared_ptr *result) { - const string& name = bucket.name; - auto iter = explicit_profiles.upper_bound(name); - if (iter == explicit_profiles.begin()) { - return false; - } - - --iter; - if (iter->first.size() > name.size()) { - return false; - } - if (name.compare(0, iter->first.size(), iter->first) != 0) { - return false; - } - - std::shared_ptr& target = iter->second; - - if (!target->prefix && - name.size() != iter->first.size()) { - return false; - } - - *result = target; - return true; - } - - void find_profile(const rgw_bucket bucket, std::shared_ptr *result) { - if (!do_find_profile(bucket, result)) { - *result = root_profile; - } - } - - AWSSyncConfig() {} - - int init(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) { - auto& default_conf = config["default"]; - - if (config.exists("default")) { - default_profile.init(default_conf); - init_profile(dpp, cct, default_conf, default_profile, false); - } - - for (auto& conn : config["connections"].array()) { - auto new_conn = conn; - - std::shared_ptr c{new AWSSyncConfig_Connection}; - c->init(new_conn); - - connections[new_conn["id"]] = c; - } - - acl_profiles.init(config["acl_profiles"]); - - int r = s3.init(dpp, cct, config["s3"]); - if (r < 0) { - return r; - } - - auto new_root_conf = config; - - r = init_target(dpp, cct, new_root_conf, &root_profile); /* the root profile config */ - if (r < 0) { - return r; - } - - for (auto target_conf : config["profiles"].array()) { - int r = init_target(dpp, cct, target_conf, nullptr); - if (r < 0) { - return r; - } - } - - JSONFormatter jf(true); - dump_conf(cct, jf); - stringstream ss; - jf.flush(ss); - - ldpp_dout(dpp, 5) << "sync module config (parsed representation):\n" << ss.str() << dendl; - - return 0; - } - - void expand_target(RGWDataSyncCtx *sc, const string& sid, const string& path, string *dest) { - apply_meta_param(path, "sid", sid, dest); - - const RGWZoneGroup& zg = sc->env->svc->zone->get_zonegroup(); - apply_meta_param(path, "zonegroup", zg.get_name(), dest); - apply_meta_param(path, "zonegroup_id", zg.get_id(), dest); - - const RGWZone& zone = sc->env->svc->zone->get_zone(); - apply_meta_param(path, "zone", zone.name, dest); - apply_meta_param(path, "zone_id", zone.id, dest); - } - - void update_config(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, const string& sid) { - expand_target(sc, sid, root_profile->target_path, &root_profile->target_path); - ldpp_dout(dpp, 20) << "updated target: (root) -> " << root_profile->target_path << dendl; - for (auto& t : explicit_profiles) { - expand_target(sc, sid, t.second->target_path, &t.second->target_path); - ldpp_dout(dpp, 20) << "updated target: " << t.first << " -> " << t.second->target_path << dendl; - } - } - - void dump_conf(CephContext *cct, JSONFormatter& jf) const { - Formatter::ObjectSection config(jf, "config"); - root_profile->dump_conf(cct, jf); - jf.open_array_section("connections"); - for (auto c : connections) { - c.second->dump_conf(cct, jf); - } - jf.close_section(); - - acl_profiles.dump_conf(cct, jf); - - { // targets - Formatter::ArraySection as(jf, "profiles"); - for (auto& t : explicit_profiles) { - Formatter::ObjectSection target_section(jf, "profile"); - encode_json("name", t.first, &jf); - t.second->dump_conf(cct, jf); - } - } - } - - string get_path(std::shared_ptr& profile, - const RGWBucketInfo& bucket_info, - const rgw_obj_key& obj) { - string bucket_str; - string owner; - if (!bucket_info.owner.tenant.empty()) { - bucket_str = owner = bucket_info.owner.tenant + "-"; - owner += bucket_info.owner.id; - } - bucket_str += bucket_info.bucket.name; - - const string& path = profile->target_path; - - string new_path; - apply_meta_param(path, "bucket", bucket_str, &new_path); - apply_meta_param(new_path, "owner", owner, &new_path); - - new_path += string("/") + get_key_oid(obj); - - return new_path; - } - - void get_target(std::shared_ptr& profile, - const RGWBucketInfo& bucket_info, - const rgw_obj_key& obj, - string *bucket_name, - string *obj_name) { - string path = get_path(profile, bucket_info, obj); - size_t pos = path.find('/'); - - *bucket_name = path.substr(0, pos); - *obj_name = path.substr(pos + 1); - } - - void init_conns(RGWDataSyncCtx *sc, const string& id) { - auto sync_env = sc->env; - - update_config(sync_env->dpp, sc, id); - - auto& root_conf = root_profile->conn_conf; - - root_profile->conn.reset(new S3RESTConn(sc->cct, - id, - { root_conf->endpoint }, - root_conf->key, - sync_env->svc->zone->get_zonegroup().get_id(), - root_conf->region, - root_conf->host_style)); - - for (auto i : explicit_profiles) { - auto& c = i.second; - - c->conn.reset(new S3RESTConn(sc->cct, - id, - { c->conn_conf->endpoint }, - c->conn_conf->key, - sync_env->svc->zone->get_zonegroup().get_id(), - c->conn_conf->region, - c->conn_conf->host_style)); - } - } -}; - - -struct AWSSyncInstanceEnv { - AWSSyncConfig conf; - string id; - - explicit AWSSyncInstanceEnv(AWSSyncConfig& _conf) : conf(_conf) {} - - void init(RGWDataSyncCtx *sc, uint64_t instance_id) { - char buf[32]; - snprintf(buf, sizeof(buf), "%llx", (unsigned long long)instance_id); - id = buf; - - conf.init_conns(sc, id); - } - - void get_profile(const rgw_bucket& bucket, std::shared_ptr *ptarget) { - conf.find_profile(bucket, ptarget); - ceph_assert(ptarget); - } -}; - -static int do_decode_rest_obj(const DoutPrefixProvider *dpp, CephContext *cct, map& attrs, map& headers, rgw_rest_obj *info) -{ - for (auto header : headers) { - const string& val = header.second; - if (header.first == "RGWX_OBJECT_SIZE") { - info->content_len = atoi(val.c_str()); - } else { - info->attrs[header.first] = val; - } - } - - info->acls.set_ctx(cct); - auto aiter = attrs.find(RGW_ATTR_ACL); - if (aiter != attrs.end()) { - bufferlist& bl = aiter->second; - auto bliter = bl.cbegin(); - try { - info->acls.decode(bliter); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode policy off attrs" << dendl; - return -EIO; - } - } else { - ldpp_dout(dpp, 0) << "WARNING: acl attrs not provided" << dendl; - } - - return 0; -} - -class RGWRESTStreamGetCRF : public RGWStreamReadHTTPResourceCRF -{ - RGWDataSyncCtx *sc; - RGWRESTConn *conn; - rgw::sal::Object* src_obj; - RGWRESTConn::get_obj_params req_params; - - rgw_sync_aws_src_obj_properties src_properties; -public: - RGWRESTStreamGetCRF(CephContext *_cct, - RGWCoroutinesEnv *_env, - RGWCoroutine *_caller, - RGWDataSyncCtx *_sc, - RGWRESTConn *_conn, - rgw::sal::Object* _src_obj, - const rgw_sync_aws_src_obj_properties& _src_properties) : RGWStreamReadHTTPResourceCRF(_cct, _env, _caller, - _sc->env->http_manager, _src_obj->get_key()), - sc(_sc), conn(_conn), src_obj(_src_obj), - src_properties(_src_properties) { - } - - int init(const DoutPrefixProvider *dpp) override { - /* init input connection */ - - - req_params.get_op = true; - req_params.prepend_metadata = true; - - req_params.unmod_ptr = &src_properties.mtime; - req_params.etag = src_properties.etag; - req_params.mod_zone_id = src_properties.zone_short_id; - req_params.mod_pg_ver = src_properties.pg_ver; - - if (range.is_set) { - req_params.range_is_set = true; - req_params.range_start = range.ofs; - req_params.range_end = range.ofs + range.size - 1; - } - - RGWRESTStreamRWRequest *in_req; - int ret = conn->get_obj(dpp, src_obj, req_params, false /* send */, &in_req); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): conn->get_obj() returned ret=" << ret << dendl; - return ret; - } - - set_req(in_req); - - return RGWStreamReadHTTPResourceCRF::init(dpp); - } - - int decode_rest_obj(const DoutPrefixProvider *dpp, map& headers, bufferlist& extra_data) override { - map src_attrs; - - ldpp_dout(dpp, 20) << __func__ << ":" << " headers=" << headers << " extra_data.length()=" << extra_data.length() << dendl; - - if (extra_data.length() > 0) { - JSONParser jp; - if (!jp.parse(extra_data.c_str(), extra_data.length())) { - ldpp_dout(dpp, 0) << "ERROR: failed to parse response extra data. len=" << extra_data.length() << " data=" << extra_data.c_str() << dendl; - return -EIO; - } - - JSONDecoder::decode_json("attrs", src_attrs, &jp); - } - return do_decode_rest_obj(dpp, sc->cct, src_attrs, headers, &rest_obj); - } - - bool need_extra_data() override { - return true; - } -}; - -static std::set keep_headers = { "CONTENT_TYPE", - "CONTENT_ENCODING", - "CONTENT_DISPOSITION", - "CONTENT_LANGUAGE" }; - -class RGWAWSStreamPutCRF : public RGWStreamWriteHTTPResourceCRF -{ - RGWDataSyncCtx *sc; - rgw_sync_aws_src_obj_properties src_properties; - std::shared_ptr target; - rgw::sal::Object* dest_obj; - string etag; -public: - RGWAWSStreamPutCRF(CephContext *_cct, - RGWCoroutinesEnv *_env, - RGWCoroutine *_caller, - RGWDataSyncCtx *_sc, - const rgw_sync_aws_src_obj_properties& _src_properties, - std::shared_ptr& _target, - rgw::sal::Object* _dest_obj) : RGWStreamWriteHTTPResourceCRF(_cct, _env, _caller, _sc->env->http_manager), - sc(_sc), src_properties(_src_properties), target(_target), dest_obj(_dest_obj) { - } - - int init() override { - /* init output connection */ - RGWRESTStreamS3PutObj *out_req{nullptr}; - - if (multipart.is_multipart) { - char buf[32]; - snprintf(buf, sizeof(buf), "%d", multipart.part_num); - rgw_http_param_pair params[] = { { "uploadId", multipart.upload_id.c_str() }, - { "partNumber", buf }, - { nullptr, nullptr } }; - target->conn->put_obj_send_init(dest_obj, params, &out_req); - } else { - target->conn->put_obj_send_init(dest_obj, nullptr, &out_req); - } - - set_req(out_req); - - return RGWStreamWriteHTTPResourceCRF::init(); - } - - static bool keep_attr(const string& h) { - return (keep_headers.find(h) != keep_headers.end() || - boost::algorithm::starts_with(h, "X_AMZ_")); - } - - static void init_send_attrs(const DoutPrefixProvider *dpp, - CephContext *cct, - const rgw_rest_obj& rest_obj, - const rgw_sync_aws_src_obj_properties& src_properties, - const AWSSyncConfig_Profile *target, - map *attrs) { - auto& new_attrs = *attrs; - - new_attrs.clear(); - - for (auto& hi : rest_obj.attrs) { - if (keep_attr(hi.first)) { - new_attrs.insert(hi); - } - } - - auto acl = rest_obj.acls.get_acl(); - - map > access_map; - - if (target->acls) { - for (auto& grant : acl.get_grant_map()) { - auto& orig_grantee = grant.first; - auto& perm = grant.second; - - string grantee; - - const auto& am = target->acls->acl_mappings; - - auto iter = am.find(orig_grantee); - if (iter == am.end()) { - ldpp_dout(dpp, 20) << "acl_mappings: Could not find " << orig_grantee << " .. ignoring" << dendl; - continue; - } - - grantee = iter->second.dest_id; - - string type; - - switch (iter->second.type) { - case ACL_TYPE_CANON_USER: - type = "id"; - break; - case ACL_TYPE_EMAIL_USER: - type = "emailAddress"; - break; - case ACL_TYPE_GROUP: - type = "uri"; - break; - default: - continue; - } - - string tv = type + "=" + grantee; - - int flags = perm.get_permission().get_permissions(); - if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) { - access_map[flags].push_back(tv); - continue; - } - - for (int i = 1; i <= RGW_PERM_WRITE_ACP; i <<= 1) { - if (flags & i) { - access_map[i].push_back(tv); - } - } - } - } - - for (auto aiter : access_map) { - int grant_type = aiter.first; - - string header_str("x-amz-grant-"); - - switch (grant_type) { - case RGW_PERM_READ: - header_str.append("read"); - break; - case RGW_PERM_WRITE: - header_str.append("write"); - break; - case RGW_PERM_READ_ACP: - header_str.append("read-acp"); - break; - case RGW_PERM_WRITE_ACP: - header_str.append("write-acp"); - break; - case RGW_PERM_FULL_CONTROL: - header_str.append("full-control"); - break; - } - - string s; - - for (auto viter : aiter.second) { - if (!s.empty()) { - s.append(", "); - } - s.append(viter); - } - - ldpp_dout(dpp, 20) << "acl_mappings: set acl: " << header_str << "=" << s << dendl; - - new_attrs[header_str] = s; - } - - char buf[32]; - snprintf(buf, sizeof(buf), "%llu", (long long)src_properties.versioned_epoch); - new_attrs["x-amz-meta-rgwx-versioned-epoch"] = buf; - - utime_t ut(src_properties.mtime); - snprintf(buf, sizeof(buf), "%lld.%09lld", - (long long)ut.sec(), - (long long)ut.nsec()); - - new_attrs["x-amz-meta-rgwx-source-mtime"] = buf; - new_attrs["x-amz-meta-rgwx-source-etag"] = src_properties.etag; - new_attrs["x-amz-meta-rgwx-source-key"] = rest_obj.key.name; - if (!rest_obj.key.instance.empty()) { - new_attrs["x-amz-meta-rgwx-source-version-id"] = rest_obj.key.instance; - } - } - - void send_ready(const DoutPrefixProvider *dpp, const rgw_rest_obj& rest_obj) override { - RGWRESTStreamS3PutObj *r = static_cast(req); - - map new_attrs; - if (!multipart.is_multipart) { - init_send_attrs(dpp, sc->cct, rest_obj, src_properties, target.get(), &new_attrs); - } - - r->set_send_length(rest_obj.content_len); - - RGWAccessControlPolicy policy; - - r->send_ready(dpp, target->conn->get_key(), new_attrs, policy); - } - - void handle_headers(const map& headers) { - for (auto h : headers) { - if (h.first == "ETAG") { - etag = h.second; - } - } - } - - bool get_etag(string *petag) { - if (etag.empty()) { - return false; - } - *petag = etag; - return true; - } -}; - - -class RGWAWSStreamObjToCloudPlainCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWRESTConn *source_conn; - std::shared_ptr target; - rgw::sal::Object* src_obj; - rgw::sal::Object* dest_obj; - - rgw_sync_aws_src_obj_properties src_properties; - - std::shared_ptr in_crf; - std::shared_ptr out_crf; - -public: - RGWAWSStreamObjToCloudPlainCR(RGWDataSyncCtx *_sc, - RGWRESTConn *_source_conn, - rgw::sal::Object* _src_obj, - const rgw_sync_aws_src_obj_properties& _src_properties, - std::shared_ptr _target, - rgw::sal::Object* _dest_obj) : RGWCoroutine(_sc->cct), - sc(_sc), - source_conn(_source_conn), - target(_target), - src_obj(_src_obj), - dest_obj(_dest_obj), - src_properties(_src_properties) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - /* init input */ - in_crf.reset(new RGWRESTStreamGetCRF(cct, get_env(), this, sc, - source_conn, src_obj, - src_properties)); - - /* init output */ - out_crf.reset(new RGWAWSStreamPutCRF(cct, get_env(), this, sc, - src_properties, target, dest_obj)); - - yield call(new RGWStreamSpliceCR(cct, sc->env->http_manager, in_crf, out_crf)); - if (retcode < 0) { - return set_cr_error(retcode); - } - - return set_cr_done(); - } - - return 0; - } -}; - -class RGWAWSStreamObjToCloudMultipartPartCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWRESTConn *source_conn; - std::shared_ptr target; - rgw::sal::Object* src_obj; - rgw::sal::Object* dest_obj; - - rgw_sync_aws_src_obj_properties src_properties; - - string upload_id; - - rgw_sync_aws_multipart_part_info part_info; - - std::shared_ptr in_crf; - std::shared_ptr out_crf; - - string *petag; - -public: - RGWAWSStreamObjToCloudMultipartPartCR(RGWDataSyncCtx *_sc, - RGWRESTConn *_source_conn, - rgw::sal::Object* _src_obj, - std::shared_ptr& _target, - rgw::sal::Object* _dest_obj, - const rgw_sync_aws_src_obj_properties& _src_properties, - const string& _upload_id, - const rgw_sync_aws_multipart_part_info& _part_info, - string *_petag) : RGWCoroutine(_sc->cct), - sc(_sc), - source_conn(_source_conn), - target(_target), - src_obj(_src_obj), - dest_obj(_dest_obj), - src_properties(_src_properties), - upload_id(_upload_id), - part_info(_part_info), - petag(_petag) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - /* init input */ - in_crf.reset(new RGWRESTStreamGetCRF(cct, get_env(), this, sc, - source_conn, src_obj, - src_properties)); - - in_crf->set_range(part_info.ofs, part_info.size); - - /* init output */ - out_crf.reset(new RGWAWSStreamPutCRF(cct, get_env(), this, sc, - src_properties, target, dest_obj)); - - out_crf->set_multipart(upload_id, part_info.part_num, part_info.size); - - yield call(new RGWStreamSpliceCR(cct, sc->env->http_manager, in_crf, out_crf)); - if (retcode < 0) { - return set_cr_error(retcode); - } - - if (!(static_cast(out_crf.get()))->get_etag(petag)) { - ldpp_dout(dpp, 0) << "ERROR: failed to get etag from PUT request" << dendl; - return set_cr_error(-EIO); - } - - return set_cr_done(); - } - - return 0; - } -}; - -class RGWAWSAbortMultipartCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWRESTConn *dest_conn; - rgw::sal::Object* dest_obj; - - string upload_id; - -public: - RGWAWSAbortMultipartCR(RGWDataSyncCtx *_sc, - RGWRESTConn *_dest_conn, - rgw::sal::Object* _dest_obj, - const string& _upload_id) : RGWCoroutine(_sc->cct), - sc(_sc), - dest_conn(_dest_conn), - dest_obj(_dest_obj), - upload_id(_upload_id) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - - yield { - rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; - bufferlist bl; - call(new RGWDeleteRESTResourceCR(sc->cct, dest_conn, sc->env->http_manager, - obj_to_aws_path(dest_obj), params)); - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload for dest object=" << dest_obj << " (retcode=" << retcode << ")" << dendl; - return set_cr_error(retcode); - } - - return set_cr_done(); - } - - return 0; - } -}; - -class RGWAWSInitMultipartCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWRESTConn *dest_conn; - rgw::sal::Object* dest_obj; - - uint64_t obj_size; - map attrs; - - bufferlist out_bl; - - string *upload_id; - - struct InitMultipartResult { - string bucket; - string key; - string upload_id; - - void decode_xml(XMLObj *obj) { - RGWXMLDecoder::decode_xml("Bucket", bucket, obj); - RGWXMLDecoder::decode_xml("Key", key, obj); - RGWXMLDecoder::decode_xml("UploadId", upload_id, obj); - } - } result; - -public: - RGWAWSInitMultipartCR(RGWDataSyncCtx *_sc, - RGWRESTConn *_dest_conn, - rgw::sal::Object* _dest_obj, - uint64_t _obj_size, - const map& _attrs, - string *_upload_id) : RGWCoroutine(_sc->cct), - sc(_sc), - dest_conn(_dest_conn), - dest_obj(_dest_obj), - obj_size(_obj_size), - attrs(_attrs), - upload_id(_upload_id) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - - yield { - rgw_http_param_pair params[] = { { "uploads", nullptr }, {nullptr, nullptr} }; - bufferlist bl; - call(new RGWPostRawRESTResourceCR (sc->cct, dest_conn, sc->env->http_manager, - obj_to_aws_path(dest_obj), params, &attrs, bl, &out_bl)); - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl; - return set_cr_error(retcode); - } - { - /* - * If one of the following fails we cannot abort upload, as we cannot - * extract the upload id. If one of these fail it's very likely that that's - * the least of our problem. - */ - RGWXMLDecoder::XMLParser parser; - if (!parser.init()) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; - return set_cr_error(-EIO); - } - - if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: failed to parse xml: " << str << dendl; - return set_cr_error(-EIO); - } - - try { - RGWXMLDecoder::decode_xml("InitiateMultipartUploadResult", result, &parser, true); - } catch (RGWXMLDecoder::err& err) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; - return set_cr_error(-EIO); - } - } - - ldpp_dout(dpp, 20) << "init multipart result: bucket=" << result.bucket << " key=" << result.key << " upload_id=" << result.upload_id << dendl; - - *upload_id = result.upload_id; - - return set_cr_done(); - } - - return 0; - } -}; - -class RGWAWSCompleteMultipartCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWRESTConn *dest_conn; - rgw::sal::Object* dest_obj; - - bufferlist out_bl; - - string upload_id; - - struct CompleteMultipartReq { - map parts; - - explicit CompleteMultipartReq(const map& _parts) : parts(_parts) {} - - void dump_xml(Formatter *f) const { - for (auto p : parts) { - f->open_object_section("Part"); - encode_xml("PartNumber", p.first, f); - encode_xml("ETag", p.second.etag, f); - f->close_section(); - }; - } - } req_enc; - - struct CompleteMultipartResult { - string location; - string bucket; - string key; - string etag; - - void decode_xml(XMLObj *obj) { - RGWXMLDecoder::decode_xml("Location", bucket, obj); - RGWXMLDecoder::decode_xml("Bucket", bucket, obj); - RGWXMLDecoder::decode_xml("Key", key, obj); - RGWXMLDecoder::decode_xml("ETag", etag, obj); - } - } result; - -public: - RGWAWSCompleteMultipartCR(RGWDataSyncCtx *_sc, - RGWRESTConn *_dest_conn, - rgw::sal::Object* _dest_obj, - string _upload_id, - const map& _parts) : RGWCoroutine(_sc->cct), - sc(_sc), - dest_conn(_dest_conn), - dest_obj(_dest_obj), - upload_id(_upload_id), - req_enc(_parts) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - - yield { - rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} }; - stringstream ss; - XMLFormatter formatter; - - encode_xml("CompleteMultipartUpload", req_enc, &formatter); - - formatter.flush(ss); - - bufferlist bl; - bl.append(ss.str()); - - call(new RGWPostRawRESTResourceCR (sc->cct, dest_conn, sc->env->http_manager, - obj_to_aws_path(dest_obj), params, nullptr, bl, &out_bl)); - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl; - return set_cr_error(retcode); - } - { - /* - * If one of the following fails we cannot abort upload, as we cannot - * extract the upload id. If one of these fail it's very likely that that's - * the least of our problem. - */ - RGWXMLDecoder::XMLParser parser; - if (!parser.init()) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; - return set_cr_error(-EIO); - } - - if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: failed to parse xml: " << str << dendl; - return set_cr_error(-EIO); - } - - try { - RGWXMLDecoder::decode_xml("CompleteMultipartUploadResult", result, &parser, true); - } catch (RGWXMLDecoder::err& err) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; - return set_cr_error(-EIO); - } - } - - ldpp_dout(dpp, 20) << "complete multipart result: location=" << result.location << " bucket=" << result.bucket << " key=" << result.key << " etag=" << result.etag << dendl; - - return set_cr_done(); - } - - return 0; - } -}; - - -class RGWAWSStreamAbortMultipartUploadCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWRESTConn *dest_conn; - rgw::sal::Object* dest_obj; - const rgw_raw_obj status_obj; - - string upload_id; - -public: - - RGWAWSStreamAbortMultipartUploadCR(RGWDataSyncCtx *_sc, - RGWRESTConn *_dest_conn, - rgw::sal::Object* _dest_obj, - const rgw_raw_obj& _status_obj, - const string& _upload_id) : RGWCoroutine(_sc->cct), sc(_sc), - dest_conn(_dest_conn), - dest_obj(_dest_obj), - status_obj(_status_obj), - upload_id(_upload_id) {} - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield call(new RGWAWSAbortMultipartCR(sc, dest_conn, dest_obj, upload_id)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload dest obj=" << dest_obj << " upload_id=" << upload_id << " retcode=" << retcode << dendl; - /* ignore error, best effort */ - } - yield call(new RGWRadosRemoveCR(sc->env->driver, status_obj)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to remove sync status obj obj=" << status_obj << " retcode=" << retcode << dendl; - /* ignore error, best effort */ - } - return set_cr_done(); - } - - return 0; - } -}; - -class RGWAWSStreamObjToCloudMultipartCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - AWSSyncConfig& conf; - RGWRESTConn *source_conn; - std::shared_ptr target; - rgw::sal::Object* src_obj; - rgw::sal::Object* dest_obj; - - uint64_t obj_size; - string src_etag; - rgw_sync_aws_src_obj_properties src_properties; - rgw_rest_obj rest_obj; - - rgw_sync_aws_multipart_upload_info status; - - map new_attrs; - - rgw_sync_aws_multipart_part_info *pcur_part_info{nullptr}; - - int ret_err{0}; - - rgw_raw_obj status_obj; - -public: - RGWAWSStreamObjToCloudMultipartCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, - AWSSyncConfig& _conf, - RGWRESTConn *_source_conn, - rgw::sal::Object* _src_obj, - std::shared_ptr& _target, - rgw::sal::Object* _dest_obj, - uint64_t _obj_size, - const rgw_sync_aws_src_obj_properties& _src_properties, - const rgw_rest_obj& _rest_obj) : RGWCoroutine(_sc->cct), - sc(_sc), - sync_env(_sc->env), - conf(_conf), - source_conn(_source_conn), - target(_target), - src_obj(_src_obj), - dest_obj(_dest_obj), - obj_size(_obj_size), - src_properties(_src_properties), - rest_obj(_rest_obj), - status_obj(sync_env->svc->zone->get_zone_params().log_pool, - RGWBucketPipeSyncStatusManager::obj_status_oid(_sync_pipe, sc->source_zone, src_obj)) { - } - - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - yield call(new RGWSimpleRadosReadCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, - status_obj, &status, false)); - - if (retcode < 0 && retcode != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: failed to read sync status of object " << src_obj << " retcode=" << retcode << dendl; - return retcode; - } - - if (retcode >= 0) { - /* check here that mtime and size did not change */ - - if (status.src_properties.mtime != src_properties.mtime || status.obj_size != obj_size || - status.src_properties.etag != src_properties.etag) { - yield call(new RGWAWSStreamAbortMultipartUploadCR(sc, target->conn.get(), dest_obj, status_obj, status.upload_id)); - retcode = -ENOENT; - } - } - - if (retcode == -ENOENT) { - RGWAWSStreamPutCRF::init_send_attrs(dpp, sc->cct, rest_obj, src_properties, target.get(), &new_attrs); - - yield call(new RGWAWSInitMultipartCR(sc, target->conn.get(), dest_obj, status.obj_size, std::move(new_attrs), &status.upload_id)); - if (retcode < 0) { - return set_cr_error(retcode); - } - - status.obj_size = obj_size; - status.src_properties = src_properties; -#define MULTIPART_MAX_PARTS 10000 - uint64_t min_part_size = obj_size / MULTIPART_MAX_PARTS; - status.part_size = std::max(conf.s3.multipart_min_part_size, min_part_size); - status.num_parts = (obj_size + status.part_size - 1) / status.part_size; - status.cur_part = 1; - } - - for (; (uint32_t)status.cur_part <= status.num_parts; ++status.cur_part) { - yield { - rgw_sync_aws_multipart_part_info& cur_part_info = status.parts[status.cur_part]; - cur_part_info.part_num = status.cur_part; - cur_part_info.ofs = status.cur_ofs; - cur_part_info.size = std::min((uint64_t)status.part_size, status.obj_size - status.cur_ofs); - - pcur_part_info = &cur_part_info; - - status.cur_ofs += status.part_size; - - call(new RGWAWSStreamObjToCloudMultipartPartCR(sc, - source_conn, src_obj, - target, - dest_obj, - status.src_properties, - status.upload_id, - cur_part_info, - &cur_part_info.etag)); - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to sync obj=" << src_obj << ", sync via multipart upload, upload_id=" << status.upload_id << " part number " << status.cur_part << " (error: " << cpp_strerror(-retcode) << ")" << dendl; - ret_err = retcode; - yield call(new RGWAWSStreamAbortMultipartUploadCR(sc, target->conn.get(), dest_obj, status_obj, status.upload_id)); - return set_cr_error(ret_err); - } - - yield call(new RGWSimpleRadosWriteCR(dpp, sync_env->async_rados, sync_env->svc->sysobj, status_obj, status)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to store multipart upload state, retcode=" << retcode << dendl; - /* continue with upload anyway */ - } - ldpp_dout(dpp, 20) << "sync of object=" << src_obj << " via multipart upload, finished sending part #" << status.cur_part << " etag=" << pcur_part_info->etag << dendl; - } - - yield call(new RGWAWSCompleteMultipartCR(sc, target->conn.get(), dest_obj, status.upload_id, status.parts)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to complete multipart upload of obj=" << src_obj << " (error: " << cpp_strerror(-retcode) << ")" << dendl; - ret_err = retcode; - yield call(new RGWAWSStreamAbortMultipartUploadCR(sc, target->conn.get(), dest_obj, status_obj, status.upload_id)); - return set_cr_error(ret_err); - } - - /* remove status obj */ - yield call(new RGWRadosRemoveCR(sync_env->driver, status_obj)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to abort multipart upload obj=" << src_obj << " upload_id=" << status.upload_id << " part number " << status.cur_part << " (" << cpp_strerror(-retcode) << ")" << dendl; - /* ignore error, best effort */ - } - return set_cr_done(); - } - - return 0; - } -}; -template -int decode_attr(map& attrs, const char *attr_name, T *result, T def_val) -{ - map::iterator iter = attrs.find(attr_name); - if (iter == attrs.end()) { - *result = def_val; - return 0; - } - bufferlist& bl = iter->second; - if (bl.length() == 0) { - *result = def_val; - return 0; - } - auto bliter = bl.cbegin(); - try { - decode(*result, bliter); - } catch (buffer::error& err) { - return -EIO; - } - return 0; -} - -// maybe use Fetch Remote Obj instead? -class RGWAWSHandleRemoteObjCBCR: public RGWStatRemoteObjCBCR { - rgw_bucket_sync_pipe sync_pipe; - AWSSyncInstanceEnv& instance; - - uint64_t versioned_epoch{0}; - - RGWRESTConn *source_conn{nullptr}; - std::shared_ptr target; - bufferlist res; - unordered_map bucket_created; - string target_bucket_name; - string target_obj_name; - rgw_rest_obj rest_obj; - int ret{0}; - - uint32_t src_zone_short_id{0}; - uint64_t src_pg_ver{0}; - - bufferlist out_bl; - - struct CreateBucketResult { - string code; - - void decode_xml(XMLObj *obj) { - RGWXMLDecoder::decode_xml("Code", code, obj); - } - } result; - - rgw_bucket target_bucket; - std::unique_ptr bucket; - std::unique_ptr src_obj; - std::unique_ptr dest_bucket; - std::unique_ptr dest_obj; - - -public: - RGWAWSHandleRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, - rgw_obj_key& _key, - AWSSyncInstanceEnv& _instance, - uint64_t _versioned_epoch) : RGWStatRemoteObjCBCR(_sc, _sync_pipe.info.source_bs.bucket, _key), - sync_pipe(_sync_pipe), - instance(_instance), versioned_epoch(_versioned_epoch) - {} - - ~RGWAWSHandleRemoteObjCBCR(){ - } - - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - ret = decode_attr(attrs, RGW_ATTR_PG_VER, &src_pg_ver, (uint64_t)0); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode pg ver attr, ignoring" << dendl; - } else { - ret = decode_attr(attrs, RGW_ATTR_SOURCE_ZONE, &src_zone_short_id, (uint32_t)0); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode source zone short_id attr, ignoring" << dendl; - src_pg_ver = 0; /* all or nothing */ - } - } - ldpp_dout(dpp, 4) << "AWS: download begin: z=" << sc->source_zone - << " b=" << src_bucket << " k=" << key << " size=" << size - << " mtime=" << mtime << " etag=" << etag - << " zone_short_id=" << src_zone_short_id << " pg_ver=" << src_pg_ver - << dendl; - - source_conn = sync_env->svc->zone->get_zone_conn(sc->source_zone); - if (!source_conn) { - ldpp_dout(dpp, 0) << "ERROR: cannot find http connection to zone " << sc->source_zone << dendl; - return set_cr_error(-EINVAL); - } - - instance.get_profile(sync_pipe.info.source_bs.bucket, &target); - instance.conf.get_target(target, sync_pipe.dest_bucket_info, key, &target_bucket_name, &target_obj_name); - - if (bucket_created.find(target_bucket_name) == bucket_created.end()){ - yield { - ldpp_dout(dpp, 0) << "AWS: creating bucket " << target_bucket_name << dendl; - bufferlist bl; - call(new RGWPutRawRESTResourceCR (sc->cct, target->conn.get(), - sync_env->http_manager, - target_bucket_name, nullptr, bl, &out_bl)); - } - if (retcode < 0 ) { - RGWXMLDecoder::XMLParser parser; - if (!parser.init()) { - ldpp_dout(dpp, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl; - return set_cr_error(retcode); - } - - if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: failed to parse xml: " << str << dendl; - return set_cr_error(retcode); - } - - try { - RGWXMLDecoder::decode_xml("Error", result, &parser, true); - } catch (RGWXMLDecoder::err& err) { - string str(out_bl.c_str(), out_bl.length()); - ldpp_dout(dpp, 5) << "ERROR: unexpected xml: " << str << dendl; - return set_cr_error(retcode); - } - - if (result.code != "BucketAlreadyOwnedByYou") { - return set_cr_error(retcode); - } - } - - bucket_created[target_bucket_name] = true; - } - - yield { - bucket.reset(new rgw::sal::RadosBucket(sync_env->driver, src_bucket)); - src_obj.reset(new rgw::sal::RadosObject(sync_env->driver, key, bucket.get())); - - /* init output */ - target_bucket.name = target_bucket_name; /* this is only possible because we only use bucket name for - uri resolution */ - dest_bucket.reset(new rgw::sal::RadosBucket(sync_env->driver, target_bucket)); - dest_obj.reset(new rgw::sal::RadosObject(sync_env->driver, rgw_obj_key(target_obj_name), dest_bucket.get())); - - rgw_sync_aws_src_obj_properties src_properties; - src_properties.mtime = mtime; - src_properties.etag = etag; - src_properties.zone_short_id = src_zone_short_id; - src_properties.pg_ver = src_pg_ver; - src_properties.versioned_epoch = versioned_epoch; - - if (size < instance.conf.s3.multipart_sync_threshold) { - call(new RGWAWSStreamObjToCloudPlainCR(sc, source_conn, src_obj.get(), - src_properties, - target, - dest_obj.get())); - } else { - rgw_rest_obj rest_obj; - rest_obj.init(key); - if (do_decode_rest_obj(dpp, sc->cct, attrs, headers, &rest_obj)) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode rest obj out of headers=" << headers << ", attrs=" << attrs << dendl; - return set_cr_error(-EINVAL); - } - call(new RGWAWSStreamObjToCloudMultipartCR(sc, sync_pipe, instance.conf, source_conn, src_obj.get(), - target, dest_obj.get(), size, src_properties, rest_obj)); - } - } - if (retcode < 0) { - return set_cr_error(retcode); - } - - return set_cr_done(); - } - - return 0; - } -}; - -class RGWAWSHandleRemoteObjCR : public RGWCallStatRemoteObjCR { - rgw_bucket_sync_pipe sync_pipe; - AWSSyncInstanceEnv& instance; - uint64_t versioned_epoch; -public: - RGWAWSHandleRemoteObjCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, - AWSSyncInstanceEnv& _instance, uint64_t _versioned_epoch) : RGWCallStatRemoteObjCR(_sc, _sync_pipe.info.source_bs.bucket, _key), - sync_pipe(_sync_pipe), - instance(_instance), versioned_epoch(_versioned_epoch) { - } - - ~RGWAWSHandleRemoteObjCR() {} - - RGWStatRemoteObjCBCR *allocate_callback() override { - return new RGWAWSHandleRemoteObjCBCR(sc, sync_pipe, key, instance, versioned_epoch); - } -}; - -class RGWAWSRemoveRemoteObjCBCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - std::shared_ptr target; - rgw_bucket_sync_pipe sync_pipe; - rgw_obj_key key; - ceph::real_time mtime; - AWSSyncInstanceEnv& instance; - int ret{0}; -public: - RGWAWSRemoveRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, const ceph::real_time& _mtime, - AWSSyncInstanceEnv& _instance) : RGWCoroutine(_sc->cct), sc(_sc), - sync_pipe(_sync_pipe), key(_key), - mtime(_mtime), instance(_instance) {} - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - ldpp_dout(dpp, 0) << ": remove remote obj: z=" << sc->source_zone - << " b=" <cct, target->conn.get(), - sc->env->http_manager, - path, nullptr /* params */)); - } - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } - -}; - - -class RGWAWSDataSyncModule: public RGWDataSyncModule { - CephContext *cct; - AWSSyncInstanceEnv instance; -public: - RGWAWSDataSyncModule(CephContext *_cct, AWSSyncConfig& _conf) : - cct(_cct), - instance(_conf) { - } - - void init(RGWDataSyncCtx *sc, uint64_t instance_id) override { - instance.init(sc, instance_id); - } - - ~RGWAWSDataSyncModule() {} - - RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, - std::optional versioned_epoch, - rgw_zone_set *zones_trace) override { - ldout(sc->cct, 0) << instance.id << ": sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; - return new RGWAWSHandleRemoteObjCR(sc, sync_pipe, key, instance, versioned_epoch.value_or(0)); - } - RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, - rgw_zone_set *zones_trace) override { - ldout(sc->cct, 0) <<"rm_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - return new RGWAWSRemoveRemoteObjCBCR(sc, sync_pipe, key, mtime, instance); - } - RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, - rgw_zone_set *zones_trace) override { - ldout(sc->cct, 0) <<"AWS Not implemented: create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime - << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - return NULL; - } -}; - -class RGWAWSSyncModuleInstance : public RGWSyncModuleInstance { - RGWAWSDataSyncModule data_handler; -public: - RGWAWSSyncModuleInstance(CephContext *cct, AWSSyncConfig& _conf) : data_handler(cct, _conf) {} - RGWDataSyncModule *get_data_handler() override { - return &data_handler; - } -}; - -int RGWAWSSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance){ - AWSSyncConfig conf; - - int r = conf.init(dpp, cct, config); - if (r < 0) { - return r; - } - - instance->reset(new RGWAWSSyncModuleInstance(cct, conf)); - return 0; -} diff --git a/src/rgw/store/rados/rgw_sync_module_aws.h b/src/rgw/store/rados/rgw_sync_module_aws.h deleted file mode 100644 index 48f0145fdf9..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_aws.h +++ /dev/null @@ -1,111 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef RGW_SYNC_MODULE_AWS_H -#define RGW_SYNC_MODULE_AWS_H - -#include "rgw_sync_module.h" - -struct rgw_sync_aws_multipart_part_info { - int part_num{0}; - uint64_t ofs{0}; - uint64_t size{0}; - std::string etag; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(part_num, bl); - encode(ofs, bl); - encode(size, bl); - encode(etag, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(part_num, bl); - decode(ofs, bl); - decode(size, bl); - decode(etag, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_sync_aws_multipart_part_info) - -struct rgw_sync_aws_src_obj_properties { - ceph::real_time mtime; - std::string etag; - uint32_t zone_short_id{0}; - uint64_t pg_ver{0}; - uint64_t versioned_epoch{0}; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(mtime, bl); - encode(etag, bl); - encode(zone_short_id, bl); - encode(pg_ver, bl); - encode(versioned_epoch, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(mtime, bl); - decode(etag, bl); - decode(zone_short_id, bl); - decode(pg_ver, bl); - decode(versioned_epoch, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_sync_aws_src_obj_properties) - -struct rgw_sync_aws_multipart_upload_info { - std::string upload_id; - uint64_t obj_size; - rgw_sync_aws_src_obj_properties src_properties; - uint32_t part_size{0}; - uint32_t num_parts{0}; - - int cur_part{0}; - uint64_t cur_ofs{0}; - - std::map parts; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(upload_id, bl); - encode(obj_size, bl); - encode(src_properties, bl); - encode(part_size, bl); - encode(num_parts, bl); - encode(cur_part, bl); - encode(cur_ofs, bl); - encode(parts, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(upload_id, bl); - decode(obj_size, bl); - decode(src_properties, bl); - decode(part_size, bl); - decode(num_parts, bl); - decode(cur_part, bl); - decode(cur_ofs, bl); - decode(parts, bl); - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(rgw_sync_aws_multipart_upload_info) - -class RGWAWSSyncModule : public RGWSyncModule { - public: - RGWAWSSyncModule() {} - bool supports_data_export() override { return false;} - int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; -}; - -#endif /* RGW_SYNC_MODULE_AWS_H */ diff --git a/src/rgw/store/rados/rgw_sync_module_es.cc b/src/rgw/store/rados/rgw_sync_module_es.cc deleted file mode 100644 index 3c294bbbc19..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_es.cc +++ /dev/null @@ -1,962 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_b64.h" -#include "rgw_common.h" -#include "rgw_coroutine.h" -#include "rgw_sync_module.h" -#include "rgw_data_sync.h" -#include "rgw_sync_module_es.h" -#include "rgw_sync_module_es_rest.h" -#include "rgw_rest_conn.h" -#include "rgw_cr_rest.h" -#include "rgw_op.h" -#include "rgw_es_query.h" -#include "rgw_zone.h" - -#include "services/svc_zone.h" - -#include "include/str_list.h" - -#include - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -/* - * allowlist utility. Config string is a list of entries, where an entry is either an item, - * a prefix, or a suffix. An item would be the name of the entity that we'd look up, - * a prefix would be a string ending with an asterisk, a suffix would be a string starting - * with an asterisk. For example: - * - * bucket1, bucket2, foo*, *bar - */ -class ItemList { - bool approve_all{false}; - - set entries; - set prefixes; - set suffixes; - - void parse(const string& str) { - list l; - - get_str_list(str, ",", l); - - for (auto& entry : l) { - entry = rgw_trim_whitespace(entry); - if (entry.empty()) { - continue; - } - - if (entry == "*") { - approve_all = true; - return; - } - - if (entry[0] == '*') { - suffixes.insert(entry.substr(1)); - continue; - } - - if (entry.back() == '*') { - prefixes.insert(entry.substr(0, entry.size() - 1)); - continue; - } - - entries.insert(entry); - } - } - -public: - ItemList() {} - void init(const string& str, bool def_val) { - if (str.empty()) { - approve_all = def_val; - } else { - parse(str); - } - } - - bool exists(const string& entry) { - if (approve_all) { - return true; - } - - if (entries.find(entry) != entries.end()) { - return true; - } - - auto i = prefixes.upper_bound(entry); - if (i != prefixes.begin()) { - --i; - if (boost::algorithm::starts_with(entry, *i)) { - return true; - } - } - - for (i = suffixes.begin(); i != suffixes.end(); ++i) { - if (boost::algorithm::ends_with(entry, *i)) { - return true; - } - } - - return false; - } -}; - -#define ES_NUM_SHARDS_MIN 5 - -#define ES_NUM_SHARDS_DEFAULT 16 -#define ES_NUM_REPLICAS_DEFAULT 1 - -using ESVersion = std::pair; -static constexpr ESVersion ES_V5{5,0}; -static constexpr ESVersion ES_V7{7,0}; - -struct ESInfo { - std::string name; - std::string cluster_name; - std::string cluster_uuid; - ESVersion version; - - void decode_json(JSONObj *obj); - - std::string get_version_str(){ - return std::to_string(version.first) + "." + std::to_string(version.second); - } -}; - -// simple wrapper structure to wrap the es version nested type -struct es_version_decoder { - ESVersion version; - - int parse_version(const std::string& s) { - int major, minor; - int ret = sscanf(s.c_str(), "%d.%d", &major, &minor); - if (ret < 0) { - return ret; - } - version = std::make_pair(major,minor); - return 0; - } - - void decode_json(JSONObj *obj) { - std::string s; - JSONDecoder::decode_json("number",s,obj); - if (parse_version(s) < 0) - throw JSONDecoder::err("Failed to parse ElasticVersion"); - } -}; - - -void ESInfo::decode_json(JSONObj *obj) -{ - JSONDecoder::decode_json("name", name, obj); - JSONDecoder::decode_json("cluster_name", cluster_name, obj); - JSONDecoder::decode_json("cluster_uuid", cluster_uuid, obj); - es_version_decoder esv; - JSONDecoder::decode_json("version", esv, obj); - version = std::move(esv.version); -} - -struct ElasticConfig { - uint64_t sync_instance{0}; - string id; - string index_path; - std::unique_ptr conn; - bool explicit_custom_meta{true}; - string override_index_path; - ItemList index_buckets; - ItemList allow_owners; - uint32_t num_shards{0}; - uint32_t num_replicas{0}; - std::map default_headers = {{ "Content-Type", "application/json" }}; - ESInfo es_info; - - void init(CephContext *cct, const JSONFormattable& config) { - string elastic_endpoint = config["endpoint"]; - id = string("elastic:") + elastic_endpoint; - conn.reset(new RGWRESTConn(cct, (rgw::sal::Driver*)nullptr, id, { elastic_endpoint }, nullopt /* region */ )); - explicit_custom_meta = config["explicit_custom_meta"](true); - index_buckets.init(config["index_buckets_list"], true); /* approve all buckets by default */ - allow_owners.init(config["approved_owners_list"], true); /* approve all bucket owners by default */ - override_index_path = config["override_index_path"]; - num_shards = config["num_shards"](ES_NUM_SHARDS_DEFAULT); - if (num_shards < ES_NUM_SHARDS_MIN) { - num_shards = ES_NUM_SHARDS_MIN; - } - num_replicas = config["num_replicas"](ES_NUM_REPLICAS_DEFAULT); - if (string user = config["username"], pw = config["password"]; - !user.empty() && !pw.empty()) { - auto auth_string = user + ":" + pw; - default_headers.emplace("AUTHORIZATION", "Basic " + rgw::to_base64(auth_string)); - } - - } - - void init_instance(const RGWRealm& realm, uint64_t instance_id) { - sync_instance = instance_id; - - if (!override_index_path.empty()) { - index_path = override_index_path; - return; - } - - char buf[32]; - snprintf(buf, sizeof(buf), "-%08x", (uint32_t)(sync_instance & 0xFFFFFFFF)); - - index_path = "/rgw-" + realm.get_name() + buf; - } - - string get_index_path() { - return index_path; - } - - map& get_request_headers() { - return default_headers; - } - - string get_obj_path(const RGWBucketInfo& bucket_info, const rgw_obj_key& key) { - if (es_info.version >= ES_V7) { - return index_path+ "/_doc/" + url_encode(bucket_info.bucket.bucket_id + ":" + key.name + ":" + (key.instance.empty() ? "null" : key.instance)); -; - } else { - return index_path + "/object/" + url_encode(bucket_info.bucket.bucket_id + ":" + key.name + ":" + (key.instance.empty() ? "null" : key.instance)); - } - } - - bool should_handle_operation(RGWBucketInfo& bucket_info) { - return index_buckets.exists(bucket_info.bucket.name) && - allow_owners.exists(bucket_info.owner.to_str()); - } -}; - -using ElasticConfigRef = std::shared_ptr; - -static const char *es_type_to_str(const ESType& t) { - switch (t) { - case ESType::String: return "string"; - case ESType::Text: return "text"; - case ESType::Keyword: return "keyword"; - case ESType::Long: return "long"; - case ESType::Integer: return "integer"; - case ESType::Short: return "short"; - case ESType::Byte: return "byte"; - case ESType::Double: return "double"; - case ESType::Float: return "float"; - case ESType::Half_Float: return "half_float"; - case ESType::Scaled_Float: return "scaled_float"; - case ESType::Date: return "date"; - case ESType::Boolean: return "boolean"; - case ESType::Integer_Range: return "integer_range"; - case ESType::Float_Range: return "float_range"; - case ESType::Double_Range: return "date_range"; - case ESType::Date_Range: return "date_range"; - case ESType::Geo_Point: return "geo_point"; - case ESType::Ip: return "ip"; - default: - return ""; - } -} - -struct es_type_v2 { - ESType estype; - const char *format{nullptr}; - std::optional analyzed; - - es_type_v2(ESType et) : estype(et) {} - - void dump(Formatter *f) const { - const char *type_str = es_type_to_str(estype); - encode_json("type", type_str, f); - if (format) { - encode_json("format", format, f); - } - - auto is_analyzed = analyzed; - - if (estype == ESType::String && - !is_analyzed) { - is_analyzed = false; - } - - if (is_analyzed) { - encode_json("index", (is_analyzed.value() ? "analyzed" : "not_analyzed"), f); - } - } -}; - -struct es_type_v5 { - ESType estype; - const char *format{nullptr}; - std::optional analyzed; - std::optional index; - - es_type_v5(ESType et) : estype(et) {} - - void dump(Formatter *f) const { - ESType new_estype; - if (estype != ESType::String) { - new_estype = estype; - } else { - bool is_analyzed = analyzed.value_or(false); - new_estype = (is_analyzed ? ESType::Text : ESType::Keyword); - /* index = true; ... Not setting index=true, because that's the default, - * and dumping a boolean value *might* be a problem when backporting this - * because value might get quoted - */ - } - - const char *type_str = es_type_to_str(new_estype); - encode_json("type", type_str, f); - if (format) { - encode_json("format", format, f); - } - if (index) { - encode_json("index", index.value(), f); - } - } -}; - -template -struct es_type : public T { - es_type(T t) : T(t) {} - es_type& set_format(const char *f) { - T::format = f; - return *this; - } - - es_type& set_analyzed(bool a) { - T::analyzed = a; - return *this; - } -}; - -template -struct es_index_mappings { - ESVersion es_version; - ESType string_type {ESType::String}; - - es_index_mappings(ESVersion esv):es_version(esv) { - } - - es_type est(ESType t) const { - return es_type(t); - } - - void dump_custom(const char *section, ESType type, const char *format, Formatter *f) const { - f->open_object_section(section); - ::encode_json("type", "nested", f); - f->open_object_section("properties"); - encode_json("name", est(string_type), f); - encode_json("value", est(type).set_format(format), f); - f->close_section(); // entry - f->close_section(); // custom-string - } - - void dump(Formatter *f) const { - if (es_version <= ES_V7) - f->open_object_section("object"); - f->open_object_section("properties"); - encode_json("bucket", est(string_type), f); - encode_json("name", est(string_type), f); - encode_json("instance", est(string_type), f); - encode_json("versioned_epoch", est(ESType::Long), f); - f->open_object_section("meta"); - f->open_object_section("properties"); - encode_json("cache_control", est(string_type), f); - encode_json("content_disposition", est(string_type), f); - encode_json("content_encoding", est(string_type), f); - encode_json("content_language", est(string_type), f); - encode_json("content_type", est(string_type), f); - encode_json("storage_class", est(string_type), f); - encode_json("etag", est(string_type), f); - encode_json("expires", est(string_type), f); - encode_json("mtime", est(ESType::Date) - .set_format("strict_date_optional_time||epoch_millis"), f); - encode_json("size", est(ESType::Long), f); - dump_custom("custom-string", string_type, nullptr, f); - dump_custom("custom-int", ESType::Long, nullptr, f); - dump_custom("custom-date", ESType::Date, "strict_date_optional_time||epoch_millis", f); - f->close_section(); // properties - f->close_section(); // meta - f->close_section(); // properties - - if (es_version <= ES_V7) - f->close_section(); // object - } -}; - -struct es_index_settings { - uint32_t num_replicas; - uint32_t num_shards; - - es_index_settings(uint32_t _replicas, uint32_t _shards) : num_replicas(_replicas), num_shards(_shards) {} - - void dump(Formatter *f) const { - encode_json("number_of_replicas", num_replicas, f); - encode_json("number_of_shards", num_shards, f); - } -}; - -struct es_index_config_base { - virtual ~es_index_config_base() {} - virtual void dump(Formatter *f) const = 0; -}; - -template -struct es_index_config : public es_index_config_base { - es_index_settings settings; - es_index_mappings mappings; - - es_index_config(es_index_settings& _s, ESVersion esv) : settings(_s), mappings(esv) { - } - - void dump(Formatter *f) const { - encode_json("settings", settings, f); - encode_json("mappings", mappings, f); - } -}; - -static bool is_sys_attr(const std::string& attr_name){ - static constexpr std::initializer_list rgw_sys_attrs = - {RGW_ATTR_PG_VER, - RGW_ATTR_SOURCE_ZONE, - RGW_ATTR_ID_TAG, - RGW_ATTR_TEMPURL_KEY1, - RGW_ATTR_TEMPURL_KEY2, - RGW_ATTR_UNIX1, - RGW_ATTR_UNIX_KEY1 - }; - - return std::find(rgw_sys_attrs.begin(), rgw_sys_attrs.end(), attr_name) != rgw_sys_attrs.end(); -} - -static size_t attr_len(const bufferlist& val) -{ - size_t len = val.length(); - if (len && val[len - 1] == '\0') { - --len; - } - - return len; -} - -struct es_obj_metadata { - const DoutPrefixProvider *dpp; - CephContext *cct; - ElasticConfigRef es_conf; - RGWBucketInfo bucket_info; - rgw_obj_key key; - ceph::real_time mtime; - uint64_t size; - map attrs; - uint64_t versioned_epoch; - - es_obj_metadata(CephContext *_cct, ElasticConfigRef _es_conf, const RGWBucketInfo& _bucket_info, - const rgw_obj_key& _key, ceph::real_time& _mtime, uint64_t _size, - map& _attrs, uint64_t _versioned_epoch) : cct(_cct), es_conf(_es_conf), bucket_info(_bucket_info), key(_key), - mtime(_mtime), size(_size), attrs(std::move(_attrs)), versioned_epoch(_versioned_epoch) {} - - void dump(Formatter *f) const { - map out_attrs; - map custom_meta; - RGWAccessControlPolicy policy; - set permissions; - RGWObjTags obj_tags; - - for (auto i : attrs) { - const string& attr_name = i.first; - bufferlist& val = i.second; - - if (!boost::algorithm::starts_with(attr_name, RGW_ATTR_PREFIX)) { - continue; - } - - if (boost::algorithm::starts_with(attr_name, RGW_ATTR_META_PREFIX)) { - custom_meta.emplace(attr_name.substr(sizeof(RGW_ATTR_META_PREFIX) - 1), - string(val.c_str(), attr_len(val))); - continue; - } - - if (boost::algorithm::starts_with(attr_name, RGW_ATTR_CRYPT_PREFIX)) { - continue; - } - - if (boost::algorithm::starts_with(attr_name, RGW_ATTR_OLH_PREFIX)) { - // skip versioned object olh info - continue; - } - - if (attr_name == RGW_ATTR_ACL) { - try { - auto i = val.cbegin(); - decode(policy, i); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode acl for " << bucket_info.bucket << "/" << key << dendl; - continue; - } - - const RGWAccessControlList& acl = policy.get_acl(); - - permissions.insert(policy.get_owner().get_id().to_str()); - for (auto acliter : acl.get_grant_map()) { - const ACLGrant& grant = acliter.second; - if (grant.get_type().get_type() == ACL_TYPE_CANON_USER && - ((uint32_t)grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) { - rgw_user user; - if (grant.get_id(user)) { - permissions.insert(user.to_str()); - } - } - } - } else if (attr_name == RGW_ATTR_TAGS) { - try { - auto tags_bl = val.cbegin(); - decode(obj_tags, tags_bl); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode obj tags for " - << bucket_info.bucket << "/" << key << dendl; - continue; - } - } else if (attr_name == RGW_ATTR_COMPRESSION) { - RGWCompressionInfo cs_info; - try { - auto vals_bl = val.cbegin(); - decode(cs_info, vals_bl); - } catch (buffer::error& err) { - ldpp_dout(dpp, 0) << "ERROR: failed to decode compression attr for " - << bucket_info.bucket << "/" << key << dendl; - continue; - } - out_attrs.emplace("compression",std::move(cs_info.compression_type)); - } else { - if (!is_sys_attr(attr_name)) { - out_attrs.emplace(attr_name.substr(sizeof(RGW_ATTR_PREFIX) - 1), - std::string(val.c_str(), attr_len(val))); - } - } - } - ::encode_json("bucket", bucket_info.bucket.name, f); - ::encode_json("name", key.name, f); - string instance = key.instance; - if (instance.empty()) - instance = "null"; - ::encode_json("instance", instance, f); - ::encode_json("versioned_epoch", versioned_epoch, f); - ::encode_json("owner", policy.get_owner(), f); - ::encode_json("permissions", permissions, f); - f->open_object_section("meta"); - ::encode_json("size", size, f); - - string mtime_str; - rgw_to_iso8601(mtime, &mtime_str); - ::encode_json("mtime", mtime_str, f); - for (auto i : out_attrs) { - ::encode_json(i.first.c_str(), i.second, f); - } - map custom_str; - map custom_int; - map custom_date; - - for (auto i : custom_meta) { - auto config = bucket_info.mdsearch_config.find(i.first); - if (config == bucket_info.mdsearch_config.end()) { - if (!es_conf->explicit_custom_meta) { - /* default custom meta is of type string */ - custom_str[i.first] = i.second; - } else { - ldpp_dout(dpp, 20) << "custom meta entry key=" << i.first << " not found in bucket mdsearch config: " << bucket_info.mdsearch_config << dendl; - } - continue; - } - switch (config->second) { - case ESEntityTypeMap::ES_ENTITY_DATE: - custom_date[i.first] = i.second; - break; - case ESEntityTypeMap::ES_ENTITY_INT: - custom_int[i.first] = i.second; - break; - default: - custom_str[i.first] = i.second; - } - } - - if (!custom_str.empty()) { - f->open_array_section("custom-string"); - for (auto i : custom_str) { - f->open_object_section("entity"); - ::encode_json("name", i.first.c_str(), f); - ::encode_json("value", i.second, f); - f->close_section(); - } - f->close_section(); - } - if (!custom_int.empty()) { - f->open_array_section("custom-int"); - for (auto i : custom_int) { - f->open_object_section("entity"); - ::encode_json("name", i.first.c_str(), f); - ::encode_json("value", i.second, f); - f->close_section(); - } - f->close_section(); - } - if (!custom_date.empty()) { - f->open_array_section("custom-date"); - for (auto i : custom_date) { - /* - * try to exlicitly parse date field, otherwise elasticsearch could reject the whole doc, - * which will end up with failed sync - */ - real_time t; - int r = parse_time(i.second.c_str(), &t); - if (r < 0) { - ldpp_dout(dpp, 20) << __func__ << "(): failed to parse time (" << i.second << "), skipping encoding of custom date attribute" << dendl; - continue; - } - - string time_str; - rgw_to_iso8601(t, &time_str); - - f->open_object_section("entity"); - ::encode_json("name", i.first.c_str(), f); - ::encode_json("value", time_str.c_str(), f); - f->close_section(); - } - f->close_section(); - } - f->close_section(); // meta - const auto& m = obj_tags.get_tags(); - if (m.size() > 0){ - f->open_array_section("tagging"); - for (const auto &it : m) { - f->open_object_section("tag"); - ::encode_json("key", it.first, f); - ::encode_json("value",it.second, f); - f->close_section(); - } - f->close_section(); // tagging - } - } -}; - -class RGWElasticGetESInfoCBCR : public RGWCoroutine { -public: - RGWElasticGetESInfoCBCR(RGWDataSyncCtx *_sc, - ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - conf(_conf) {} - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - ldpp_dout(dpp, 5) << conf->id << ": get elasticsearch info for zone: " << sc->source_zone << dendl; - yield call(new RGWReadRESTResourceCR (sync_env->cct, - conf->conn.get(), - sync_env->http_manager, - "/", nullptr /*params*/, - &(conf->default_headers), - &(conf->es_info))); - if (retcode < 0) { - ldpp_dout(dpp, 5) << conf->id << ": get elasticsearch failed: " << retcode << dendl; - return set_cr_error(retcode); - } - - ldpp_dout(dpp, 5) << conf->id << ": got elastic version=" << conf->es_info.get_version_str() << dendl; - return set_cr_done(); - } - return 0; - } -private: - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - ElasticConfigRef conf; -}; - -class RGWElasticPutIndexCBCR : public RGWCoroutine { -public: - RGWElasticPutIndexCBCR(RGWDataSyncCtx *_sc, - ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - conf(_conf) {} - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - ldpp_dout(dpp, 5) << conf->id << ": put elasticsearch index for zone: " << sc->source_zone << dendl; - - yield { - string path = conf->get_index_path(); - es_index_settings settings(conf->num_replicas, conf->num_shards); - std::unique_ptr index_conf; - - if (conf->es_info.version >= ES_V5) { - ldpp_dout(dpp, 0) << "elasticsearch: index mapping: version >= 5" << dendl; - index_conf.reset(new es_index_config(settings, conf->es_info.version)); - } else { - ldpp_dout(dpp, 0) << "elasticsearch: index mapping: version < 5" << dendl; - index_conf.reset(new es_index_config(settings, conf->es_info.version)); - } - call(new RGWPutRESTResourceCR (sc->cct, - conf->conn.get(), - sync_env->http_manager, - path, nullptr /*params*/, - &(conf->default_headers), - *index_conf, nullptr, &err_response)); - } - if (retcode < 0) { - - if (err_response.error.type != "index_already_exists_exception" && - err_response.error.type != "resource_already_exists_exception") { - ldpp_dout(dpp, 0) << "elasticsearch: failed to initialize index: response.type=" << err_response.error.type << " response.reason=" << err_response.error.reason << dendl; - return set_cr_error(retcode); - } - - ldpp_dout(dpp, 0) << "elasticsearch: index already exists, assuming external initialization" << dendl; - } - return set_cr_done(); - } - return 0; - } - -private: - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - ElasticConfigRef conf; - - struct _err_response { - struct err_reason { - vector root_cause; - string type; - string reason; - string index; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("root_cause", root_cause, obj); - JSONDecoder::decode_json("type", type, obj); - JSONDecoder::decode_json("reason", reason, obj); - JSONDecoder::decode_json("index", index, obj); - } - } error; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("error", error, obj); - } - } err_response; -}; - -class RGWElasticInitConfigCBCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - ElasticConfigRef conf; - -public: - RGWElasticInitConfigCBCR(RGWDataSyncCtx *_sc, - ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), - sc(_sc), sync_env(_sc->env), - conf(_conf) {} - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - - yield call(new RGWElasticGetESInfoCBCR(sc, conf)); - - if (retcode < 0) { - return set_cr_error(retcode); - } - - yield call(new RGWElasticPutIndexCBCR(sc, conf)); - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } - -}; - -class RGWElasticHandleRemoteObjCBCR : public RGWStatRemoteObjCBCR { - rgw_bucket_sync_pipe sync_pipe; - ElasticConfigRef conf; - uint64_t versioned_epoch; -public: - RGWElasticHandleRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, - ElasticConfigRef _conf, uint64_t _versioned_epoch) : RGWStatRemoteObjCBCR(_sc, _sync_pipe.info.source_bs.bucket, _key), - sync_pipe(_sync_pipe), conf(_conf), - versioned_epoch(_versioned_epoch) {} - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - ldpp_dout(dpp, 10) << ": stat of remote obj: z=" << sc->source_zone - << " b=" << sync_pipe.info.source_bs.bucket << " k=" << key - << " size=" << size << " mtime=" << mtime << dendl; - - yield { - string path = conf->get_obj_path(sync_pipe.dest_bucket_info, key); - es_obj_metadata doc(sync_env->cct, conf, sync_pipe.dest_bucket_info, key, mtime, size, attrs, versioned_epoch); - - call(new RGWPutRESTResourceCR(sync_env->cct, conf->conn.get(), - sync_env->http_manager, - path, nullptr /* params */, - &(conf->default_headers), - doc, nullptr /* result */)); - - } - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } -}; - -class RGWElasticHandleRemoteObjCR : public RGWCallStatRemoteObjCR { - rgw_bucket_sync_pipe sync_pipe; - ElasticConfigRef conf; - uint64_t versioned_epoch; -public: - RGWElasticHandleRemoteObjCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, - ElasticConfigRef _conf, uint64_t _versioned_epoch) : RGWCallStatRemoteObjCR(_sc, _sync_pipe.info.source_bs.bucket, _key), - sync_pipe(_sync_pipe), - conf(_conf), versioned_epoch(_versioned_epoch) { - } - - ~RGWElasticHandleRemoteObjCR() override {} - - RGWStatRemoteObjCBCR *allocate_callback() override { - return new RGWElasticHandleRemoteObjCBCR(sc, sync_pipe, key, conf, versioned_epoch); - } -}; - -class RGWElasticRemoveRemoteObjCBCR : public RGWCoroutine { - RGWDataSyncCtx *sc; - RGWDataSyncEnv *sync_env; - rgw_bucket_sync_pipe sync_pipe; - rgw_obj_key key; - ceph::real_time mtime; - ElasticConfigRef conf; -public: - RGWElasticRemoveRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket_sync_pipe& _sync_pipe, rgw_obj_key& _key, const ceph::real_time& _mtime, - ElasticConfigRef _conf) : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), - sync_pipe(_sync_pipe), key(_key), - mtime(_mtime), conf(_conf) {} - int operate(const DoutPrefixProvider *dpp) override { - reenter(this) { - ldpp_dout(dpp, 10) << ": remove remote obj: z=" << sc->source_zone - << " b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << dendl; - yield { - string path = conf->get_obj_path(sync_pipe.dest_bucket_info, key); - - call(new RGWDeleteRESTResourceCR(sync_env->cct, conf->conn.get(), - sync_env->http_manager, - path, nullptr /* params */)); - } - if (retcode < 0) { - return set_cr_error(retcode); - } - return set_cr_done(); - } - return 0; - } - -}; - -class RGWElasticDataSyncModule : public RGWDataSyncModule { - ElasticConfigRef conf; -public: - RGWElasticDataSyncModule(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) : conf(std::make_shared()) { - conf->init(cct, config); - } - ~RGWElasticDataSyncModule() override {} - - void init(RGWDataSyncCtx *sc, uint64_t instance_id) override { - conf->init_instance(sc->env->svc->zone->get_realm(), instance_id); - } - - RGWCoroutine *init_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) override { - ldpp_dout(dpp, 5) << conf->id << ": init" << dendl; - return new RGWElasticInitConfigCBCR(sc, conf); - } - - RGWCoroutine *start_sync(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc) override { - ldpp_dout(dpp, 5) << conf->id << ": start_sync" << dendl; - // try to get elastic search version - return new RGWElasticGetESInfoCBCR(sc, conf); - } - - RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override { - ldpp_dout(dpp, 10) << conf->id << ": sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; - if (!conf->should_handle_operation(sync_pipe.dest_bucket_info)) { - ldpp_dout(dpp, 10) << conf->id << ": skipping operation (bucket not approved)" << dendl; - return nullptr; - } - return new RGWElasticHandleRemoteObjCR(sc, sync_pipe, key, conf, versioned_epoch.value_or(0)); - } - RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { - /* versioned and versioned epoch params are useless in the elasticsearch backend case */ - ldpp_dout(dpp, 10) << conf->id << ": rm_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - if (!conf->should_handle_operation(sync_pipe.dest_bucket_info)) { - ldpp_dout(dpp, 10) << conf->id << ": skipping operation (bucket not approved)" << dendl; - return nullptr; - } - return new RGWElasticRemoveRemoteObjCBCR(sc, sync_pipe, key, mtime, conf); - } - RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { - ldpp_dout(dpp, 10) << conf->id << ": create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime - << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - ldpp_dout(dpp, 10) << conf->id << ": skipping operation (not handled)" << dendl; - return NULL; - } - RGWRESTConn *get_rest_conn() { - return conf->conn.get(); - } - - string get_index_path() { - return conf->get_index_path(); - } - - map& get_request_headers() { - return conf->get_request_headers(); - } -}; - -RGWElasticSyncModuleInstance::RGWElasticSyncModuleInstance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config) -{ - data_handler = std::unique_ptr(new RGWElasticDataSyncModule(dpp, cct, config)); -} - -RGWDataSyncModule *RGWElasticSyncModuleInstance::get_data_handler() -{ - return data_handler.get(); -} - -RGWRESTConn *RGWElasticSyncModuleInstance::get_rest_conn() -{ - return data_handler->get_rest_conn(); -} - -string RGWElasticSyncModuleInstance::get_index_path() { - return data_handler->get_index_path(); -} - -map& RGWElasticSyncModuleInstance::get_request_headers() { - return data_handler->get_request_headers(); -} - -RGWRESTMgr *RGWElasticSyncModuleInstance::get_rest_filter(int dialect, RGWRESTMgr *orig) { - if (dialect != RGW_REST_S3) { - return orig; - } - delete orig; - return new RGWRESTMgr_MDSearch_S3(); -} - -int RGWElasticSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) { - string endpoint = config["endpoint"]; - instance->reset(new RGWElasticSyncModuleInstance(dpp, cct, config)); - return 0; -} - diff --git a/src/rgw/store/rados/rgw_sync_module_es.h b/src/rgw/store/rados/rgw_sync_module_es.h deleted file mode 100644 index 6c0c422c39c..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_es.h +++ /dev/null @@ -1,62 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SYNC_MODULE_ES_H -#define CEPH_RGW_SYNC_MODULE_ES_H - -#include "rgw_sync_module.h" - -enum class ESType { - /* string datatypes */ - String, /* Deprecated Since 5.X+ */ - Text, - Keyword, - - /* Numeric Types */ - Long, Integer, Short, Byte, Double, Float, Half_Float, Scaled_Float, - - /* Date Type */ - Date, - - /* Boolean */ - Boolean, - - /* Binary; Must Be Base64 Encoded */ - Binary, - - /* Range Types */ - Integer_Range, Float_Range, Long_Range, Double_Range, Date_Range, - - /* A Few Specialized Types */ - Geo_Point, - Ip -}; - - -class RGWElasticSyncModule : public RGWSyncModule { -public: - RGWElasticSyncModule() {} - bool supports_data_export() override { - return false; - } - int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; -}; - -class RGWElasticDataSyncModule; -class RGWRESTConn; - -class RGWElasticSyncModuleInstance : public RGWSyncModuleInstance { - std::unique_ptr data_handler; -public: - RGWElasticSyncModuleInstance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config); - RGWDataSyncModule *get_data_handler() override; - RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) override; - RGWRESTConn *get_rest_conn(); - std::string get_index_path(); - std::map& get_request_headers(); - bool supports_user_writes() override { - return true; - } -}; - -#endif diff --git a/src/rgw/store/rados/rgw_sync_module_es_rest.cc b/src/rgw/store/rados/rgw_sync_module_es_rest.cc deleted file mode 100644 index db9d48adb36..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_es_rest.cc +++ /dev/null @@ -1,428 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_sync_module_es.h" -#include "rgw_sync_module_es_rest.h" -#include "rgw_es_query.h" -#include "rgw_op.h" -#include "rgw_rest.h" -#include "rgw_rest_s3.h" -#include "rgw_sal_rados.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -struct es_index_obj_response { - string bucket; - rgw_obj_key key; - uint64_t versioned_epoch{0}; - ACLOwner owner; - set read_permissions; - - struct { - uint64_t size{0}; - ceph::real_time mtime; - string etag; - string content_type; - string storage_class; - map custom_str; - map custom_int; - map custom_date; - - template - struct _custom_entry { - string name; - T value; - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("name", name, obj); - JSONDecoder::decode_json("value", value, obj); - } - }; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("size", size, obj); - string mtime_str; - JSONDecoder::decode_json("mtime", mtime_str, obj); - parse_time(mtime_str.c_str(), &mtime); - JSONDecoder::decode_json("etag", etag, obj); - JSONDecoder::decode_json("content_type", content_type, obj); - JSONDecoder::decode_json("storage_class", storage_class, obj); - list<_custom_entry > str_entries; - JSONDecoder::decode_json("custom-string", str_entries, obj); - for (auto& e : str_entries) { - custom_str[e.name] = e.value; - } - list<_custom_entry > int_entries; - JSONDecoder::decode_json("custom-int", int_entries, obj); - for (auto& e : int_entries) { - custom_int[e.name] = e.value; - } - list<_custom_entry > date_entries; - JSONDecoder::decode_json("custom-date", date_entries, obj); - for (auto& e : date_entries) { - custom_date[e.name] = e.value; - } - } - } meta; - - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("bucket", bucket, obj); - JSONDecoder::decode_json("name", key.name, obj); - JSONDecoder::decode_json("instance", key.instance, obj); - JSONDecoder::decode_json("versioned_epoch", versioned_epoch, obj); - JSONDecoder::decode_json("permissions", read_permissions, obj); - JSONDecoder::decode_json("owner", owner, obj); - JSONDecoder::decode_json("meta", meta, obj); - } -}; - -struct es_search_response { - uint32_t took; - bool timed_out; - struct { - uint32_t total; - uint32_t successful; - uint32_t failed; - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("total", total, obj); - JSONDecoder::decode_json("successful", successful, obj); - JSONDecoder::decode_json("failed", failed, obj); - } - } shards; - struct obj_hit { - string index; - string type; - string id; - // double score - es_index_obj_response source; - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("_index", index, obj); - JSONDecoder::decode_json("_type", type, obj); - JSONDecoder::decode_json("_id", id, obj); - JSONDecoder::decode_json("_source", source, obj); - } - }; - struct { - uint32_t total; - // double max_score; - list hits; - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("total", total, obj); - // JSONDecoder::decode_json("max_score", max_score, obj); - JSONDecoder::decode_json("hits", hits, obj); - } - } hits; - void decode_json(JSONObj *obj) { - JSONDecoder::decode_json("took", took, obj); - JSONDecoder::decode_json("timed_out", timed_out, obj); - JSONDecoder::decode_json("_shards", shards, obj); - JSONDecoder::decode_json("hits", hits, obj); - } -}; - -class RGWMetadataSearchOp : public RGWOp { - RGWSyncModuleInstanceRef sync_module_ref; - RGWElasticSyncModuleInstance *es_module; -protected: - string expression; - string custom_prefix; -#define MAX_KEYS_DEFAULT 100 - uint64_t max_keys{MAX_KEYS_DEFAULT}; - string marker_str; - uint64_t marker{0}; - string next_marker; - bool is_truncated{false}; - string err; - - es_search_response response; - -public: - RGWMetadataSearchOp(const RGWSyncModuleInstanceRef& sync_module) : sync_module_ref(sync_module) { - es_module = static_cast(sync_module_ref.get()); - } - - int verify_permission(optional_yield) override { - return 0; - } - virtual int get_params() = 0; - void pre_exec() override; - void execute(optional_yield y) override; - - const char* name() const override { return "metadata_search"; } - virtual RGWOpType get_type() override { return RGW_OP_METADATA_SEARCH; } - virtual uint32_t op_mask() override { return RGW_OP_TYPE_READ; } -}; - -void RGWMetadataSearchOp::pre_exec() -{ - rgw_bucket_object_pre_exec(s); -} - -void RGWMetadataSearchOp::execute(optional_yield y) -{ - op_ret = get_params(); - if (op_ret < 0) - return; - - list > conds; - - if (!s->user->get_info().system) { - conds.push_back(make_pair("permissions", s->user->get_id().to_str())); - } - - if (!s->bucket_name.empty()) { - conds.push_back(make_pair("bucket", s->bucket_name)); - } - - ESQueryCompiler es_query(expression, &conds, custom_prefix); - - static map aliases = { - { "bucket", "bucket" }, /* forces lowercase */ - { "name", "name" }, - { "key", "name" }, - { "instance", "instance" }, - { "etag", "meta.etag" }, - { "size", "meta.size" }, - { "mtime", "meta.mtime" }, - { "lastmodified", "meta.mtime" }, - { "last_modified", "meta.mtime" }, - { "contenttype", "meta.content_type" }, - { "content_type", "meta.content_type" }, - { "storageclass", "meta.storage_class" }, - { "storage_class", "meta.storage_class" }, - }; - es_query.set_field_aliases(&aliases); - - static map generic_map = { {"bucket", ESEntityTypeMap::ES_ENTITY_STR}, - {"name", ESEntityTypeMap::ES_ENTITY_STR}, - {"instance", ESEntityTypeMap::ES_ENTITY_STR}, - {"permissions", ESEntityTypeMap::ES_ENTITY_STR}, - {"meta.etag", ESEntityTypeMap::ES_ENTITY_STR}, - {"meta.content_type", ESEntityTypeMap::ES_ENTITY_STR}, - {"meta.mtime", ESEntityTypeMap::ES_ENTITY_DATE}, - {"meta.size", ESEntityTypeMap::ES_ENTITY_INT}, - {"meta.storage_class", ESEntityTypeMap::ES_ENTITY_STR} }; - ESEntityTypeMap gm(generic_map); - es_query.set_generic_type_map(&gm); - - static set restricted_fields = { {"permissions"} }; - es_query.set_restricted_fields(&restricted_fields); - - map custom_map; - for (auto& i : s->bucket->get_info().mdsearch_config) { - custom_map[i.first] = (ESEntityTypeMap::EntityType)i.second; - } - - ESEntityTypeMap em(custom_map); - es_query.set_custom_type_map(&em); - - bool valid = es_query.compile(&err); - if (!valid) { - ldpp_dout(this, 10) << "invalid query, failed generating request json" << dendl; - op_ret = -EINVAL; - return; - } - - JSONFormatter f; - encode_json("root", es_query, &f); - - RGWRESTConn *conn = es_module->get_rest_conn(); - - bufferlist in; - bufferlist out; - - stringstream ss; - - f.flush(ss); - in.append(ss.str()); - - string resource = es_module->get_index_path() + "/_search"; - param_vec_t params; - static constexpr int BUFSIZE = 32; - char buf[BUFSIZE]; - snprintf(buf, sizeof(buf), "%lld", (long long)max_keys); - params.push_back(param_pair_t("size", buf)); - if (marker > 0) { - params.push_back(param_pair_t("from", marker_str.c_str())); - } - ldpp_dout(this, 20) << "sending request to elasticsearch, payload=" << string(in.c_str(), in.length()) << dendl; - auto& extra_headers = es_module->get_request_headers(); - op_ret = conn->get_resource(s, resource, ¶ms, &extra_headers, - out, &in, nullptr, y); - if (op_ret < 0) { - ldpp_dout(this, 0) << "ERROR: failed to fetch resource (r=" << resource << ", ret=" << op_ret << ")" << dendl; - return; - } - - ldpp_dout(this, 20) << "response: " << string(out.c_str(), out.length()) << dendl; - - JSONParser jparser; - if (!jparser.parse(out.c_str(), out.length())) { - ldpp_dout(this, 0) << "ERROR: failed to parse elasticsearch response" << dendl; - op_ret = -EINVAL; - return; - } - - try { - decode_json_obj(response, &jparser); - } catch (const JSONDecoder::err& e) { - ldpp_dout(this, 0) << "ERROR: failed to decode JSON input: " << e.what() << dendl; - op_ret = -EINVAL; - return; - } - -} - -class RGWMetadataSearch_ObjStore_S3 : public RGWMetadataSearchOp { -public: - explicit RGWMetadataSearch_ObjStore_S3(const RGWSyncModuleInstanceRef& _sync_module) : RGWMetadataSearchOp(_sync_module) { - custom_prefix = "x-amz-meta-"; - } - - int get_params() override { - expression = s->info.args.get("query"); - bool exists; - string max_keys_str = s->info.args.get("max-keys", &exists); -#define MAX_KEYS_MAX 10000 - if (exists) { - string err; - max_keys = strict_strtoll(max_keys_str.c_str(), 10, &err); - if (!err.empty()) { - return -EINVAL; - } - if (max_keys > MAX_KEYS_MAX) { - max_keys = MAX_KEYS_MAX; - } - } - marker_str = s->info.args.get("marker", &exists); - if (exists) { - string err; - marker = strict_strtoll(marker_str.c_str(), 10, &err); - if (!err.empty()) { - return -EINVAL; - } - } - uint64_t nm = marker + max_keys; - static constexpr int BUFSIZE = 32; - char buf[BUFSIZE]; - snprintf(buf, sizeof(buf), "%lld", (long long)nm); - next_marker = buf; - return 0; - } - void send_response() override { - if (op_ret) { - s->err.message = err; - set_req_state_err(s, op_ret); - } - dump_errno(s); - end_header(s, this, "application/xml"); - - if (op_ret < 0) { - return; - } - - is_truncated = (response.hits.hits.size() >= max_keys); - - s->formatter->open_object_section("SearchMetadataResponse"); - s->formatter->dump_string("Marker", marker_str); - s->formatter->dump_string("IsTruncated", (is_truncated ? "true" : "false")); - if (is_truncated) { - s->formatter->dump_string("NextMarker", next_marker); - } - if (s->format == RGWFormat::JSON) { - s->formatter->open_array_section("Objects"); - } - for (auto& i : response.hits.hits) { - s->formatter->open_object_section("Contents"); - es_index_obj_response& e = i.source; - s->formatter->dump_string("Bucket", e.bucket); - s->formatter->dump_string("Key", e.key.name); - string instance = (!e.key.instance.empty() ? e.key.instance : "null"); - s->formatter->dump_string("Instance", instance.c_str()); - s->formatter->dump_int("VersionedEpoch", e.versioned_epoch); - dump_time(s, "LastModified", e.meta.mtime); - s->formatter->dump_int("Size", e.meta.size); - s->formatter->dump_format("ETag", "\"%s\"", e.meta.etag.c_str()); - s->formatter->dump_string("ContentType", e.meta.content_type.c_str()); - s->formatter->dump_string("StorageClass", e.meta.storage_class.c_str()); - dump_owner(s, e.owner.get_id(), e.owner.get_display_name()); - s->formatter->open_array_section("CustomMetadata"); - for (auto& m : e.meta.custom_str) { - s->formatter->open_object_section("Entry"); - s->formatter->dump_string("Name", m.first.c_str()); - s->formatter->dump_string("Value", m.second); - s->formatter->close_section(); - } - for (auto& m : e.meta.custom_int) { - s->formatter->open_object_section("Entry"); - s->formatter->dump_string("Name", m.first.c_str()); - s->formatter->dump_int("Value", m.second); - s->formatter->close_section(); - } - for (auto& m : e.meta.custom_date) { - s->formatter->open_object_section("Entry"); - s->formatter->dump_string("Name", m.first.c_str()); - s->formatter->dump_string("Value", m.second); - s->formatter->close_section(); - } - s->formatter->close_section(); - rgw_flush_formatter(s, s->formatter); - s->formatter->close_section(); - }; - if (s->format == RGWFormat::JSON) { - s->formatter->close_section(); - } - s->formatter->close_section(); - rgw_flush_formatter_and_reset(s, s->formatter); - } -}; - -class RGWHandler_REST_MDSearch_S3 : public RGWHandler_REST_S3 { -protected: - RGWOp *op_get() override { - if (s->info.args.exists("query")) { - return new RGWMetadataSearch_ObjStore_S3(driver->get_sync_module()); - } - if (!s->init_state.url_bucket.empty() && - s->info.args.exists("mdsearch")) { - return new RGWGetBucketMetaSearch_ObjStore_S3; - } - return nullptr; - } - RGWOp *op_head() override { - return nullptr; - } - RGWOp *op_post() override { - return nullptr; - } -public: - explicit RGWHandler_REST_MDSearch_S3(const rgw::auth::StrategyRegistry& auth_registry) : RGWHandler_REST_S3(auth_registry) {} - virtual ~RGWHandler_REST_MDSearch_S3() {} -}; - - -RGWHandler_REST* RGWRESTMgr_MDSearch_S3::get_handler(rgw::sal::Driver* driver, - req_state* const s, - const rgw::auth::StrategyRegistry& auth_registry, - const std::string& frontend_prefix) -{ - int ret = - RGWHandler_REST_S3::init_from_header(driver, s, - RGWFormat::XML, true); - if (ret < 0) { - return nullptr; - } - - if (!s->object->empty()) { - return nullptr; - } - - RGWHandler_REST *handler = new RGWHandler_REST_MDSearch_S3(auth_registry); - - ldpp_dout(s, 20) << __func__ << " handler=" << typeid(*handler).name() - << dendl; - return handler; -} - diff --git a/src/rgw/store/rados/rgw_sync_module_es_rest.h b/src/rgw/store/rados/rgw_sync_module_es_rest.h deleted file mode 100644 index b18271a69cd..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_es_rest.h +++ /dev/null @@ -1,18 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include "rgw_rest.h" - -class RGWElasticSyncModuleInstance; - -class RGWRESTMgr_MDSearch_S3 : public RGWRESTMgr { -public: - explicit RGWRESTMgr_MDSearch_S3() {} - - RGWHandler_REST *get_handler(rgw::sal::Driver* driver, - req_state* s, - const rgw::auth::StrategyRegistry& auth_registry, - const std::string& frontend_prefix) override; -}; diff --git a/src/rgw/store/rados/rgw_sync_module_log.cc b/src/rgw/store/rados/rgw_sync_module_log.cc deleted file mode 100644 index a21604cc228..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_log.cc +++ /dev/null @@ -1,76 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_common.h" -#include "rgw_coroutine.h" -#include "rgw_cr_rados.h" -#include "rgw_sync_module.h" -#include "rgw_data_sync.h" -#include "rgw_sync_module_log.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -class RGWLogStatRemoteObjCBCR : public RGWStatRemoteObjCBCR { -public: - RGWLogStatRemoteObjCBCR(RGWDataSyncCtx *_sc, - rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWStatRemoteObjCBCR(_sc, _src_bucket, _key) {} - int operate(const DoutPrefixProvider *dpp) override { - ldpp_dout(dpp, 0) << "SYNC_LOG: stat of remote obj: z=" << sc->source_zone - << " b=" << src_bucket << " k=" << key << " size=" << size << " mtime=" << mtime - << " attrs=" << attrs << dendl; - return set_cr_done(); - } - -}; - -class RGWLogStatRemoteObjCR : public RGWCallStatRemoteObjCR { -public: - RGWLogStatRemoteObjCR(RGWDataSyncCtx *_sc, - rgw_bucket& _src_bucket, rgw_obj_key& _key) : RGWCallStatRemoteObjCR(_sc, _src_bucket, _key) { - } - - ~RGWLogStatRemoteObjCR() override {} - - RGWStatRemoteObjCBCR *allocate_callback() override { - return new RGWLogStatRemoteObjCBCR(sc, src_bucket, key); - } -}; - -class RGWLogDataSyncModule : public RGWDataSyncModule { - string prefix; -public: - explicit RGWLogDataSyncModule(const string& _prefix) : prefix(_prefix) {} - - RGWCoroutine *sync_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, std::optional versioned_epoch, rgw_zone_set *zones_trace) override { - ldpp_dout(dpp, 0) << prefix << ": SYNC_LOG: sync_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl; - return new RGWLogStatRemoteObjCR(sc, sync_pipe.info.source_bs.bucket, key); - } - RGWCoroutine *remove_object(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { - ldpp_dout(dpp, 0) << prefix << ": SYNC_LOG: rm_object: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - return NULL; - } - RGWCoroutine *create_delete_marker(const DoutPrefixProvider *dpp, RGWDataSyncCtx *sc, rgw_bucket_sync_pipe& sync_pipe, rgw_obj_key& key, real_time& mtime, - rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override { - ldpp_dout(dpp, 0) << prefix << ": SYNC_LOG: create_delete_marker: b=" << sync_pipe.info.source_bs.bucket << " k=" << key << " mtime=" << mtime - << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl; - return NULL; - } -}; - -class RGWLogSyncModuleInstance : public RGWSyncModuleInstance { - RGWLogDataSyncModule data_handler; -public: - explicit RGWLogSyncModuleInstance(const string& prefix) : data_handler(prefix) {} - RGWDataSyncModule *get_data_handler() override { - return &data_handler; - } -}; - -int RGWLogSyncModule::create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) { - string prefix = config["prefix"]; - instance->reset(new RGWLogSyncModuleInstance(prefix)); - return 0; -} - diff --git a/src/rgw/store/rados/rgw_sync_module_log.h b/src/rgw/store/rados/rgw_sync_module_log.h deleted file mode 100644 index ecf3bb78911..00000000000 --- a/src/rgw/store/rados/rgw_sync_module_log.h +++ /dev/null @@ -1,18 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SYNC_MODULE_LOG_H -#define CEPH_RGW_SYNC_MODULE_LOG_H - -#include "rgw_sync_module.h" - -class RGWLogSyncModule : public RGWSyncModule { -public: - RGWLogSyncModule() {} - bool supports_data_export() override { - return false; - } - int create_instance(const DoutPrefixProvider *dpp, CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override; -}; - -#endif diff --git a/src/rgw/store/rados/rgw_sync_trace.cc b/src/rgw/store/rados/rgw_sync_trace.cc deleted file mode 100644 index b346835938d..00000000000 --- a/src/rgw/store/rados/rgw_sync_trace.cc +++ /dev/null @@ -1,290 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SYNC_TRACE_H -#define CEPH_RGW_SYNC_TRACE_H - -#include - -#include "common/debug.h" -#include "common/ceph_json.h" - -#include "rgw_sync_trace.h" -#include "rgw_rados.h" -#include "rgw_worker.h" - -#define dout_context g_ceph_context - -static constexpr auto dout_subsys = ceph_subsys_rgw; - -using namespace std; - - -RGWSyncTraceNode::RGWSyncTraceNode(CephContext *_cct, uint64_t _handle, - const RGWSyncTraceNodeRef& _parent, - const string& _type, const string& _id) : cct(_cct), - parent(_parent), - type(_type), - id(_id), - handle(_handle), - history(cct->_conf->rgw_sync_trace_per_node_log_size) -{ - if (parent.get()) { - prefix = parent->get_prefix(); - } - - if (!type.empty()) { - prefix += type; - if (!id.empty()) { - prefix += "[" + id + "]"; - } - prefix += ":"; - } -} - -void RGWSyncTraceNode::log(int level, const string& s) -{ - status = s; - history.push_back(status); - /* dump output on either rgw_sync, or rgw -- but only once */ - if (cct->_conf->subsys.should_gather(ceph_subsys_rgw_sync, level)) { - lsubdout(cct, rgw_sync, - ceph::dout::need_dynamic(level)) << "RGW-SYNC:" << to_str() << dendl; - } else { - lsubdout(cct, rgw, - ceph::dout::need_dynamic(level)) << "RGW-SYNC:" << to_str() << dendl; - } -} - - -class RGWSyncTraceServiceMapThread : public RGWRadosThread { - RGWRados *store; - RGWSyncTraceManager *manager; - - uint64_t interval_msec() override { - return cct->_conf->rgw_sync_trace_servicemap_update_interval * 1000; - } -public: - RGWSyncTraceServiceMapThread(RGWRados *_store, RGWSyncTraceManager *_manager) - : RGWRadosThread(_store, "sync-trace"), store(_store), manager(_manager) {} - - int process(const DoutPrefixProvider *dpp) override; -}; - -int RGWSyncTraceServiceMapThread::process(const DoutPrefixProvider *dpp) -{ - map status; - status["current_sync"] = manager->get_active_names(); - int ret = store->update_service_map(dpp, std::move(status)); - if (ret < 0) { - ldout(store->ctx(), 0) << "ERROR: update_service_map() returned ret=" << ret << dendl; - } - return 0; -} - -RGWSyncTraceNodeRef RGWSyncTraceManager::add_node(const RGWSyncTraceNodeRef& parent, - const std::string& type, - const std::string& id) -{ - shunique_lock wl(lock, ceph::acquire_unique); - auto handle = alloc_handle(); - RGWSyncTraceNodeRef& ref = nodes[handle]; - ref.reset(new RGWSyncTraceNode(cct, handle, parent, type, id)); - // return a separate shared_ptr that calls finish() on the node instead of - // deleting it. the lambda capture holds a reference to the original 'ref' - auto deleter = [ref, this] (RGWSyncTraceNode *node) { finish_node(node); }; - return {ref.get(), deleter}; -} - -bool RGWSyncTraceNode::match(const string& search_term, bool search_history) -{ - try { - std::regex expr(search_term); - std::smatch m; - - if (regex_search(prefix, m, expr)) { - return true; - } - if (regex_search(status, m,expr)) { - return true; - } - if (!search_history) { - return false; - } - - for (auto h : history) { - if (regex_search(h, m, expr)) { - return true; - } - } - } catch (const std::regex_error& e) { - ldout(cct, 5) << "NOTICE: sync trace: bad expression: bad regex search term" << dendl; - } - - return false; -} - -void RGWSyncTraceManager::init(RGWRados *store) -{ - service_map_thread = new RGWSyncTraceServiceMapThread(store, this); - service_map_thread->start(); -} - -RGWSyncTraceManager::~RGWSyncTraceManager() -{ - cct->get_admin_socket()->unregister_commands(this); - service_map_thread->stop(); - delete service_map_thread; - - nodes.clear(); -} - -int RGWSyncTraceManager::hook_to_admin_command() -{ - AdminSocket *admin_socket = cct->get_admin_socket(); - - admin_commands = { { "sync trace show name=search,type=CephString,req=false", "sync trace show [filter_str]: show current multisite tracing information" }, - { "sync trace history name=search,type=CephString,req=false", "sync trace history [filter_str]: show history of multisite tracing information" }, - { "sync trace active name=search,type=CephString,req=false", "show active multisite sync entities information" }, - { "sync trace active_short name=search,type=CephString,req=false", "show active multisite sync entities entries" } }; - for (auto cmd : admin_commands) { - int r = admin_socket->register_command(cmd[0], this, - cmd[1]); - if (r < 0) { - lderr(cct) << "ERROR: fail to register admin socket command (r=" << r << ")" << dendl; - return r; - } - } - return 0; -} - -static void dump_node(RGWSyncTraceNode *entry, bool show_history, Formatter *f) -{ - f->open_object_section("entry"); - ::encode_json("status", entry->to_str(), f); - if (show_history) { - f->open_array_section("history"); - for (auto h : entry->get_history()) { - ::encode_json("entry", h, f); - } - f->close_section(); - } - f->close_section(); -} - -string RGWSyncTraceManager::get_active_names() -{ - shunique_lock rl(lock, ceph::acquire_shared); - - stringstream ss; - JSONFormatter f; - - f.open_array_section("result"); - for (auto n : nodes) { - auto& entry = n.second; - - if (!entry->test_flags(RGW_SNS_FLAG_ACTIVE)) { - continue; - } - const string& name = entry->get_resource_name(); - if (!name.empty()) { - ::encode_json("entry", name, &f); - } - f.flush(ss); - } - f.close_section(); - f.flush(ss); - - return ss.str(); -} - -int RGWSyncTraceManager::call(std::string_view command, const cmdmap_t& cmdmap, - const bufferlist&, - Formatter *f, - std::ostream& ss, - bufferlist& out) { - - bool show_history = (command == "sync trace history"); - bool show_short = (command == "sync trace active_short"); - bool show_active = (command == "sync trace active") || show_short; - - string search; - - auto si = cmdmap.find("search"); - if (si != cmdmap.end()) { - search = boost::get(si->second); - } - - shunique_lock rl(lock, ceph::acquire_shared); - - f->open_object_section("result"); - f->open_array_section("running"); - for (auto n : nodes) { - auto& entry = n.second; - - if (!search.empty() && !entry->match(search, show_history)) { - continue; - } - if (show_active && !entry->test_flags(RGW_SNS_FLAG_ACTIVE)) { - continue; - } - if (show_short) { - const string& name = entry->get_resource_name(); - if (!name.empty()) { - ::encode_json("entry", name, f); - } - } else { - dump_node(entry.get(), show_history, f); - } - f->flush(out); - } - f->close_section(); - - f->open_array_section("complete"); - for (auto& entry : complete_nodes) { - if (!search.empty() && !entry->match(search, show_history)) { - continue; - } - if (show_active && !entry->test_flags(RGW_SNS_FLAG_ACTIVE)) { - continue; - } - dump_node(entry.get(), show_history, f); - f->flush(out); - } - f->close_section(); - - f->close_section(); - - return 0; -} - -void RGWSyncTraceManager::finish_node(RGWSyncTraceNode *node) -{ - RGWSyncTraceNodeRef old_node; - - { - shunique_lock wl(lock, ceph::acquire_unique); - if (!node) { - return; - } - auto iter = nodes.find(node->handle); - if (iter == nodes.end()) { - /* not found, already finished */ - return; - } - - if (complete_nodes.full()) { - /* take a reference to the entry that is going to be evicted, - * can't let it get evicted under lock held, otherwise - * it's a deadlock as it will call finish_node() - */ - old_node = complete_nodes.front(); - } - - complete_nodes.push_back(iter->second); - nodes.erase(iter); - } -}; - -#endif - diff --git a/src/rgw/store/rados/rgw_sync_trace.h b/src/rgw/store/rados/rgw_sync_trace.h deleted file mode 100644 index 9617dac70db..00000000000 --- a/src/rgw/store/rados/rgw_sync_trace.h +++ /dev/null @@ -1,145 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_SYNC_LOG_H -#define CEPH_RGW_SYNC_LOG_H - -#include - -#include "common/ceph_mutex.h" -#include "common/shunique_lock.h" -#include "common/admin_socket.h" - -#include -#include -#include -#include -#include - -#define SSTR(o) ({ \ - std::stringstream ss; \ - ss << o; \ - ss.str(); \ -}) - -#define RGW_SNS_FLAG_ACTIVE 1 -#define RGW_SNS_FLAG_ERROR 2 - -class RGWRados; -class RGWSyncTraceManager; -class RGWSyncTraceNode; -class RGWSyncTraceServiceMapThread; - -using RGWSyncTraceNodeRef = std::shared_ptr; - -class RGWSyncTraceNode final { - friend class RGWSyncTraceManager; - - CephContext *cct; - RGWSyncTraceNodeRef parent; - - uint16_t state{0}; - std::string status; - - ceph::mutex lock = ceph::make_mutex("RGWSyncTraceNode::lock"); - - std::string type; - std::string id; - - std::string prefix; - - std::string resource_name; - - uint64_t handle; - - boost::circular_buffer history; - - // private constructor, create with RGWSyncTraceManager::add_node() - RGWSyncTraceNode(CephContext *_cct, uint64_t _handle, - const RGWSyncTraceNodeRef& _parent, - const std::string& _type, const std::string& _id); - - public: - void set_resource_name(const std::string& s) { - resource_name = s; - } - - const std::string& get_resource_name() { - return resource_name; - } - - void set_flag(uint16_t s) { - state |= s; - } - void unset_flag(uint16_t s) { - state &= ~s; - } - bool test_flags(uint16_t f) { - return (state & f) == f; - } - void log(int level, const std::string& s); - - std::string to_str() { - return prefix + " " + status; - } - - const std::string& get_prefix() { - return prefix; - } - - std::ostream& operator<<(std::ostream& os) { - os << to_str(); - return os; - } - - boost::circular_buffer& get_history() { - return history; - } - - bool match(const std::string& search_term, bool search_history); -}; - -class RGWSyncTraceManager : public AdminSocketHook { - friend class RGWSyncTraceNode; - - mutable std::shared_timed_mutex lock; - using shunique_lock = ceph::shunique_lock; - - CephContext *cct; - RGWSyncTraceServiceMapThread *service_map_thread{nullptr}; - - std::map nodes; - boost::circular_buffer complete_nodes; - - std::atomic count = { 0 }; - - std::list > admin_commands; - - uint64_t alloc_handle() { - return ++count; - } - void finish_node(RGWSyncTraceNode *node); - -public: - RGWSyncTraceManager(CephContext *_cct, int max_lru) : cct(_cct), complete_nodes(max_lru) {} - ~RGWSyncTraceManager(); - - void init(RGWRados *store); - - const RGWSyncTraceNodeRef root_node; - - RGWSyncTraceNodeRef add_node(const RGWSyncTraceNodeRef& parent, - const std::string& type, - const std::string& id = ""); - - int hook_to_admin_command(); - int call(std::string_view command, const cmdmap_t& cmdmap, - const bufferlist&, - Formatter *f, - std::ostream& ss, - bufferlist& out) override; - std::string get_active_names(); -}; - - -#endif diff --git a/src/rgw/store/rados/rgw_tools.cc b/src/rgw/store/rados/rgw_tools.cc deleted file mode 100644 index 5a8aefaac3d..00000000000 --- a/src/rgw/store/rados/rgw_tools.cc +++ /dev/null @@ -1,414 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/errno.h" -#include "librados/librados_asio.h" - -#include "include/stringify.h" - -#include "rgw_tools.h" -#include "rgw_acl_s3.h" -#include "rgw_aio_throttle.h" -#include "rgw_compression.h" - -#define dout_subsys ceph_subsys_rgw - -#define READ_CHUNK_LEN (512 * 1024) - -using namespace std; - -int rgw_init_ioctx(const DoutPrefixProvider *dpp, - librados::Rados *rados, const rgw_pool& pool, - librados::IoCtx& ioctx, bool create, - bool mostly_omap) -{ - int r = rados->ioctx_create(pool.name.c_str(), ioctx); - if (r == -ENOENT && create) { - r = rados->pool_create(pool.name.c_str()); - if (r == -ERANGE) { - ldpp_dout(dpp, 0) - << __func__ - << " ERROR: librados::Rados::pool_create returned " << cpp_strerror(-r) - << " (this can be due to a pool or placement group misconfiguration, e.g." - << " pg_num < pgp_num or mon_max_pg_per_osd exceeded)" - << dendl; - } - if (r < 0 && r != -EEXIST) { - return r; - } - - r = rados->ioctx_create(pool.name.c_str(), ioctx); - if (r < 0) { - return r; - } - - r = ioctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false); - if (r < 0 && r != -EOPNOTSUPP) { - return r; - } - - if (mostly_omap) { - // set pg_autoscale_bias - bufferlist inbl; - float bias = g_conf().get_val("rgw_rados_pool_autoscale_bias"); - int r = rados->mon_command( - "{\"prefix\": \"osd pool set\", \"pool\": \"" + - pool.name + "\", \"var\": \"pg_autoscale_bias\", \"val\": \"" + - stringify(bias) + "\"}", - inbl, NULL, NULL); - if (r < 0) { - ldpp_dout(dpp, 10) << __func__ << " warning: failed to set pg_autoscale_bias on " - << pool.name << dendl; - } - // set recovery_priority - int p = g_conf().get_val("rgw_rados_pool_recovery_priority"); - r = rados->mon_command( - "{\"prefix\": \"osd pool set\", \"pool\": \"" + - pool.name + "\", \"var\": \"recovery_priority\": \"" + - stringify(p) + "\"}", - inbl, NULL, NULL); - if (r < 0) { - ldpp_dout(dpp, 10) << __func__ << " warning: failed to set recovery_priority on " - << pool.name << dendl; - } - } - } else if (r < 0) { - return r; - } - if (!pool.ns.empty()) { - ioctx.set_namespace(pool.ns); - } - return 0; -} - -map* no_change_attrs() { - static map no_change; - return &no_change; -} - -int rgw_put_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, - const rgw_pool& pool, const string& oid, bufferlist& data, bool exclusive, - RGWObjVersionTracker *objv_tracker, real_time set_mtime, optional_yield y, map *pattrs) -{ - map no_attrs; - if (!pattrs) { - pattrs = &no_attrs; - } - - rgw_raw_obj obj(pool, oid); - - auto sysobj = svc_sysobj->get_obj(obj); - int ret; - - if (pattrs != no_change_attrs()) { - ret = sysobj.wop() - .set_objv_tracker(objv_tracker) - .set_exclusive(exclusive) - .set_mtime(set_mtime) - .set_attrs(*pattrs) - .write(dpp, data, y); - } else { - ret = sysobj.wop() - .set_objv_tracker(objv_tracker) - .set_exclusive(exclusive) - .set_mtime(set_mtime) - .write_data(dpp, data, y); - } - - return ret; -} - -int rgw_stat_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, - const rgw_pool& pool, const std::string& key, - RGWObjVersionTracker *objv_tracker, - real_time *pmtime, optional_yield y, - std::map *pattrs) -{ - rgw_raw_obj obj(pool, key); - auto sysobj = svc_sysobj->get_obj(obj); - return sysobj.rop() - .set_attrs(pattrs) - .set_last_mod(pmtime) - .stat(y, dpp); -} - - -int rgw_get_system_obj(RGWSI_SysObj* svc_sysobj, const rgw_pool& pool, const string& key, bufferlist& bl, - RGWObjVersionTracker *objv_tracker, real_time *pmtime, optional_yield y, - const DoutPrefixProvider *dpp, map *pattrs, - rgw_cache_entry_info *cache_info, - boost::optional refresh_version, bool raw_attrs) -{ - const rgw_raw_obj obj(pool, key); - auto sysobj = svc_sysobj->get_obj(obj); - auto rop = sysobj.rop(); - return rop.set_attrs(pattrs) - .set_last_mod(pmtime) - .set_objv_tracker(objv_tracker) - .set_raw_attrs(raw_attrs) - .set_cache_info(cache_info) - .set_refresh_version(refresh_version) - .read(dpp, &bl, y); -} - -int rgw_delete_system_obj(const DoutPrefixProvider *dpp, - RGWSI_SysObj *sysobj_svc, const rgw_pool& pool, const string& oid, - RGWObjVersionTracker *objv_tracker, optional_yield y) -{ - auto sysobj = sysobj_svc->get_obj(rgw_raw_obj{pool, oid}); - rgw_raw_obj obj(pool, oid); - return sysobj.wop() - .set_objv_tracker(objv_tracker) - .remove(dpp, y); -} - -int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - librados::ObjectReadOperation *op, bufferlist* pbl, - optional_yield y, int flags) -{ - // given a yield_context, call async_operate() to yield the coroutine instead - // of blocking - if (y) { - auto& context = y.get_io_context(); - auto& yield = y.get_yield_context(); - boost::system::error_code ec; - auto bl = librados::async_operate( - context, ioctx, oid, op, flags, yield[ec]); - if (pbl) { - *pbl = std::move(bl); - } - return -ec.value(); - } - // work on asio threads should be asynchronous, so warn when they block - if (is_asio_thread) { - ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; - } - return ioctx.operate(oid, op, nullptr, flags); -} - -int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - librados::ObjectWriteOperation *op, optional_yield y, - int flags) -{ - if (y) { - auto& context = y.get_io_context(); - auto& yield = y.get_yield_context(); - boost::system::error_code ec; - librados::async_operate(context, ioctx, oid, op, flags, yield[ec]); - return -ec.value(); - } - if (is_asio_thread) { - ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; - } - return ioctx.operate(oid, op, flags); -} - -int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, - optional_yield y) -{ - if (y) { - auto& context = y.get_io_context(); - auto& yield = y.get_yield_context(); - boost::system::error_code ec; - auto reply = librados::async_notify(context, ioctx, oid, - bl, timeout_ms, yield[ec]); - if (pbl) { - *pbl = std::move(reply); - } - return -ec.value(); - } - if (is_asio_thread) { - ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; - } - return ioctx.notify2(oid, bl, timeout_ms, pbl); -} - -void rgw_filter_attrset(map& unfiltered_attrset, const string& check_prefix, - map *attrset) -{ - attrset->clear(); - map::iterator iter; - for (iter = unfiltered_attrset.lower_bound(check_prefix); - iter != unfiltered_attrset.end(); ++iter) { - if (!boost::algorithm::starts_with(iter->first, check_prefix)) - break; - (*attrset)[iter->first] = iter->second; - } -} - -RGWDataAccess::RGWDataAccess(rgw::sal::Driver* _driver) : driver(_driver) -{ -} - - -int RGWDataAccess::Bucket::finish_init() -{ - auto iter = attrs.find(RGW_ATTR_ACL); - if (iter == attrs.end()) { - return 0; - } - - bufferlist::const_iterator bliter = iter->second.begin(); - try { - policy.decode(bliter); - } catch (buffer::error& err) { - return -EIO; - } - - return 0; -} - -int RGWDataAccess::Bucket::init(const DoutPrefixProvider *dpp, optional_yield y) -{ - std::unique_ptr bucket; - int ret = sd->driver->get_bucket(dpp, nullptr, tenant, name, &bucket, y); - if (ret < 0) { - return ret; - } - - bucket_info = bucket->get_info(); - mtime = bucket->get_modification_time(); - attrs = bucket->get_attrs(); - - return finish_init(); -} - -int RGWDataAccess::Bucket::init(const RGWBucketInfo& _bucket_info, - const map& _attrs) -{ - bucket_info = _bucket_info; - attrs = _attrs; - - return finish_init(); -} - -int RGWDataAccess::Bucket::get_object(const rgw_obj_key& key, - ObjectRef *obj) { - obj->reset(new Object(sd, shared_from_this(), key)); - return 0; -} - -int RGWDataAccess::Object::put(bufferlist& data, - map& attrs, - const DoutPrefixProvider *dpp, - optional_yield y) -{ - rgw::sal::Driver* driver = sd->driver; - CephContext *cct = driver->ctx(); - - string tag; - append_rand_alpha(cct, tag, tag, 32); - - RGWBucketInfo& bucket_info = bucket->bucket_info; - - rgw::BlockingAioThrottle aio(driver->ctx()->_conf->rgw_put_obj_min_window_size); - - std::unique_ptr b; - driver->get_bucket(NULL, bucket_info, &b); - std::unique_ptr obj = b->get_object(key); - - auto& owner = bucket->policy.get_owner(); - - string req_id = driver->zone_unique_id(driver->get_new_req_id()); - - std::unique_ptr processor; - processor = driver->get_atomic_writer(dpp, y, std::move(obj), - owner.get_id(), - nullptr, olh_epoch, req_id); - - int ret = processor->prepare(y); - if (ret < 0) - return ret; - - rgw::sal::DataProcessor *filter = processor.get(); - - CompressorRef plugin; - boost::optional compressor; - - const auto& compression_type = driver->get_compression_type(bucket_info.placement_rule); - if (compression_type != "none") { - plugin = Compressor::create(driver->ctx(), compression_type); - if (!plugin) { - ldpp_dout(dpp, 1) << "Cannot load plugin for compression type " - << compression_type << dendl; - } else { - compressor.emplace(driver->ctx(), plugin, filter); - filter = &*compressor; - } - } - - off_t ofs = 0; - auto obj_size = data.length(); - - RGWMD5Etag etag_calc; - - do { - size_t read_len = std::min(data.length(), (unsigned int)cct->_conf->rgw_max_chunk_size); - - bufferlist bl; - - data.splice(0, read_len, &bl); - etag_calc.update(bl); - - ret = filter->process(std::move(bl), ofs); - if (ret < 0) - return ret; - - ofs += read_len; - } while (data.length() > 0); - - ret = filter->process({}, ofs); - if (ret < 0) { - return ret; - } - bool has_etag_attr = false; - auto iter = attrs.find(RGW_ATTR_ETAG); - if (iter != attrs.end()) { - bufferlist& bl = iter->second; - etag = bl.to_str(); - has_etag_attr = true; - } - - if (!aclbl) { - RGWAccessControlPolicy_S3 policy(cct); - - policy.create_canned(bucket->policy.get_owner(), bucket->policy.get_owner(), string()); /* default private policy */ - - policy.encode(aclbl.emplace()); - } - - if (etag.empty()) { - etag_calc.finish(&etag); - } - - if (!has_etag_attr) { - bufferlist etagbl; - etagbl.append(etag); - attrs[RGW_ATTR_ETAG] = etagbl; - } - attrs[RGW_ATTR_ACL] = *aclbl; - - string *puser_data = nullptr; - if (user_data) { - puser_data = &(*user_data); - } - - return processor->complete(obj_size, etag, - &mtime, mtime, - attrs, delete_at, - nullptr, nullptr, - puser_data, - nullptr, nullptr, y); -} - -void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy) -{ - policy.encode(aclbl.emplace()); -} - -void rgw_complete_aio_completion(librados::AioCompletion* c, int r) { - auto pc = c->pc; - librados::CB_AioCompleteAndSafe cb(pc); - cb(r); -} diff --git a/src/rgw/store/rados/rgw_tools.h b/src/rgw/store/rados/rgw_tools.h deleted file mode 100644 index 6aeb9b89100..00000000000 --- a/src/rgw/store/rados/rgw_tools.h +++ /dev/null @@ -1,277 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_TOOLS_H -#define CEPH_RGW_TOOLS_H - -#include - -#include "include/types.h" -#include "include/ceph_hash.h" - -#include "common/ceph_time.h" - -#include "rgw_common.h" -#include "rgw_sal_fwd.h" - -class RGWSI_SysObj; - -class RGWRados; -struct RGWObjVersionTracker; -class optional_yield; - -struct obj_version; - - -int rgw_init_ioctx(const DoutPrefixProvider *dpp, - librados::Rados *rados, const rgw_pool& pool, - librados::IoCtx& ioctx, - bool create = false, - bool mostly_omap = false); - -#define RGW_NO_SHARD -1 - -#define RGW_SHARDS_PRIME_0 7877 -#define RGW_SHARDS_PRIME_1 65521 - -extern const std::string MP_META_SUFFIX; - -inline int rgw_shards_max() -{ - return RGW_SHARDS_PRIME_1; -} - -// only called by rgw_shard_id and rgw_bucket_shard_index -static inline int rgw_shards_mod(unsigned hval, int max_shards) -{ - if (max_shards <= RGW_SHARDS_PRIME_0) { - return hval % RGW_SHARDS_PRIME_0 % max_shards; - } - return hval % RGW_SHARDS_PRIME_1 % max_shards; -} - -// used for logging and tagging -inline int rgw_shard_id(const std::string& key, int max_shards) -{ - return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()), - max_shards); -} - -void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& key, std::string& name, int *shard_id); -void rgw_shard_name(const std::string& prefix, unsigned max_shards, const std::string& section, const std::string& key, std::string& name); -void rgw_shard_name(const std::string& prefix, unsigned shard_id, std::string& name); - -int rgw_put_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, - const rgw_pool& pool, const std::string& oid, - bufferlist& data, bool exclusive, - RGWObjVersionTracker *objv_tracker, - real_time set_mtime, optional_yield y, - std::map *pattrs = nullptr); -int rgw_get_system_obj(RGWSI_SysObj* svc_sysobj, const rgw_pool& pool, - const std::string& key, bufferlist& bl, - RGWObjVersionTracker *objv_tracker, real_time *pmtime, - optional_yield y, const DoutPrefixProvider *dpp, - std::map *pattrs = nullptr, - rgw_cache_entry_info *cache_info = nullptr, - boost::optional refresh_version = boost::none, - bool raw_attrs=false); -int rgw_delete_system_obj(const DoutPrefixProvider *dpp, - RGWSI_SysObj *sysobj_svc, const rgw_pool& pool, const std::string& oid, - RGWObjVersionTracker *objv_tracker, optional_yield y); -int rgw_stat_system_obj(const DoutPrefixProvider *dpp, RGWSI_SysObj* svc_sysobj, - const rgw_pool& pool, const std::string& key, - RGWObjVersionTracker *objv_tracker, - real_time *pmtime, optional_yield y, - std::map *pattrs = nullptr); - -const char *rgw_find_mime_by_ext(std::string& ext); - -void rgw_filter_attrset(std::map& unfiltered_attrset, const std::string& check_prefix, - std::map *attrset); - -/// indicates whether the current thread is in boost::asio::io_context::run(), -/// used to log warnings if synchronous librados calls are made -extern thread_local bool is_asio_thread; - -/// perform the rados operation, using the yield context when given -int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - librados::ObjectReadOperation *op, bufferlist* pbl, - optional_yield y, int flags = 0); -int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - librados::ObjectWriteOperation *op, optional_yield y, - int flags = 0); -int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, - bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, - optional_yield y); - -int rgw_tools_init(const DoutPrefixProvider *dpp, CephContext *cct); -void rgw_tools_cleanup(); - -template -class RGWEtag -{ - H hash; - -public: - RGWEtag() { - if constexpr (std::is_same_v) { - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - } - } - - void update(const char *buf, size_t len) { - hash.Update((const unsigned char *)buf, len); - } - - void update(bufferlist& bl) { - if (bl.length() > 0) { - update(bl.c_str(), bl.length()); - } - } - - void update(const std::string& s) { - if (!s.empty()) { - update(s.c_str(), s.size()); - } - } - void finish(std::string *etag) { - char etag_buf[S]; - char etag_buf_str[S * 2 + 16]; - - hash.Final((unsigned char *)etag_buf); - buf_to_hex((const unsigned char *)etag_buf, S, - etag_buf_str); - - *etag = etag_buf_str; - } -}; - -using RGWMD5Etag = RGWEtag; - -class RGWDataAccess -{ - rgw::sal::Driver* driver; - -public: - RGWDataAccess(rgw::sal::Driver* _driver); - - class Object; - class Bucket; - - using BucketRef = std::shared_ptr; - using ObjectRef = std::shared_ptr; - - class Bucket : public std::enable_shared_from_this { - friend class RGWDataAccess; - friend class Object; - - RGWDataAccess *sd{nullptr}; - RGWBucketInfo bucket_info; - std::string tenant; - std::string name; - std::string bucket_id; - ceph::real_time mtime; - std::map attrs; - - RGWAccessControlPolicy policy; - int finish_init(); - - Bucket(RGWDataAccess *_sd, - const std::string& _tenant, - const std::string& _name, - const std::string& _bucket_id) : sd(_sd), - tenant(_tenant), - name(_name), - bucket_id(_bucket_id) {} - Bucket(RGWDataAccess *_sd) : sd(_sd) {} - int init(const DoutPrefixProvider *dpp, optional_yield y); - int init(const RGWBucketInfo& _bucket_info, const std::map& _attrs); - public: - int get_object(const rgw_obj_key& key, - ObjectRef *obj); - - }; - - - class Object { - RGWDataAccess *sd{nullptr}; - BucketRef bucket; - rgw_obj_key key; - - ceph::real_time mtime; - std::string etag; - uint64_t olh_epoch{0}; - ceph::real_time delete_at; - std::optional user_data; - - std::optional aclbl; - - Object(RGWDataAccess *_sd, - BucketRef&& _bucket, - const rgw_obj_key& _key) : sd(_sd), - bucket(_bucket), - key(_key) {} - public: - int put(bufferlist& data, std::map& attrs, const DoutPrefixProvider *dpp, optional_yield y); /* might modify attrs */ - - void set_mtime(const ceph::real_time& _mtime) { - mtime = _mtime; - } - - void set_etag(const std::string& _etag) { - etag = _etag; - } - - void set_olh_epoch(uint64_t epoch) { - olh_epoch = epoch; - } - - void set_delete_at(ceph::real_time _delete_at) { - delete_at = _delete_at; - } - - void set_user_data(const std::string& _user_data) { - user_data = _user_data; - } - - void set_policy(const RGWAccessControlPolicy& policy); - - friend class Bucket; - }; - - int get_bucket(const DoutPrefixProvider *dpp, - const std::string& tenant, - const std::string name, - const std::string bucket_id, - BucketRef *bucket, - optional_yield y) { - bucket->reset(new Bucket(this, tenant, name, bucket_id)); - return (*bucket)->init(dpp, y); - } - - int get_bucket(const RGWBucketInfo& bucket_info, - const std::map& attrs, - BucketRef *bucket) { - bucket->reset(new Bucket(this)); - return (*bucket)->init(bucket_info, attrs); - } - friend class Bucket; - friend class Object; -}; - -using RGWDataAccessRef = std::shared_ptr; - -/// Complete an AioCompletion. To return error values or otherwise -/// satisfy the caller. Useful for making complicated asynchronous -/// calls and error handling. -void rgw_complete_aio_completion(librados::AioCompletion* c, int r); - -/// This returns a static, non-NULL pointer, recognized only by -/// rgw_put_system_obj(). When supplied instead of the attributes, the -/// attributes will be unmodified. -/// -// (Currently providing nullptr will wipe all attributes.) - -std::map* no_change_attrs(); -#endif diff --git a/src/rgw/store/rados/rgw_trim_bilog.cc b/src/rgw/store/rados/rgw_trim_bilog.cc deleted file mode 100644 index 6ddda5d6b17..00000000000 --- a/src/rgw/store/rados/rgw_trim_bilog.cc +++ /dev/null @@ -1,1445 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2017 Red Hat, Inc - * - * Author: Casey Bodley - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - */ - -#include -#include -#include - -#include "include/scope_guard.h" -#include "common/bounded_key_counter.h" -#include "common/errno.h" -#include "rgw_trim_bilog.h" -#include "rgw_cr_rados.h" -#include "rgw_cr_rest.h" -#include "rgw_cr_tools.h" -#include "rgw_data_sync.h" -#include "rgw_metadata.h" -#include "rgw_sal.h" -#include "rgw_zone.h" -#include "rgw_sync.h" -#include "rgw_bucket.h" - -#include "services/svc_zone.h" -#include "services/svc_meta.h" -#include "services/svc_bilog_rados.h" - -#include -#include "include/ceph_assert.h" - -#define dout_subsys ceph_subsys_rgw - -#undef dout_prefix -#define dout_prefix (*_dout << "trim: ") - -using namespace std; - -using rgw::BucketTrimConfig; -using BucketChangeCounter = BoundedKeyCounter; - -const std::string rgw::BucketTrimStatus::oid = "bilog.trim"; -using rgw::BucketTrimStatus; - - -// watch/notify api for gateways to coordinate about which buckets to trim -enum TrimNotifyType { - NotifyTrimCounters = 0, - NotifyTrimComplete, -}; -WRITE_RAW_ENCODER(TrimNotifyType); - -struct TrimNotifyHandler { - virtual ~TrimNotifyHandler() = default; - - virtual void handle(bufferlist::const_iterator& input, bufferlist& output) = 0; -}; - -/// api to share the bucket trim counters between gateways in the same zone. -/// each gateway will process different datalog shards, so the gateway that runs -/// the trim process needs to accumulate their counters -struct TrimCounters { - /// counter for a single bucket - struct BucketCounter { - std::string bucket; //< bucket instance metadata key - int count{0}; - - BucketCounter() = default; - BucketCounter(const std::string& bucket, int count) - : bucket(bucket), count(count) {} - - void encode(bufferlist& bl) const; - void decode(bufferlist::const_iterator& p); - }; - using Vector = std::vector; - - /// request bucket trim counters from peer gateways - struct Request { - uint16_t max_buckets; //< maximum number of bucket counters to return - - void encode(bufferlist& bl) const; - void decode(bufferlist::const_iterator& p); - }; - - /// return the current bucket trim counters - struct Response { - Vector bucket_counters; - - void encode(bufferlist& bl) const; - void decode(bufferlist::const_iterator& p); - }; - - /// server interface to query the hottest buckets - struct Server { - virtual ~Server() = default; - - virtual void get_bucket_counters(int count, Vector& counters) = 0; - virtual void reset_bucket_counters() = 0; - }; - - /// notify handler - class Handler : public TrimNotifyHandler { - Server *const server; - public: - explicit Handler(Server *server) : server(server) {} - - void handle(bufferlist::const_iterator& input, bufferlist& output) override; - }; -}; -std::ostream& operator<<(std::ostream& out, const TrimCounters::BucketCounter& rhs) -{ - return out << rhs.bucket << ":" << rhs.count; -} - -void TrimCounters::BucketCounter::encode(bufferlist& bl) const -{ - using ceph::encode; - // no versioning to save space - encode(bucket, bl); - encode(count, bl); -} -void TrimCounters::BucketCounter::decode(bufferlist::const_iterator& p) -{ - using ceph::decode; - decode(bucket, p); - decode(count, p); -} -WRITE_CLASS_ENCODER(TrimCounters::BucketCounter); - -void TrimCounters::Request::encode(bufferlist& bl) const -{ - ENCODE_START(1, 1, bl); - encode(max_buckets, bl); - ENCODE_FINISH(bl); -} -void TrimCounters::Request::decode(bufferlist::const_iterator& p) -{ - DECODE_START(1, p); - decode(max_buckets, p); - DECODE_FINISH(p); -} -WRITE_CLASS_ENCODER(TrimCounters::Request); - -void TrimCounters::Response::encode(bufferlist& bl) const -{ - ENCODE_START(1, 1, bl); - encode(bucket_counters, bl); - ENCODE_FINISH(bl); -} -void TrimCounters::Response::decode(bufferlist::const_iterator& p) -{ - DECODE_START(1, p); - decode(bucket_counters, p); - DECODE_FINISH(p); -} -WRITE_CLASS_ENCODER(TrimCounters::Response); - -void TrimCounters::Handler::handle(bufferlist::const_iterator& input, - bufferlist& output) -{ - Request request; - decode(request, input); - auto count = std::min(request.max_buckets, 128); - - Response response; - server->get_bucket_counters(count, response.bucket_counters); - encode(response, output); -} - -/// api to notify peer gateways that trim has completed and their bucket change -/// counters can be reset -struct TrimComplete { - struct Request { - void encode(bufferlist& bl) const; - void decode(bufferlist::const_iterator& p); - }; - struct Response { - void encode(bufferlist& bl) const; - void decode(bufferlist::const_iterator& p); - }; - - /// server interface to reset bucket counters - using Server = TrimCounters::Server; - - /// notify handler - class Handler : public TrimNotifyHandler { - Server *const server; - public: - explicit Handler(Server *server) : server(server) {} - - void handle(bufferlist::const_iterator& input, bufferlist& output) override; - }; -}; - -void TrimComplete::Request::encode(bufferlist& bl) const -{ - ENCODE_START(1, 1, bl); - ENCODE_FINISH(bl); -} -void TrimComplete::Request::decode(bufferlist::const_iterator& p) -{ - DECODE_START(1, p); - DECODE_FINISH(p); -} -WRITE_CLASS_ENCODER(TrimComplete::Request); - -void TrimComplete::Response::encode(bufferlist& bl) const -{ - ENCODE_START(1, 1, bl); - ENCODE_FINISH(bl); -} -void TrimComplete::Response::decode(bufferlist::const_iterator& p) -{ - DECODE_START(1, p); - DECODE_FINISH(p); -} -WRITE_CLASS_ENCODER(TrimComplete::Response); - -void TrimComplete::Handler::handle(bufferlist::const_iterator& input, - bufferlist& output) -{ - Request request; - decode(request, input); - - server->reset_bucket_counters(); - - Response response; - encode(response, output); -} - - -/// rados watcher for bucket trim notifications -class BucketTrimWatcher : public librados::WatchCtx2 { - rgw::sal::RadosStore* const store; - const rgw_raw_obj& obj; - rgw_rados_ref ref; - uint64_t handle{0}; - - using HandlerPtr = std::unique_ptr; - boost::container::flat_map handlers; - - public: - BucketTrimWatcher(rgw::sal::RadosStore* store, const rgw_raw_obj& obj, - TrimCounters::Server *counters) - : store(store), obj(obj) { - handlers.emplace(NotifyTrimCounters, new TrimCounters::Handler(counters)); - handlers.emplace(NotifyTrimComplete, new TrimComplete::Handler(counters)); - } - - ~BucketTrimWatcher() { - stop(); - } - - int start(const DoutPrefixProvider *dpp) { - int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); - if (r < 0) { - return r; - } - - // register a watch on the realm's control object - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); - if (r == -ENOENT) { - constexpr bool exclusive = true; - r = ref.pool.ioctx().create(ref.obj.oid, exclusive); - if (r == -EEXIST || r == 0) { - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); - } - } - if (r < 0) { - ldpp_dout(dpp, -1) << "Failed to watch " << ref.obj - << " with " << cpp_strerror(-r) << dendl; - ref.pool.ioctx().close(); - return r; - } - - ldpp_dout(dpp, 10) << "Watching " << ref.obj.oid << dendl; - return 0; - } - - int restart() { - int r = ref.pool.ioctx().unwatch2(handle); - if (r < 0) { - lderr(store->ctx()) << "Failed to unwatch on " << ref.obj - << " with " << cpp_strerror(-r) << dendl; - } - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); - if (r < 0) { - lderr(store->ctx()) << "Failed to restart watch on " << ref.obj - << " with " << cpp_strerror(-r) << dendl; - ref.pool.ioctx().close(); - } - return r; - } - - void stop() { - if (handle) { - ref.pool.ioctx().unwatch2(handle); - ref.pool.ioctx().close(); - } - } - - /// respond to bucket trim notifications - void handle_notify(uint64_t notify_id, uint64_t cookie, - uint64_t notifier_id, bufferlist& bl) override { - if (cookie != handle) { - return; - } - bufferlist reply; - try { - auto p = bl.cbegin(); - TrimNotifyType type; - decode(type, p); - - auto handler = handlers.find(type); - if (handler != handlers.end()) { - handler->second->handle(p, reply); - } else { - lderr(store->ctx()) << "no handler for notify type " << type << dendl; - } - } catch (const buffer::error& e) { - lderr(store->ctx()) << "Failed to decode notification: " << e.what() << dendl; - } - ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, reply); - } - - /// reestablish the watch if it gets disconnected - void handle_error(uint64_t cookie, int err) override { - if (cookie != handle) { - return; - } - if (err == -ENOTCONN) { - ldout(store->ctx(), 4) << "Disconnected watch on " << ref.obj << dendl; - restart(); - } - } -}; - - -/// Interface to communicate with the trim manager about completed operations -struct BucketTrimObserver { - virtual ~BucketTrimObserver() = default; - - virtual void on_bucket_trimmed(std::string&& bucket_instance) = 0; - virtual bool trimmed_recently(const std::string_view& bucket_instance) = 0; -}; - -/// trim each bilog shard to the given marker, while limiting the number of -/// concurrent requests -class BucketTrimShardCollectCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* const store; - const RGWBucketInfo& bucket_info; - rgw::bucket_index_layout_generation generation; - const std::vector& markers; //< shard markers to trim - size_t i{0}; //< index of current shard marker - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to trim bilog shard: " << cpp_strerror(r) << dendl; - } - return r; - } - public: - BucketTrimShardCollectCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info, - const rgw::bucket_index_layout_generation& generation, - const std::vector& markers) - : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS), - dpp(dpp), store(store), bucket_info(bucket_info), - generation(generation), markers(markers) - {} - bool spawn_next() override; -}; - -bool BucketTrimShardCollectCR::spawn_next() -{ - while (i < markers.size()) { - const auto& marker = markers[i]; - const auto shard_id = i++; - - // skip empty markers - if (!marker.empty()) { - ldpp_dout(dpp, 10) << "trimming bilog shard " << shard_id - << " of " << bucket_info.bucket << " at marker " << marker << dendl; - spawn(new RGWRadosBILogTrimCR(dpp, store, bucket_info, shard_id, - generation, std::string{}, marker), - false); - return true; - } - } - return false; -} - -/// Delete a BI generation, limiting the number of requests in flight. -class BucketCleanIndexCollectCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* const store; - const RGWBucketInfo& bucket_info; - rgw::bucket_index_layout_generation index; - uint32_t shard = 0; - const uint32_t num_shards = rgw::num_shards(index); - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "clean index: " << cpp_strerror(r) << dendl; - } - return r; - } - public: - BucketCleanIndexCollectCR(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, - const RGWBucketInfo& bucket_info, - rgw::bucket_index_layout_generation index) - : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS), - dpp(dpp), store(store), bucket_info(bucket_info), - index(index) - {} - bool spawn_next() override { - if (shard < num_shards) { - RGWRados::BucketShard bs(store->getRados()); - bs.init(dpp, bucket_info, index, shard); - spawn(new RGWRadosRemoveOidCR(store, std::move(bs.bucket_obj), nullptr), - false); - ++shard; - return true; - } else { - return false; - } - } -}; - - -/// trim the bilog of all of the given bucket instance's shards -class BucketTrimInstanceCR : public RGWCoroutine { - static constexpr auto MAX_RETRIES = 25u; - rgw::sal::RadosStore* const store; - RGWHTTPManager *const http; - BucketTrimObserver *const observer; - std::string bucket_instance; - rgw_bucket_get_sync_policy_params get_policy_params; - std::shared_ptr source_policy; - rgw_bucket bucket; - const std::string& zone_id; //< my zone id - RGWBucketInfo _bucket_info; - const RGWBucketInfo *pbucket_info; //< pointer to bucket instance info to locate bucket indices - int child_ret = 0; - const DoutPrefixProvider *dpp; -public: - struct StatusShards { - uint64_t generation = 0; - std::vector shards; - }; -private: - std::vector peer_status; //< sync status for each peer - std::vector min_markers; //< min marker per shard - - /// The log generation to trim - rgw::bucket_log_layout_generation totrim; - - /// Generation to be cleaned/New bucket info (if any) - std::optional> clean_info; - /// Maximum number of times to attempt to put bucket info - unsigned retries = 0; - - int take_min_generation() { - // Initialize the min_generation to the bucket's current - // generation, used in case we have no peers. - auto min_generation = pbucket_info->layout.logs.back().gen; - - // Determine the minimum generation - if (auto m = std::min_element(peer_status.begin(), - peer_status.end(), - [](const StatusShards& l, - const StatusShards& r) { - return l.generation < r.generation; - }); m != peer_status.end()) { - min_generation = m->generation; - } - - auto& logs = pbucket_info->layout.logs; - auto log = std::find_if(logs.begin(), logs.end(), - rgw::matches_gen(min_generation)); - if (log == logs.end()) { - ldpp_dout(dpp, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << "ERROR: No log layout for min_generation=" - << min_generation << dendl; - return -ENOENT; - } - - totrim = *log; - return 0; - } - - /// If there is a generation below the minimum, prepare to clean it up. - int maybe_remove_generation() { - if (clean_info) - return 0; - - - if (pbucket_info->layout.logs.front().gen < totrim.gen) { - clean_info = {*pbucket_info, {}}; - auto log = clean_info->first.layout.logs.cbegin(); - clean_info->second = *log; - - if (clean_info->first.layout.logs.size() == 1) { - ldpp_dout(dpp, -1) - << "Critical error! Attempt to remove only log generation! " - << "log.gen=" << log->gen << ", totrim.gen=" << totrim.gen - << dendl; - return -EIO; - } - clean_info->first.layout.logs.erase(log); - } - return 0; - } - - public: - BucketTrimInstanceCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, - BucketTrimObserver *observer, - const std::string& bucket_instance, - const DoutPrefixProvider *dpp) - : RGWCoroutine(store->ctx()), store(store), - http(http), observer(observer), - bucket_instance(bucket_instance), - zone_id(store->svc()->zone->get_zone().id), - dpp(dpp) { - rgw_bucket_parse_bucket_key(cct, bucket_instance, &bucket, nullptr); - source_policy = make_shared(); - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -namespace { -/// populate the status with the minimum stable marker of each shard -int take_min_status( - CephContext *cct, - const uint64_t min_generation, - std::vector::const_iterator first, - std::vector::const_iterator last, - std::vector *status) { - for (auto peer = first; peer != last; ++peer) { - // Peers on later generations don't get a say in the matter - if (peer->generation > min_generation) { - continue; - } - if (peer->shards.size() != status->size()) { - // all peers must agree on the number of shards - return -EINVAL; - } - - auto m = status->begin(); - for (auto& shard : peer->shards) { - auto& marker = *m++; - // always take the first marker, or any later marker that's smaller - if (peer == first || marker > shard.inc_marker.position) { - marker = std::move(shard.inc_marker.position); - } - } - } - return 0; -} -} - -template<> -inline int parse_decode_json( - BucketTrimInstanceCR::StatusShards& s, bufferlist& bl) -{ - JSONParser p; - if (!p.parse(bl.c_str(), bl.length())) { - return -EINVAL; - } - - try { - bilog_status_v2 v; - decode_json_obj(v, &p); - s.generation = v.sync_status.incremental_gen; - s.shards = std::move(v.inc_status); - } catch (JSONDecoder::err& e) { - try { - // Fall back if we're talking to an old node that can't give v2 - // output. - s.generation = 0; - decode_json_obj(s.shards, &p); - } catch (JSONDecoder::err& e) { - return -EINVAL; - } - } - return 0; -} - -int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - ldpp_dout(dpp, 4) << "starting trim on bucket=" << bucket_instance << dendl; - - get_policy_params.zone = zone_id; - get_policy_params.bucket = bucket; - yield call(new RGWBucketGetSyncPolicyHandlerCR(store->svc()->rados->get_async_processor(), - store, - get_policy_params, - source_policy, - dpp)); - if (retcode < 0) { - if (retcode != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: failed to fetch policy handler for bucket=" << bucket << dendl; - } - - return set_cr_error(retcode); - } - - if (auto& opt_bucket_info = source_policy->policy_handler->get_bucket_info(); - opt_bucket_info) { - pbucket_info = &(*opt_bucket_info); - } else { - /* this shouldn't really happen */ - return set_cr_error(-ENOENT); - } - - if (pbucket_info->layout.logs.empty()) { - return set_cr_done(); // no bilogs to trim - } - - // query peers for sync status - set_status("fetching sync status from relevant peers"); - yield { - const auto& all_dests = source_policy->policy_handler->get_all_dests(); - - vector zids; - rgw_zone_id last_zid; - for (auto& diter : all_dests) { - const auto& zid = diter.first; - if (zid == last_zid) { - continue; - } - last_zid = zid; - zids.push_back(zid); - } - - peer_status.resize(zids.size()); - - auto& zone_conn_map = store->svc()->zone->get_zone_conn_map(); - - auto p = peer_status.begin(); - for (auto& zid : zids) { - // query data sync status from each sync peer - rgw_http_param_pair params[] = { - { "type", "bucket-index" }, - { "status", nullptr }, - { "options", "merge" }, - { "bucket", bucket_instance.c_str() }, /* equal to source-bucket when `options==merge` and source-bucket - param is not provided */ - { "source-zone", zone_id.c_str() }, - { "version", "2" }, - { nullptr, nullptr } - }; - - auto ziter = zone_conn_map.find(zid); - if (ziter == zone_conn_map.end()) { - ldpp_dout(dpp, 0) << "WARNING: no connection to zone " << zid << ", can't trim bucket: " << bucket << dendl; - return set_cr_error(-ECANCELED); - } - - using StatusCR = RGWReadRESTResourceCR; - spawn(new StatusCR(cct, ziter->second, http, "/admin/log/", params, &*p), - false); - ++p; - } - } - // wait for a response from each peer. all must respond to attempt trim - while (num_spawned()) { - yield wait_for_child(); - collect(&child_ret, nullptr); - if (child_ret < 0) { - drain_all(); - return set_cr_error(child_ret); - } - } - - // Determine the minimum generation - retcode = take_min_generation(); - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to find minimum generation" << dendl; - return set_cr_error(retcode); - } - retcode = maybe_remove_generation(); - if (retcode < 0) { - ldpp_dout(dpp, 4) << "error removing old generation from log: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - if (clean_info) { - if (clean_info->second.layout.type != rgw::BucketLogType::InIndex) { - ldpp_dout(dpp, 0) << "Unable to convert log of unknown type " - << clean_info->second.layout.type - << " to rgw::bucket_index_layout_generation " << dendl; - return set_cr_error(-EINVAL); - } - - yield call(new BucketCleanIndexCollectCR(dpp, store, clean_info->first, - clean_info->second.layout.in_index)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "failed to remove previous generation: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - while (clean_info && retries < MAX_RETRIES) { - yield call(new RGWPutBucketInstanceInfoCR( - store->svc()->rados->get_async_processor(), - store, clean_info->first, false, {}, - no_change_attrs(), dpp)); - - // Raced, try again. - if (retcode == -ECANCELED) { - yield call(new RGWGetBucketInstanceInfoCR( - store->svc()->rados->get_async_processor(), - store, clean_info->first.bucket, - &(clean_info->first), nullptr, dpp)); - if (retcode < 0) { - ldpp_dout(dpp, 0) << "failed to get bucket info: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - if (clean_info->first.layout.logs.front().gen == - clean_info->second.gen) { - clean_info->first.layout.logs.erase( - clean_info->first.layout.logs.begin()); - ++retries; - continue; - } - // Raced, but someone else did what we needed to. - retcode = 0; - } - - if (retcode < 0) { - ldpp_dout(dpp, 0) << "failed to put bucket info: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - clean_info = std::nullopt; - } - } else { - if (totrim.layout.type != rgw::BucketLogType::InIndex) { - ldpp_dout(dpp, 0) << "Unable to convert log of unknown type " - << totrim.layout.type - << " to rgw::bucket_index_layout_generation " << dendl; - return set_cr_error(-EINVAL); - } - // To avoid hammering the OSD too hard, either trim old - // generations OR trim the current one. - - // determine the minimum marker for each shard - - // initialize each shard with the maximum marker, which is only used when - // there are no peers syncing from us - min_markers.assign(std::max(1u, rgw::num_shards(totrim.layout.in_index)), - RGWSyncLogTrimCR::max_marker); - - - retcode = take_min_status(cct, totrim.gen, peer_status.cbegin(), - peer_status.cend(), &min_markers); - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to correlate bucket sync status from peers" << dendl; - return set_cr_error(retcode); - } - - // trim shards with a ShardCollectCR - ldpp_dout(dpp, 10) << "trimming bilogs for bucket=" << pbucket_info->bucket - << " markers=" << min_markers << ", shards=" << min_markers.size() << dendl; - set_status("trimming bilog shards"); - yield call(new BucketTrimShardCollectCR(dpp, store, *pbucket_info, totrim.layout.in_index, - min_markers)); - // ENODATA just means there were no keys to trim - if (retcode == -ENODATA) { - retcode = 0; - } - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to trim bilog shards: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - } - - observer->on_bucket_trimmed(std::move(bucket_instance)); - return set_cr_done(); - } - return 0; -} - -/// trim each bucket instance while limiting the number of concurrent operations - -class BucketTrimInstanceCollectCR : public RGWShardCollectCR { - rgw::sal::RadosStore* const store; - RGWHTTPManager *const http; - BucketTrimObserver *const observer; - std::vector::const_iterator bucket; - std::vector::const_iterator end; - const DoutPrefixProvider *dpp; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to trim bucket instance: " << cpp_strerror(r) << dendl; - } - return r; - } - public: - BucketTrimInstanceCollectCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, - BucketTrimObserver *observer, - const std::vector& buckets, - int max_concurrent, - const DoutPrefixProvider *dpp) - : RGWShardCollectCR(store->ctx(), max_concurrent), - store(store), http(http), observer(observer), - bucket(buckets.begin()), end(buckets.end()), - dpp(dpp) - {} - bool spawn_next() override; -}; - -bool BucketTrimInstanceCollectCR::spawn_next() -{ - if (bucket == end) { - return false; - } - spawn(new BucketTrimInstanceCR(store, http, observer, *bucket, dpp), false); - ++bucket; - return true; -} - -/// correlate the replies from each peer gateway into the given counter -int accumulate_peer_counters(bufferlist& bl, BucketChangeCounter& counter) -{ - counter.clear(); - - try { - // decode notify responses - auto p = bl.cbegin(); - std::map, bufferlist> replies; - std::set> timeouts; - decode(replies, p); - decode(timeouts, p); - - for (auto& peer : replies) { - auto q = peer.second.cbegin(); - TrimCounters::Response response; - decode(response, q); - for (const auto& b : response.bucket_counters) { - counter.insert(b.bucket, b.count); - } - } - } catch (const buffer::error& e) { - return -EIO; - } - return 0; -} - -/// metadata callback has the signature bool(string&& key, string&& marker) -using MetadataListCallback = std::function; - -/// lists metadata keys, passing each to a callback until it returns false. -/// on reaching the end, it will restart at the beginning and list up to the -/// initial marker -class AsyncMetadataList : public RGWAsyncRadosRequest { - CephContext *const cct; - RGWMetadataManager *const mgr; - const std::string section; - const std::string start_marker; - MetadataListCallback callback; - - int _send_request(const DoutPrefixProvider *dpp) override; - public: - AsyncMetadataList(CephContext *cct, RGWCoroutine *caller, - RGWAioCompletionNotifier *cn, RGWMetadataManager *mgr, - const std::string& section, const std::string& start_marker, - const MetadataListCallback& callback) - : RGWAsyncRadosRequest(caller, cn), cct(cct), mgr(mgr), - section(section), start_marker(start_marker), callback(callback) - {} -}; - -int AsyncMetadataList::_send_request(const DoutPrefixProvider *dpp) -{ - void* handle = nullptr; - std::list keys; - bool truncated{false}; - std::string marker; - - // start a listing at the given marker - int r = mgr->list_keys_init(dpp, section, start_marker, &handle); - if (r == -EINVAL) { - // restart with empty marker below - } else if (r < 0) { - ldpp_dout(dpp, 10) << "failed to init metadata listing: " - << cpp_strerror(r) << dendl; - return r; - } else { - ldpp_dout(dpp, 20) << "starting metadata listing at " << start_marker << dendl; - - // release the handle when scope exits - auto g = make_scope_guard([=, this] { mgr->list_keys_complete(handle); }); - - do { - // get the next key and marker - r = mgr->list_keys_next(dpp, handle, 1, keys, &truncated); - if (r < 0) { - ldpp_dout(dpp, 10) << "failed to list metadata: " - << cpp_strerror(r) << dendl; - return r; - } - marker = mgr->get_marker(handle); - - if (!keys.empty()) { - ceph_assert(keys.size() == 1); - auto& key = keys.front(); - if (!callback(std::move(key), std::move(marker))) { - return 0; - } - } - } while (truncated); - - if (start_marker.empty()) { - // already listed all keys - return 0; - } - } - - // restart the listing from the beginning (empty marker) - handle = nullptr; - - r = mgr->list_keys_init(dpp, section, "", &handle); - if (r < 0) { - ldpp_dout(dpp, 10) << "failed to restart metadata listing: " - << cpp_strerror(r) << dendl; - return r; - } - ldpp_dout(dpp, 20) << "restarting metadata listing" << dendl; - - // release the handle when scope exits - auto g = make_scope_guard([=, this] { mgr->list_keys_complete(handle); }); - do { - // get the next key and marker - r = mgr->list_keys_next(dpp, handle, 1, keys, &truncated); - if (r < 0) { - ldpp_dout(dpp, 10) << "failed to list metadata: " - << cpp_strerror(r) << dendl; - return r; - } - marker = mgr->get_marker(handle); - - if (!keys.empty()) { - ceph_assert(keys.size() == 1); - auto& key = keys.front(); - // stop at original marker - if (marker > start_marker) { - return 0; - } - if (!callback(std::move(key), std::move(marker))) { - return 0; - } - } - } while (truncated); - - return 0; -} - -/// coroutine wrapper for AsyncMetadataList -class MetadataListCR : public RGWSimpleCoroutine { - RGWAsyncRadosProcessor *const async_rados; - RGWMetadataManager *const mgr; - const std::string& section; - const std::string& start_marker; - MetadataListCallback callback; - RGWAsyncRadosRequest *req{nullptr}; - public: - MetadataListCR(CephContext *cct, RGWAsyncRadosProcessor *async_rados, - RGWMetadataManager *mgr, const std::string& section, - const std::string& start_marker, - const MetadataListCallback& callback) - : RGWSimpleCoroutine(cct), async_rados(async_rados), mgr(mgr), - section(section), start_marker(start_marker), callback(callback) - {} - ~MetadataListCR() override { - request_cleanup(); - } - - int send_request(const DoutPrefixProvider *dpp) override { - req = new AsyncMetadataList(cct, this, stack->create_completion_notifier(), - mgr, section, start_marker, callback); - async_rados->queue(req); - return 0; - } - int request_complete() override { - return req->get_ret_status(); - } - void request_cleanup() override { - if (req) { - req->finish(); - req = nullptr; - } - } -}; - -class BucketTrimCR : public RGWCoroutine { - rgw::sal::RadosStore* const store; - RGWHTTPManager *const http; - const BucketTrimConfig& config; - BucketTrimObserver *const observer; - const rgw_raw_obj& obj; - ceph::mono_time start_time; - bufferlist notify_replies; - BucketChangeCounter counter; - std::vector buckets; //< buckets selected for trim - BucketTrimStatus status; - RGWObjVersionTracker objv; //< version tracker for trim status object - std::string last_cold_marker; //< position for next trim marker - const DoutPrefixProvider *dpp; - - static const std::string section; //< metadata section for bucket instances - public: - BucketTrimCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, - const BucketTrimConfig& config, BucketTrimObserver *observer, - const rgw_raw_obj& obj, const DoutPrefixProvider *dpp) - : RGWCoroutine(store->ctx()), store(store), http(http), config(config), - observer(observer), obj(obj), counter(config.counter_size), dpp(dpp) - {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -const std::string BucketTrimCR::section{"bucket.instance"}; - -int BucketTrimCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - start_time = ceph::mono_clock::now(); - - if (config.buckets_per_interval) { - // query watch/notify for hot buckets - ldpp_dout(dpp, 10) << "fetching active bucket counters" << dendl; - set_status("fetching active bucket counters"); - yield { - // request the top bucket counters from each peer gateway - const TrimNotifyType type = NotifyTrimCounters; - TrimCounters::Request request{32}; - bufferlist bl; - encode(type, bl); - encode(request, bl); - call(new RGWRadosNotifyCR(store, obj, bl, config.notify_timeout_ms, - ¬ify_replies)); - } - if (retcode < 0) { - ldpp_dout(dpp, 10) << "failed to fetch peer bucket counters" << dendl; - return set_cr_error(retcode); - } - - // select the hottest buckets for trim - retcode = accumulate_peer_counters(notify_replies, counter); - if (retcode < 0) { - ldout(cct, 4) << "failed to correlate peer bucket counters" << dendl; - return set_cr_error(retcode); - } - buckets.reserve(config.buckets_per_interval); - - const int max_count = config.buckets_per_interval - - config.min_cold_buckets_per_interval; - counter.get_highest(max_count, - [this] (const std::string& bucket, int count) { - buckets.push_back(bucket); - }); - } - - if (buckets.size() < config.buckets_per_interval) { - // read BucketTrimStatus for marker position - set_status("reading trim status"); - using ReadStatus = RGWSimpleRadosReadCR; - yield call(new ReadStatus(dpp, store->svc()->rados->get_async_processor(), store->svc()->sysobj, obj, - &status, true, &objv)); - if (retcode < 0) { - ldpp_dout(dpp, 10) << "failed to read bilog trim status: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - if (status.marker == "MAX") { - status.marker.clear(); // restart at the beginning - } - ldpp_dout(dpp, 10) << "listing cold buckets from marker=" - << status.marker << dendl; - - set_status("listing cold buckets for trim"); - yield { - // capture a reference so 'this' remains valid in the callback - auto ref = boost::intrusive_ptr{this}; - // list cold buckets to consider for trim - auto cb = [this, ref] (std::string&& bucket, std::string&& marker) { - // filter out keys that we trimmed recently - if (observer->trimmed_recently(bucket)) { - return true; - } - // filter out active buckets that we've already selected - auto i = std::find(buckets.begin(), buckets.end(), bucket); - if (i != buckets.end()) { - return true; - } - buckets.emplace_back(std::move(bucket)); - // remember the last cold bucket spawned to update the status marker - last_cold_marker = std::move(marker); - // return true if there's room for more - return buckets.size() < config.buckets_per_interval; - }; - - call(new MetadataListCR(cct, store->svc()->rados->get_async_processor(), - store->ctl()->meta.mgr, - section, status.marker, cb)); - } - if (retcode < 0) { - ldout(cct, 4) << "failed to list bucket instance metadata: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - } - - // trim bucket instances with limited concurrency - set_status("trimming buckets"); - ldpp_dout(dpp, 4) << "collected " << buckets.size() << " buckets for trim" << dendl; - yield call(new BucketTrimInstanceCollectCR(store, http, observer, buckets, - config.concurrent_buckets, dpp)); - // ignore errors from individual buckets - - // write updated trim status - if (!last_cold_marker.empty() && status.marker != last_cold_marker) { - set_status("writing updated trim status"); - status.marker = std::move(last_cold_marker); - ldpp_dout(dpp, 20) << "writing bucket trim marker=" << status.marker << dendl; - using WriteStatus = RGWSimpleRadosWriteCR; - yield call(new WriteStatus(dpp, store->svc()->rados->get_async_processor(), store->svc()->sysobj, obj, - status, &objv)); - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to write updated trim status: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - } - - // notify peers that trim completed - set_status("trim completed"); - yield { - const TrimNotifyType type = NotifyTrimComplete; - TrimComplete::Request request; - bufferlist bl; - encode(type, bl); - encode(request, bl); - call(new RGWRadosNotifyCR(store, obj, bl, config.notify_timeout_ms, - nullptr)); - } - if (retcode < 0) { - ldout(cct, 10) << "failed to notify peers of trim completion" << dendl; - return set_cr_error(retcode); - } - - ldpp_dout(dpp, 4) << "bucket index log processing completed in " - << ceph::mono_clock::now() - start_time << dendl; - return set_cr_done(); - } - return 0; -} - -class BucketTrimPollCR : public RGWCoroutine { - rgw::sal::RadosStore* const store; - RGWHTTPManager *const http; - const BucketTrimConfig& config; - BucketTrimObserver *const observer; - const rgw_raw_obj& obj; - const std::string name{"trim"}; //< lock name - const std::string cookie; - const DoutPrefixProvider *dpp; - - public: - BucketTrimPollCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, - const BucketTrimConfig& config, - BucketTrimObserver *observer, const rgw_raw_obj& obj, - const DoutPrefixProvider *dpp) - : RGWCoroutine(store->ctx()), store(store), http(http), - config(config), observer(observer), obj(obj), - cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)), - dpp(dpp) {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int BucketTrimPollCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - for (;;) { - set_status("sleeping"); - wait(utime_t{static_cast(config.trim_interval_sec), 0}); - - // prevent others from trimming for our entire wait interval - set_status("acquiring trim lock"); - yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, - obj, name, cookie, - config.trim_interval_sec)); - if (retcode < 0) { - ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl; - continue; - } - - set_status("trimming"); - yield call(new BucketTrimCR(store, http, config, observer, obj, dpp)); - if (retcode < 0) { - // on errors, unlock so other gateways can try - set_status("unlocking"); - yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store, - obj, name, cookie)); - } - } - } - return 0; -} - -/// tracks a bounded list of events with timestamps. old events can be expired, -/// and recent events can be searched by key. expiration depends on events being -/// inserted in temporal order -template -class RecentEventList { - public: - using clock_type = Clock; - using time_point = typename clock_type::time_point; - - RecentEventList(size_t max_size, const ceph::timespan& max_duration) - : events(max_size), max_duration(max_duration) - {} - - /// insert an event at the given point in time. this time must be at least as - /// recent as the last inserted event - void insert(T&& value, const time_point& now) { - // ceph_assert(events.empty() || now >= events.back().time) - events.push_back(Event{std::move(value), now}); - } - - /// performs a linear search for an event matching the given key, whose type - /// U can be any that provides operator==(U, T) - template - bool lookup(const U& key) const { - for (const auto& event : events) { - if (key == event.value) { - return true; - } - } - return false; - } - - /// remove events that are no longer recent compared to the given point in time - void expire_old(const time_point& now) { - const auto expired_before = now - max_duration; - while (!events.empty() && events.front().time < expired_before) { - events.pop_front(); - } - } - - private: - struct Event { - T value; - time_point time; - }; - boost::circular_buffer events; - const ceph::timespan max_duration; -}; - -namespace rgw { - -// read bucket trim configuration from ceph context -void configure_bucket_trim(CephContext *cct, BucketTrimConfig& config) -{ - const auto& conf = cct->_conf; - - config.trim_interval_sec = - conf.get_val("rgw_sync_log_trim_interval"); - config.counter_size = 512; - config.buckets_per_interval = - conf.get_val("rgw_sync_log_trim_max_buckets"); - config.min_cold_buckets_per_interval = - conf.get_val("rgw_sync_log_trim_min_cold_buckets"); - config.concurrent_buckets = - conf.get_val("rgw_sync_log_trim_concurrent_buckets"); - config.notify_timeout_ms = 10000; - config.recent_size = 128; - config.recent_duration = std::chrono::hours(2); -} - -class BucketTrimManager::Impl : public TrimCounters::Server, - public BucketTrimObserver { - public: - rgw::sal::RadosStore* const store; - const BucketTrimConfig config; - - const rgw_raw_obj status_obj; - - /// count frequency of bucket instance entries in the data changes log - BucketChangeCounter counter; - - using RecentlyTrimmedBucketList = RecentEventList; - using clock_type = RecentlyTrimmedBucketList::clock_type; - /// track recently trimmed buckets to focus trim activity elsewhere - RecentlyTrimmedBucketList trimmed; - - /// serve the bucket trim watch/notify api - BucketTrimWatcher watcher; - - /// protect data shared between data sync, trim, and watch/notify threads - std::mutex mutex; - - Impl(rgw::sal::RadosStore* store, const BucketTrimConfig& config) - : store(store), config(config), - status_obj(store->svc()->zone->get_zone_params().log_pool, BucketTrimStatus::oid), - counter(config.counter_size), - trimmed(config.recent_size, config.recent_duration), - watcher(store, status_obj, this) - {} - - /// TrimCounters::Server interface for watch/notify api - void get_bucket_counters(int count, TrimCounters::Vector& buckets) { - buckets.reserve(count); - std::lock_guard lock(mutex); - counter.get_highest(count, [&buckets] (const std::string& key, int count) { - buckets.emplace_back(key, count); - }); - ldout(store->ctx(), 20) << "get_bucket_counters: " << buckets << dendl; - } - - void reset_bucket_counters() override { - ldout(store->ctx(), 20) << "bucket trim completed" << dendl; - std::lock_guard lock(mutex); - counter.clear(); - trimmed.expire_old(clock_type::now()); - } - - /// BucketTrimObserver interface to remember successfully-trimmed buckets - void on_bucket_trimmed(std::string&& bucket_instance) override { - ldout(store->ctx(), 20) << "trimmed bucket instance " << bucket_instance << dendl; - std::lock_guard lock(mutex); - trimmed.insert(std::move(bucket_instance), clock_type::now()); - } - - bool trimmed_recently(const std::string_view& bucket_instance) override { - std::lock_guard lock(mutex); - return trimmed.lookup(bucket_instance); - } -}; - -BucketTrimManager::BucketTrimManager(rgw::sal::RadosStore* store, - const BucketTrimConfig& config) - : impl(new Impl(store, config)) -{ -} -BucketTrimManager::~BucketTrimManager() = default; - -int BucketTrimManager::init() -{ - return impl->watcher.start(this); -} - -void BucketTrimManager::on_bucket_changed(const std::string_view& bucket) -{ - std::lock_guard lock(impl->mutex); - // filter recently trimmed bucket instances out of bucket change counter - if (impl->trimmed.lookup(bucket)) { - return; - } - impl->counter.insert(std::string(bucket)); -} - -RGWCoroutine* BucketTrimManager::create_bucket_trim_cr(RGWHTTPManager *http) -{ - return new BucketTrimPollCR(impl->store, http, impl->config, - impl.get(), impl->status_obj, this); -} - -RGWCoroutine* BucketTrimManager::create_admin_bucket_trim_cr(RGWHTTPManager *http) -{ - // return the trim coroutine without any polling - return new BucketTrimCR(impl->store, http, impl->config, - impl.get(), impl->status_obj, this); -} - -CephContext* BucketTrimManager::get_cct() const -{ - return impl->store->ctx(); -} - -unsigned BucketTrimManager::get_subsys() const -{ - return dout_subsys; -} - -std::ostream& BucketTrimManager::gen_prefix(std::ostream& out) const -{ - return out << "rgw bucket trim manager: "; -} - -} // namespace rgw - -int bilog_trim(const DoutPrefixProvider* p, rgw::sal::RadosStore* store, - RGWBucketInfo& bucket_info, uint64_t gen, int shard_id, - std::string_view start_marker, std::string_view end_marker) -{ - auto& logs = bucket_info.layout.logs; - auto log = std::find_if(logs.begin(), logs.end(), rgw::matches_gen(gen)); - if (log == logs.end()) { - ldpp_dout(p, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << "ERROR: no log layout with gen=" << gen << dendl; - return -ENOENT; - } - - auto log_layout = *log; - - auto r = store->svc()->bilog_rados->log_trim(p, bucket_info, log_layout, shard_id, start_marker, end_marker); - if (r < 0) { - ldpp_dout(p, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ - << "ERROR: bilog_rados->log_trim returned r=" << r << dendl; - } - return r; -} diff --git a/src/rgw/store/rados/rgw_trim_bilog.h b/src/rgw/store/rados/rgw_trim_bilog.h deleted file mode 100644 index 5b9c4cdd7ec..00000000000 --- a/src/rgw/store/rados/rgw_trim_bilog.h +++ /dev/null @@ -1,124 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2017 Red Hat, Inc - * - * Author: Casey Bodley - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - */ - -#ifndef RGW_SYNC_LOG_TRIM_H -#define RGW_SYNC_LOG_TRIM_H - -#include -#include - -#include "include/common_fwd.h" -#include "include/encoding.h" -#include "common/ceph_time.h" -#include "common/dout.h" -#include "rgw_common.h" - -class RGWCoroutine; -class RGWHTTPManager; - -namespace rgw { - -namespace sal { - class RadosStore; -} - -/// Interface to inform the trim process about which buckets are most active -struct BucketChangeObserver { - virtual ~BucketChangeObserver() = default; - - virtual void on_bucket_changed(const std::string_view& bucket_instance) = 0; -}; - -/// Configuration for BucketTrimManager -struct BucketTrimConfig { - /// time interval in seconds between bucket trim attempts - uint32_t trim_interval_sec{0}; - /// maximum number of buckets to track with BucketChangeObserver - size_t counter_size{0}; - /// maximum number of buckets to process each trim interval - uint32_t buckets_per_interval{0}; - /// minimum number of buckets to choose from the global bucket instance list - uint32_t min_cold_buckets_per_interval{0}; - /// maximum number of buckets to process in parallel - uint32_t concurrent_buckets{0}; - /// timeout in ms for bucket trim notify replies - uint64_t notify_timeout_ms{0}; - /// maximum number of recently trimmed buckets to remember (should be small - /// enough for a linear search) - size_t recent_size{0}; - /// maximum duration to consider a trim as 'recent' (should be some multiple - /// of the trim interval, at least) - ceph::timespan recent_duration{0}; -}; - -/// fill out the BucketTrimConfig from the ceph context -void configure_bucket_trim(CephContext *cct, BucketTrimConfig& config); - -/// Determines the buckets on which to focus trim activity, using two sources of -/// input: the frequency of entries read from the data changes log, and a global -/// listing of the bucket.instance metadata. This allows us to trim active -/// buckets quickly, while also ensuring that all buckets will eventually trim -class BucketTrimManager : public BucketChangeObserver, public DoutPrefixProvider { - class Impl; - std::unique_ptr impl; - public: - BucketTrimManager(sal::RadosStore *store, const BucketTrimConfig& config); - ~BucketTrimManager(); - - int init(); - - /// increment a counter for the given bucket instance - void on_bucket_changed(const std::string_view& bucket_instance) override; - - /// create a coroutine to run the bucket trim process every trim interval - RGWCoroutine* create_bucket_trim_cr(RGWHTTPManager *http); - - /// create a coroutine to trim buckets directly via radosgw-admin - RGWCoroutine* create_admin_bucket_trim_cr(RGWHTTPManager *http); - - CephContext *get_cct() const override; - unsigned get_subsys() const; - std::ostream& gen_prefix(std::ostream& out) const; -}; - -/// provides persistent storage for the trim manager's current position in the -/// list of bucket instance metadata -struct BucketTrimStatus { - std::string marker; //< metadata key of current bucket instance - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(marker, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::const_iterator& p) { - DECODE_START(1, p); - decode(marker, p); - DECODE_FINISH(p); - } - - static const std::string oid; -}; - -} // namespace rgw - -WRITE_CLASS_ENCODER(rgw::BucketTrimStatus); - -int bilog_trim(const DoutPrefixProvider* p, rgw::sal::RadosStore* store, - RGWBucketInfo& bucket_info, uint64_t gen, int shard_id, - std::string_view start_marker, std::string_view end_marker); - -#endif // RGW_SYNC_LOG_TRIM_H diff --git a/src/rgw/store/rados/rgw_trim_datalog.cc b/src/rgw/store/rados/rgw_trim_datalog.cc deleted file mode 100644 index 72a160039cf..00000000000 --- a/src/rgw/store/rados/rgw_trim_datalog.cc +++ /dev/null @@ -1,252 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include -#include - -#include "common/errno.h" - -#include "rgw_trim_datalog.h" -#include "rgw_cr_rados.h" -#include "rgw_cr_rest.h" -#include "rgw_datalog.h" -#include "rgw_data_sync.h" -#include "rgw_zone.h" -#include "rgw_bucket.h" - -#include "services/svc_zone.h" - -#include - -#define dout_subsys ceph_subsys_rgw - -#undef dout_prefix -#define dout_prefix (*_dout << "data trim: ") - -namespace { - -class DatalogTrimImplCR : public RGWSimpleCoroutine { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - boost::intrusive_ptr cn; - int shard; - std::string marker; - std::string* last_trim_marker; - - public: - DatalogTrimImplCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, int shard, - const std::string& marker, std::string* last_trim_marker) - : RGWSimpleCoroutine(store->ctx()), dpp(dpp), store(store), shard(shard), - marker(marker), last_trim_marker(last_trim_marker) { - set_description() << "Datalog trim shard=" << shard - << " marker=" << marker; - } - - int send_request(const DoutPrefixProvider *dpp) override { - set_status() << "sending request"; - cn = stack->create_completion_notifier(); - return store->svc()->datalog_rados->trim_entries(dpp, shard, marker, - cn->completion()); - } - int request_complete() override { - int r = cn->completion()->get_return_value(); - ldpp_dout(dpp, 20) << __PRETTY_FUNCTION__ << "(): trim of shard=" << shard - << " marker=" << marker << " returned r=" << r << dendl; - - set_status() << "request complete; ret=" << r; - if (r != -ENODATA) { - return r; - } - // nothing left to trim, update last_trim_marker - if (*last_trim_marker < marker && - marker != store->svc()->datalog_rados->max_marker()) { - *last_trim_marker = marker; - } - return 0; - } -}; - -/// return the marker that it's safe to trim up to -const std::string& get_stable_marker(const rgw_data_sync_marker& m) -{ - return m.state == m.FullSync ? m.next_step_marker : m.marker; -} - -/// populate the container starting with 'dest' with the minimum stable marker -/// of each shard for all of the peers in [first, last) -template -void take_min_markers(IterIn first, IterIn last, IterOut dest) -{ - if (first == last) { - return; - } - for (auto p = first; p != last; ++p) { - auto m = dest; - for (auto &shard : p->sync_markers) { - const auto& stable = get_stable_marker(shard.second); - if (*m > stable) { - *m = stable; - } - ++m; - } - } -} - -} // anonymous namespace - -class DataLogTrimCR : public RGWCoroutine { - using TrimCR = DatalogTrimImplCR; - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - RGWHTTPManager *http; - const int num_shards; - const std::string& zone_id; //< my zone id - std::vector peer_status; //< sync status for each peer - std::vector min_shard_markers; //< min marker per shard - std::vector& last_trim; //< last trimmed marker per shard - int ret{0}; - - public: - DataLogTrimCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, - int num_shards, std::vector& last_trim) - : RGWCoroutine(store->ctx()), dpp(dpp), store(store), http(http), - num_shards(num_shards), - zone_id(store->svc()->zone->get_zone().id), - peer_status(store->svc()->zone->get_zone_data_notify_to_map().size()), - min_shard_markers(num_shards, - std::string(store->svc()->datalog_rados->max_marker())), - last_trim(last_trim) - {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int DataLogTrimCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - ldpp_dout(dpp, 10) << "fetching sync status for zone " << zone_id << dendl; - set_status("fetching sync status"); - yield { - // query data sync status from each sync peer - rgw_http_param_pair params[] = { - { "type", "data" }, - { "status", nullptr }, - { "source-zone", zone_id.c_str() }, - { nullptr, nullptr } - }; - - auto p = peer_status.begin(); - for (auto& c : store->svc()->zone->get_zone_data_notify_to_map()) { - ldpp_dout(dpp, 20) << "query sync status from " << c.first << dendl; - using StatusCR = RGWReadRESTResourceCR; - spawn(new StatusCR(cct, c.second, http, "/admin/log/", params, &*p), - false); - ++p; - } - } - - // must get a successful reply from all peers to consider trimming - ret = 0; - while (ret == 0 && num_spawned() > 0) { - yield wait_for_child(); - collect_next(&ret); - } - drain_all(); - - if (ret < 0) { - ldpp_dout(dpp, 4) << "failed to fetch sync status from all peers" << dendl; - return set_cr_error(ret); - } - - ldpp_dout(dpp, 10) << "trimming log shards" << dendl; - set_status("trimming log shards"); - yield { - // determine the minimum marker for each shard - take_min_markers(peer_status.begin(), peer_status.end(), - min_shard_markers.begin()); - - for (int i = 0; i < num_shards; i++) { - const auto& m = min_shard_markers[i]; - if (m <= last_trim[i]) { - continue; - } - ldpp_dout(dpp, 10) << "trimming log shard " << i - << " at marker=" << m - << " last_trim=" << last_trim[i] << dendl; - spawn(new TrimCR(dpp, store, i, m, &last_trim[i]), - true); - } - } - return set_cr_done(); - } - return 0; -} - -RGWCoroutine* create_admin_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards, - std::vector& markers) -{ - return new DataLogTrimCR(dpp, store, http, num_shards, markers); -} - -class DataLogTrimPollCR : public RGWCoroutine { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* store; - RGWHTTPManager *http; - const int num_shards; - const utime_t interval; //< polling interval - const std::string lock_oid; //< use first data log shard for lock - const std::string lock_cookie; - std::vector last_trim; //< last trimmed marker per shard - - public: - DataLogTrimPollCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, - int num_shards, utime_t interval) - : RGWCoroutine(store->ctx()), dpp(dpp), store(store), http(http), - num_shards(num_shards), interval(interval), - lock_oid(store->svc()->datalog_rados->get_oid(0, 0)), - lock_cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)), - last_trim(num_shards) - {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int DataLogTrimPollCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - for (;;) { - set_status("sleeping"); - wait(interval); - - // request a 'data_trim' lock that covers the entire wait interval to - // prevent other gateways from attempting to trim for the duration - set_status("acquiring trim lock"); - yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, - rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, lock_oid), - "data_trim", lock_cookie, - interval.sec())); - if (retcode < 0) { - // if the lock is already held, go back to sleep and try again later - ldpp_dout(dpp, 4) << "failed to lock " << lock_oid << ", trying again in " - << interval.sec() << "s" << dendl; - continue; - } - - set_status("trimming"); - yield call(new DataLogTrimCR(dpp, store, http, num_shards, last_trim)); - - // note that the lock is not released. this is intentional, as it avoids - // duplicating this work in other gateways - } - } - return 0; -} - -RGWCoroutine* create_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards, utime_t interval) -{ - return new DataLogTrimPollCR(dpp, store, http, num_shards, interval); -} diff --git a/src/rgw/store/rados/rgw_trim_datalog.h b/src/rgw/store/rados/rgw_trim_datalog.h deleted file mode 100644 index 9f5bf7252fe..00000000000 --- a/src/rgw/store/rados/rgw_trim_datalog.h +++ /dev/null @@ -1,28 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include -#include - -#include "common/dout.h" - -class RGWCoroutine; -class RGWRados; -class RGWHTTPManager; -class utime_t; -namespace rgw { namespace sal { - class RadosStore; -} } - -// DataLogTrimCR factory function -extern RGWCoroutine* create_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards, utime_t interval); - -// factory function for datalog trim via radosgw-admin -RGWCoroutine* create_admin_data_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards, - std::vector& markers); diff --git a/src/rgw/store/rados/rgw_trim_mdlog.cc b/src/rgw/store/rados/rgw_trim_mdlog.cc deleted file mode 100644 index d8e19594aea..00000000000 --- a/src/rgw/store/rados/rgw_trim_mdlog.cc +++ /dev/null @@ -1,795 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/errno.h" - -#include "rgw_trim_mdlog.h" -#include "rgw_sync.h" -#include "rgw_cr_rados.h" -#include "rgw_cr_rest.h" -#include "rgw_zone.h" -#include "services/svc_zone.h" -#include "services/svc_meta.h" -#include "services/svc_mdlog.h" -#include "services/svc_cls.h" - -#include - -#define dout_subsys ceph_subsys_rgw - -#undef dout_prefix -#define dout_prefix (*_dout << "meta trim: ") - -/// purge all log shards for the given mdlog -class PurgeLogShardsCR : public RGWShardCollectCR { - rgw::sal::RadosStore* const store; - const RGWMetadataLog* mdlog; - const int num_shards; - rgw_raw_obj obj; - int i{0}; - - static constexpr int max_concurrent = 16; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to remove mdlog shard: " << cpp_strerror(r) << dendl; - } - return r; - } - public: - PurgeLogShardsCR(rgw::sal::RadosStore* store, const RGWMetadataLog* mdlog, - const rgw_pool& pool, int num_shards) - : RGWShardCollectCR(store->ctx(), max_concurrent), - store(store), mdlog(mdlog), num_shards(num_shards), obj(pool, "") - {} - - bool spawn_next() override { - if (i == num_shards) { - return false; - } - mdlog->get_shard_oid(i++, obj.oid); - spawn(new RGWRadosRemoveCR(store, obj), false); - return true; - } -}; - -using Cursor = RGWPeriodHistory::Cursor; - -/// purge mdlogs from the oldest up to (but not including) the given realm_epoch -class PurgePeriodLogsCR : public RGWCoroutine { - struct Svc { - RGWSI_Zone *zone; - RGWSI_MDLog *mdlog; - } svc; - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* const store; - RGWMetadataManager *const metadata; - RGWObjVersionTracker objv; - Cursor cursor; - epoch_t realm_epoch; - epoch_t *last_trim_epoch; //< update last trim on success - - public: - PurgePeriodLogsCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, epoch_t realm_epoch, epoch_t *last_trim) - : RGWCoroutine(store->ctx()), dpp(dpp), store(store), metadata(store->ctl()->meta.mgr), - realm_epoch(realm_epoch), last_trim_epoch(last_trim) { - svc.zone = store->svc()->zone; - svc.mdlog = store->svc()->mdlog; - } - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int PurgePeriodLogsCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - // read our current oldest log period - yield call(svc.mdlog->read_oldest_log_period_cr(dpp, &cursor, &objv)); - if (retcode < 0) { - return set_cr_error(retcode); - } - ceph_assert(cursor); - ldpp_dout(dpp, 20) << "oldest log realm_epoch=" << cursor.get_epoch() - << " period=" << cursor.get_period().get_id() << dendl; - - // trim -up to- the given realm_epoch - while (cursor.get_epoch() < realm_epoch) { - ldpp_dout(dpp, 4) << "purging log shards for realm_epoch=" << cursor.get_epoch() - << " period=" << cursor.get_period().get_id() << dendl; - yield { - const auto mdlog = svc.mdlog->get_log(cursor.get_period().get_id()); - const auto& pool = svc.zone->get_zone_params().log_pool; - auto num_shards = cct->_conf->rgw_md_log_max_shards; - call(new PurgeLogShardsCR(store, mdlog, pool, num_shards)); - } - if (retcode < 0) { - ldpp_dout(dpp, 1) << "failed to remove log shards: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - ldpp_dout(dpp, 10) << "removed log shards for realm_epoch=" << cursor.get_epoch() - << " period=" << cursor.get_period().get_id() << dendl; - - // update our mdlog history - yield call(svc.mdlog->trim_log_period_cr(dpp, cursor, &objv)); - if (retcode == -ENOENT) { - // must have raced to update mdlog history. return success and allow the - // winner to continue purging - ldpp_dout(dpp, 10) << "already removed log shards for realm_epoch=" << cursor.get_epoch() - << " period=" << cursor.get_period().get_id() << dendl; - return set_cr_done(); - } else if (retcode < 0) { - ldpp_dout(dpp, 1) << "failed to remove log shards for realm_epoch=" - << cursor.get_epoch() << " period=" << cursor.get_period().get_id() - << " with: " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - - if (*last_trim_epoch < cursor.get_epoch()) { - *last_trim_epoch = cursor.get_epoch(); - } - - ceph_assert(cursor.has_next()); // get_current() should always come after - cursor.next(); - } - return set_cr_done(); - } - return 0; -} - -namespace { - -using connection_map = std::map>; - -/// construct a RGWRESTConn for each zone in the realm -template -connection_map make_peer_connections(rgw::sal::RadosStore* store, - const Zonegroups& zonegroups) -{ - connection_map connections; - for (auto& g : zonegroups) { - for (auto& z : g.second.zones) { - std::unique_ptr conn{ - new RGWRESTConn(store->ctx(), store, z.first.id, z.second.endpoints, g.second.api_name)}; - connections.emplace(z.first.id, std::move(conn)); - } - } - return connections; -} - -/// return the marker that it's safe to trim up to -const std::string& get_stable_marker(const rgw_meta_sync_marker& m) -{ - return m.state == m.FullSync ? m.next_step_marker : m.marker; -} - -/// comparison operator for take_min_status() -bool operator<(const rgw_meta_sync_marker& lhs, const rgw_meta_sync_marker& rhs) -{ - // sort by stable marker - return get_stable_marker(lhs) < get_stable_marker(rhs); -} - -/// populate the status with the minimum stable marker of each shard for any -/// peer whose realm_epoch matches the minimum realm_epoch in the input -template -int take_min_status(CephContext *cct, Iter first, Iter last, - rgw_meta_sync_status *status) -{ - if (first == last) { - return -EINVAL; - } - const size_t num_shards = cct->_conf->rgw_md_log_max_shards; - - status->sync_info.realm_epoch = std::numeric_limits::max(); - for (auto p = first; p != last; ++p) { - // validate peer's shard count - if (p->sync_markers.size() != num_shards) { - ldout(cct, 1) << "take_min_status got peer status with " - << p->sync_markers.size() << " shards, expected " - << num_shards << dendl; - return -EINVAL; - } - if (p->sync_info.realm_epoch < status->sync_info.realm_epoch) { - // earlier epoch, take its entire status - *status = std::move(*p); - } else if (p->sync_info.realm_epoch == status->sync_info.realm_epoch) { - // same epoch, take any earlier markers - auto m = status->sync_markers.begin(); - for (auto& shard : p->sync_markers) { - if (shard.second < m->second) { - m->second = std::move(shard.second); - } - ++m; - } - } - } - return 0; -} - -struct TrimEnv { - const DoutPrefixProvider *dpp; - rgw::sal::RadosStore* const store; - RGWHTTPManager *const http; - int num_shards; - const rgw_zone_id& zone; - Cursor current; //< cursor to current period - epoch_t last_trim_epoch{0}; //< epoch of last mdlog that was purged - - TrimEnv(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) - : dpp(dpp), store(store), http(http), num_shards(num_shards), - zone(store->svc()->zone->zone_id()), - current(store->svc()->mdlog->get_period_history()->get_current()) - {} -}; - -struct MasterTrimEnv : public TrimEnv { - connection_map connections; //< peer connections - std::vector peer_status; //< sync status for each peer - /// last trim marker for each shard, only applies to current period's mdlog - std::vector last_trim_markers; - - MasterTrimEnv(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) - : TrimEnv(dpp, store, http, num_shards), - last_trim_markers(num_shards) - { - auto& period = current.get_period(); - connections = make_peer_connections(store, period.get_map().zonegroups); - connections.erase(zone.id); - peer_status.resize(connections.size()); - } -}; - -struct PeerTrimEnv : public TrimEnv { - /// last trim timestamp for each shard, only applies to current period's mdlog - std::vector last_trim_timestamps; - - PeerTrimEnv(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) - : TrimEnv(dpp, store, http, num_shards), - last_trim_timestamps(num_shards) - {} - - void set_num_shards(int num_shards) { - this->num_shards = num_shards; - last_trim_timestamps.resize(num_shards); - } -}; - -} // anonymous namespace - - -/// spawn a trim cr for each shard that needs it, while limiting the number -/// of concurrent shards -class MetaMasterTrimShardCollectCR : public RGWShardCollectCR { - private: - static constexpr int MAX_CONCURRENT_SHARDS = 16; - - MasterTrimEnv& env; - RGWMetadataLog *mdlog; - int shard_id{0}; - std::string oid; - const rgw_meta_sync_status& sync_status; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl; - } - return r; - } - public: - MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog, - const rgw_meta_sync_status& sync_status) - : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), - env(env), mdlog(mdlog), sync_status(sync_status) - {} - - bool spawn_next() override; -}; - -bool MetaMasterTrimShardCollectCR::spawn_next() -{ - while (shard_id < env.num_shards) { - auto m = sync_status.sync_markers.find(shard_id); - if (m == sync_status.sync_markers.end()) { - shard_id++; - continue; - } - auto& stable = get_stable_marker(m->second); - auto& last_trim = env.last_trim_markers[shard_id]; - - if (stable <= last_trim) { - // already trimmed - ldpp_dout(env.dpp, 20) << "skipping log shard " << shard_id - << " at marker=" << stable - << " last_trim=" << last_trim - << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl; - shard_id++; - continue; - } - - mdlog->get_shard_oid(shard_id, oid); - - ldpp_dout(env.dpp, 10) << "trimming log shard " << shard_id - << " at marker=" << stable - << " last_trim=" << last_trim - << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl; - spawn(new RGWSyncLogTrimCR(env.dpp, env.store, oid, stable, &last_trim), false); - shard_id++; - return true; - } - return false; -} - -/// spawn rest requests to read each peer's sync status -class MetaMasterStatusCollectCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - - MasterTrimEnv& env; - connection_map::iterator c; - std::vector::iterator s; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to fetch metadata sync status: " - << cpp_strerror(r) << dendl; - } - return r; - } - public: - explicit MetaMasterStatusCollectCR(MasterTrimEnv& env) - : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), - env(env), c(env.connections.begin()), s(env.peer_status.begin()) - {} - - bool spawn_next() override { - if (c == env.connections.end()) { - return false; - } - static rgw_http_param_pair params[] = { - { "type", "metadata" }, - { "status", nullptr }, - { nullptr, nullptr } - }; - - ldout(cct, 20) << "query sync status from " << c->first << dendl; - auto conn = c->second.get(); - using StatusCR = RGWReadRESTResourceCR; - spawn(new StatusCR(cct, conn, env.http, "/admin/log/", params, &*s), - false); - ++c; - ++s; - return true; - } -}; - -class MetaMasterTrimCR : public RGWCoroutine { - MasterTrimEnv& env; - rgw_meta_sync_status min_status; //< minimum sync status of all peers - int ret{0}; - - public: - explicit MetaMasterTrimCR(MasterTrimEnv& env) - : RGWCoroutine(env.store->ctx()), env(env) - {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int MetaMasterTrimCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - // TODO: detect this and fail before we spawn the trim thread? - if (env.connections.empty()) { - ldpp_dout(dpp, 4) << "no peers, exiting" << dendl; - return set_cr_done(); - } - - ldpp_dout(dpp, 10) << "fetching sync status for zone " << env.zone << dendl; - // query mdlog sync status from peers - yield call(new MetaMasterStatusCollectCR(env)); - - // must get a successful reply from all peers to consider trimming - if (ret < 0) { - ldpp_dout(dpp, 4) << "failed to fetch sync status from all peers" << dendl; - return set_cr_error(ret); - } - - // determine the minimum epoch and markers - ret = take_min_status(env.store->ctx(), env.peer_status.begin(), - env.peer_status.end(), &min_status); - if (ret < 0) { - ldpp_dout(dpp, 4) << "failed to calculate min sync status from peers" << dendl; - return set_cr_error(ret); - } - yield { - auto store = env.store; - auto epoch = min_status.sync_info.realm_epoch; - ldpp_dout(dpp, 4) << "realm epoch min=" << epoch - << " current=" << env.current.get_epoch()<< dendl; - if (epoch > env.last_trim_epoch + 1) { - // delete any prior mdlog periods - spawn(new PurgePeriodLogsCR(dpp, store, epoch, &env.last_trim_epoch), true); - } else { - ldpp_dout(dpp, 10) << "mdlogs already purged up to realm_epoch " - << env.last_trim_epoch << dendl; - } - - // if realm_epoch == current, trim mdlog based on markers - if (epoch == env.current.get_epoch()) { - auto mdlog = store->svc()->mdlog->get_log(env.current.get_period().get_id()); - spawn(new MetaMasterTrimShardCollectCR(env, mdlog, min_status), true); - } - } - // ignore any errors during purge/trim because we want to hold the lock open - return set_cr_done(); - } - return 0; -} - - -/// read the first entry of the master's mdlog shard and trim to that position -class MetaPeerTrimShardCR : public RGWCoroutine { - RGWMetaSyncEnv& env; - RGWMetadataLog *mdlog; - const std::string& period_id; - const int shard_id; - RGWMetadataLogInfo info; - ceph::real_time stable; //< safe timestamp to trim, according to master - ceph::real_time *last_trim; //< last trimmed timestamp, updated on trim - rgw_mdlog_shard_data result; //< result from master's mdlog listing - - public: - MetaPeerTrimShardCR(RGWMetaSyncEnv& env, RGWMetadataLog *mdlog, - const std::string& period_id, int shard_id, - ceph::real_time *last_trim) - : RGWCoroutine(env.store->ctx()), env(env), mdlog(mdlog), - period_id(period_id), shard_id(shard_id), last_trim(last_trim) - {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int MetaPeerTrimShardCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - // query master's first mdlog entry for this shard - yield call(create_list_remote_mdlog_shard_cr(&env, period_id, shard_id, - "", 1, &result)); - if (retcode < 0) { - ldpp_dout(dpp, 5) << "failed to read first entry from master's mdlog shard " - << shard_id << " for period " << period_id - << ": " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - if (result.entries.empty()) { - // if there are no mdlog entries, we don't have a timestamp to compare. we - // can't just trim everything, because there could be racing updates since - // this empty reply. query the mdlog shard info to read its max timestamp, - // then retry the listing to make sure it's still empty before trimming to - // that - ldpp_dout(dpp, 10) << "empty master mdlog shard " << shard_id - << ", reading last timestamp from shard info" << dendl; - // read the mdlog shard info for the last timestamp - yield call(create_read_remote_mdlog_shard_info_cr(&env, period_id, shard_id, &info)); - if (retcode < 0) { - ldpp_dout(dpp, 5) << "failed to read info from master's mdlog shard " - << shard_id << " for period " << period_id - << ": " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - if (ceph::real_clock::is_zero(info.last_update)) { - return set_cr_done(); // nothing to trim - } - ldpp_dout(dpp, 10) << "got mdlog shard info with last update=" - << info.last_update << dendl; - // re-read the master's first mdlog entry to make sure it hasn't changed - yield call(create_list_remote_mdlog_shard_cr(&env, period_id, shard_id, - "", 1, &result)); - if (retcode < 0) { - ldpp_dout(dpp, 5) << "failed to read first entry from master's mdlog shard " - << shard_id << " for period " << period_id - << ": " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - // if the mdlog is still empty, trim to max marker - if (result.entries.empty()) { - stable = info.last_update; - } else { - stable = result.entries.front().timestamp; - - // can only trim -up to- master's first timestamp, so subtract a second. - // (this is why we use timestamps instead of markers for the peers) - stable -= std::chrono::seconds(1); - } - } else { - stable = result.entries.front().timestamp; - stable -= std::chrono::seconds(1); - } - - if (stable <= *last_trim) { - ldpp_dout(dpp, 10) << "skipping log shard " << shard_id - << " at timestamp=" << stable - << " last_trim=" << *last_trim << dendl; - return set_cr_done(); - } - - ldpp_dout(dpp, 10) << "trimming log shard " << shard_id - << " at timestamp=" << stable - << " last_trim=" << *last_trim << dendl; - yield { - std::string oid; - mdlog->get_shard_oid(shard_id, oid); - call(new RGWRadosTimelogTrimCR(dpp, env.store, oid, real_time{}, stable, "", "")); - } - if (retcode < 0 && retcode != -ENODATA) { - ldpp_dout(dpp, 1) << "failed to trim mdlog shard " << shard_id - << ": " << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); - } - *last_trim = stable; - return set_cr_done(); - } - return 0; -} - -class MetaPeerTrimShardCollectCR : public RGWShardCollectCR { - static constexpr int MAX_CONCURRENT_SHARDS = 16; - - PeerTrimEnv& env; - RGWMetadataLog *mdlog; - const std::string& period_id; - RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR - int shard_id{0}; - - int handle_result(int r) override { - if (r == -ENOENT) { // ENOENT is not a fatal error - return 0; - } - if (r < 0) { - ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl; - } - return r; - } - public: - MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog) - : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), - env(env), mdlog(mdlog), period_id(env.current.get_period().get_id()) - { - meta_env.init(env.dpp, cct, env.store, env.store->svc()->zone->get_master_conn(), - env.store->svc()->rados->get_async_processor(), env.http, nullptr, - env.store->getRados()->get_sync_tracer()); - } - - bool spawn_next() override; -}; - -bool MetaPeerTrimShardCollectCR::spawn_next() -{ - if (shard_id >= env.num_shards) { - return false; - } - auto& last_trim = env.last_trim_timestamps[shard_id]; - spawn(new MetaPeerTrimShardCR(meta_env, mdlog, period_id, shard_id, &last_trim), - false); - shard_id++; - return true; -} - -class MetaPeerTrimCR : public RGWCoroutine { - PeerTrimEnv& env; - rgw_mdlog_info mdlog_info; //< master's mdlog info - - public: - explicit MetaPeerTrimCR(PeerTrimEnv& env) : RGWCoroutine(env.store->ctx()), env(env) {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int MetaPeerTrimCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - ldpp_dout(dpp, 10) << "fetching master mdlog info" << dendl; - yield { - // query mdlog_info from master for oldest_log_period - rgw_http_param_pair params[] = { - { "type", "metadata" }, - { nullptr, nullptr } - }; - - using LogInfoCR = RGWReadRESTResourceCR; - call(new LogInfoCR(cct, env.store->svc()->zone->get_master_conn(), env.http, - "/admin/log/", params, &mdlog_info)); - } - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to read mdlog info from master" << dendl; - return set_cr_error(retcode); - } - // use master's shard count instead - env.set_num_shards(mdlog_info.num_shards); - - if (mdlog_info.realm_epoch > env.last_trim_epoch + 1) { - // delete any prior mdlog periods - yield call(new PurgePeriodLogsCR(dpp, env.store, mdlog_info.realm_epoch, - &env.last_trim_epoch)); - } else { - ldpp_dout(dpp, 10) << "mdlogs already purged through realm_epoch " - << env.last_trim_epoch << dendl; - } - - // if realm_epoch == current, trim mdlog based on master's markers - if (mdlog_info.realm_epoch == env.current.get_epoch()) { - yield { - auto mdlog = env.store->svc()->mdlog->get_log(env.current.get_period().get_id()); - call(new MetaPeerTrimShardCollectCR(env, mdlog)); - // ignore any errors during purge/trim because we want to hold the lock open - } - } - return set_cr_done(); - } - return 0; -} - -class MetaTrimPollCR : public RGWCoroutine { - rgw::sal::RadosStore* const store; - const utime_t interval; //< polling interval - const rgw_raw_obj obj; - const std::string name{"meta_trim"}; //< lock name - const std::string cookie; - - protected: - /// allocate the coroutine to run within the lease - virtual RGWCoroutine* alloc_cr() = 0; - - public: - MetaTrimPollCR(rgw::sal::RadosStore* store, utime_t interval) - : RGWCoroutine(store->ctx()), store(store), interval(interval), - obj(store->svc()->zone->get_zone_params().log_pool, RGWMetadataLogHistory::oid), - cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)) - {} - - int operate(const DoutPrefixProvider *dpp) override; -}; - -int MetaTrimPollCR::operate(const DoutPrefixProvider *dpp) -{ - reenter(this) { - for (;;) { - set_status("sleeping"); - wait(interval); - - // prevent others from trimming for our entire wait interval - set_status("acquiring trim lock"); - yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, - obj, name, cookie, interval.sec())); - if (retcode < 0) { - ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl; - continue; - } - - set_status("trimming"); - yield call(alloc_cr()); - - if (retcode < 0) { - // on errors, unlock so other gateways can try - set_status("unlocking"); - yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store, - obj, name, cookie)); - } - } - } - return 0; -} - -class MetaMasterTrimPollCR : public MetaTrimPollCR { - MasterTrimEnv env; //< trim state to share between calls - RGWCoroutine* alloc_cr() override { - return new MetaMasterTrimCR(env); - } - public: - MetaMasterTrimPollCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, - int num_shards, utime_t interval) - : MetaTrimPollCR(store, interval), - env(dpp, store, http, num_shards) - {} -}; - -class MetaPeerTrimPollCR : public MetaTrimPollCR { - PeerTrimEnv env; //< trim state to share between calls - RGWCoroutine* alloc_cr() override { - return new MetaPeerTrimCR(env); - } - public: - MetaPeerTrimPollCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, - int num_shards, utime_t interval) - : MetaTrimPollCR(store, interval), - env(dpp, store, http, num_shards) - {} -}; - -namespace { -bool sanity_check_endpoints(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store) { - bool retval = true; - auto current = store->svc()->mdlog->get_period_history()->get_current(); - const auto& period = current.get_period(); - for (const auto& [_, zonegroup] : period.get_map().zonegroups) { - if (zonegroup.endpoints.empty()) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " WARNING: Cluster is is misconfigured! " - << " Zonegroup " << zonegroup.get_name() - << " (" << zonegroup.get_id() << ") in Realm " - << period.get_realm_name() << " ( " << period.get_realm() << ") " - << " has no endpoints!" << dendl; - } - for (const auto& [_, zone] : zonegroup.zones) { - if (zone.endpoints.empty()) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " ERROR: Cluster is is misconfigured! " - << " Zone " << zone.name << " (" << zone.id << ") in Zonegroup " - << zonegroup.get_name() << " ( " << zonegroup.get_id() - << ") in Realm " << period.get_realm_name() - << " ( " << period.get_realm() << ") " - << " has no endpoints! Trimming is impossible." << dendl; - retval = false; - } - } - } - return retval; -} -} - -RGWCoroutine* create_meta_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, - int num_shards, utime_t interval) -{ - if (!sanity_check_endpoints(dpp, store)) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " ERROR: Cluster is is misconfigured! Refusing to trim." << dendl; - return nullptr; - } - if (store->svc()->zone->is_meta_master()) { - return new MetaMasterTrimPollCR(dpp, store, http, num_shards, interval); - } - return new MetaPeerTrimPollCR(dpp, store, http, num_shards, interval); -} - - -struct MetaMasterAdminTrimCR : private MasterTrimEnv, public MetaMasterTrimCR { - MetaMasterAdminTrimCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) - : MasterTrimEnv(dpp, store, http, num_shards), - MetaMasterTrimCR(*static_cast(this)) - {} -}; - -struct MetaPeerAdminTrimCR : private PeerTrimEnv, public MetaPeerTrimCR { - MetaPeerAdminTrimCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, RGWHTTPManager *http, int num_shards) - : PeerTrimEnv(dpp, store, http, num_shards), - MetaPeerTrimCR(*static_cast(this)) - {} -}; - -RGWCoroutine* create_admin_meta_log_trim_cr(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards) -{ - if (!sanity_check_endpoints(dpp, store)) { - ldpp_dout(dpp, -1) - << __PRETTY_FUNCTION__ << ":" << __LINE__ - << " ERROR: Cluster is is misconfigured! Refusing to trim." << dendl; - return nullptr; - } - if (store->svc()->zone->is_meta_master()) { - return new MetaMasterAdminTrimCR(dpp, store, http, num_shards); - } - return new MetaPeerAdminTrimCR(dpp, store, http, num_shards); -} diff --git a/src/rgw/store/rados/rgw_trim_mdlog.h b/src/rgw/store/rados/rgw_trim_mdlog.h deleted file mode 100644 index 1dba8612bd3..00000000000 --- a/src/rgw/store/rados/rgw_trim_mdlog.h +++ /dev/null @@ -1,25 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -class RGWCoroutine; -class DoutPrefixProvider; -class RGWRados; -class RGWHTTPManager; -class utime_t; -namespace rgw { namespace sal { - class RadosStore; -} } - -// MetaLogTrimCR factory function -RGWCoroutine* create_meta_log_trim_cr(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards, utime_t interval); - -// factory function for mdlog trim via radosgw-admin -RGWCoroutine* create_admin_meta_log_trim_cr(const DoutPrefixProvider *dpp, - rgw::sal::RadosStore* store, - RGWHTTPManager *http, - int num_shards); diff --git a/src/rgw/store/rados/rgw_user.cc b/src/rgw/store/rados/rgw_user.cc deleted file mode 100644 index 7c36a52e31d..00000000000 --- a/src/rgw/store/rados/rgw_user.cc +++ /dev/null @@ -1,2768 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "common/errno.h" - -#include "rgw_user.h" - -#include "rgw_bucket.h" - -#include "services/svc_user.h" -#include "services/svc_meta.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -extern void op_type_to_str(uint32_t mask, char *buf, int len); - -static string key_type_to_str(int key_type) { - switch (key_type) { - case KEY_TYPE_SWIFT: - return "swift"; - break; - - default: - return "s3"; - break; - } -} - -static bool char_is_unreserved_url(char c) -{ - if (isalnum(c)) - return true; - - switch (c) { - case '-': - case '.': - case '_': - case '~': - return true; - default: - return false; - } -} - -static bool validate_access_key(string& key) -{ - const char *p = key.c_str(); - while (*p) { - if (!char_is_unreserved_url(*p)) - return false; - p++; - } - return true; -} - -static void set_err_msg(std::string *sink, std::string msg) -{ - if (sink && !msg.empty()) - *sink = msg; -} - -/* - * Dump either the full user info or a subset to a formatter. - * - * NOTE: It is the caller's responsibility to ensure that the - * formatter is flushed at the correct time. - */ - -static void dump_subusers_info(Formatter *f, RGWUserInfo &info) -{ - map::iterator uiter; - - f->open_array_section("subusers"); - for (uiter = info.subusers.begin(); uiter != info.subusers.end(); ++uiter) { - RGWSubUser& u = uiter->second; - f->open_object_section("user"); - string s; - info.user_id.to_str(s); - f->dump_format("id", "%s:%s", s.c_str(), u.name.c_str()); - char buf[256]; - rgw_perm_to_str(u.perm_mask, buf, sizeof(buf)); - f->dump_string("permissions", buf); - f->close_section(); - } - f->close_section(); -} - -static void dump_access_keys_info(Formatter *f, RGWUserInfo &info) -{ - map::iterator kiter; - f->open_array_section("keys"); - for (kiter = info.access_keys.begin(); kiter != info.access_keys.end(); ++kiter) { - RGWAccessKey& k = kiter->second; - const char *sep = (k.subuser.empty() ? "" : ":"); - const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str()); - f->open_object_section("key"); - string s; - info.user_id.to_str(s); - f->dump_format("user", "%s%s%s", s.c_str(), sep, subuser); - f->dump_string("access_key", k.id); - f->dump_string("secret_key", k.key); - f->close_section(); - } - f->close_section(); -} - -static void dump_swift_keys_info(Formatter *f, RGWUserInfo &info) -{ - map::iterator kiter; - f->open_array_section("swift_keys"); - for (kiter = info.swift_keys.begin(); kiter != info.swift_keys.end(); ++kiter) { - RGWAccessKey& k = kiter->second; - const char *sep = (k.subuser.empty() ? "" : ":"); - const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str()); - f->open_object_section("key"); - string s; - info.user_id.to_str(s); - f->dump_format("user", "%s%s%s", s.c_str(), sep, subuser); - f->dump_string("secret_key", k.key); - f->close_section(); - } - f->close_section(); -} - -static void dump_user_info(Formatter *f, RGWUserInfo &info, - RGWStorageStats *stats = NULL) -{ - f->open_object_section("user_info"); - encode_json("tenant", info.user_id.tenant, f); - encode_json("user_id", info.user_id.id, f); - encode_json("display_name", info.display_name, f); - encode_json("email", info.user_email, f); - encode_json("suspended", (int)info.suspended, f); - encode_json("max_buckets", (int)info.max_buckets, f); - - dump_subusers_info(f, info); - dump_access_keys_info(f, info); - dump_swift_keys_info(f, info); - - encode_json("caps", info.caps, f); - - char buf[256]; - op_type_to_str(info.op_mask, buf, sizeof(buf)); - encode_json("op_mask", (const char *)buf, f); - encode_json("system", (bool)info.system, f); - encode_json("admin", (bool)info.admin, f); - encode_json("default_placement", info.default_placement.name, f); - encode_json("default_storage_class", info.default_placement.storage_class, f); - encode_json("placement_tags", info.placement_tags, f); - encode_json("bucket_quota", info.quota.bucket_quota, f); - encode_json("user_quota", info.quota.user_quota, f); - encode_json("temp_url_keys", info.temp_url_keys, f); - - string user_source_type; - switch ((RGWIdentityType)info.type) { - case TYPE_RGW: - user_source_type = "rgw"; - break; - case TYPE_KEYSTONE: - user_source_type = "keystone"; - break; - case TYPE_LDAP: - user_source_type = "ldap"; - break; - case TYPE_NONE: - user_source_type = "none"; - break; - default: - user_source_type = "none"; - break; - } - encode_json("type", user_source_type, f); - encode_json("mfa_ids", info.mfa_ids, f); - if (stats) { - encode_json("stats", *stats, f); - } - f->close_section(); -} - -static int user_add_helper(RGWUserAdminOpState& op_state, std::string *err_msg) -{ - int ret = 0; - const rgw_user& uid = op_state.get_user_id(); - std::string user_email = op_state.get_user_email(); - std::string display_name = op_state.get_display_name(); - - // fail if the user exists already - if (op_state.has_existing_user()) { - if (op_state.found_by_email) { - set_err_msg(err_msg, "email: " + user_email + - " is the email address of an existing user"); - ret = -ERR_EMAIL_EXIST; - } else if (op_state.found_by_key) { - set_err_msg(err_msg, "duplicate key provided"); - ret = -ERR_KEY_EXIST; - } else { - set_err_msg(err_msg, "user: " + uid.to_str() + " exists"); - ret = -EEXIST; - } - return ret; - } - - // fail if the user_info has already been populated - if (op_state.is_populated()) { - set_err_msg(err_msg, "cannot overwrite already populated user"); - return -EEXIST; - } - - // fail if the display name was not included - if (display_name.empty()) { - set_err_msg(err_msg, "no display name specified"); - return -EINVAL; - } - - return ret; -} - -RGWAccessKeyPool::RGWAccessKeyPool(RGWUser* usr) -{ - if (!usr) { - return; - } - - user = usr; - - driver = user->get_driver(); -} - -int RGWAccessKeyPool::init(RGWUserAdminOpState& op_state) -{ - if (!op_state.is_initialized()) { - keys_allowed = false; - return -EINVAL; - } - - const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { - keys_allowed = false; - return -EINVAL; - } - - swift_keys = op_state.get_swift_keys(); - access_keys = op_state.get_access_keys(); - - keys_allowed = true; - - return 0; -} - -RGWUserAdminOpState::RGWUserAdminOpState(rgw::sal::Driver* driver) -{ - user = driver->get_user(rgw_user(RGW_USER_ANON_ID)); -} - -void RGWUserAdminOpState::set_user_id(const rgw_user& id) -{ - if (id.empty()) - return; - - user->get_info().user_id = id; -} - -void RGWUserAdminOpState::set_subuser(std::string& _subuser) -{ - if (_subuser.empty()) - return; - - size_t pos = _subuser.find(":"); - if (pos != string::npos) { - rgw_user tmp_id; - tmp_id.from_str(_subuser.substr(0, pos)); - if (tmp_id.tenant.empty()) { - user->get_info().user_id.id = tmp_id.id; - } else { - user->get_info().user_id = tmp_id; - } - subuser = _subuser.substr(pos+1); - } else { - subuser = _subuser; - } - - subuser_specified = true; -} - -void RGWUserAdminOpState::set_user_info(RGWUserInfo& user_info) -{ - user->get_info() = user_info; -} - -void RGWUserAdminOpState::set_user_version_tracker(RGWObjVersionTracker& objv_tracker) -{ - user->get_version_tracker() = objv_tracker; -} - -const rgw_user& RGWUserAdminOpState::get_user_id() -{ - return user->get_id(); -} - -RGWUserInfo& RGWUserAdminOpState::get_user_info() -{ - return user->get_info(); -} - -map* RGWUserAdminOpState::get_swift_keys() -{ - return &user->get_info().swift_keys; -} - -map* RGWUserAdminOpState::get_access_keys() -{ - return &user->get_info().access_keys; -} - -map* RGWUserAdminOpState::get_subusers() -{ - return &user->get_info().subusers; -} - -RGWUserCaps *RGWUserAdminOpState::get_caps_obj() -{ - return &user->get_info().caps; -} - -std::string RGWUserAdminOpState::build_default_swift_kid() -{ - if (user->get_id().empty() || subuser.empty()) - return ""; - - std::string kid; - user->get_id().to_str(kid); - kid.append(":"); - kid.append(subuser); - - return kid; -} - -std::string RGWUserAdminOpState::generate_subuser() { - if (user->get_id().empty()) - return ""; - - std::string generated_subuser; - user->get_id().to_str(generated_subuser); - std::string rand_suffix; - - int sub_buf_size = RAND_SUBUSER_LEN + 1; - char sub_buf[RAND_SUBUSER_LEN + 1]; - - gen_rand_alphanumeric_upper(g_ceph_context, sub_buf, sub_buf_size); - - rand_suffix = sub_buf; - if (rand_suffix.empty()) - return ""; - - generated_subuser.append(rand_suffix); - subuser = generated_subuser; - - return generated_subuser; -} - -/* - * Do a fairly exhaustive search for an existing key matching the parameters - * given. Also handles the case where no key type was specified and updates - * the operation state if needed. - */ - -bool RGWAccessKeyPool::check_existing_key(RGWUserAdminOpState& op_state) -{ - bool existing_key = false; - - int key_type = op_state.get_key_type(); - std::string kid = op_state.get_access_key(); - std::map::iterator kiter; - std::string swift_kid = op_state.build_default_swift_kid(); - - RGWUserInfo dup_info; - - if (kid.empty() && swift_kid.empty()) - return false; - - switch (key_type) { - case KEY_TYPE_SWIFT: - kiter = swift_keys->find(swift_kid); - - existing_key = (kiter != swift_keys->end()); - if (existing_key) - op_state.set_access_key(swift_kid); - - break; - case KEY_TYPE_S3: - kiter = access_keys->find(kid); - existing_key = (kiter != access_keys->end()); - - break; - default: - kiter = access_keys->find(kid); - - existing_key = (kiter != access_keys->end()); - if (existing_key) { - op_state.set_key_type(KEY_TYPE_S3); - break; - } - - kiter = swift_keys->find(kid); - - existing_key = (kiter != swift_keys->end()); - if (existing_key) { - op_state.set_key_type(KEY_TYPE_SWIFT); - break; - } - - // handle the case where the access key was not provided in user:key format - if (swift_kid.empty()) - return false; - - kiter = swift_keys->find(swift_kid); - - existing_key = (kiter != swift_keys->end()); - if (existing_key) { - op_state.set_access_key(swift_kid); - op_state.set_key_type(KEY_TYPE_SWIFT); - } - } - - op_state.set_existing_key(existing_key); - - return existing_key; -} - -int RGWAccessKeyPool::check_op(RGWUserAdminOpState& op_state, - std::string *err_msg) -{ - RGWUserInfo dup_info; - - if (!op_state.is_populated()) { - set_err_msg(err_msg, "user info was not populated"); - return -EINVAL; - } - - if (!keys_allowed) { - set_err_msg(err_msg, "keys not allowed for this user"); - return -EACCES; - } - - int32_t key_type = op_state.get_key_type(); - - // if a key type wasn't specified - if (key_type < 0) { - if (op_state.has_subuser()) { - key_type = KEY_TYPE_SWIFT; - } else { - key_type = KEY_TYPE_S3; - } - } - - op_state.set_key_type(key_type); - - /* see if the access key was specified */ - if (key_type == KEY_TYPE_S3 && !op_state.will_gen_access() && - op_state.get_access_key().empty()) { - set_err_msg(err_msg, "empty access key"); - return -ERR_INVALID_ACCESS_KEY; - } - - // don't check for secret key because we may be doing a removal - - if (check_existing_key(op_state)) { - op_state.set_access_key_exist(); - } - return 0; -} - -// Generate a new random key -int RGWAccessKeyPool::generate_key(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, - optional_yield y, std::string *err_msg) -{ - std::string id; - std::string key; - - std::pair key_pair; - RGWAccessKey new_key; - std::unique_ptr duplicate_check; - - int key_type = op_state.get_key_type(); - bool gen_access = op_state.will_gen_access(); - bool gen_secret = op_state.will_gen_secret(); - - if (!keys_allowed) { - set_err_msg(err_msg, "access keys not allowed for this user"); - return -EACCES; - } - - if (op_state.has_existing_key()) { - set_err_msg(err_msg, "cannot create existing key"); - return -ERR_KEY_EXIST; - } - - if (!gen_access) { - id = op_state.get_access_key(); - } - - if (!id.empty()) { - switch (key_type) { - case KEY_TYPE_SWIFT: - if (driver->get_user_by_swift(dpp, id, y, &duplicate_check) >= 0) { - set_err_msg(err_msg, "existing swift key in RGW system:" + id); - return -ERR_KEY_EXIST; - } - break; - case KEY_TYPE_S3: - if (driver->get_user_by_access_key(dpp, id, y, &duplicate_check) >= 0) { - set_err_msg(err_msg, "existing S3 key in RGW system:" + id); - return -ERR_KEY_EXIST; - } - } - } - - //key's subuser - if (op_state.has_subuser()) { - //create user and subuser at the same time, user's s3 key should not be set this - if (!op_state.key_type_setbycontext || (key_type == KEY_TYPE_SWIFT)) { - new_key.subuser = op_state.get_subuser(); - } - } - - //Secret key - if (!gen_secret) { - if (op_state.get_secret_key().empty()) { - set_err_msg(err_msg, "empty secret key"); - return -ERR_INVALID_SECRET_KEY; - } - - key = op_state.get_secret_key(); - } else { - char secret_key_buf[SECRET_KEY_LEN + 1]; - gen_rand_alphanumeric_plain(g_ceph_context, secret_key_buf, sizeof(secret_key_buf)); - key = secret_key_buf; - } - - // Generate the access key - if (key_type == KEY_TYPE_S3 && gen_access) { - char public_id_buf[PUBLIC_ID_LEN + 1]; - - do { - int id_buf_size = sizeof(public_id_buf); - gen_rand_alphanumeric_upper(g_ceph_context, public_id_buf, id_buf_size); - id = public_id_buf; - if (!validate_access_key(id)) - continue; - - } while (!driver->get_user_by_access_key(dpp, id, y, &duplicate_check)); - } - - if (key_type == KEY_TYPE_SWIFT) { - id = op_state.build_default_swift_kid(); - if (id.empty()) { - set_err_msg(err_msg, "empty swift access key"); - return -ERR_INVALID_ACCESS_KEY; - } - - // check that the access key doesn't exist - if (driver->get_user_by_swift(dpp, id, y, &duplicate_check) >= 0) { - set_err_msg(err_msg, "cannot create existing swift key"); - return -ERR_KEY_EXIST; - } - } - - // finally create the new key - new_key.id = id; - new_key.key = key; - - key_pair.first = id; - key_pair.second = new_key; - - if (key_type == KEY_TYPE_S3) { - access_keys->insert(key_pair); - } else if (key_type == KEY_TYPE_SWIFT) { - swift_keys->insert(key_pair); - } - - return 0; -} - -// modify an existing key -int RGWAccessKeyPool::modify_key(RGWUserAdminOpState& op_state, std::string *err_msg) -{ - std::string id; - std::string key = op_state.get_secret_key(); - int key_type = op_state.get_key_type(); - - RGWAccessKey modify_key; - - pair key_pair; - map::iterator kiter; - - switch (key_type) { - case KEY_TYPE_S3: - id = op_state.get_access_key(); - if (id.empty()) { - set_err_msg(err_msg, "no access key specified"); - return -ERR_INVALID_ACCESS_KEY; - } - break; - case KEY_TYPE_SWIFT: - id = op_state.build_default_swift_kid(); - if (id.empty()) { - set_err_msg(err_msg, "no subuser specified"); - return -EINVAL; - } - break; - default: - set_err_msg(err_msg, "invalid key type"); - return -ERR_INVALID_KEY_TYPE; - } - - if (!op_state.has_existing_key()) { - set_err_msg(err_msg, "key does not exist"); - return -ERR_INVALID_ACCESS_KEY; - } - - key_pair.first = id; - - if (key_type == KEY_TYPE_SWIFT) { - modify_key.id = id; - modify_key.subuser = op_state.get_subuser(); - } else if (key_type == KEY_TYPE_S3) { - kiter = access_keys->find(id); - if (kiter != access_keys->end()) { - modify_key = kiter->second; - } - } - - if (op_state.will_gen_secret()) { - char secret_key_buf[SECRET_KEY_LEN + 1]; - int key_buf_size = sizeof(secret_key_buf); - gen_rand_alphanumeric_plain(g_ceph_context, secret_key_buf, key_buf_size); - key = secret_key_buf; - } - - if (key.empty()) { - set_err_msg(err_msg, "empty secret key"); - return -ERR_INVALID_SECRET_KEY; - } - - // update the access key with the new secret key - modify_key.key = key; - - key_pair.second = modify_key; - - - if (key_type == KEY_TYPE_S3) { - (*access_keys)[id] = modify_key; - } else if (key_type == KEY_TYPE_SWIFT) { - (*swift_keys)[id] = modify_key; - } - - return 0; -} - -int RGWAccessKeyPool::execute_add(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, - std::string *err_msg, bool defer_user_update, - optional_yield y) -{ - int ret = 0; - - std::string subprocess_msg; - int key_op = GENERATE_KEY; - - // set the op - if (op_state.has_existing_key()) - key_op = MODIFY_KEY; - - switch (key_op) { - case GENERATE_KEY: - ret = generate_key(dpp, op_state, y, &subprocess_msg); - break; - case MODIFY_KEY: - ret = modify_key(op_state, &subprocess_msg); - break; - } - - if (ret < 0) { - set_err_msg(err_msg, subprocess_msg); - return ret; - } - - // store the updated info - if (!defer_user_update) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -int RGWAccessKeyPool::add(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg) -{ - return add(dpp, op_state, err_msg, false, y); -} - -int RGWAccessKeyPool::add(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_user_update, optional_yield y) -{ - int ret; - std::string subprocess_msg; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); - return ret; - } - - ret = execute_add(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to add access key, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWAccessKeyPool::execute_remove(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, - std::string *err_msg, - bool defer_user_update, - optional_yield y) -{ - int ret = 0; - - int key_type = op_state.get_key_type(); - std::string id = op_state.get_access_key(); - map::iterator kiter; - map *keys_map; - - if (!op_state.has_existing_key()) { - set_err_msg(err_msg, "unable to find access key, with key type: " + - key_type_to_str(key_type)); - return -ERR_INVALID_ACCESS_KEY; - } - - if (key_type == KEY_TYPE_S3) { - keys_map = access_keys; - } else if (key_type == KEY_TYPE_SWIFT) { - keys_map = swift_keys; - } else { - keys_map = NULL; - set_err_msg(err_msg, "invalid access key"); - return -ERR_INVALID_ACCESS_KEY; - } - - kiter = keys_map->find(id); - if (kiter == keys_map->end()) { - set_err_msg(err_msg, "key not found"); - return -ERR_INVALID_ACCESS_KEY; - } - - keys_map->erase(kiter); - - if (!defer_user_update) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -int RGWAccessKeyPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg) -{ - return remove(dpp, op_state, err_msg, false, y); -} - -int RGWAccessKeyPool::remove(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, - std::string *err_msg, bool defer_user_update, - optional_yield y) -{ - int ret; - - std::string subprocess_msg; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); - return ret; - } - - ret = execute_remove(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to remove access key, " + subprocess_msg); - return ret; - } - - return 0; -} - -// remove all keys associated with a subuser -int RGWAccessKeyPool::remove_subuser_keys(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, - std::string *err_msg, - bool defer_user_update, - optional_yield y) -{ - int ret = 0; - - if (!op_state.is_populated()) { - set_err_msg(err_msg, "user info was not populated"); - return -EINVAL; - } - - if (!op_state.has_subuser()) { - set_err_msg(err_msg, "no subuser specified"); - return -EINVAL; - } - - std::string swift_kid = op_state.build_default_swift_kid(); - if (swift_kid.empty()) { - set_err_msg(err_msg, "empty swift access key"); - return -EINVAL; - } - - map::iterator kiter; - map *keys_map; - - // a subuser can have at most one swift key - keys_map = swift_keys; - kiter = keys_map->find(swift_kid); - if (kiter != keys_map->end()) { - keys_map->erase(kiter); - } - - // a subuser may have multiple s3 key pairs - std::string subuser_str = op_state.get_subuser(); - keys_map = access_keys; - RGWUserInfo user_info = op_state.get_user_info(); - auto user_kiter = user_info.access_keys.begin(); - for (; user_kiter != user_info.access_keys.end(); ++user_kiter) { - if (user_kiter->second.subuser == subuser_str) { - kiter = keys_map->find(user_kiter->first); - if (kiter != keys_map->end()) { - keys_map->erase(kiter); - } - } - } - - if (!defer_user_update) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -RGWSubUserPool::RGWSubUserPool(RGWUser *usr) -{ - if (!usr) { - return; - } - - user = usr; - - subusers_allowed = true; - driver = user->get_driver(); -} - -int RGWSubUserPool::init(RGWUserAdminOpState& op_state) -{ - if (!op_state.is_initialized()) { - subusers_allowed = false; - return -EINVAL; - } - - const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { - subusers_allowed = false; - return -EACCES; - } - - subuser_map = op_state.get_subusers(); - if (subuser_map == NULL) { - subusers_allowed = false; - return -EINVAL; - } - - subusers_allowed = true; - - return 0; -} - -bool RGWSubUserPool::exists(std::string subuser) -{ - if (subuser.empty()) - return false; - - if (!subuser_map) - return false; - - if (subuser_map->count(subuser)) - return true; - - return false; -} - -int RGWSubUserPool::check_op(RGWUserAdminOpState& op_state, - std::string *err_msg) -{ - bool existing = false; - std::string subuser = op_state.get_subuser(); - - if (!op_state.is_populated()) { - set_err_msg(err_msg, "user info was not populated"); - return -EINVAL; - } - - if (!subusers_allowed) { - set_err_msg(err_msg, "subusers not allowed for this user"); - return -EACCES; - } - - if (subuser.empty() && !op_state.will_gen_subuser()) { - set_err_msg(err_msg, "empty subuser name"); - return -EINVAL; - } - - if (op_state.get_subuser_perm() == RGW_PERM_INVALID) { - set_err_msg(err_msg, "invalid subuser access"); - return -EINVAL; - } - - //set key type when it not set or set by context - if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) { - op_state.set_key_type(KEY_TYPE_SWIFT); - op_state.key_type_setbycontext = true; - } - - // check if the subuser exists - if (!subuser.empty()) - existing = exists(subuser); - - op_state.set_existing_subuser(existing); - - return 0; -} - -int RGWSubUserPool::execute_add(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, - std::string *err_msg, bool defer_user_update, - optional_yield y) -{ - int ret = 0; - std::string subprocess_msg; - - RGWSubUser subuser; - std::pair subuser_pair; - std::string subuser_str = op_state.get_subuser(); - - subuser_pair.first = subuser_str; - - // assumes key should be created - if (op_state.has_key_op()) { - ret = user->keys.add(dpp, op_state, &subprocess_msg, true, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to create subuser key, " + subprocess_msg); - return ret; - } - } - - // create the subuser - subuser.name = subuser_str; - - if (op_state.has_subuser_perm()) - subuser.perm_mask = op_state.get_subuser_perm(); - - // insert the subuser into user info - subuser_pair.second = subuser; - subuser_map->insert(subuser_pair); - - // attempt to save the subuser - if (!defer_user_update) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -int RGWSubUserPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg) -{ - return add(dpp, op_state, err_msg, false, y); -} - -int RGWSubUserPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update, optional_yield y) -{ - std::string subprocess_msg; - int ret; - int32_t key_type = op_state.get_key_type(); - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); - return ret; - } - - if (op_state.get_access_key_exist()) { - set_err_msg(err_msg, "cannot create existing key"); - return -ERR_KEY_EXIST; - } - - if (key_type == KEY_TYPE_S3 && op_state.get_access_key().empty()) { - op_state.set_gen_access(); - } - - if (op_state.get_secret_key().empty()) { - op_state.set_gen_secret(); - } - - ret = execute_add(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to create subuser, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWSubUserPool::execute_remove(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, - std::string *err_msg, bool defer_user_update, - optional_yield y) -{ - int ret = 0; - std::string subprocess_msg; - - std::string subuser_str = op_state.get_subuser(); - - map::iterator siter; - siter = subuser_map->find(subuser_str); - if (siter == subuser_map->end()){ - set_err_msg(err_msg, "subuser not found: " + subuser_str); - return -ERR_NO_SUCH_SUBUSER; - } - if (!op_state.has_existing_subuser()) { - set_err_msg(err_msg, "subuser not found: " + subuser_str); - return -ERR_NO_SUCH_SUBUSER; - } - - // always purge all associate keys - user->keys.remove_subuser_keys(dpp, op_state, &subprocess_msg, true, y); - - // remove the subuser from the user info - subuser_map->erase(siter); - - // attempt to save the subuser - if (!defer_user_update) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -int RGWSubUserPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg) -{ - return remove(dpp, op_state, err_msg, false, y); -} - -int RGWSubUserPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_user_update, optional_yield y) -{ - std::string subprocess_msg; - int ret; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); - return ret; - } - - ret = execute_remove(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to remove subuser, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWSubUserPool::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update, optional_yield y) -{ - int ret = 0; - std::string subprocess_msg; - std::map::iterator siter; - std::pair subuser_pair; - - std::string subuser_str = op_state.get_subuser(); - RGWSubUser subuser; - - if (!op_state.has_existing_subuser()) { - set_err_msg(err_msg, "subuser does not exist"); - return -ERR_NO_SUCH_SUBUSER; - } - - subuser_pair.first = subuser_str; - - siter = subuser_map->find(subuser_str); - subuser = siter->second; - - if (op_state.has_key_op()) { - ret = user->keys.add(dpp, op_state, &subprocess_msg, true, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to create subuser keys, " + subprocess_msg); - return ret; - } - } - - if (op_state.has_subuser_perm()) - subuser.perm_mask = op_state.get_subuser_perm(); - - subuser_pair.second = subuser; - - subuser_map->erase(siter); - subuser_map->insert(subuser_pair); - - // attempt to save the subuser - if (!defer_user_update) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -int RGWSubUserPool::modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) -{ - return RGWSubUserPool::modify(dpp, op_state, y, err_msg, false); -} - -int RGWSubUserPool::modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg, bool defer_user_update) -{ - std::string subprocess_msg; - int ret; - - RGWSubUser subuser; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse request, " + subprocess_msg); - return ret; - } - - ret = execute_modify(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to modify subuser, " + subprocess_msg); - return ret; - } - - return 0; -} - -RGWUserCapPool::RGWUserCapPool(RGWUser *usr) -{ - if (!usr) { - return; - } - user = usr; - caps_allowed = true; -} - -int RGWUserCapPool::init(RGWUserAdminOpState& op_state) -{ - if (!op_state.is_initialized()) { - caps_allowed = false; - return -EINVAL; - } - - const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { - caps_allowed = false; - return -EACCES; - } - - caps = op_state.get_caps_obj(); - if (!caps) { - caps_allowed = false; - return -ERR_INVALID_CAP; - } - - caps_allowed = true; - - return 0; -} - -int RGWUserCapPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg) -{ - return add(dpp, op_state, err_msg, false, y); -} - -int RGWUserCapPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_save, optional_yield y) -{ - int ret = 0; - std::string caps_str = op_state.get_caps(); - - if (!op_state.is_populated()) { - set_err_msg(err_msg, "user info was not populated"); - return -EINVAL; - } - - if (!caps_allowed) { - set_err_msg(err_msg, "caps not allowed for this user"); - return -EACCES; - } - - if (caps_str.empty()) { - set_err_msg(err_msg, "empty user caps"); - return -ERR_INVALID_CAP; - } - - int r = caps->add_from_string(caps_str); - if (r < 0) { - set_err_msg(err_msg, "unable to add caps: " + caps_str); - return r; - } - - if (!defer_save) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -int RGWUserCapPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg) -{ - return remove(dpp, op_state, err_msg, false, y); -} - -int RGWUserCapPool::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_save, optional_yield y) -{ - int ret = 0; - - std::string caps_str = op_state.get_caps(); - - if (!op_state.is_populated()) { - set_err_msg(err_msg, "user info was not populated"); - return -EINVAL; - } - - if (!caps_allowed) { - set_err_msg(err_msg, "caps not allowed for this user"); - return -EACCES; - } - - if (caps_str.empty()) { - set_err_msg(err_msg, "empty user caps"); - return -ERR_INVALID_CAP; - } - - int r = caps->remove_from_string(caps_str); - if (r < 0) { - set_err_msg(err_msg, "unable to remove caps: " + caps_str); - return r; - } - - if (!defer_save) - ret = user->update(dpp, op_state, err_msg, y); - - if (ret < 0) - return ret; - - return 0; -} - -RGWUser::RGWUser() : caps(this), keys(this), subusers(this) -{ - init_default(); -} - -int RGWUser::init(const DoutPrefixProvider *dpp, rgw::sal::Driver* _driver, - RGWUserAdminOpState& op_state, optional_yield y) -{ - init_default(); - int ret = init_storage(_driver); - if (ret < 0) - return ret; - - ret = init(dpp, op_state, y); - if (ret < 0) - return ret; - - return 0; -} - -void RGWUser::init_default() -{ - // use anonymous user info as a placeholder - rgw_get_anon_user(old_info); - user_id = RGW_USER_ANON_ID; - - clear_populated(); -} - -int RGWUser::init_storage(rgw::sal::Driver* _driver) -{ - if (!_driver) { - return -EINVAL; - } - - driver = _driver; - - clear_populated(); - - /* API wrappers */ - keys = RGWAccessKeyPool(this); - caps = RGWUserCapPool(this); - subusers = RGWSubUserPool(this); - - return 0; -} - -int RGWUser::init(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y) -{ - bool found = false; - std::string swift_user; - user_id = op_state.get_user_id(); - std::string user_email = op_state.get_user_email(); - std::string access_key = op_state.get_access_key(); - std::string subuser = op_state.get_subuser(); - - int key_type = op_state.get_key_type(); - if (key_type == KEY_TYPE_SWIFT) { - swift_user = op_state.get_access_key(); - access_key.clear(); - } - - std::unique_ptr user; - - clear_populated(); - - if (user_id.empty() && !subuser.empty()) { - size_t pos = subuser.find(':'); - if (pos != string::npos) { - user_id = subuser.substr(0, pos); - op_state.set_user_id(user_id); - } - } - - if (!user_id.empty() && (user_id.compare(RGW_USER_ANON_ID) != 0)) { - user = driver->get_user(user_id); - found = (user->load_user(dpp, y) >= 0); - op_state.found_by_uid = found; - } - if (driver->ctx()->_conf.get_val("rgw_user_unique_email")) { - if (!user_email.empty() && !found) { - found = (driver->get_user_by_email(dpp, user_email, y, &user) >= 0); - op_state.found_by_email = found; - } - } - if (!swift_user.empty() && !found) { - found = (driver->get_user_by_swift(dpp, swift_user, y, &user) >= 0); - op_state.found_by_key = found; - } - if (!access_key.empty() && !found) { - found = (driver->get_user_by_access_key(dpp, access_key, y, &user) >= 0); - op_state.found_by_key = found; - } - - op_state.set_existing_user(found); - if (found) { - op_state.set_user_info(user->get_info()); - op_state.set_populated(); - op_state.objv = user->get_version_tracker(); - op_state.set_user_version_tracker(user->get_version_tracker()); - - old_info = user->get_info(); - set_populated(); - } - - if (user_id.empty()) { - user_id = user->get_id(); - } - op_state.set_initialized(); - - // this may have been called by a helper object - int ret = init_members(op_state); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUser::init_members(RGWUserAdminOpState& op_state) -{ - int ret = 0; - - ret = keys.init(op_state); - if (ret < 0) - return ret; - - ret = subusers.init(op_state); - if (ret < 0) - return ret; - - ret = caps.init(op_state); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUser::update(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - optional_yield y) -{ - int ret; - std::string subprocess_msg; - rgw::sal::User* user = op_state.get_user(); - - if (!driver) { - set_err_msg(err_msg, "couldn't initialize storage"); - return -EINVAL; - } - - RGWUserInfo *pold_info = (is_populated() ? &old_info : nullptr); - - ret = user->store_user(dpp, y, false, pold_info); - op_state.objv = user->get_version_tracker(); - op_state.set_user_version_tracker(user->get_version_tracker()); - - if (ret < 0) { - set_err_msg(err_msg, "unable to store user info"); - return ret; - } - - old_info = user->get_info(); - set_populated(); - - return 0; -} - -int RGWUser::check_op(RGWUserAdminOpState& op_state, std::string *err_msg) -{ - int ret = 0; - const rgw_user& uid = op_state.get_user_id(); - - if (uid.compare(RGW_USER_ANON_ID) == 0) { - set_err_msg(err_msg, "unable to perform operations on the anonymous user"); - return -EINVAL; - } - - if (is_populated() && user_id.compare(uid) != 0) { - set_err_msg(err_msg, "user id mismatch, operation id: " + uid.to_str() - + " does not match: " + user_id.to_str()); - - return -EINVAL; - } - - ret = rgw_validate_tenant_name(uid.tenant); - if (ret) { - set_err_msg(err_msg, - "invalid tenant only alphanumeric and _ characters are allowed"); - return ret; - } - - //set key type when it not set or set by context - if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) { - op_state.set_key_type(KEY_TYPE_S3); - op_state.key_type_setbycontext = true; - } - - return 0; -} - -// update swift_keys with new user id -static void rename_swift_keys(const rgw_user& user, - std::map& keys) -{ - std::string user_id; - user.to_str(user_id); - - auto modify_keys = std::move(keys); - for ([[maybe_unused]] auto& [k, key] : modify_keys) { - std::string id = user_id + ":" + key.subuser; - key.id = id; - keys[id] = std::move(key); - } -} - -int RGWUser::execute_rename(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y) -{ - int ret; - bool populated = op_state.is_populated(); - - if (!op_state.has_existing_user() && !populated) { - set_err_msg(err_msg, "user not found"); - return -ENOENT; - } - - if (!populated) { - ret = init(dpp, op_state, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to retrieve user info"); - return ret; - } - } - - std::unique_ptr old_user = driver->get_user(op_state.get_user_info().user_id); - std::unique_ptr new_user = driver->get_user(op_state.get_new_uid()); - if (old_user->get_tenant() != new_user->get_tenant()) { - set_err_msg(err_msg, "users have to be under the same tenant namespace " - + old_user->get_tenant() + " != " + new_user->get_tenant()); - return -EINVAL; - } - - // create a stub user and write only the uid index and buckets object - std::unique_ptr user; - user = driver->get_user(new_user->get_id()); - - const bool exclusive = !op_state.get_overwrite_new_user(); // overwrite if requested - - ret = user->store_user(dpp, y, exclusive); - if (ret == -EEXIST) { - set_err_msg(err_msg, "user name given by --new-uid already exists"); - return ret; - } - if (ret < 0) { - set_err_msg(err_msg, "unable to store new user info"); - return ret; - } - - RGWAccessControlPolicy policy_instance; - policy_instance.create_default(new_user->get_id(), old_user->get_display_name()); - - //unlink and link buckets to new user - string marker; - CephContext *cct = driver->ctx(); - size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk; - rgw::sal::BucketList buckets; - - do { - ret = old_user->list_buckets(dpp, marker, "", max_buckets, false, buckets, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to list user buckets"); - return ret; - } - - auto& m = buckets.get_buckets(); - - for (auto it = m.begin(); it != m.end(); ++it) { - auto& bucket = it->second; - marker = it->first; - - ret = bucket->load_bucket(dpp, y); - if (ret < 0) { - set_err_msg(err_msg, "failed to fetch bucket info for bucket=" + bucket->get_name()); - return ret; - } - - ret = bucket->set_acl(dpp, policy_instance, y); - if (ret < 0) { - set_err_msg(err_msg, "failed to set acl on bucket " + bucket->get_name()); - return ret; - } - - ret = bucket->chown(dpp, new_user.get(), old_user.get(), y); - if (ret < 0) { - set_err_msg(err_msg, "failed to run bucket chown" + cpp_strerror(-ret)); - return ret; - } - } - - } while (buckets.is_truncated()); - - // update the 'stub user' with all of the other fields and rewrite all of the - // associated index objects - RGWUserInfo& user_info = op_state.get_user_info(); - user_info.user_id = new_user->get_id(); - op_state.objv = user->get_version_tracker(); - op_state.set_user_version_tracker(user->get_version_tracker()); - - rename_swift_keys(new_user->get_id(), user_info.swift_keys); - - return update(dpp, op_state, err_msg, y); -} - -int RGWUser::execute_add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - optional_yield y) -{ - const rgw_user& uid = op_state.get_user_id(); - std::string user_email = op_state.get_user_email(); - std::string display_name = op_state.get_display_name(); - - // set the user info - RGWUserInfo user_info; - user_id = uid; - user_info.user_id = user_id; - user_info.display_name = display_name; - user_info.type = TYPE_RGW; - - if (!user_email.empty()) - user_info.user_email = user_email; - - CephContext *cct = driver->ctx(); - if (op_state.max_buckets_specified) { - user_info.max_buckets = op_state.get_max_buckets(); - } else { - user_info.max_buckets = - cct->_conf.get_val("rgw_user_max_buckets"); - } - - user_info.suspended = op_state.get_suspension_status(); - user_info.admin = op_state.admin; - user_info.system = op_state.system; - - if (op_state.op_mask_specified) - user_info.op_mask = op_state.get_op_mask(); - - if (op_state.has_bucket_quota()) { - user_info.quota.bucket_quota = op_state.get_bucket_quota(); - } else { - rgw_apply_default_bucket_quota(user_info.quota.bucket_quota, cct->_conf); - } - - if (op_state.temp_url_key_specified) { - map::iterator iter; - for (iter = op_state.temp_url_keys.begin(); - iter != op_state.temp_url_keys.end(); ++iter) { - user_info.temp_url_keys[iter->first] = iter->second; - } - } - - if (op_state.has_user_quota()) { - user_info.quota.user_quota = op_state.get_user_quota(); - } else { - rgw_apply_default_user_quota(user_info.quota.user_quota, cct->_conf); - } - - if (op_state.default_placement_specified) { - user_info.default_placement = op_state.default_placement; - } - - if (op_state.placement_tags_specified) { - user_info.placement_tags = op_state.placement_tags; - } - - // update the request - op_state.set_user_info(user_info); - op_state.set_populated(); - - // update the helper objects - int ret = init_members(op_state); - if (ret < 0) { - set_err_msg(err_msg, "unable to initialize user"); - return ret; - } - - // see if we need to add an access key - std::string subprocess_msg; - bool defer_user_update = true; - if (op_state.has_key_op()) { - ret = keys.add(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to create access key, " + subprocess_msg); - return ret; - } - } - - // see if we need to add some caps - if (op_state.has_caps_op()) { - ret = caps.add(dpp, op_state, &subprocess_msg, defer_user_update, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to add user capabilities, " + subprocess_msg); - return ret; - } - } - - ret = update(dpp, op_state, err_msg, y); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUser::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) -{ - std::string subprocess_msg; - int ret = user_add_helper(op_state, &subprocess_msg); - if (ret != 0) { - set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); - return ret; - } - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); - return ret; - } - - ret = execute_add(dpp, op_state, &subprocess_msg, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to create user, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWUser::rename(RGWUserAdminOpState& op_state, optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg) -{ - std::string subprocess_msg; - int ret; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); - return ret; - } - - ret = execute_rename(dpp, op_state, &subprocess_msg, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to rename user, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWUser::execute_remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y) -{ - int ret; - - bool purge_data = op_state.will_purge_data(); - rgw::sal::User* user = op_state.get_user(); - - if (!op_state.has_existing_user()) { - set_err_msg(err_msg, "user does not exist"); - return -ENOENT; - } - - rgw::sal::BucketList buckets; - string marker; - CephContext *cct = driver->ctx(); - size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk; - do { - ret = user->list_buckets(dpp, marker, string(), max_buckets, false, buckets, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to read user bucket info"); - return ret; - } - - auto& m = buckets.get_buckets(); - if (!m.empty() && !purge_data) { - set_err_msg(err_msg, "must specify purge data to remove user with buckets"); - return -EEXIST; // change to code that maps to 409: conflict - } - - for (auto it = m.begin(); it != m.end(); ++it) { - ret = it->second->remove_bucket(dpp, true, false, nullptr, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to delete user data"); - return ret; - } - - marker = it->first; - } - - } while (buckets.is_truncated()); - - ret = user->remove_user(dpp, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to remove user from RADOS"); - return ret; - } - - op_state.clear_populated(); - clear_populated(); - - return 0; -} - -int RGWUser::remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) -{ - std::string subprocess_msg; - int ret; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); - return ret; - } - - ret = execute_remove(dpp, op_state, &subprocess_msg, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to remove user, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWUser::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y) -{ - bool populated = op_state.is_populated(); - int ret = 0; - std::string subprocess_msg; - std::string op_email = op_state.get_user_email(); - std::string display_name = op_state.get_display_name(); - - RGWUserInfo user_info; - std::unique_ptr duplicate_check; - - // ensure that the user info has been populated or is populate-able - if (!op_state.has_existing_user() && !populated) { - set_err_msg(err_msg, "user not found"); - return -ENOENT; - } - - // if the user hasn't already been populated...attempt to - if (!populated) { - ret = init(dpp, op_state, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to retrieve user info"); - return ret; - } - } - - // ensure that we can modify the user's attributes - if (user_id.compare(RGW_USER_ANON_ID) == 0) { - set_err_msg(err_msg, "unable to modify anonymous user's info"); - return -EACCES; - } - - user_info = old_info; - - std::string old_email = old_info.user_email; - if (!op_email.empty()) { - // make sure we are not adding a duplicate email - if (old_email != op_email) { - ret = driver->get_user_by_email(dpp, op_email, y, &duplicate_check); - if (ret >= 0 && duplicate_check->get_id().compare(user_id) != 0) { - set_err_msg(err_msg, "cannot add duplicate email"); - return -ERR_EMAIL_EXIST; - } - } - user_info.user_email = op_email; - } else if (op_email.empty() && op_state.user_email_specified) { - ldpp_dout(dpp, 10) << "removing email index: " << user_info.user_email << dendl; - /* will be physically removed later when calling update() */ - user_info.user_email.clear(); - } - - // update the remaining user info - if (!display_name.empty()) - user_info.display_name = display_name; - - if (op_state.max_buckets_specified) - user_info.max_buckets = op_state.get_max_buckets(); - - if (op_state.admin_specified) - user_info.admin = op_state.admin; - - if (op_state.system_specified) - user_info.system = op_state.system; - - if (op_state.temp_url_key_specified) { - map::iterator iter; - for (iter = op_state.temp_url_keys.begin(); - iter != op_state.temp_url_keys.end(); ++iter) { - user_info.temp_url_keys[iter->first] = iter->second; - } - } - - if (op_state.op_mask_specified) - user_info.op_mask = op_state.get_op_mask(); - - if (op_state.has_bucket_quota()) - user_info.quota.bucket_quota = op_state.get_bucket_quota(); - - if (op_state.has_user_quota()) - user_info.quota.user_quota = op_state.get_user_quota(); - - if (op_state.has_suspension_op()) { - __u8 suspended = op_state.get_suspension_status(); - user_info.suspended = suspended; - - rgw::sal::BucketList buckets; - - if (user_id.empty()) { - set_err_msg(err_msg, "empty user id passed...aborting"); - return -EINVAL; - } - - string marker; - CephContext *cct = driver->ctx(); - size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk; - std::unique_ptr user = driver->get_user(user_id); - do { - ret = user->list_buckets(dpp, marker, string(), max_buckets, false, buckets, y); - if (ret < 0) { - set_err_msg(err_msg, "could not get buckets for uid: " + user_id.to_str()); - return ret; - } - - auto& m = buckets.get_buckets(); - - vector bucket_names; - for (auto iter = m.begin(); iter != m.end(); ++iter) { - auto& bucket = iter->second; - bucket_names.push_back(bucket->get_key()); - - marker = iter->first; - } - - ret = driver->set_buckets_enabled(dpp, bucket_names, !suspended); - if (ret < 0) { - set_err_msg(err_msg, "failed to modify bucket"); - return ret; - } - - } while (buckets.is_truncated()); - } - - if (op_state.mfa_ids_specified) { - user_info.mfa_ids = op_state.mfa_ids; - } - - if (op_state.default_placement_specified) { - user_info.default_placement = op_state.default_placement; - } - - if (op_state.placement_tags_specified) { - user_info.placement_tags = op_state.placement_tags; - } - - op_state.set_user_info(user_info); - - // if we're supposed to modify keys, do so - if (op_state.has_key_op()) { - ret = keys.add(dpp, op_state, &subprocess_msg, true, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to create or modify keys, " + subprocess_msg); - return ret; - } - } - - ret = update(dpp, op_state, err_msg, y); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUser::modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg) -{ - std::string subprocess_msg; - int ret; - - ret = check_op(op_state, &subprocess_msg); - if (ret < 0) { - set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg); - return ret; - } - - ret = execute_modify(dpp, op_state, &subprocess_msg, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to modify user, " + subprocess_msg); - return ret; - } - - return 0; -} - -int RGWUser::info(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWUserInfo& fetched_info, - optional_yield y, std::string *err_msg) -{ - int ret = init(dpp, op_state, y); - if (ret < 0) { - set_err_msg(err_msg, "unable to fetch user info"); - return ret; - } - - fetched_info = op_state.get_user_info(); - - return 0; -} - -int RGWUser::info(RGWUserInfo& fetched_info, std::string *err_msg) -{ - if (!is_populated()) { - set_err_msg(err_msg, "no user info saved"); - return -EINVAL; - } - - fetched_info = old_info; - - return 0; -} - -int RGWUser::list(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher) -{ - Formatter *formatter = flusher.get_formatter(); - void *handle = nullptr; - std::string metadata_key = "user"; - if (op_state.max_entries > 1000) { - op_state.max_entries = 1000; - } - - int ret = driver->meta_list_keys_init(dpp, metadata_key, op_state.marker, &handle); - if (ret < 0) { - return ret; - } - - bool truncated = false; - uint64_t count = 0; - uint64_t left = 0; - flusher.start(0); - - // open the result object section - formatter->open_object_section("result"); - - // open the user id list array section - formatter->open_array_section("keys"); - do { - std::list keys; - left = op_state.max_entries - count; - ret = driver->meta_list_keys_next(dpp, handle, left, keys, &truncated); - if (ret < 0 && ret != -ENOENT) { - return ret; - } if (ret != -ENOENT) { - for (std::list::iterator iter = keys.begin(); iter != keys.end(); ++iter) { - formatter->dump_string("key", *iter); - ++count; - } - } - } while (truncated && left > 0); - // close user id list section - formatter->close_section(); - - formatter->dump_bool("truncated", truncated); - formatter->dump_int("count", count); - if (truncated) { - formatter->dump_string("marker", driver->meta_get_marker(handle)); - } - - // close result object section - formatter->close_section(); - - driver->meta_list_keys_complete(handle); - - flusher.flush(); - return 0; -} - -int RGWUserAdminOp_User::list(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher) -{ - RGWUser user; - - int ret = user.init_storage(driver); - if (ret < 0) - return ret; - - ret = user.list(dpp, op_state, flusher); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUserAdminOp_User::info(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - std::unique_ptr ruser; - - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - ruser = driver->get_user(info.user_id); - - if (op_state.sync_stats) { - ret = rgw_user_sync_all_stats(dpp, driver, ruser.get(), y); - if (ret < 0) { - return ret; - } - } - - RGWStorageStats stats; - RGWStorageStats *arg_stats = NULL; - if (op_state.fetch_stats) { - int ret = ruser->read_stats(dpp, y, &stats); - if (ret < 0 && ret != -ENOENT) { - return ret; - } - - arg_stats = &stats; - } - - if (formatter) { - flusher.start(0); - - dump_user_info(formatter, info, arg_stats); - flusher.flush(); - } - - return 0; -} - -int RGWUserAdminOp_User::create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.add(dpp, op_state, y, NULL); - if (ret < 0) { - if (ret == -EEXIST) - ret = -ERR_USER_EXIST; - return ret; - } - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - dump_user_info(formatter, info); - flusher.flush(); - } - - return 0; -} - -int RGWUserAdminOp_User::modify(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - Formatter *formatter = flusher.get_formatter(); - - ret = user.modify(dpp, op_state, y, NULL); - if (ret < 0) { - if (ret == -ENOENT) - ret = -ERR_NO_SUCH_USER; - return ret; - } - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - dump_user_info(formatter, info); - flusher.flush(); - } - - return 0; -} - -int RGWUserAdminOp_User::remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - - ret = user.remove(dpp, op_state, y, NULL); - - if (ret == -ENOENT) - ret = -ERR_NO_SUCH_USER; - return ret; -} - -int RGWUserAdminOp_Subuser::create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.subusers.add(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - dump_subusers_info(formatter, info); - flusher.flush(); - } - - return 0; -} - -int RGWUserAdminOp_Subuser::modify(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.subusers.modify(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - dump_subusers_info(formatter, info); - flusher.flush(); - } - - return 0; -} - -int RGWUserAdminOp_Subuser::remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - ret = user.subusers.remove(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUserAdminOp_Key::create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.keys.add(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - int key_type = op_state.get_key_type(); - - if (key_type == KEY_TYPE_SWIFT) - dump_swift_keys_info(formatter, info); - - else if (key_type == KEY_TYPE_S3) - dump_access_keys_info(formatter, info); - - flusher.flush(); - } - - return 0; -} - -int RGWUserAdminOp_Key::remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, - optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - - ret = user.keys.remove(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - return 0; -} - -int RGWUserAdminOp_Caps::add(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.caps.add(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - info.caps.dump(formatter); - flusher.flush(); - } - - return 0; -} - - -int RGWUserAdminOp_Caps::remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, - RGWFormatterFlusher& flusher, optional_yield y) -{ - RGWUserInfo info; - RGWUser user; - int ret = user.init(dpp, driver, op_state, y); - if (ret < 0) - return ret; - - if (!op_state.has_existing_user()) - return -ERR_NO_SUCH_USER; - - Formatter *formatter = flusher.get_formatter(); - - ret = user.caps.remove(dpp, op_state, y, NULL); - if (ret < 0) - return ret; - - ret = user.info(info, NULL); - if (ret < 0) - return ret; - - if (formatter) { - flusher.start(0); - - info.caps.dump(formatter); - flusher.flush(); - } - - return 0; -} - -class RGWUserMetadataHandler : public RGWMetadataHandler_GenericMetaBE { -public: - struct Svc { - RGWSI_User *user{nullptr}; - } svc; - - RGWUserMetadataHandler(RGWSI_User *user_svc) { - base_init(user_svc->ctx(), user_svc->get_be_handler()); - svc.user = user_svc; - } - - ~RGWUserMetadataHandler() {} - - string get_type() override { return "user"; } - - int do_get(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWMetadataObject **obj, optional_yield y, const DoutPrefixProvider *dpp) override { - RGWUserCompleteInfo uci; - RGWObjVersionTracker objv_tracker; - real_time mtime; - - rgw_user user = RGWSI_User::user_from_meta_key(entry); - - int ret = svc.user->read_user_info(op->ctx(), user, &uci.info, &objv_tracker, - &mtime, nullptr, &uci.attrs, - y, dpp); - if (ret < 0) { - return ret; - } - - RGWUserMetadataObject *mdo = new RGWUserMetadataObject(uci, objv_tracker.read_version, mtime); - *obj = mdo; - - return 0; - } - - RGWMetadataObject *get_meta_obj(JSONObj *jo, const obj_version& objv, const ceph::real_time& mtime) override { - RGWUserCompleteInfo uci; - - try { - decode_json_obj(uci, jo); - } catch (JSONDecoder::err& e) { - return nullptr; - } - - return new RGWUserMetadataObject(uci, objv, mtime); - } - - int do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) override; - - int do_remove(RGWSI_MetaBackend_Handler::Op *op, string& entry, RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp) override { - RGWUserInfo info; - - rgw_user user = RGWSI_User::user_from_meta_key(entry); - - int ret = svc.user->read_user_info(op->ctx(), user, &info, nullptr, - nullptr, nullptr, nullptr, - y, dpp); - if (ret < 0) { - return ret; - } - - return svc.user->remove_user_info(op->ctx(), info, &objv_tracker, - y, dpp); - } -}; - -class RGWMetadataHandlerPut_User : public RGWMetadataHandlerPut_SObj -{ - RGWUserMetadataHandler *uhandler; - RGWUserMetadataObject *uobj; -public: - RGWMetadataHandlerPut_User(RGWUserMetadataHandler *_handler, - RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *obj, RGWObjVersionTracker& objv_tracker, - optional_yield y, - RGWMDLogSyncType type, bool from_remote_zone) : RGWMetadataHandlerPut_SObj(_handler, op, entry, obj, objv_tracker, y, type, from_remote_zone), - uhandler(_handler) { - uobj = static_cast(obj); - } - - int put_checked(const DoutPrefixProvider *dpp) override; -}; - -int RGWUserMetadataHandler::do_put(RGWSI_MetaBackend_Handler::Op *op, string& entry, - RGWMetadataObject *obj, - RGWObjVersionTracker& objv_tracker, - optional_yield y, const DoutPrefixProvider *dpp, - RGWMDLogSyncType type, bool from_remote_zone) -{ - RGWMetadataHandlerPut_User put_op(this, op, entry, obj, objv_tracker, y, type, from_remote_zone); - return do_put_operate(&put_op, dpp); -} - -int RGWMetadataHandlerPut_User::put_checked(const DoutPrefixProvider *dpp) -{ - RGWUserMetadataObject *orig_obj = static_cast(old_obj); - RGWUserCompleteInfo& uci = uobj->get_uci(); - - map *pattrs{nullptr}; - if (uci.has_attrs) { - pattrs = &uci.attrs; - } - - RGWUserInfo *pold_info = (orig_obj ? &orig_obj->get_uci().info : nullptr); - - auto mtime = obj->get_mtime(); - - int ret = uhandler->svc.user->store_user_info(op->ctx(), uci.info, pold_info, - &objv_tracker, mtime, - false, pattrs, y, dpp); - if (ret < 0) { - return ret; - } - - return STATUS_APPLIED; -} - - -RGWUserCtl::RGWUserCtl(RGWSI_Zone *zone_svc, - RGWSI_User *user_svc, - RGWUserMetadataHandler *_umhandler) : umhandler(_umhandler) { - svc.zone = zone_svc; - svc.user = user_svc; - be_handler = umhandler->get_be_handler(); -} - -template -class optional_default -{ - const std::optional& opt; - std::optional def; - const T *p; -public: - optional_default(const std::optional& _o) : opt(_o) { - if (opt) { - p = &(*opt); - } else { - def = T(); - p = &(*def); - } - } - - const T *operator->() { - return p; - } - - const T& operator*() { - return *p; - } -}; - -int RGWUserCtl::get_info_by_uid(const DoutPrefixProvider *dpp, - const rgw_user& uid, - RGWUserInfo *info, - optional_yield y, - const GetParams& params) - -{ - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->read_user_info(op->ctx(), - uid, - info, - params.objv_tracker, - params.mtime, - params.cache_info, - params.attrs, - y, - dpp); - }); -} - -int RGWUserCtl::get_info_by_email(const DoutPrefixProvider *dpp, - const string& email, - RGWUserInfo *info, - optional_yield y, - const GetParams& params) -{ - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->get_user_info_by_email(op->ctx(), email, - info, - params.objv_tracker, - params.mtime, - y, - dpp); - }); -} - -int RGWUserCtl::get_info_by_swift(const DoutPrefixProvider *dpp, - const string& swift_name, - RGWUserInfo *info, - optional_yield y, - const GetParams& params) -{ - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->get_user_info_by_swift(op->ctx(), swift_name, - info, - params.objv_tracker, - params.mtime, - y, - dpp); - }); -} - -int RGWUserCtl::get_info_by_access_key(const DoutPrefixProvider *dpp, - const string& access_key, - RGWUserInfo *info, - optional_yield y, - const GetParams& params) -{ - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->get_user_info_by_access_key(op->ctx(), access_key, - info, - params.objv_tracker, - params.mtime, - y, - dpp); - }); -} - -int RGWUserCtl::get_attrs_by_uid(const DoutPrefixProvider *dpp, - const rgw_user& user_id, - map *pattrs, - optional_yield y, - RGWObjVersionTracker *objv_tracker) -{ - RGWUserInfo user_info; - - return get_info_by_uid(dpp, user_id, &user_info, y, RGWUserCtl::GetParams() - .set_attrs(pattrs) - .set_objv_tracker(objv_tracker)); -} - -int RGWUserCtl::store_info(const DoutPrefixProvider *dpp, - const RGWUserInfo& info, optional_yield y, - const PutParams& params) -{ - string key = RGWSI_User::get_meta_key(info.user_id); - - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->store_user_info(op->ctx(), info, - params.old_info, - params.objv_tracker, - params.mtime, - params.exclusive, - params.attrs, - y, - dpp); - }); -} - -int RGWUserCtl::remove_info(const DoutPrefixProvider *dpp, - const RGWUserInfo& info, optional_yield y, - const RemoveParams& params) - -{ - string key = RGWSI_User::get_meta_key(info.user_id); - - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->remove_user_info(op->ctx(), info, - params.objv_tracker, - y, dpp); - }); -} - -int RGWUserCtl::list_buckets(const DoutPrefixProvider *dpp, - const rgw_user& user, - const string& marker, - const string& end_marker, - uint64_t max, - bool need_stats, - RGWUserBuckets *buckets, - bool *is_truncated, - optional_yield y, - uint64_t default_max) -{ - if (!max) { - max = default_max; - } - - int ret = svc.user->list_buckets(dpp, user, marker, end_marker, - max, buckets, is_truncated, y); - if (ret < 0) { - return ret; - } - if (need_stats) { - map& m = buckets->get_buckets(); - ret = ctl.bucket->read_buckets_stats(m, y, dpp); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: could not get stats for buckets" << dendl; - return ret; - } - } - return 0; -} - -int RGWUserCtl::read_stats(const DoutPrefixProvider *dpp, - const rgw_user& user, RGWStorageStats *stats, - optional_yield y, - ceph::real_time *last_stats_sync, - ceph::real_time *last_stats_update) -{ - return be_handler->call([&](RGWSI_MetaBackend_Handler::Op *op) { - return svc.user->read_stats(dpp, op->ctx(), user, stats, - last_stats_sync, last_stats_update, y); - }); -} - -RGWMetadataHandler *RGWUserMetaHandlerAllocator::alloc(RGWSI_User *user_svc) { - return new RGWUserMetadataHandler(user_svc); -} - -void rgw_user::dump(Formatter *f) const -{ - ::encode_json("user", *this, f); -} - diff --git a/src/rgw/store/rados/rgw_user.h b/src/rgw/store/rados/rgw_user.h deleted file mode 100644 index 110124cdbc7..00000000000 --- a/src/rgw/store/rados/rgw_user.h +++ /dev/null @@ -1,887 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_USER_H -#define CEPH_RGW_USER_H - -#include -#include -#include "include/ceph_assert.h" - -#include "include/types.h" -#include "rgw_common.h" -#include "rgw_tools.h" - -#include "rgw_string.h" - -#include "common/Formatter.h" -#include "rgw_formats.h" -#include "rgw_metadata.h" -#include "rgw_sal_fwd.h" - -#define RGW_USER_ANON_ID "anonymous" - -#define SECRET_KEY_LEN 40 -#define PUBLIC_ID_LEN 20 -#define RAND_SUBUSER_LEN 5 - -#define XMLNS_AWS_S3 "http://s3.amazonaws.com/doc/2006-03-01/" - -class RGWUserCtl; -class RGWBucketCtl; -class RGWUserBuckets; - -class RGWGetUserStats_CB; - -/** - * A string wrapper that includes encode/decode functions - * for easily accessing a UID in all forms - */ -struct RGWUID -{ - rgw_user user_id; - void encode(bufferlist& bl) const { - std::string s; - user_id.to_str(s); - using ceph::encode; - encode(s, bl); - } - void decode(bufferlist::const_iterator& bl) { - std::string s; - using ceph::decode; - decode(s, bl); - user_id.from_str(s); - } -}; -WRITE_CLASS_ENCODER(RGWUID) - -/** Entry for bucket metadata collection */ -struct bucket_meta_entry { - size_t size; - size_t size_rounded; - ceph::real_time creation_time; - uint64_t count; -}; - -extern int rgw_user_sync_all_stats(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::User* user, optional_yield y); -extern int rgw_user_get_all_buckets_stats(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, rgw::sal::User* user, - std::map& buckets_usage_map, optional_yield y); - -/** - * Get the anonymous (ie, unauthenticated) user info. - */ -extern void rgw_get_anon_user(RGWUserInfo& info); - -extern void rgw_perm_to_str(uint32_t mask, char *buf, int len); -extern uint32_t rgw_str_to_perm(const char *str); - -extern int rgw_validate_tenant_name(const std::string& t); - -enum ObjectKeyType { - KEY_TYPE_SWIFT, - KEY_TYPE_S3, - KEY_TYPE_UNDEFINED -}; - -enum RGWKeyPoolOp { - GENERATE_KEY, - MODIFY_KEY -}; - -enum RGWUserId { - RGW_USER_ID, - RGW_SWIFT_USERNAME, - RGW_USER_EMAIL, - RGW_ACCESS_KEY, -}; - -/* - * An RGWUser class along with supporting classes created - * to support the creation of an RESTful administrative API - */ -struct RGWUserAdminOpState { - // user attributes - std::unique_ptr user; - std::string user_email; - std::string display_name; - rgw_user new_user_id; - bool overwrite_new_user = false; - int32_t max_buckets{RGW_DEFAULT_MAX_BUCKETS}; - __u8 suspended{0}; - __u8 admin{0}; - __u8 system{0}; - __u8 exclusive{0}; - __u8 fetch_stats{0}; - __u8 sync_stats{0}; - std::string caps; - RGWObjVersionTracker objv; - uint32_t op_mask{0}; - std::map temp_url_keys; - - // subuser attributes - std::string subuser; - uint32_t perm_mask{RGW_PERM_NONE}; - - // key_attributes - std::string id; // access key - std::string key; // secret key - int32_t key_type{-1}; - bool access_key_exist = false; - - std::set mfa_ids; - - // operation attributes - bool existing_user{false}; - bool existing_key{false}; - bool existing_subuser{false}; - bool existing_email{false}; - bool subuser_specified{false}; - bool gen_secret{false}; - bool gen_access{false}; - bool gen_subuser{false}; - bool id_specified{false}; - bool key_specified{false}; - bool type_specified{false}; - bool key_type_setbycontext{false}; // key type set by user or subuser context - bool purge_data{false}; - bool purge_keys{false}; - bool display_name_specified{false}; - bool user_email_specified{false}; - bool max_buckets_specified{false}; - bool perm_specified{false}; - bool op_mask_specified{false}; - bool caps_specified{false}; - bool suspension_op{false}; - bool admin_specified{false}; - bool system_specified{false}; - bool key_op{false}; - bool temp_url_key_specified{false}; - bool found_by_uid{false}; - bool found_by_email{false}; - bool found_by_key{false}; - bool mfa_ids_specified{false}; - - // req parameters - bool populated{false}; - bool initialized{false}; - bool key_params_checked{false}; - bool subuser_params_checked{false}; - bool user_params_checked{false}; - - bool bucket_quota_specified{false}; - bool user_quota_specified{false}; - bool bucket_ratelimit_specified{false}; - bool user_ratelimit_specified{false}; - - RGWQuota quota; - RGWRateLimitInfo user_ratelimit; - RGWRateLimitInfo bucket_ratelimit; - - // req parameters for listing user - std::string marker{""}; - uint32_t max_entries{1000}; - rgw_placement_rule default_placement; // user default placement - bool default_placement_specified{false}; - - std::list placement_tags; // user default placement_tags - bool placement_tags_specified{false}; - - void set_access_key(const std::string& access_key) { - if (access_key.empty()) - return; - - id = access_key; - id_specified = true; - gen_access = false; - key_op = true; - } - - void set_secret_key(const std::string& secret_key) { - if (secret_key.empty()) - return; - - key = secret_key; - key_specified = true; - gen_secret = false; - key_op = true; - } - - void set_user_id(const rgw_user& id); - - void set_new_user_id(const rgw_user& id) { - if (id.empty()) - return; - - new_user_id = id; - } - void set_overwrite_new_user(bool b) { - overwrite_new_user = b; - } - - void set_user_email(std::string& email) { - /* always lowercase email address */ - boost::algorithm::to_lower(email); - user_email = email; - user_email_specified = true; - } - - void set_display_name(const std::string& name) { - if (name.empty()) - return; - - display_name = name; - display_name_specified = true; - } - - void set_subuser(std::string& _subuser); - - void set_caps(const std::string& _caps) { - if (_caps.empty()) - return; - - caps = _caps; - caps_specified = true; - } - - void set_perm(uint32_t perm) { - perm_mask = perm; - perm_specified = true; - } - - void set_op_mask(uint32_t mask) { - op_mask = mask; - op_mask_specified = true; - } - - void set_temp_url_key(const std::string& key, int index) { - temp_url_keys[index] = key; - temp_url_key_specified = true; - } - - void set_key_type(int32_t type) { - key_type = type; - type_specified = true; - } - - void set_access_key_exist() { - access_key_exist = true; - } - - void set_suspension(__u8 is_suspended) { - suspended = is_suspended; - suspension_op = true; - } - - void set_admin(__u8 is_admin) { - admin = is_admin; - admin_specified = true; - } - - void set_system(__u8 is_system) { - system = is_system; - system_specified = true; - } - - void set_exclusive(__u8 is_exclusive) { - exclusive = is_exclusive; - } - - void set_fetch_stats(__u8 is_fetch_stats) { - fetch_stats = is_fetch_stats; - } - - void set_sync_stats(__u8 is_sync_stats) { - sync_stats = is_sync_stats; - } - - void set_user_info(RGWUserInfo& user_info); - - void set_user_version_tracker(RGWObjVersionTracker& objv_tracker); - - void set_max_buckets(int32_t mb) { - max_buckets = mb; - max_buckets_specified = true; - } - - void set_gen_access() { - gen_access = true; - key_op = true; - } - - void set_gen_secret() { - gen_secret = true; - key_op = true; - } - - void set_generate_key() { - if (id.empty()) - gen_access = true; - if (key.empty()) - gen_secret = true; - key_op = true; - } - - void clear_generate_key() { - gen_access = false; - gen_secret = false; - } - - void set_purge_keys() { - purge_keys = true; - key_op = true; - } - - void set_bucket_quota(RGWQuotaInfo& quotas) { - quota.bucket_quota = quotas; - bucket_quota_specified = true; - } - - void set_user_quota(RGWQuotaInfo& quotas) { - quota.user_quota = quotas; - user_quota_specified = true; - } - - void set_bucket_ratelimit(RGWRateLimitInfo& ratelimit) { - bucket_ratelimit = ratelimit; - bucket_ratelimit_specified = true; - } - - void set_user_ratelimit(RGWRateLimitInfo& ratelimit) { - user_ratelimit = ratelimit; - user_ratelimit_specified = true; - } - - void set_mfa_ids(const std::set& ids) { - mfa_ids = ids; - mfa_ids_specified = true; - } - - void set_default_placement(const rgw_placement_rule& _placement) { - default_placement = _placement; - default_placement_specified = true; - } - - void set_placement_tags(const std::list& _tags) { - placement_tags = _tags; - placement_tags_specified = true; - } - - bool is_populated() { return populated; } - bool is_initialized() { return initialized; } - bool has_existing_user() { return existing_user; } - bool has_existing_key() { return existing_key; } - bool has_existing_subuser() { return existing_subuser; } - bool has_existing_email() { return existing_email; } - bool has_subuser() { return subuser_specified; } - bool has_key_op() { return key_op; } - bool has_caps_op() { return caps_specified; } - bool has_suspension_op() { return suspension_op; } - bool has_subuser_perm() { return perm_specified; } - bool has_op_mask() { return op_mask_specified; } - bool will_gen_access() { return gen_access; } - bool will_gen_secret() { return gen_secret; } - bool will_gen_subuser() { return gen_subuser; } - bool will_purge_keys() { return purge_keys; } - bool will_purge_data() { return purge_data; } - bool will_generate_subuser() { return gen_subuser; } - bool has_bucket_quota() { return bucket_quota_specified; } - bool has_user_quota() { return user_quota_specified; } - void set_populated() { populated = true; } - void clear_populated() { populated = false; } - void set_initialized() { initialized = true; } - void set_existing_user(bool flag) { existing_user = flag; } - void set_existing_key(bool flag) { existing_key = flag; } - void set_existing_subuser(bool flag) { existing_subuser = flag; } - void set_existing_email(bool flag) { existing_email = flag; } - void set_purge_data(bool flag) { purge_data = flag; } - void set_generate_subuser(bool flag) { gen_subuser = flag; } - __u8 get_suspension_status() { return suspended; } - int32_t get_key_type() {return key_type; } - bool get_access_key_exist() {return access_key_exist; } - uint32_t get_subuser_perm() { return perm_mask; } - int32_t get_max_buckets() { return max_buckets; } - uint32_t get_op_mask() { return op_mask; } - RGWQuotaInfo& get_bucket_quota() { return quota.bucket_quota; } - RGWQuotaInfo& get_user_quota() { return quota.user_quota; } - std::set& get_mfa_ids() { return mfa_ids; } - - rgw::sal::User* get_user() { return user.get(); } - const rgw_user& get_user_id(); - std::string get_subuser() { return subuser; } - std::string get_access_key() { return id; } - std::string get_secret_key() { return key; } - std::string get_caps() { return caps; } - std::string get_user_email() { return user_email; } - std::string get_display_name() { return display_name; } - rgw_user& get_new_uid() { return new_user_id; } - bool get_overwrite_new_user() const { return overwrite_new_user; } - std::map& get_temp_url_keys() { return temp_url_keys; } - - RGWUserInfo& get_user_info(); - - std::map* get_swift_keys(); - std::map* get_access_keys(); - std::map* get_subusers(); - - RGWUserCaps* get_caps_obj(); - - std::string build_default_swift_kid(); - - std::string generate_subuser(); - - RGWUserAdminOpState(rgw::sal::Driver* driver); -}; - -class RGWUser; - -class RGWAccessKeyPool -{ - RGWUser *user{nullptr}; - - std::map key_type_map; - rgw_user user_id; - rgw::sal::Driver* driver{nullptr}; - - std::map *swift_keys{nullptr}; - std::map *access_keys{nullptr}; - - // we don't want to allow keys for the anonymous user or a null user - bool keys_allowed{false}; - -private: - int create_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); - int generate_key(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg = NULL); - int modify_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); - - int check_key_owner(RGWUserAdminOpState& op_state); - bool check_existing_key(RGWUserAdminOpState& op_state); - int check_op(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); - - /* API Contract Fulfilment */ - int execute_add(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_save, optional_yield y); - int execute_remove(const DoutPrefixProvider *dpp, - RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_save, optional_yield y); - int remove_subuser_keys(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_save, optional_yield y); - - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, - optional_yield y); - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - bool defer_save, optional_yield y); -public: - explicit RGWAccessKeyPool(RGWUser* usr); - - int init(RGWUserAdminOpState& op_state); - - /* API Contracted Methods */ - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg = NULL); - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg = NULL); - - friend class RGWUser; - friend class RGWSubUserPool; -}; - -class RGWSubUserPool -{ - RGWUser *user{nullptr}; - - rgw_user user_id; - rgw::sal::Driver* driver{nullptr}; - bool subusers_allowed{false}; - - std::map *subuser_map{nullptr}; - -private: - int check_op(RGWUserAdminOpState& op_state, std::string *err_msg = NULL); - - /* API Contract Fulfillment */ - int execute_add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); - int execute_remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); - int execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); - - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, - optional_yield y); - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, optional_yield y); - int modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg, bool defer_save); -public: - explicit RGWSubUserPool(RGWUser *user); - - bool exists(std::string subuser); - int init(RGWUserAdminOpState& op_state); - - /* API contracted methods */ - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg = NULL); - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - int modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - - friend class RGWUser; -}; - -class RGWUserCapPool -{ - RGWUserCaps *caps{nullptr}; - bool caps_allowed{false}; - RGWUser *user{nullptr}; - -private: - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, - optional_yield y); - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save, - optional_yield y); - -public: - explicit RGWUserCapPool(RGWUser *user); - - int init(RGWUserAdminOpState& op_state); - - /* API contracted methods */ - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, - std::string *err_msg = NULL); - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - - friend class RGWUser; -}; - -class RGWUser -{ - -private: - RGWUserInfo old_info; - rgw::sal::Driver* driver{nullptr}; - - rgw_user user_id; - bool info_stored{false}; - - void set_populated() { info_stored = true; } - void clear_populated() { info_stored = false; } - bool is_populated() { return info_stored; } - - int check_op(RGWUserAdminOpState& req, std::string *err_msg); - int update(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y); - - void clear_members(); - void init_default(); - - /* API Contract Fulfillment */ - int execute_add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, - optional_yield y); - int execute_remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, - std::string *err_msg, optional_yield y); - int execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y); - int execute_rename(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, std::string *err_msg, optional_yield y); - -public: - RGWUser(); - - int init(const DoutPrefixProvider *dpp, rgw::sal::Driver* storage, RGWUserAdminOpState& op_state, - optional_yield y); - - int init_storage(rgw::sal::Driver* storage); - int init(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y); - int init_members(RGWUserAdminOpState& op_state); - - rgw::sal::Driver* get_driver() { return driver; } - - /* API Contracted Members */ - RGWUserCapPool caps; - RGWAccessKeyPool keys; - RGWSubUserPool subusers; - - /* API Contracted Methods */ - int add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - - int remove(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - - int rename(RGWUserAdminOpState& op_state, optional_yield y, const DoutPrefixProvider *dpp, std::string *err_msg = NULL); - - /* remove an already populated RGWUser */ - int remove(std::string *err_msg = NULL); - - int modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - - /* retrieve info from an existing user in the RGW system */ - int info(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWUserInfo& fetched_info, optional_yield y, - std::string *err_msg = NULL); - - /* info from an already populated RGWUser */ - int info (RGWUserInfo& fetched_info, std::string *err_msg = NULL); - - /* list the existing users */ - int list(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher); - - friend class RGWAccessKeyPool; - friend class RGWSubUserPool; - friend class RGWUserCapPool; -}; - -/* Wrappers for admin API functionality */ - -class RGWUserAdminOp_User -{ -public: - static int list(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher); - - static int info(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); - - static int create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); - - static int modify(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, optional_yield y); - - static int remove(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, optional_yield y); -}; - -class RGWUserAdminOp_Subuser -{ -public: - static int create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); - - static int modify(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); - - static int remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); -}; - -class RGWUserAdminOp_Key -{ -public: - static int create(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); - - static int remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); -}; - -class RGWUserAdminOp_Caps -{ -public: - static int add(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); - - static int remove(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher, - optional_yield y); -}; - -struct RGWUserCompleteInfo { - RGWUserInfo info; - std::map attrs; - bool has_attrs{false}; - - void dump(Formatter * const f) const { - info.dump(f); - encode_json("attrs", attrs, f); - } - - void decode_json(JSONObj *obj) { - decode_json_obj(info, obj); - has_attrs = JSONDecoder::decode_json("attrs", attrs, obj); - } -}; - -class RGWUserMetadataObject : public RGWMetadataObject { - RGWUserCompleteInfo uci; -public: - RGWUserMetadataObject() {} - RGWUserMetadataObject(const RGWUserCompleteInfo& _uci, const obj_version& v, real_time m) - : uci(_uci) { - objv = v; - mtime = m; - } - - void dump(Formatter *f) const override { - uci.dump(f); - } - - RGWUserCompleteInfo& get_uci() { - return uci; - } -}; - -class RGWUserMetadataHandler; - -class RGWUserCtl -{ - struct Svc { - RGWSI_Zone *zone{nullptr}; - RGWSI_User *user{nullptr}; - } svc; - - struct Ctl { - RGWBucketCtl *bucket{nullptr}; - } ctl; - - RGWUserMetadataHandler *umhandler; - RGWSI_MetaBackend_Handler *be_handler{nullptr}; - -public: - RGWUserCtl(RGWSI_Zone *zone_svc, - RGWSI_User *user_svc, - RGWUserMetadataHandler *_umhandler); - - void init(RGWBucketCtl *bucket_ctl) { - ctl.bucket = bucket_ctl; - } - - RGWBucketCtl *get_bucket_ctl() { - return ctl.bucket; - } - - struct GetParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - ceph::real_time *mtime{nullptr}; - rgw_cache_entry_info *cache_info{nullptr}; - std::map *attrs{nullptr}; - - GetParams() {} - - GetParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - GetParams& set_mtime(ceph::real_time *_mtime) { - mtime = _mtime; - return *this; - } - - GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { - cache_info = _cache_info; - return *this; - } - - GetParams& set_attrs(std::map *_attrs) { - attrs = _attrs; - return *this; - } - }; - - struct PutParams { - RGWUserInfo *old_info{nullptr}; - RGWObjVersionTracker *objv_tracker{nullptr}; - ceph::real_time mtime; - bool exclusive{false}; - std::map *attrs{nullptr}; - - PutParams() {} - - PutParams& set_old_info(RGWUserInfo *_info) { - old_info = _info; - return *this; - } - - PutParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - - PutParams& set_mtime(const ceph::real_time& _mtime) { - mtime = _mtime; - return *this; - } - - PutParams& set_exclusive(bool _exclusive) { - exclusive = _exclusive; - return *this; - } - - PutParams& set_attrs(std::map *_attrs) { - attrs = _attrs; - return *this; - } - }; - - struct RemoveParams { - RGWObjVersionTracker *objv_tracker{nullptr}; - - RemoveParams() {} - - RemoveParams& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { - objv_tracker = _objv_tracker; - return *this; - } - }; - - int get_info_by_uid(const DoutPrefixProvider *dpp, - const rgw_user& uid, RGWUserInfo *info, - optional_yield y, const GetParams& params = {}); - int get_info_by_email(const DoutPrefixProvider *dpp, - const std::string& email, RGWUserInfo *info, - optional_yield y, const GetParams& params = {}); - int get_info_by_swift(const DoutPrefixProvider *dpp, - const std::string& swift_name, RGWUserInfo *info, - optional_yield y, const GetParams& params = {}); - int get_info_by_access_key(const DoutPrefixProvider *dpp, - const std::string& access_key, RGWUserInfo *info, - optional_yield y, const GetParams& params = {}); - - int get_attrs_by_uid(const DoutPrefixProvider *dpp, - const rgw_user& user_id, - std::map *attrs, - optional_yield y, - RGWObjVersionTracker *objv_tracker = nullptr); - - int store_info(const DoutPrefixProvider *dpp, - const RGWUserInfo& info, optional_yield y, - const PutParams& params = {}); - int remove_info(const DoutPrefixProvider *dpp, - const RGWUserInfo& info, optional_yield y, - const RemoveParams& params = {}); - - int list_buckets(const DoutPrefixProvider *dpp, - const rgw_user& user, - const std::string& marker, - const std::string& end_marker, - uint64_t max, - bool need_stats, - RGWUserBuckets *buckets, - bool *is_truncated, - optional_yield y, - uint64_t default_max = 1000); - - int read_stats(const DoutPrefixProvider *dpp, - const rgw_user& user, RGWStorageStats *stats, - optional_yield y, - ceph::real_time *last_stats_sync = nullptr, /* last time a full stats sync completed */ - ceph::real_time *last_stats_update = nullptr); /* last time a stats update was done */ -}; - -class RGWUserMetaHandlerAllocator { -public: - static RGWMetadataHandler *alloc(RGWSI_User *user_svc); -}; - - -#endif diff --git a/src/rgw/store/rados/rgw_zone.cc b/src/rgw/store/rados/rgw_zone.cc deleted file mode 100644 index d9e750541d7..00000000000 --- a/src/rgw/store/rados/rgw_zone.cc +++ /dev/null @@ -1,1287 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "rgw_zone.h" -#include "rgw_realm_watcher.h" -#include "rgw_sal_config.h" -#include "rgw_sync.h" - -#include "services/svc_zone.h" - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_rgw - -using namespace std; -using namespace rgw_zone_defaults; - -RGWMetaSyncStatusManager::~RGWMetaSyncStatusManager(){} - -#define FIRST_EPOCH 1 - -struct RGWAccessKey; - -/// Generate a random uuid for realm/period/zonegroup/zone ids -static std::string gen_random_uuid() -{ - uuid_d uuid; - uuid.generate_random(); - return uuid.to_string(); -} - -void RGWDefaultZoneGroupInfo::dump(Formatter *f) const { - encode_json("default_zonegroup", default_zonegroup, f); -} - -void RGWDefaultZoneGroupInfo::decode_json(JSONObj *obj) { - - JSONDecoder::decode_json("default_zonegroup", default_zonegroup, obj); - /* backward compatability with region */ - if (default_zonegroup.empty()) { - JSONDecoder::decode_json("default_region", default_zonegroup, obj); - } -} - -int RGWZoneGroup::create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format) -{ - name = default_zonegroup_name; - api_name = default_zonegroup_name; - is_master = true; - - RGWZoneGroupPlacementTarget placement_target; - placement_target.name = "default-placement"; - placement_targets[placement_target.name] = placement_target; - default_placement.name = "default-placement"; - - RGWZoneParams zone_params(default_zone_name); - - int r = zone_params.init(dpp, cct, sysobj_svc, y, false); - if (r < 0) { - ldpp_dout(dpp, 0) << "create_default: error initializing zone params: " << cpp_strerror(-r) << dendl; - return r; - } - - r = zone_params.create_default(dpp, y); - if (r < 0 && r != -EEXIST) { - ldpp_dout(dpp, 0) << "create_default: error in create_default zone params: " << cpp_strerror(-r) << dendl; - return r; - } else if (r == -EEXIST) { - ldpp_dout(dpp, 10) << "zone_params::create_default() returned -EEXIST, we raced with another default zone_params creation" << dendl; - zone_params.clear_id(); - r = zone_params.init(dpp, cct, sysobj_svc, y); - if (r < 0) { - ldpp_dout(dpp, 0) << "create_default: error in init existing zone params: " << cpp_strerror(-r) << dendl; - return r; - } - ldpp_dout(dpp, 20) << "zone_params::create_default() " << zone_params.get_name() << " id " << zone_params.get_id() - << dendl; - } - - RGWZone& default_zone = zones[zone_params.get_id()]; - default_zone.name = zone_params.get_name(); - default_zone.id = zone_params.get_id(); - master_zone = default_zone.id; - - // enable all supported features - enabled_features.insert(rgw::zone_features::supported.begin(), - rgw::zone_features::supported.end()); - default_zone.supported_features = enabled_features; - - r = create(dpp, y); - if (r < 0 && r != -EEXIST) { - ldpp_dout(dpp, 0) << "error storing zone group info: " << cpp_strerror(-r) << dendl; - return r; - } - - if (r == -EEXIST) { - ldpp_dout(dpp, 10) << "create_default() returned -EEXIST, we raced with another zonegroup creation" << dendl; - id.clear(); - r = init(dpp, cct, sysobj_svc, y); - if (r < 0) { - return r; - } - } - - if (old_format) { - name = id; - } - - post_process_params(dpp, y); - - return 0; -} - -int RGWZoneGroup::equals(const string& other_zonegroup) const -{ - if (is_master && other_zonegroup.empty()) - return true; - - return (id == other_zonegroup); -} - -int RGWZoneGroup::add_zone(const DoutPrefixProvider *dpp, - const RGWZoneParams& zone_params, bool *is_master, bool *read_only, - const list& endpoints, const string *ptier_type, - bool *psync_from_all, list& sync_from, list& sync_from_rm, - string *predirect_zone, std::optional bucket_index_max_shards, - RGWSyncModulesManager *sync_mgr, - const rgw::zone_features::set& enable_features, - const rgw::zone_features::set& disable_features, - optional_yield y) -{ - auto& zone_id = zone_params.get_id(); - auto& zone_name = zone_params.get_name(); - - // check for duplicate zone name on insert - if (!zones.count(zone_id)) { - for (const auto& zone : zones) { - if (zone.second.name == zone_name) { - ldpp_dout(dpp, 0) << "ERROR: found existing zone name " << zone_name - << " (" << zone.first << ") in zonegroup " << get_name() << dendl; - return -EEXIST; - } - } - } - - if (is_master) { - if (*is_master) { - if (!master_zone.empty() && master_zone != zone_id) { - ldpp_dout(dpp, 0) << "NOTICE: overriding master zone: " << master_zone << dendl; - } - master_zone = zone_id; - } else if (master_zone == zone_id) { - master_zone.clear(); - } - } - - RGWZone& zone = zones[zone_id]; - zone.name = zone_name; - zone.id = zone_id; - if (!endpoints.empty()) { - zone.endpoints = endpoints; - } - if (read_only) { - zone.read_only = *read_only; - } - if (ptier_type) { - zone.tier_type = *ptier_type; - if (!sync_mgr->get_module(*ptier_type, nullptr)) { - ldpp_dout(dpp, 0) << "ERROR: could not found sync module: " << *ptier_type - << ", valid sync modules: " - << sync_mgr->get_registered_module_names() - << dendl; - return -ENOENT; - } - } - - if (psync_from_all) { - zone.sync_from_all = *psync_from_all; - } - - if (predirect_zone) { - zone.redirect_zone = *predirect_zone; - } - - if (bucket_index_max_shards) { - zone.bucket_index_max_shards = *bucket_index_max_shards; - } - - for (auto add : sync_from) { - zone.sync_from.insert(add); - } - - for (auto rm : sync_from_rm) { - zone.sync_from.erase(rm); - } - - zone.supported_features.insert(enable_features.begin(), - enable_features.end()); - - for (const auto& feature : disable_features) { - if (enabled_features.contains(feature)) { - lderr(cct) << "ERROR: Cannot disable zone feature \"" << feature - << "\" until it's been disabled in zonegroup " << name << dendl; - return -EINVAL; - } - auto i = zone.supported_features.find(feature); - if (i == zone.supported_features.end()) { - ldout(cct, 1) << "WARNING: zone feature \"" << feature - << "\" was not enabled in zone " << zone.name << dendl; - continue; - } - zone.supported_features.erase(i); - } - - post_process_params(dpp, y); - - return update(dpp,y); -} - - -int RGWZoneGroup::rename_zone(const DoutPrefixProvider *dpp, - const RGWZoneParams& zone_params, - optional_yield y) -{ - RGWZone& zone = zones[zone_params.get_id()]; - zone.name = zone_params.get_name(); - - return update(dpp, y); -} - -void RGWZoneGroup::post_process_params(const DoutPrefixProvider *dpp, optional_yield y) -{ - bool log_data = zones.size() > 1; - - if (master_zone.empty()) { - auto iter = zones.begin(); - if (iter != zones.end()) { - master_zone = iter->first; - } - } - - for (auto& item : zones) { - RGWZone& zone = item.second; - zone.log_data = log_data; - - RGWZoneParams zone_params(zone.id, zone.name); - int ret = zone_params.init(dpp, cct, sysobj_svc, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: could not read zone params for zone id=" << zone.id << " name=" << zone.name << dendl; - continue; - } - - for (auto& pitem : zone_params.placement_pools) { - const string& placement_name = pitem.first; - if (placement_targets.find(placement_name) == placement_targets.end()) { - RGWZoneGroupPlacementTarget placement_target; - placement_target.name = placement_name; - placement_targets[placement_name] = placement_target; - } - } - } - - if (default_placement.empty() && !placement_targets.empty()) { - default_placement.init(placement_targets.begin()->first, RGW_STORAGE_CLASS_STANDARD); - } -} - -int RGWZoneGroup::remove_zone(const DoutPrefixProvider *dpp, const std::string& zone_id, optional_yield y) -{ - auto iter = zones.find(zone_id); - if (iter == zones.end()) { - ldpp_dout(dpp, 0) << "zone id " << zone_id << " is not a part of zonegroup " - << name << dendl; - return -ENOENT; - } - - zones.erase(iter); - - post_process_params(dpp, y); - - return update(dpp, y); -} - -void RGWDefaultSystemMetaObjInfo::dump(Formatter *f) const { - encode_json("default_id", default_id, f); -} - -void RGWDefaultSystemMetaObjInfo::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("default_id", default_id, obj); -} - -int RGWSystemMetaObj::rename(const DoutPrefixProvider *dpp, const string& new_name, optional_yield y) -{ - string new_id; - int ret = read_id(dpp, new_name, new_id, y); - if (!ret) { - return -EEXIST; - } - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "Error read_id " << new_name << ": " << cpp_strerror(-ret) << dendl; - return ret; - } - string old_name = name; - name = new_name; - ret = update(dpp, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "Error storing new obj info " << new_name << ": " << cpp_strerror(-ret) << dendl; - return ret; - } - ret = store_name(dpp, true, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "Error storing new name " << new_name << ": " << cpp_strerror(-ret) << dendl; - return ret; - } - /* delete old name */ - rgw_pool pool(get_pool(cct)); - string oid = get_names_oid_prefix() + old_name; - rgw_raw_obj old_name_obj(pool, oid); - auto sysobj = sysobj_svc->get_obj(old_name_obj); - ret = sysobj.wop().remove(dpp, y); - if (ret < 0) { - ldpp_dout(dpp, 0) << "Error delete old obj name " << old_name << ": " << cpp_strerror(-ret) << dendl; - return ret; - } - - return ret; -} - -int RGWSystemMetaObj::read(const DoutPrefixProvider *dpp, optional_yield y) -{ - int ret = read_id(dpp, name, id, y); - if (ret < 0) { - return ret; - } - - return read_info(dpp, id, y); -} - -int RGWZoneParams::create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format) -{ - name = default_zone_name; - - int r = create(dpp, y); - if (r < 0) { - return r; - } - - if (old_format) { - name = id; - } - - return r; -} - -const string& RGWZoneParams::get_compression_type(const rgw_placement_rule& placement_rule) const -{ - static const std::string NONE{"none"}; - auto p = placement_pools.find(placement_rule.name); - if (p == placement_pools.end()) { - return NONE; - } - const auto& type = p->second.get_compression_type(placement_rule.get_storage_class()); - return !type.empty() ? type : NONE; -} - -// run an MD5 hash on the zone_id and return the first 32 bits -static uint32_t gen_short_zone_id(const std::string zone_id) -{ - unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE]; - MD5 hash; - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - hash.Update((const unsigned char *)zone_id.c_str(), zone_id.size()); - hash.Final(md5); - - uint32_t short_id; - memcpy((char *)&short_id, md5, sizeof(short_id)); - return std::max(short_id, 1u); -} - -int RGWPeriodMap::update(const RGWZoneGroup& zonegroup, CephContext *cct) -{ - if (zonegroup.is_master_zonegroup() && (!master_zonegroup.empty() && zonegroup.get_id() != master_zonegroup)) { - ldout(cct,0) << "Error updating periodmap, multiple master zonegroups configured "<< dendl; - ldout(cct,0) << "master zonegroup: " << master_zonegroup << " and " << zonegroup.get_id() <::iterator iter = zonegroups.find(zonegroup.get_id()); - if (iter != zonegroups.end()) { - RGWZoneGroup& old_zonegroup = iter->second; - if (!old_zonegroup.api_name.empty()) { - zonegroups_by_api.erase(old_zonegroup.api_name); - } - } - zonegroups[zonegroup.get_id()] = zonegroup; - - if (!zonegroup.api_name.empty()) { - zonegroups_by_api[zonegroup.api_name] = zonegroup; - } - - if (zonegroup.is_master_zonegroup()) { - master_zonegroup = zonegroup.get_id(); - } else if (master_zonegroup == zonegroup.get_id()) { - master_zonegroup = ""; - } - - for (auto& i : zonegroup.zones) { - auto& zone = i.second; - if (short_zone_ids.find(zone.id) != short_zone_ids.end()) { - continue; - } - // calculate the zone's short id - uint32_t short_id = gen_short_zone_id(zone.id); - - // search for an existing zone with the same short id - for (auto& s : short_zone_ids) { - if (s.second == short_id) { - ldout(cct, 0) << "New zone '" << zone.name << "' (" << zone.id - << ") generates the same short_zone_id " << short_id - << " as existing zone id " << s.first << dendl; - return -EEXIST; - } - } - - short_zone_ids[zone.id] = short_id; - } - - return 0; -} - -uint32_t RGWPeriodMap::get_zone_short_id(const string& zone_id) const -{ - auto i = short_zone_ids.find(zone_id); - if (i == short_zone_ids.end()) { - return 0; - } - return i->second; -} - -bool RGWPeriodMap::find_zone_by_name(const string& zone_name, - RGWZoneGroup *zonegroup, - RGWZone *zone) const -{ - for (auto& iter : zonegroups) { - auto& zg = iter.second; - for (auto& ziter : zg.zones) { - auto& z = ziter.second; - - if (z.name == zone_name) { - *zonegroup = zg; - *zone = z; - return true; - } - } - } - - return false; -} - -namespace rgw { - -int read_realm(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - std::string_view realm_id, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer) -{ - if (!realm_id.empty()) { - return cfgstore->read_realm_by_id(dpp, y, realm_id, info, writer); - } - if (!realm_name.empty()) { - return cfgstore->read_realm_by_name(dpp, y, realm_name, info, writer); - } - return cfgstore->read_default_realm(dpp, y, info, writer); -} - -int create_realm(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, bool exclusive, - RGWRealm& info, - std::unique_ptr* writer_out) -{ - if (info.name.empty()) { - ldpp_dout(dpp, -1) << __func__ << " requires a realm name" << dendl; - return -EINVAL; - } - if (info.id.empty()) { - info.id = gen_random_uuid(); - } - - // if the realm already has a current_period, just make sure it exists - std::optional period; - if (!info.current_period.empty()) { - period.emplace(); - int r = cfgstore->read_period(dpp, y, info.current_period, - std::nullopt, *period); - if (r < 0) { - ldpp_dout(dpp, -1) << __func__ << " failed to read realm's current_period=" - << info.current_period << " with " << cpp_strerror(r) << dendl; - return r; - } - } - - // create the realm - std::unique_ptr writer; - int r = cfgstore->create_realm(dpp, y, exclusive, info, &writer); - if (r < 0) { - return r; - } - - if (!period) { - // initialize and exclusive-create the initial period - period.emplace(); - period->id = gen_random_uuid(); - period->period_map.id = period->id; - period->epoch = FIRST_EPOCH; - period->realm_id = info.id; - period->realm_name = info.name; - - r = cfgstore->create_period(dpp, y, true, *period); - if (r < 0) { - ldpp_dout(dpp, -1) << __func__ << " failed to create the initial period id=" - << period->id << " for realm " << info.name - << " with " << cpp_strerror(r) << dendl; - return r; - } - } - - // update the realm's current_period - r = realm_set_current_period(dpp, y, cfgstore, *writer, info, *period); - if (r < 0) { - return r; - } - - // try to set as default. may race with another create, so pass exclusive=true - // so we don't override an existing default - r = set_default_realm(dpp, y, cfgstore, info, true); - if (r < 0 && r != -EEXIST) { - ldpp_dout(dpp, 0) << "WARNING: failed to set realm as default: " - << cpp_strerror(r) << dendl; - } - - if (writer_out) { - *writer_out = std::move(writer); - } - return 0; -} - -int set_default_realm(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWRealm& info, - bool exclusive) -{ - return cfgstore->write_default_realm_id(dpp, y, exclusive, info.id); -} - -int realm_set_current_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - sal::RealmWriter& writer, RGWRealm& realm, - const RGWPeriod& period) -{ - // update realm epoch to match the period's - if (realm.epoch > period.realm_epoch) { - ldpp_dout(dpp, -1) << __func__ << " with old realm epoch " - << period.realm_epoch << ", current epoch=" << realm.epoch << dendl; - return -EINVAL; - } - if (realm.epoch == period.realm_epoch && realm.current_period != period.id) { - ldpp_dout(dpp, -1) << __func__ << " with same realm epoch " - << period.realm_epoch << ", but different period id " - << period.id << " != " << realm.current_period << dendl; - return -EINVAL; - } - - realm.epoch = period.realm_epoch; - realm.current_period = period.id; - - // update the realm object - int r = writer.write(dpp, y, realm); - if (r < 0) { - ldpp_dout(dpp, -1) << __func__ << " failed to overwrite realm " - << realm.name << " with " << cpp_strerror(r) << dendl; - return r; - } - - // reflect the zonegroup and period config - (void) reflect_period(dpp, y, cfgstore, period); - return 0; -} - -int reflect_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWPeriod& info) -{ - // overwrite the local period config and zonegroup objects - constexpr bool exclusive = false; - - int r = cfgstore->write_period_config(dpp, y, exclusive, info.realm_id, - info.period_config); - if (r < 0) { - ldpp_dout(dpp, -1) << __func__ << " failed to store period config for realm id=" - << info.realm_id << " with " << cpp_strerror(r) << dendl; - return r; - } - - for (auto& [zonegroup_id, zonegroup] : info.period_map.zonegroups) { - r = cfgstore->create_zonegroup(dpp, y, exclusive, zonegroup, nullptr); - if (r < 0) { - ldpp_dout(dpp, -1) << __func__ << " failed to store zonegroup id=" - << zonegroup_id << " with " << cpp_strerror(r) << dendl; - return r; - } - if (zonegroup.is_master) { - // set master as default if no default exists - constexpr bool exclusive = true; - r = set_default_zonegroup(dpp, y, cfgstore, zonegroup, exclusive); - if (r == 0) { - ldpp_dout(dpp, 1) << "Set the period's master zonegroup " - << zonegroup.name << " as the default" << dendl; - } - } - } - return 0; -} - -std::string get_staging_period_id(std::string_view realm_id) -{ - return string_cat_reserve(realm_id, ":staging"); -} - -void fork_period(const DoutPrefixProvider* dpp, RGWPeriod& info) -{ - ldpp_dout(dpp, 20) << __func__ << " realm id=" << info.realm_id - << " period id=" << info.id << dendl; - - info.predecessor_uuid = std::move(info.id); - info.id = get_staging_period_id(info.realm_id); - info.period_map.reset(); - info.realm_epoch++; -} - -int update_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, RGWPeriod& info) -{ - // clear zone short ids of removed zones. period_map.update() will add the - // remaining zones back - info.period_map.short_zone_ids.clear(); - - // list all zonegroups in the realm - rgw::sal::ListResult listing; - std::array zonegroup_names; // list in pages of 1000 - do { - int ret = cfgstore->list_zonegroup_names(dpp, y, listing.next, - zonegroup_names, listing); - if (ret < 0) { - std::cerr << "failed to list zonegroups: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - for (const auto& name : listing.entries) { - RGWZoneGroup zg; - ret = cfgstore->read_zonegroup_by_name(dpp, y, name, zg, nullptr); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to read zonegroup " - << name << ": " << cpp_strerror(-ret) << dendl; - continue; - } - - if (zg.realm_id != info.realm_id) { - ldpp_dout(dpp, 20) << "skipping zonegroup " << zg.get_name() - << " with realm id " << zg.realm_id - << ", not on our realm " << info.realm_id << dendl; - continue; - } - - if (zg.master_zone.empty()) { - ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl; - return -EINVAL; - } - - if (zg.zones.find(zg.master_zone) == zg.zones.end()) { - ldpp_dout(dpp, 0) << "ERROR: zonegroup " << zg.get_name() - << " has a non existent master zone "<< dendl; - return -EINVAL; - } - - if (zg.is_master_zonegroup()) { - info.master_zonegroup = zg.get_id(); - info.master_zone = zg.master_zone; - } - - ret = info.period_map.update(zg, dpp->get_cct()); - if (ret < 0) { - return ret; - } - } // foreach name in listing.entries - } while (!listing.next.empty()); - - // read the realm's current period config - int ret = cfgstore->read_period_config(dpp, y, info.realm_id, - info.period_config); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: failed to read period config: " - << cpp_strerror(ret) << dendl; - return ret; - } - - return 0; -} - -int commit_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, sal::Driver* driver, - RGWRealm& realm, sal::RealmWriter& realm_writer, - const RGWPeriod& current_period, - RGWPeriod& info, std::ostream& error_stream, - bool force_if_stale) -{ - auto zone_svc = static_cast(driver)->svc()->zone; // XXX - - ldpp_dout(dpp, 20) << __func__ << " realm " << realm.id - << " period " << current_period.id << dendl; - // gateway must be in the master zone to commit - if (info.master_zone != zone_svc->get_zone_params().id) { - error_stream << "Cannot commit period on zone " - << zone_svc->get_zone_params().id << ", it must be sent to " - "the period's master zone " << info.master_zone << '.' << std::endl; - return -EINVAL; - } - // period predecessor must match current period - if (info.predecessor_uuid != current_period.id) { - error_stream << "Period predecessor " << info.predecessor_uuid - << " does not match current period " << current_period.id - << ". Use 'period pull' to get the latest period from the master, " - "reapply your changes, and try again." << std::endl; - return -EINVAL; - } - // realm epoch must be 1 greater than current period - if (info.realm_epoch != current_period.realm_epoch + 1) { - error_stream << "Period's realm epoch " << info.realm_epoch - << " does not come directly after current realm epoch " - << current_period.realm_epoch << ". Use 'realm pull' to get the " - "latest realm and period from the master zone, reapply your changes, " - "and try again." << std::endl; - return -EINVAL; - } - // did the master zone change? - if (info.master_zone != current_period.master_zone) { - // store the current metadata sync status in the period - int r = info.update_sync_status(dpp, driver, current_period, - error_stream, force_if_stale); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to update metadata sync status: " - << cpp_strerror(-r) << dendl; - return r; - } - // create an object with a new period id - info.period_map.id = info.id = gen_random_uuid(); - info.epoch = FIRST_EPOCH; - - constexpr bool exclusive = true; - r = cfgstore->create_period(dpp, y, exclusive, info); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl; - return r; - } - // set as current period - r = realm_set_current_period(dpp, y, cfgstore, realm_writer, realm, info); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to update realm's current period: " - << cpp_strerror(-r) << dendl; - return r; - } - ldpp_dout(dpp, 4) << "Promoted to master zone and committed new period " - << info.id << dendl; - (void) cfgstore->realm_notify_new_period(dpp, y, info); - return 0; - } - // period must be based on current epoch - if (info.epoch != current_period.epoch) { - error_stream << "Period epoch " << info.epoch << " does not match " - "predecessor epoch " << current_period.epoch << ". Use " - "'period pull' to get the latest epoch from the master zone, " - "reapply your changes, and try again." << std::endl; - return -EINVAL; - } - // set period as next epoch - info.id = current_period.id; - info.epoch = current_period.epoch + 1; - info.predecessor_uuid = current_period.predecessor_uuid; - info.realm_epoch = current_period.realm_epoch; - // write the period - constexpr bool exclusive = true; - int r = cfgstore->create_period(dpp, y, exclusive, info); - if (r == -EEXIST) { - // already have this epoch (or a more recent one) - return 0; - } - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to store period: " << cpp_strerror(r) << dendl; - return r; - } - r = reflect_period(dpp, y, cfgstore, info); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to update local objects: " << cpp_strerror(r) << dendl; - return r; - } - ldpp_dout(dpp, 4) << "Committed new epoch " << info.epoch - << " for period " << info.id << dendl; - (void) cfgstore->realm_notify_new_period(dpp, y, info); - return 0; -} - - -int read_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - std::string_view zonegroup_id, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer) -{ - if (!zonegroup_id.empty()) { - return cfgstore->read_zonegroup_by_id(dpp, y, zonegroup_id, info, writer); - } - if (!zonegroup_name.empty()) { - return cfgstore->read_zonegroup_by_name(dpp, y, zonegroup_name, info, writer); - } - - std::string realm_id; - int r = cfgstore->read_default_realm_id(dpp, y, realm_id); - if (r == -ENOENT) { - return cfgstore->read_zonegroup_by_name(dpp, y, default_zonegroup_name, - info, writer); - } - if (r < 0) { - return r; - } - return cfgstore->read_default_zonegroup(dpp, y, realm_id, info, writer); -} - -int create_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, bool exclusive, - RGWZoneGroup& info) -{ - if (info.name.empty()) { - ldpp_dout(dpp, -1) << __func__ << " requires a zonegroup name" << dendl; - return -EINVAL; - } - if (info.id.empty()) { - info.id = gen_random_uuid(); - } - - // insert the default placement target if it doesn't exist - constexpr std::string_view default_placement_name = "default-placement"; - - RGWZoneGroupPlacementTarget placement_target; - placement_target.name = default_placement_name; - - info.placement_targets.emplace(default_placement_name, placement_target); - if (info.default_placement.name.empty()) { - info.default_placement.name = default_placement_name; - } - - int r = cfgstore->create_zonegroup(dpp, y, exclusive, info, nullptr); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to create zonegroup with " - << cpp_strerror(r) << dendl; - return r; - } - - // try to set as default. may race with another create, so pass exclusive=true - // so we don't override an existing default - r = set_default_zonegroup(dpp, y, cfgstore, info, true); - if (r < 0 && r != -EEXIST) { - ldpp_dout(dpp, 0) << "WARNING: failed to set zonegroup as default: " - << cpp_strerror(r) << dendl; - } - - return 0; -} - -int set_default_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWZoneGroup& info, - bool exclusive) -{ - return cfgstore->write_default_zonegroup_id( - dpp, y, exclusive, info.realm_id, info.id); -} - -int remove_zone_from_group(const DoutPrefixProvider* dpp, - RGWZoneGroup& zonegroup, - const rgw_zone_id& zone_id) -{ - auto z = zonegroup.zones.find(zone_id); - if (z == zonegroup.zones.end()) { - return -ENOENT; - } - zonegroup.zones.erase(z); - - if (zonegroup.master_zone == zone_id) { - // choose a new master zone - auto m = zonegroup.zones.begin(); - if (m != zonegroup.zones.end()) { - zonegroup.master_zone = m->first; - ldpp_dout(dpp, 0) << "NOTICE: promoted " << m->second.name - << " as new master_zone of zonegroup " << zonegroup.name << dendl; - } else { - zonegroup.master_zone.clear(); - ldpp_dout(dpp, 0) << "NOTICE: cleared master_zone of zonegroup " - << zonegroup.name << dendl; - } - } - - const bool log_data = zonegroup.zones.size() > 1; - for (auto& [id, zone] : zonegroup.zones) { - zone.log_data = log_data; - } - - return 0; -} - -// try to remove the given zone id from every zonegroup in the cluster -static int remove_zone_from_groups(const DoutPrefixProvider* dpp, - optional_yield y, - sal::ConfigStore* cfgstore, - const rgw_zone_id& zone_id) -{ - std::array zonegroup_names; - sal::ListResult listing; - do { - int r = cfgstore->list_zonegroup_names(dpp, y, listing.next, - zonegroup_names, listing); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to list zonegroups with " - << cpp_strerror(r) << dendl; - return r; - } - - for (const auto& name : listing.entries) { - RGWZoneGroup zonegroup; - std::unique_ptr writer; - r = cfgstore->read_zonegroup_by_name(dpp, y, name, zonegroup, &writer); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to load zonegroup " << name - << " with " << cpp_strerror(r) << dendl; - continue; - } - - r = remove_zone_from_group(dpp, zonegroup, zone_id); - if (r < 0) { - continue; - } - - // write the updated zonegroup - r = writer->write(dpp, y, zonegroup); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: failed to write zonegroup " << name - << " with " << cpp_strerror(r) << dendl; - continue; - } - ldpp_dout(dpp, 0) << "Removed zone from zonegroup " << name << dendl; - } - } while (!listing.next.empty()); - - return 0; -} - - -int read_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - std::string_view zone_id, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer) -{ - if (!zone_id.empty()) { - return cfgstore->read_zone_by_id(dpp, y, zone_id, info, writer); - } - if (!zone_name.empty()) { - return cfgstore->read_zone_by_name(dpp, y, zone_name, info, writer); - } - - std::string realm_id; - int r = cfgstore->read_default_realm_id(dpp, y, realm_id); - if (r == -ENOENT) { - return cfgstore->read_zone_by_name(dpp, y, default_zone_name, info, writer); - } - if (r < 0) { - return r; - } - return cfgstore->read_default_zone(dpp, y, realm_id, info, writer); -} - -extern int get_zones_pool_set(const DoutPrefixProvider *dpp, optional_yield y, - rgw::sal::ConfigStore* cfgstore, - std::string_view my_zone_id, - std::set& pools); - -int create_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, bool exclusive, - RGWZoneParams& info, std::unique_ptr* writer) -{ - if (info.name.empty()) { - ldpp_dout(dpp, -1) << __func__ << " requires a zone name" << dendl; - return -EINVAL; - } - if (info.id.empty()) { - info.id = gen_random_uuid(); - } - - // add default placement with empty pool name - rgw_pool pool; - auto& placement = info.placement_pools["default-placement"]; - placement.storage_classes.set_storage_class( - RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); - - // build a set of all pool names used by other zones - std::set pools; - int r = get_zones_pool_set(dpp, y, cfgstore, info.id, pools); - if (r < 0) { - return r; - } - - // initialize pool names with the zone name prefix - r = init_zone_pool_names(dpp, y, pools, info); - if (r < 0) { - return r; - } - - r = cfgstore->create_zone(dpp, y, exclusive, info, nullptr); - if (r < 0) { - ldpp_dout(dpp, 0) << "failed to create zone with " - << cpp_strerror(r) << dendl; - return r; - } - - // try to set as default. may race with another create, so pass exclusive=true - // so we don't override an existing default - r = set_default_zone(dpp, y, cfgstore, info, true); - if (r < 0 && r != -EEXIST) { - ldpp_dout(dpp, 0) << "WARNING: failed to set zone as default: " - << cpp_strerror(r) << dendl; - } - - return 0; - -} - -int set_default_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWZoneParams& info, - bool exclusive) -{ - return cfgstore->write_default_zone_id( - dpp, y, exclusive, info.realm_id, info.id); -} - -int delete_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWZoneParams& info, - sal::ZoneWriter& writer) -{ - // remove this zone from any zonegroups that contain it - int r = remove_zone_from_groups(dpp, y, cfgstore, info.id); - if (r < 0) { - return r; - } - - return writer.remove(dpp, y); -} - -} // namespace rgw - -static inline int conf_to_uint64(const JSONFormattable& config, const string& key, uint64_t *pval) -{ - string sval; - if (config.find(key, &sval)) { - string err; - uint64_t val = strict_strtoll(sval.c_str(), 10, &err); - if (!err.empty()) { - return -EINVAL; - } - *pval = val; - } - return 0; -} - -int RGWZoneGroupPlacementTier::update_params(const JSONFormattable& config) -{ - int r = -1; - - if (config.exists("retain_head_object")) { - string s = config["retain_head_object"]; - if (s == "true") { - retain_head_object = true; - } else { - retain_head_object = false; - } - } - - if (tier_type == "cloud-s3") { - r = t.s3.update_params(config); - } - - return r; -} - -int RGWZoneGroupPlacementTier::clear_params(const JSONFormattable& config) -{ - if (config.exists("retain_head_object")) { - retain_head_object = false; - } - - if (tier_type == "cloud-s3") { - t.s3.clear_params(config); - } - - return 0; -} - -int RGWZoneGroupPlacementTierS3::update_params(const JSONFormattable& config) -{ - int r = -1; - - if (config.exists("endpoint")) { - endpoint = config["endpoint"]; - } - if (config.exists("target_path")) { - target_path = config["target_path"]; - } - if (config.exists("region")) { - region = config["region"]; - } - if (config.exists("host_style")) { - string s; - s = config["host_style"]; - if (s != "virtual") { - host_style = PathStyle; - } else { - host_style = VirtualStyle; - } - } - if (config.exists("target_storage_class")) { - target_storage_class = config["target_storage_class"]; - } - if (config.exists("access_key")) { - key.id = config["access_key"]; - } - if (config.exists("secret")) { - key.key = config["secret"]; - } - if (config.exists("multipart_sync_threshold")) { - r = conf_to_uint64(config, "multipart_sync_threshold", &multipart_sync_threshold); - if (r < 0) { - multipart_sync_threshold = DEFAULT_MULTIPART_SYNC_PART_SIZE; - } - } - - if (config.exists("multipart_min_part_size")) { - r = conf_to_uint64(config, "multipart_min_part_size", &multipart_min_part_size); - if (r < 0) { - multipart_min_part_size = DEFAULT_MULTIPART_SYNC_PART_SIZE; - } - } - - if (config.exists("acls")) { - const JSONFormattable& cc = config["acls"]; - if (cc.is_array()) { - for (auto& c : cc.array()) { - RGWTierACLMapping m; - m.init(c); - if (!m.source_id.empty()) { - acl_mappings[m.source_id] = m; - } - } - } else { - RGWTierACLMapping m; - m.init(cc); - if (!m.source_id.empty()) { - acl_mappings[m.source_id] = m; - } - } - } - return 0; -} - -int RGWZoneGroupPlacementTierS3::clear_params(const JSONFormattable& config) -{ - if (config.exists("endpoint")) { - endpoint.clear(); - } - if (config.exists("target_path")) { - target_path.clear(); - } - if (config.exists("region")) { - region.clear(); - } - if (config.exists("host_style")) { - /* default */ - host_style = PathStyle; - } - if (config.exists("target_storage_class")) { - target_storage_class.clear(); - } - if (config.exists("access_key")) { - key.id.clear(); - } - if (config.exists("secret")) { - key.key.clear(); - } - if (config.exists("multipart_sync_threshold")) { - multipart_sync_threshold = DEFAULT_MULTIPART_SYNC_PART_SIZE; - } - if (config.exists("multipart_min_part_size")) { - multipart_min_part_size = DEFAULT_MULTIPART_SYNC_PART_SIZE; - } - if (config.exists("acls")) { - const JSONFormattable& cc = config["acls"]; - if (cc.is_array()) { - for (auto& c : cc.array()) { - RGWTierACLMapping m; - m.init(c); - acl_mappings.erase(m.source_id); - } - } else { - RGWTierACLMapping m; - m.init(cc); - acl_mappings.erase(m.source_id); - } - } - return 0; -} - -void rgw_meta_sync_info::generate_test_instances(list& o) -{ - auto info = new rgw_meta_sync_info; - info->state = rgw_meta_sync_info::StateBuildingFullSyncMaps; - info->period = "periodid"; - info->realm_epoch = 5; - o.push_back(info); - o.push_back(new rgw_meta_sync_info); -} - -void rgw_meta_sync_marker::generate_test_instances(list& o) -{ - auto marker = new rgw_meta_sync_marker; - marker->state = rgw_meta_sync_marker::IncrementalSync; - marker->marker = "01234"; - marker->realm_epoch = 5; - o.push_back(marker); - o.push_back(new rgw_meta_sync_marker); -} - -void rgw_meta_sync_status::generate_test_instances(list& o) -{ - o.push_back(new rgw_meta_sync_status); -} - -void RGWZoneParams::generate_test_instances(list &o) -{ - o.push_back(new RGWZoneParams); - o.push_back(new RGWZoneParams); -} - -void RGWPeriodLatestEpochInfo::generate_test_instances(list &o) -{ - RGWPeriodLatestEpochInfo *z = new RGWPeriodLatestEpochInfo; - o.push_back(z); - o.push_back(new RGWPeriodLatestEpochInfo); -} - -void RGWZoneGroup::generate_test_instances(list& o) -{ - RGWZoneGroup *r = new RGWZoneGroup; - o.push_back(r); - o.push_back(new RGWZoneGroup); -} - -void RGWPeriodLatestEpochInfo::dump(Formatter *f) const { - encode_json("latest_epoch", epoch, f); -} - -void RGWPeriodLatestEpochInfo::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("latest_epoch", epoch, obj); -} - -void RGWNameToId::dump(Formatter *f) const { - encode_json("obj_id", obj_id, f); -} - -void RGWNameToId::decode_json(JSONObj *obj) { - JSONDecoder::decode_json("obj_id", obj_id, obj); -} - diff --git a/src/rgw/store/rados/rgw_zone.h b/src/rgw/store/rados/rgw_zone.h deleted file mode 100644 index e1792a40cce..00000000000 --- a/src/rgw/store/rados/rgw_zone.h +++ /dev/null @@ -1,1525 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#ifndef CEPH_RGW_ZONE_H -#define CEPH_RGW_ZONE_H - -#include -#include "rgw_common.h" -#include "rgw_sal_fwd.h" -#include "rgw_sync_policy.h" -#include "rgw_zone_features.h" - -namespace rgw_zone_defaults { - -extern std::string zone_names_oid_prefix; -extern std::string region_info_oid_prefix; -extern std::string realm_names_oid_prefix; -extern std::string zone_group_info_oid_prefix; -extern std::string realm_info_oid_prefix; -extern std::string default_region_info_oid; -extern std::string default_zone_group_info_oid; -extern std::string region_map_oid; -extern std::string default_realm_info_oid; -extern std::string default_zonegroup_name; -extern std::string default_zone_name; -extern std::string zonegroup_names_oid_prefix; -extern std::string RGW_DEFAULT_ZONE_ROOT_POOL; -extern std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL; -extern std::string RGW_DEFAULT_REALM_ROOT_POOL; -extern std::string RGW_DEFAULT_PERIOD_ROOT_POOL; -extern std::string avail_pools; -extern std::string default_storage_pool_suffix; - -} - -class JSONObj; -class RGWSyncModulesManager; - - -struct RGWNameToId { - std::string obj_id; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(obj_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(obj_id, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWNameToId) - -struct RGWDefaultSystemMetaObjInfo { - std::string default_id; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(default_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(default_id, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo) - -class RGWSI_SysObj; -class RGWSI_Zone; - -class RGWSystemMetaObj { -public: - std::string id; - std::string name; - - CephContext *cct{nullptr}; - RGWSI_SysObj *sysobj_svc{nullptr}; - RGWSI_Zone *zone_svc{nullptr}; - - int store_name(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); - int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); - int read_info(const DoutPrefixProvider *dpp, const std::string& obj_id, optional_yield y, bool old_format = false); - int read_id(const DoutPrefixProvider *dpp, const std::string& obj_name, std::string& obj_id, optional_yield y); - int read_default(const DoutPrefixProvider *dpp, - RGWDefaultSystemMetaObjInfo& default_info, - const std::string& oid, - optional_yield y); - /* read and use default id */ - int use_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); - -public: - RGWSystemMetaObj() {} - RGWSystemMetaObj(const std::string& _name): name(_name) {} - RGWSystemMetaObj(const std::string& _id, const std::string& _name) : id(_id), name(_name) {} - RGWSystemMetaObj(CephContext *_cct, RGWSI_SysObj *_sysobj_svc) { - reinit_instance(_cct, _sysobj_svc); - } - RGWSystemMetaObj(const std::string& _name, CephContext *_cct, RGWSI_SysObj *_sysobj_svc): name(_name) { - reinit_instance(_cct, _sysobj_svc); - } - - const std::string& get_name() const { return name; } - const std::string& get_id() const { return id; } - - void set_name(const std::string& _name) { name = _name;} - void set_id(const std::string& _id) { id = _id;} - void clear_id() { id.clear(); } - - virtual ~RGWSystemMetaObj() {} - - virtual void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(id, bl); - encode(name, bl); - ENCODE_FINISH(bl); - } - - virtual void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(id, bl); - decode(name, bl); - DECODE_FINISH(bl); - } - - void reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc); - int init(const DoutPrefixProvider *dpp, CephContext *_cct, RGWSI_SysObj *_sysobj_svc, - optional_yield y, - bool setup_obj = true, bool old_format = false); - virtual int read_default_id(const DoutPrefixProvider *dpp, std::string& default_id, optional_yield y, - bool old_format = false); - virtual int set_as_default(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = false); - int delete_default(); - virtual int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true); - int delete_obj(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); - int rename(const DoutPrefixProvider *dpp, const std::string& new_name, optional_yield y); - int update(const DoutPrefixProvider *dpp, optional_yield y) { return store_info(dpp, false, y);} - int update_name(const DoutPrefixProvider *dpp, optional_yield y) { return store_name(dpp, false, y);} - int read(const DoutPrefixProvider *dpp, optional_yield y); - int write(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); - - virtual rgw_pool get_pool(CephContext *cct) const = 0; - virtual const std::string get_default_oid(bool old_format = false) const = 0; - virtual const std::string& get_names_oid_prefix() const = 0; - virtual const std::string& get_info_oid_prefix(bool old_format = false) const = 0; - virtual std::string get_predefined_id(CephContext *cct) const = 0; - virtual const std::string& get_predefined_name(CephContext *cct) const = 0; - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWSystemMetaObj) - -struct RGWZoneStorageClass { - boost::optional data_pool; - boost::optional compression_type; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(data_pool, bl); - encode(compression_type, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(data_pool, bl); - decode(compression_type, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWZoneStorageClass) - - -class RGWZoneStorageClasses { - std::map m; - - /* in memory only */ - RGWZoneStorageClass *standard_class; - -public: - RGWZoneStorageClasses() { - standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; - } - RGWZoneStorageClasses(const RGWZoneStorageClasses& rhs) { - m = rhs.m; - standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; - } - RGWZoneStorageClasses& operator=(const RGWZoneStorageClasses& rhs) { - m = rhs.m; - standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; - return *this; - } - - const RGWZoneStorageClass& get_standard() const { - return *standard_class; - } - - bool find(const std::string& sc, const RGWZoneStorageClass **pstorage_class) const { - auto iter = m.find(sc); - if (iter == m.end()) { - return false; - } - *pstorage_class = &iter->second; - return true; - } - - bool exists(const std::string& sc) const { - if (sc.empty()) { - return true; - } - auto iter = m.find(sc); - return (iter != m.end()); - } - - const std::map& get_all() const { - return m; - } - - std::map& get_all() { - return m; - } - - void set_storage_class(const std::string& sc, const rgw_pool *data_pool, const std::string *compression_type) { - const std::string *psc = ≻ - if (sc.empty()) { - psc = &RGW_STORAGE_CLASS_STANDARD; - } - RGWZoneStorageClass& storage_class = m[*psc]; - if (data_pool) { - storage_class.data_pool = *data_pool; - } - if (compression_type) { - storage_class.compression_type = *compression_type; - } - } - - void remove_storage_class(const std::string& sc) { - if (!sc.empty()) { - m.erase(sc); - } - } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(m, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(m, bl); - standard_class = &m[RGW_STORAGE_CLASS_STANDARD]; - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWZoneStorageClasses) - -struct RGWZonePlacementInfo { - rgw_pool index_pool; - rgw_pool data_extra_pool; /* if not set we should use data_pool */ - RGWZoneStorageClasses storage_classes; - rgw::BucketIndexType index_type; - - RGWZonePlacementInfo() : index_type(rgw::BucketIndexType::Normal) {} - - void encode(bufferlist& bl) const { - ENCODE_START(7, 1, bl); - encode(index_pool.to_str(), bl); - rgw_pool standard_data_pool = get_data_pool(RGW_STORAGE_CLASS_STANDARD); - encode(standard_data_pool.to_str(), bl); - encode(data_extra_pool.to_str(), bl); - encode((uint32_t)index_type, bl); - std::string standard_compression_type = get_compression_type(RGW_STORAGE_CLASS_STANDARD); - encode(standard_compression_type, bl); - encode(storage_classes, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(7, bl); - std::string index_pool_str; - std::string data_pool_str; - decode(index_pool_str, bl); - index_pool = rgw_pool(index_pool_str); - decode(data_pool_str, bl); - rgw_pool standard_data_pool(data_pool_str); - if (struct_v >= 4) { - std::string data_extra_pool_str; - decode(data_extra_pool_str, bl); - data_extra_pool = rgw_pool(data_extra_pool_str); - } - if (struct_v >= 5) { - uint32_t it; - decode(it, bl); - index_type = (rgw::BucketIndexType)it; - } - std::string standard_compression_type; - if (struct_v >= 6) { - decode(standard_compression_type, bl); - } - if (struct_v >= 7) { - decode(storage_classes, bl); - } else { - storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &standard_data_pool, - (!standard_compression_type.empty() ? &standard_compression_type : nullptr)); - } - DECODE_FINISH(bl); - } - const rgw_pool& get_data_extra_pool() const { - static rgw_pool no_pool; - if (data_extra_pool.empty()) { - return storage_classes.get_standard().data_pool.get_value_or(no_pool); - } - return data_extra_pool; - } - const rgw_pool& get_data_pool(const std::string& sc) const { - const RGWZoneStorageClass *storage_class; - static rgw_pool no_pool; - - if (!storage_classes.find(sc, &storage_class)) { - return storage_classes.get_standard().data_pool.get_value_or(no_pool); - } - - return storage_class->data_pool.get_value_or(no_pool); - } - const rgw_pool& get_standard_data_pool() const { - return get_data_pool(RGW_STORAGE_CLASS_STANDARD); - } - - const std::string& get_compression_type(const std::string& sc) const { - const RGWZoneStorageClass *storage_class; - static std::string no_compression; - - if (!storage_classes.find(sc, &storage_class)) { - return no_compression; - } - return storage_class->compression_type.get_value_or(no_compression); - } - - bool storage_class_exists(const std::string& sc) const { - return storage_classes.exists(sc); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - -}; -WRITE_CLASS_ENCODER(RGWZonePlacementInfo) - -struct RGWZoneParams : RGWSystemMetaObj { - rgw_pool domain_root; - rgw_pool control_pool; - rgw_pool gc_pool; - rgw_pool lc_pool; - rgw_pool log_pool; - rgw_pool intent_log_pool; - rgw_pool usage_log_pool; - rgw_pool user_keys_pool; - rgw_pool user_email_pool; - rgw_pool user_swift_pool; - rgw_pool user_uid_pool; - rgw_pool roles_pool; - rgw_pool reshard_pool; - rgw_pool otp_pool; - rgw_pool oidc_pool; - rgw_pool notif_pool; - - RGWAccessKey system_key; - - std::map placement_pools; - - std::string realm_id; - - JSONFormattable tier_config; - - RGWZoneParams() : RGWSystemMetaObj() {} - explicit RGWZoneParams(const std::string& name) : RGWSystemMetaObj(name){} - RGWZoneParams(const rgw_zone_id& id, const std::string& name) : RGWSystemMetaObj(id.id, name) {} - RGWZoneParams(const rgw_zone_id& id, const std::string& name, const std::string& _realm_id) - : RGWSystemMetaObj(id.id, name), realm_id(_realm_id) {} - virtual ~RGWZoneParams(); - - rgw_pool get_pool(CephContext *cct) const override; - const std::string get_default_oid(bool old_format = false) const override; - const std::string& get_names_oid_prefix() const override; - const std::string& get_info_oid_prefix(bool old_format = false) const override; - std::string get_predefined_id(CephContext *cct) const override; - const std::string& get_predefined_name(CephContext *cct) const override; - - int init(const DoutPrefixProvider *dpp, - CephContext *_cct, RGWSI_SysObj *_sysobj_svc, optional_yield y, - bool setup_obj = true, bool old_format = false); - using RGWSystemMetaObj::init; - int read_default_id(const DoutPrefixProvider *dpp, std::string& default_id, optional_yield y, bool old_format = false) override; - int set_as_default(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = false) override; - int create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); - int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true) override; - int fix_pool_names(const DoutPrefixProvider *dpp, optional_yield y); - - const std::string& get_compression_type(const rgw_placement_rule& placement_rule) const; - - void encode(bufferlist& bl) const override { - ENCODE_START(14, 1, bl); - encode(domain_root, bl); - encode(control_pool, bl); - encode(gc_pool, bl); - encode(log_pool, bl); - encode(intent_log_pool, bl); - encode(usage_log_pool, bl); - encode(user_keys_pool, bl); - encode(user_email_pool, bl); - encode(user_swift_pool, bl); - encode(user_uid_pool, bl); - RGWSystemMetaObj::encode(bl); - encode(system_key, bl); - encode(placement_pools, bl); - rgw_pool unused_metadata_heap; - encode(unused_metadata_heap, bl); - encode(realm_id, bl); - encode(lc_pool, bl); - std::map old_tier_config; - encode(old_tier_config, bl); - encode(roles_pool, bl); - encode(reshard_pool, bl); - encode(otp_pool, bl); - encode(tier_config, bl); - encode(oidc_pool, bl); - encode(notif_pool, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) override { - DECODE_START(14, bl); - decode(domain_root, bl); - decode(control_pool, bl); - decode(gc_pool, bl); - decode(log_pool, bl); - decode(intent_log_pool, bl); - decode(usage_log_pool, bl); - decode(user_keys_pool, bl); - decode(user_email_pool, bl); - decode(user_swift_pool, bl); - decode(user_uid_pool, bl); - if (struct_v >= 6) { - RGWSystemMetaObj::decode(bl); - } else if (struct_v >= 2) { - decode(name, bl); - id = name; - } - if (struct_v >= 3) - decode(system_key, bl); - if (struct_v >= 4) - decode(placement_pools, bl); - if (struct_v >= 5) { - rgw_pool unused_metadata_heap; - decode(unused_metadata_heap, bl); - } - if (struct_v >= 6) { - decode(realm_id, bl); - } - if (struct_v >= 7) { - decode(lc_pool, bl); - } else { - lc_pool = log_pool.name + ":lc"; - } - std::map old_tier_config; - if (struct_v >= 8) { - decode(old_tier_config, bl); - } - if (struct_v >= 9) { - decode(roles_pool, bl); - } else { - roles_pool = name + ".rgw.meta:roles"; - } - if (struct_v >= 10) { - decode(reshard_pool, bl); - } else { - reshard_pool = log_pool.name + ":reshard"; - } - if (struct_v >= 11) { - ::decode(otp_pool, bl); - } else { - otp_pool = name + ".rgw.otp"; - } - if (struct_v >= 12) { - ::decode(tier_config, bl); - } else { - for (auto& kv : old_tier_config) { - tier_config.set(kv.first, kv.second); - } - } - if (struct_v >= 13) { - ::decode(oidc_pool, bl); - } else { - oidc_pool = name + ".rgw.meta:oidc"; - } - if (struct_v >= 14) { - decode(notif_pool, bl); - } else { - notif_pool = log_pool.name + ":notif"; - } - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - static void generate_test_instances(std::list& o); - - bool get_placement(const std::string& placement_id, RGWZonePlacementInfo *placement) const { - auto iter = placement_pools.find(placement_id); - if (iter == placement_pools.end()) { - return false; - } - *placement = iter->second; - return true; - } - - /* - * return data pool of the head object - */ - bool get_head_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) const { - const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement; - if (!explicit_placement.data_pool.empty()) { - if (!obj.in_extra_data) { - *pool = explicit_placement.data_pool; - } else { - *pool = explicit_placement.get_data_extra_pool(); - } - return true; - } - if (placement_rule.empty()) { - return false; - } - auto iter = placement_pools.find(placement_rule.name); - if (iter == placement_pools.end()) { - return false; - } - if (!obj.in_extra_data) { - *pool = iter->second.get_data_pool(placement_rule.storage_class); - } else { - *pool = iter->second.get_data_extra_pool(); - } - return true; - } - - bool valid_placement(const rgw_placement_rule& rule) const { - auto iter = placement_pools.find(rule.name); - if (iter == placement_pools.end()) { - return false; - } - return iter->second.storage_class_exists(rule.storage_class); - } -}; -WRITE_CLASS_ENCODER(RGWZoneParams) - - -struct RGWZone { - std::string id; - std::string name; - std::list endpoints; - bool log_meta; - bool log_data; - bool read_only; - std::string tier_type; - - std::string redirect_zone; - -/** - * Represents the number of shards for the bucket index object, a value of zero - * indicates there is no sharding. By default (no sharding, the name of the object - * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}', - * sharding_id is zero-based value. It is not recommended to set a too large value - * (e.g. thousand) as it increases the cost for bucket listing. - */ - uint32_t bucket_index_max_shards; - - // pre-shard buckets on creation to enable some write-parallism by default, - // delay the need to reshard as the bucket grows, and (in multisite) get some - // bucket index sharding where dynamic resharding is not supported - static constexpr uint32_t default_bucket_index_max_shards = 11; - - bool sync_from_all; - std::set sync_from; /* list of zones to sync from */ - - rgw::zone_features::set supported_features; - - RGWZone() - : log_meta(false), log_data(false), read_only(false), - bucket_index_max_shards(default_bucket_index_max_shards), - sync_from_all(true) {} - - void encode(bufferlist& bl) const { - ENCODE_START(8, 1, bl); - encode(name, bl); - encode(endpoints, bl); - encode(log_meta, bl); - encode(log_data, bl); - encode(bucket_index_max_shards, bl); - encode(id, bl); - encode(read_only, bl); - encode(tier_type, bl); - encode(sync_from_all, bl); - encode(sync_from, bl); - encode(redirect_zone, bl); - encode(supported_features, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(8, bl); - decode(name, bl); - if (struct_v < 4) { - id = name; - } - decode(endpoints, bl); - if (struct_v >= 2) { - decode(log_meta, bl); - decode(log_data, bl); - } - if (struct_v >= 3) { - decode(bucket_index_max_shards, bl); - } - if (struct_v >= 4) { - decode(id, bl); - decode(read_only, bl); - } - if (struct_v >= 5) { - decode(tier_type, bl); - } - if (struct_v >= 6) { - decode(sync_from_all, bl); - decode(sync_from, bl); - } - if (struct_v >= 7) { - decode(redirect_zone, bl); - } - if (struct_v >= 8) { - decode(supported_features, bl); - } - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - static void generate_test_instances(std::list& o); - - bool is_read_only() const { return read_only; } - - bool syncs_from(const std::string& zone_name) const { - return (sync_from_all || sync_from.find(zone_name) != sync_from.end()); - } - - bool supports(std::string_view feature) const { - return supported_features.contains(feature); - } -}; -WRITE_CLASS_ENCODER(RGWZone) - -struct RGWDefaultZoneGroupInfo { - std::string default_zonegroup; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(default_zonegroup, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(default_zonegroup, bl); - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - //todo: implement ceph-dencoder -}; -WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo) - -struct RGWTierACLMapping { - ACLGranteeTypeEnum type{ACL_TYPE_CANON_USER}; - std::string source_id; - std::string dest_id; - - RGWTierACLMapping() = default; - - RGWTierACLMapping(ACLGranteeTypeEnum t, - const std::string& s, - const std::string& d) : type(t), - source_id(s), - dest_id(d) {} - - void init(const JSONFormattable& config) { - const std::string& t = config["type"]; - - if (t == "email") { - type = ACL_TYPE_EMAIL_USER; - } else if (t == "uri") { - type = ACL_TYPE_GROUP; - } else { - type = ACL_TYPE_CANON_USER; - } - - source_id = config["source_id"]; - dest_id = config["dest_id"]; - } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode((uint32_t)type, bl); - encode(source_id, bl); - encode(dest_id, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - uint32_t it; - decode(it, bl); - type = (ACLGranteeTypeEnum)it; - decode(source_id, bl); - decode(dest_id, bl); - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWTierACLMapping) - -struct RGWZoneGroupPlacementTierS3 { -#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024) - std::string endpoint; - RGWAccessKey key; - std::string region; - HostStyle host_style{PathStyle}; - std::string target_storage_class; - - /* Should below be bucket/zone specific?? */ - std::string target_path; - std::map acl_mappings; - - uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE}; - uint64_t multipart_min_part_size{DEFAULT_MULTIPART_SYNC_PART_SIZE}; - - int update_params(const JSONFormattable& config); - int clear_params(const JSONFormattable& config); - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(endpoint, bl); - encode(key, bl); - encode(region, bl); - encode((uint32_t)host_style, bl); - encode(target_storage_class, bl); - encode(target_path, bl); - encode(acl_mappings, bl); - encode(multipart_sync_threshold, bl); - encode(multipart_min_part_size, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(endpoint, bl); - decode(key, bl); - decode(region, bl); - - uint32_t it; - decode(it, bl); - host_style = (HostStyle)it; - - decode(target_storage_class, bl); - decode(target_path, bl); - decode(acl_mappings, bl); - decode(multipart_sync_threshold, bl); - decode(multipart_min_part_size, bl); - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTierS3) - -struct RGWZoneGroupPlacementTier { - std::string tier_type; - std::string storage_class; - bool retain_head_object = false; - - struct _tier { - RGWZoneGroupPlacementTierS3 s3; - } t; - - int update_params(const JSONFormattable& config); - int clear_params(const JSONFormattable& config); - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(tier_type, bl); - encode(storage_class, bl); - encode(retain_head_object, bl); - if (tier_type == "cloud-s3") { - encode(t.s3, bl); - } - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(tier_type, bl); - decode(storage_class, bl); - decode(retain_head_object, bl); - if (tier_type == "cloud-s3") { - decode(t.s3, bl); - } - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTier) - -struct RGWZoneGroupPlacementTarget { - std::string name; - std::set tags; - std::set storage_classes; - std::map tier_targets; - - bool user_permitted(const std::list& user_tags) const { - if (tags.empty()) { - return true; - } - for (auto& rule : user_tags) { - if (tags.find(rule) != tags.end()) { - return true; - } - } - return false; - } - - void encode(bufferlist& bl) const { - ENCODE_START(3, 1, bl); - encode(name, bl); - encode(tags, bl); - encode(storage_classes, bl); - encode(tier_targets, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(3, bl); - decode(name, bl); - decode(tags, bl); - if (struct_v >= 2) { - decode(storage_classes, bl); - } - if (storage_classes.empty()) { - storage_classes.insert(RGW_STORAGE_CLASS_STANDARD); - } - if (struct_v >= 3) { - decode(tier_targets, bl); - } - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); -}; -WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget) - -struct RGWZoneGroup : public RGWSystemMetaObj { - std::string api_name; - std::list endpoints; - bool is_master = false; - - rgw_zone_id master_zone; - std::map zones; - - std::map placement_targets; - rgw_placement_rule default_placement; - - std::list hostnames; - std::list hostnames_s3website; - // TODO: Maybe convert hostnames to a map> for - // endpoint_type->hostnames -/* -20:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; }; -20:05 < _robbat21irssi> but that's a later compatability migration planning bit -20:06 < yehudasa> more like if (!hostnames.empty()) { -20:06 < yehudasa> for (std::list::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) { -20:06 < yehudasa> hostname_map["s3"].append(iter->second); -20:07 < yehudasa> hostname_map["s3website"].append(iter->second); -20:07 < yehudasa> s/append/push_back/g -20:08 < _robbat21irssi> inner loop over APIs -20:08 < yehudasa> yeah, probably -20:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website -*/ - std::map > api_hostname_map; - std::map > api_endpoints_map; - - std::string realm_id; - - rgw_sync_policy_info sync_policy; - rgw::zone_features::set enabled_features; - - RGWZoneGroup(): is_master(false){} - RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {} - explicit RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {} - RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWSI_SysObj* sysobj_svc, - const std::string& _realm_id, const std::list& _endpoints) - : RGWSystemMetaObj(_name, cct , sysobj_svc), endpoints(_endpoints), is_master(_is_master), - realm_id(_realm_id) {} - virtual ~RGWZoneGroup(); - - bool is_master_zonegroup() const { return is_master;} - void update_master(const DoutPrefixProvider *dpp, bool _is_master, optional_yield y) { - is_master = _is_master; - post_process_params(dpp, y); - } - void post_process_params(const DoutPrefixProvider *dpp, optional_yield y); - - void encode(bufferlist& bl) const override { - ENCODE_START(6, 1, bl); - encode(name, bl); - encode(api_name, bl); - encode(is_master, bl); - encode(endpoints, bl); - encode(master_zone, bl); - encode(zones, bl); - encode(placement_targets, bl); - encode(default_placement, bl); - encode(hostnames, bl); - encode(hostnames_s3website, bl); - RGWSystemMetaObj::encode(bl); - encode(realm_id, bl); - encode(sync_policy, bl); - encode(enabled_features, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) override { - DECODE_START(6, bl); - decode(name, bl); - decode(api_name, bl); - decode(is_master, bl); - decode(endpoints, bl); - decode(master_zone, bl); - decode(zones, bl); - decode(placement_targets, bl); - decode(default_placement, bl); - if (struct_v >= 2) { - decode(hostnames, bl); - } - if (struct_v >= 3) { - decode(hostnames_s3website, bl); - } - if (struct_v >= 4) { - RGWSystemMetaObj::decode(bl); - decode(realm_id, bl); - } else { - id = name; - } - if (struct_v >= 5) { - decode(sync_policy, bl); - } - if (struct_v >= 6) { - decode(enabled_features, bl); - } - DECODE_FINISH(bl); - } - - int read_default_id(const DoutPrefixProvider *dpp, std::string& default_id, optional_yield y, bool old_format = false) override; - int set_as_default(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = false) override; - int create_default(const DoutPrefixProvider *dpp, optional_yield y, bool old_format = false); - int equals(const std::string& other_zonegroup) const; - int add_zone(const DoutPrefixProvider *dpp, - const RGWZoneParams& zone_params, bool *is_master, bool *read_only, - const std::list& endpoints, const std::string *ptier_type, - bool *psync_from_all, std::list& sync_from, - std::list& sync_from_rm, std::string *predirect_zone, - std::optional bucket_index_max_shards, RGWSyncModulesManager *sync_mgr, - const rgw::zone_features::set& enable_features, - const rgw::zone_features::set& disable_features, - optional_yield y); - int remove_zone(const DoutPrefixProvider *dpp, const std::string& zone_id, optional_yield y); - int rename_zone(const DoutPrefixProvider *dpp, const RGWZoneParams& zone_params, optional_yield y); - rgw_pool get_pool(CephContext *cct) const override; - const std::string get_default_oid(bool old_region_format = false) const override; - const std::string& get_info_oid_prefix(bool old_region_format = false) const override; - const std::string& get_names_oid_prefix() const override; - std::string get_predefined_id(CephContext *cct) const override; - const std::string& get_predefined_name(CephContext *cct) const override; - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - static void generate_test_instances(std::list& o); - - bool supports(std::string_view feature) const { - return enabled_features.contains(feature); - } -}; -WRITE_CLASS_ENCODER(RGWZoneGroup) - -struct RGWPeriodMap -{ - std::string id; - std::map zonegroups; - std::map zonegroups_by_api; - std::map short_zone_ids; - - std::string master_zonegroup; - - void encode(bufferlist& bl) const; - void decode(bufferlist::const_iterator& bl); - - int update(const RGWZoneGroup& zonegroup, CephContext *cct); - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - - void reset() { - zonegroups.clear(); - zonegroups_by_api.clear(); - master_zonegroup.clear(); - } - - uint32_t get_zone_short_id(const std::string& zone_id) const; - - bool find_zone_by_id(const rgw_zone_id& zone_id, - RGWZoneGroup *zonegroup, - RGWZone *zone) const; - bool find_zone_by_name(const std::string& zone_id, - RGWZoneGroup *zonegroup, - RGWZone *zone) const; -}; -WRITE_CLASS_ENCODER(RGWPeriodMap) - -struct RGWPeriodConfig -{ - RGWQuota quota; - RGWRateLimitInfo user_ratelimit; - RGWRateLimitInfo bucket_ratelimit; - // rate limit unauthenticated user - RGWRateLimitInfo anon_ratelimit; - - void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); - encode(quota.bucket_quota, bl); - encode(quota.user_quota, bl); - encode(bucket_ratelimit, bl); - encode(user_ratelimit, bl); - encode(anon_ratelimit, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); - decode(quota.bucket_quota, bl); - decode(quota.user_quota, bl); - if (struct_v >= 2) { - decode(bucket_ratelimit, bl); - decode(user_ratelimit, bl); - decode(anon_ratelimit, bl); - } - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - - // the period config must be stored in a local object outside of the period, - // so that it can be used in a default configuration where no realm/period - // exists - int read(const DoutPrefixProvider *dpp, RGWSI_SysObj *sysobj_svc, const std::string& realm_id, optional_yield y); - int write(const DoutPrefixProvider *dpp, RGWSI_SysObj *sysobj_svc, const std::string& realm_id, optional_yield y); - - static std::string get_oid(const std::string& realm_id); - static rgw_pool get_pool(CephContext *cct); -}; -WRITE_CLASS_ENCODER(RGWPeriodConfig) - -class RGWRealm; -class RGWPeriod; - -class RGWRealm : public RGWSystemMetaObj -{ -public: - std::string current_period; - epoch_t epoch{0}; //< realm epoch, incremented for each new period - - int create_control(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); - int delete_control(const DoutPrefixProvider *dpp, optional_yield y); -public: - RGWRealm() {} - RGWRealm(const std::string& _id, const std::string& _name = "") : RGWSystemMetaObj(_id, _name) {} - RGWRealm(CephContext *_cct, RGWSI_SysObj *_sysobj_svc): RGWSystemMetaObj(_cct, _sysobj_svc) {} - RGWRealm(const std::string& _name, CephContext *_cct, RGWSI_SysObj *_sysobj_svc): RGWSystemMetaObj(_name, _cct, _sysobj_svc){} - virtual ~RGWRealm() override; - - void encode(bufferlist& bl) const override { - ENCODE_START(1, 1, bl); - RGWSystemMetaObj::encode(bl); - encode(current_period, bl); - encode(epoch, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) override { - DECODE_START(1, bl); - RGWSystemMetaObj::decode(bl); - decode(current_period, bl); - decode(epoch, bl); - DECODE_FINISH(bl); - } - - int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true) override; - int delete_obj(const DoutPrefixProvider *dpp, optional_yield y); - rgw_pool get_pool(CephContext *cct) const override; - const std::string get_default_oid(bool old_format = false) const override; - const std::string& get_names_oid_prefix() const override; - const std::string& get_info_oid_prefix(bool old_format = false) const override; - std::string get_predefined_id(CephContext *cct) const override; - const std::string& get_predefined_name(CephContext *cct) const override; - - using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - static void generate_test_instances(std::list& o); - - const std::string& get_current_period() const { - return current_period; - } - int set_current_period(const DoutPrefixProvider *dpp, RGWPeriod& period, optional_yield y); - void clear_current_period_and_epoch() { - current_period.clear(); - epoch = 0; - } - epoch_t get_epoch() const { return epoch; } - - std::string get_control_oid() const; - /// send a notify on the realm control object - int notify_zone(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y); - /// notify the zone of a new period - int notify_new_period(const DoutPrefixProvider *dpp, const RGWPeriod& period, optional_yield y); - - int find_zone(const DoutPrefixProvider *dpp, - const rgw_zone_id& zid, - RGWPeriod *pperiod, - RGWZoneGroup *pzonegroup, - bool *pfound, - optional_yield y) const; -}; -WRITE_CLASS_ENCODER(RGWRealm) - -struct RGWPeriodLatestEpochInfo { - epoch_t epoch = 0; - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(epoch, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(epoch, bl); - DECODE_FINISH(bl); - } - - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - static void generate_test_instances(std::list& o); -}; -WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo) - - -/* - * The RGWPeriod object contains the entire configuration of a - * RGWRealm, including its RGWZoneGroups and RGWZones. Consistency of - * this configuration is maintained across all zones by passing around - * the RGWPeriod object in its JSON representation. - * - * If a new configuration changes which zone is the metadata master - * zone (i.e., master zone of the master zonegroup), then a new - * RGWPeriod::id (a uuid) is generated, its RGWPeriod::realm_epoch is - * incremented, and the RGWRealm object is updated to reflect that new - * current_period id and epoch. If the configuration changes BUT which - * zone is the metadata master does NOT change, then only the - * RGWPeriod::epoch is incremented (and the RGWPeriod::id remains the - * same). - * - * When a new RGWPeriod is created with a new RGWPeriod::id (uuid), it - * is linked back to its predecessor RGWPeriod through the - * RGWPeriod::predecessor_uuid field, thus creating a "linked - * list"-like structure of RGWPeriods back to the cluster's creation. - */ -class RGWPeriod -{ -public: - std::string id; //< a uuid - epoch_t epoch{0}; - std::string predecessor_uuid; - std::vector sync_status; - RGWPeriodMap period_map; - RGWPeriodConfig period_config; - std::string master_zonegroup; - rgw_zone_id master_zone; - - std::string realm_id; - std::string realm_name; - epoch_t realm_epoch{1}; //< realm epoch when period was made current - - CephContext *cct{nullptr}; - RGWSI_SysObj *sysobj_svc{nullptr}; - - int read_info(const DoutPrefixProvider *dpp, optional_yield y); - int read_latest_epoch(const DoutPrefixProvider *dpp, - RGWPeriodLatestEpochInfo& epoch_info, - optional_yield y, - RGWObjVersionTracker *objv = nullptr); - int use_latest_epoch(const DoutPrefixProvider *dpp, optional_yield y); - int use_current_period(); - - const std::string get_period_oid() const; - const std::string get_period_oid_prefix() const; - - // gather the metadata sync status for each shard; only for use on master zone - int update_sync_status(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const RGWPeriod ¤t_period, - std::ostream& error_stream, bool force_if_stale); - -public: - RGWPeriod() {} - - explicit RGWPeriod(const std::string& period_id, epoch_t _epoch = 0) - : id(period_id), epoch(_epoch) {} - - const std::string& get_id() const { return id; } - epoch_t get_epoch() const { return epoch; } - epoch_t get_realm_epoch() const { return realm_epoch; } - const std::string& get_predecessor() const { return predecessor_uuid; } - const rgw_zone_id& get_master_zone() const { return master_zone; } - const std::string& get_master_zonegroup() const { return master_zonegroup; } - const std::string& get_realm() const { return realm_id; } - const std::string& get_realm_name() const { return realm_name; } - const RGWPeriodMap& get_map() const { return period_map; } - RGWPeriodConfig& get_config() { return period_config; } - const RGWPeriodConfig& get_config() const { return period_config; } - const std::vector& get_sync_status() const { return sync_status; } - rgw_pool get_pool(CephContext *cct) const; - const std::string& get_latest_epoch_oid() const; - const std::string& get_info_oid_prefix() const; - - void set_user_quota(RGWQuotaInfo& user_quota) { - period_config.quota.user_quota = user_quota; - } - - void set_bucket_quota(RGWQuotaInfo& bucket_quota) { - period_config.quota.bucket_quota = bucket_quota; - } - - void set_id(const std::string& _id) { - this->id = _id; - period_map.id = _id; - } - void set_epoch(epoch_t epoch) { this->epoch = epoch; } - void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; } - - void set_predecessor(const std::string& predecessor) - { - predecessor_uuid = predecessor; - } - - void set_realm_id(const std::string& _realm_id) { - realm_id = _realm_id; - } - - int reflect(const DoutPrefixProvider *dpp, optional_yield y); - - int get_zonegroup(RGWZoneGroup& zonegroup, - const std::string& zonegroup_id) const; - - bool is_single_zonegroup() const - { - return (period_map.zonegroups.size() <= 1); - } - - /* - returns true if there are several zone groups with a least one zone - */ - bool is_multi_zonegroups_with_zones() const - { - int count = 0; - for (const auto& zg: period_map.zonegroups) { - if (zg.second.zones.size() > 0) { - if (count++ > 0) { - return true; - } - } - } - return false; - } - - bool find_zone(const DoutPrefixProvider *dpp, - const rgw_zone_id& zid, - RGWZoneGroup *pzonegroup, - optional_yield y) const; - - int get_latest_epoch(const DoutPrefixProvider *dpp, epoch_t& epoch, optional_yield y); - int set_latest_epoch(const DoutPrefixProvider *dpp, optional_yield y, - epoch_t epoch, bool exclusive = false, - RGWObjVersionTracker *objv = nullptr); - // update latest_epoch if the given epoch is higher, else return -EEXIST - int update_latest_epoch(const DoutPrefixProvider *dpp, epoch_t epoch, optional_yield y); - - int init(const DoutPrefixProvider *dpp, CephContext *_cct, RGWSI_SysObj *_sysobj_svc, const std::string &period_realm_id, optional_yield y, - const std::string &period_realm_name = "", bool setup_obj = true); - int init(const DoutPrefixProvider *dpp, CephContext *_cct, RGWSI_SysObj *_sysobj_svc, optional_yield y, bool setup_obj = true); - - int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true); - int delete_obj(const DoutPrefixProvider *dpp, optional_yield y); - int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y); - int add_zonegroup(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, optional_yield y); - - void fork(); - int update(const DoutPrefixProvider *dpp, optional_yield y); - - // commit a staging period; only for use on master zone - int commit(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - RGWRealm& realm, const RGWPeriod ¤t_period, - std::ostream& error_stream, optional_yield y, - bool force_if_stale = false); - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - encode(id, bl); - encode(epoch, bl); - encode(realm_epoch, bl); - encode(predecessor_uuid, bl); - encode(sync_status, bl); - encode(period_map, bl); - encode(master_zone, bl); - encode(master_zonegroup, bl); - encode(period_config, bl); - encode(realm_id, bl); - encode(realm_name, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); - decode(id, bl); - decode(epoch, bl); - decode(realm_epoch, bl); - decode(predecessor_uuid, bl); - decode(sync_status, bl); - decode(period_map, bl); - decode(master_zone, bl); - decode(master_zonegroup, bl); - decode(period_config, bl); - decode(realm_id, bl); - decode(realm_name, bl); - DECODE_FINISH(bl); - } - void dump(Formatter *f) const; - void decode_json(JSONObj *obj); - static void generate_test_instances(std::list& o); - - static std::string get_staging_id(const std::string& realm_id) { - return realm_id + ":staging"; - } -}; -WRITE_CLASS_ENCODER(RGWPeriod) - -namespace rgw { - -/// Look up a realm by its id. If no id is given, look it up by name. -/// If no name is given, fall back to the cluster's default realm. -int read_realm(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - std::string_view realm_id, - std::string_view realm_name, - RGWRealm& info, - std::unique_ptr* writer = nullptr); - -/// Create a realm and its initial period. If the info.id is empty, a -/// random uuid will be generated. -int create_realm(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, bool exclusive, - RGWRealm& info, - std::unique_ptr* writer = nullptr); - -/// Set the given realm as the cluster's default realm. -int set_default_realm(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWRealm& info, - bool exclusive = false); - -/// Update the current_period of an existing realm. -int realm_set_current_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - sal::RealmWriter& writer, RGWRealm& realm, - const RGWPeriod& period); - -/// Overwrite the local zonegroup and period config objects with the new -/// configuration contained in the given period. -int reflect_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWPeriod& info); - -/// Return the staging period id for the given realm. -std::string get_staging_period_id(std::string_view realm_id); - -/// Convert the given period into a separate staging period, where -/// radosgw-admin can make changes to it without effecting the running -/// configuration. -void fork_period(const DoutPrefixProvider* dpp, RGWPeriod& info); - -/// Read all zonegroups in the period's realm and add them to the period. -int update_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, RGWPeriod& info); - -/// Validates the given 'staging' period and tries to commit it as the -/// realm's new current period. -int commit_period(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, sal::Driver* driver, - RGWRealm& realm, sal::RealmWriter& realm_writer, - const RGWPeriod& current_period, - RGWPeriod& info, std::ostream& error_stream, - bool force_if_stale); - - -/// Look up a zonegroup by its id. If no id is given, look it up by name. -/// If no name is given, fall back to the cluster's default zonegroup. -int read_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - std::string_view zonegroup_id, - std::string_view zonegroup_name, - RGWZoneGroup& info, - std::unique_ptr* writer = nullptr); - -/// Initialize and create the given zonegroup. If the given info.id is empty, -/// a random uuid will be generated. May fail with -EEXIST. -int create_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, bool exclusive, - RGWZoneGroup& info); - -/// Set the given zonegroup as its realm's default zonegroup. -int set_default_zonegroup(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWZoneGroup& info, - bool exclusive = false); - -/// Add a zone to the zonegroup, or update an existing zone entry. -int add_zone_to_group(const DoutPrefixProvider* dpp, - RGWZoneGroup& zonegroup, - const RGWZoneParams& zone_params, - const bool *pis_master, const bool *pread_only, - const std::list& endpoints, - const std::string *ptier_type, - const bool *psync_from_all, - const std::list& sync_from, - const std::list& sync_from_rm, - const std::string *predirect_zone, - std::optional bucket_index_max_shards, - const rgw::zone_features::set& enable_features, - const rgw::zone_features::set& disable_features); - -/// Remove a zone by id from its zonegroup, promoting a new master zone if -/// necessary. -int remove_zone_from_group(const DoutPrefixProvider* dpp, - RGWZoneGroup& info, - const rgw_zone_id& zone_id); - - -/// Look up a zone by its id. If no id is given, look it up by name. If no name -/// is given, fall back to the realm's default zone. -int read_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, - std::string_view zone_id, - std::string_view zone_name, - RGWZoneParams& info, - std::unique_ptr* writer = nullptr); - -/// Initialize and create a new zone. If the given info.id is empty, a random -/// uuid will be generated. Pool names are initialized with the zone name as a -/// prefix. If any pool names conflict with existing zones, a random suffix is -/// added. -int create_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, bool exclusive, - RGWZoneParams& info, - std::unique_ptr* writer = nullptr); - -/// Initialize the zone's pool names using the zone name as a prefix. If a pool -/// name conflicts with an existing zone's pool, add a unique suffix. -int init_zone_pool_names(const DoutPrefixProvider *dpp, optional_yield y, - const std::set& pools, RGWZoneParams& info); - -/// Set the given zone as its realm's default zone. -int set_default_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWZoneParams& info, - bool exclusive = false); - -/// Delete an existing zone and remove it from any zonegroups that contain it. -int delete_zone(const DoutPrefixProvider* dpp, optional_yield y, - sal::ConfigStore* cfgstore, const RGWZoneParams& info, - sal::ZoneWriter& writer); - -} // namespace rgw - -#endif diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 0a38a469860..8909788a4f8 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -320,7 +320,7 @@ add_executable(ceph_test_librgw_file_nfsns target_include_directories(ceph_test_librgw_file_nfsns PUBLIC "${LUA_INCLUDE_DIR}" SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw" - SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/store/rados") + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados") target_link_libraries(ceph_test_librgw_file_nfsns rgw librados @@ -352,7 +352,7 @@ add_executable(ceph_test_librgw_file_marker target_include_directories(ceph_test_librgw_file_marker PUBLIC "${LUA_INCLUDE_DIR}" SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw" - SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/store/rados") + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados") target_link_libraries(ceph_test_librgw_file_marker rgw librados @@ -370,7 +370,7 @@ add_executable(ceph_test_librgw_file_xattr target_include_directories(ceph_test_librgw_file_xattr PUBLIC "${LUA_INCLUDE_DIR}" SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw" - SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/store/rados") + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados") target_link_libraries(ceph_test_librgw_file_xattr rgw librados @@ -399,7 +399,7 @@ add_executable(test_rgw_ldap ) target_include_directories(test_rgw_ldap SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw" - SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/store/rados") + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados") target_link_libraries(test_rgw_ldap librados ceph-common