From 7f1df9157064b330bf1faf061d13afbdb4d9b11f Mon Sep 17 00:00:00 2001 From: "Adam C. Emerson" Date: Fri, 14 May 2021 18:59:48 -0400 Subject: [PATCH] rgw: Trim old generations in BucketTrimInstanceCR Only one generation per call. Signed-off-by: Adam C. Emerson --- src/rgw/rgw_reshard.cc | 1 - src/rgw/rgw_trim_bilog.cc | 184 +++++++++++++++++++++++++++++------ src/rgw/services/svc_rados.h | 6 +- 3 files changed, 161 insertions(+), 30 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index e4aacb44b6ea5..672b4b9c8e2c1 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -11,7 +11,6 @@ #include "rgw_sal_rados.h" #include "cls/rgw/cls_rgw_client.h" #include "cls/lock/cls_lock_client.h" -#include "services/svc_bilog_rados.h" #include "common/errno.h" #include "common/ceph_json.h" diff --git a/src/rgw/rgw_trim_bilog.cc b/src/rgw/rgw_trim_bilog.cc index 71529b8aaa712..d75e655aee35c 100644 --- a/src/rgw/rgw_trim_bilog.cc +++ b/src/rgw/rgw_trim_bilog.cc @@ -377,7 +377,7 @@ class BucketTrimShardCollectCR : public RGWShardCollectCR { public: BucketTrimShardCollectCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info, - rgw::bucket_index_layout_generation generation, + const rgw::bucket_index_layout_generation& generation, const std::vector& markers) : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS), dpp(dpp), store(store), bucket_info(bucket_info), @@ -405,8 +405,52 @@ bool BucketTrimShardCollectCR::spawn_next() return false; } +/// Delete a BI generation, limiting the number of requests in flight. +class BucketCleanIndexCollectCR : public RGWShardCollectCR { + static constexpr int MAX_CONCURRENT_SHARDS = 16; + const DoutPrefixProvider *dpp; + rgw::sal::RadosStore* const store; + const RGWBucketInfo& bucket_info; + rgw::bucket_index_layout_generation index; + uint32_t shard = 0; + const uint32_t num_shards = rgw::num_shards(index); + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "clean index: " << cpp_strerror(r) << dendl; + } + return r; + } + public: + BucketCleanIndexCollectCR(const DoutPrefixProvider *dpp, + rgw::sal::RadosStore* store, + const RGWBucketInfo& bucket_info, + rgw::bucket_index_layout_generation index) + : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS), + dpp(dpp), store(store), bucket_info(bucket_info), + index(index) + {} + bool spawn_next() override { + if (shard < num_shards) { + RGWRados::BucketShard bs(store->getRados()); + bs.init(dpp, bucket_info, index, shard); + spawn(new RGWRadosRemoveOidCR(store, std::move(bs.bucket_obj), nullptr), + false); + ++shard; + return true; + } else { + return false; + } + } +}; + + /// trim the bilog of all of the given bucket instance's shards class BucketTrimInstanceCR : public RGWCoroutine { + static constexpr auto MAX_RETRIES = 25u; rgw::sal::RadosStore* const store; RGWHTTPManager *const http; BucketTrimObserver *const observer; @@ -419,7 +463,6 @@ class BucketTrimInstanceCR : public RGWCoroutine { const RGWBucketInfo *pbucket_info; //< pointer to bucket instance info to locate bucket indices int child_ret = 0; const DoutPrefixProvider *dpp; - public: struct StatusShards { uint64_t generation = 0; @@ -432,6 +475,12 @@ private: /// The layout of the generation to trim rgw::bucket_index_layout_generation totrim; + /// Generation to be cleaned/New bucket info (if any) + std::optional> clean_info; + /// Maximum number of times to attempt to put bucket info + unsigned retries = 0; + int take_min_generation() { // Initialize the min_generation to the bucket's current // generation, used in case we have no peers. @@ -469,6 +518,29 @@ private: return 0; } + /// If there is a generation below the minimum, prepare to clean it up. + int maybe_remove_generation() { + if (clean_info) + return 0; + + + if (pbucket_info->layout.logs.front().gen < totrim.gen) { + clean_info = {*pbucket_info, {}}; + auto log = clean_info->first.layout.logs.cbegin(); + if (log->layout.type == rgw::BucketLogType::InIndex) { + clean_info->second = log->layout.in_index; + } else { + ldpp_dout(dpp, 0) << "Unable to convert log of unknown type " + << log->layout.type + << " to rgw::bucket_index_layout_generation " << dendl; + return -EINVAL; + } + + clean_info->first.layout.logs.erase(log); + } + return 0; + } + public: BucketTrimInstanceCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, BucketTrimObserver *observer, @@ -548,7 +620,6 @@ inline int parse_decode_json( return 0; } - int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) { reenter(this) { @@ -633,39 +704,96 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) } } - // initialize each shard with the maximum marker, which is only used when - // there are no peers syncing from us - min_markers.assign(std::max(1u, pbucket_info->layout.current_index.layout.normal.num_shards), - RGWSyncLogTrimCR::max_marker); - // Determine the minimum generation retcode = take_min_generation(); if (retcode < 0) { ldpp_dout(dpp, 4) << "failed to find minimum generation" << dendl; return set_cr_error(retcode); } - // determine the minimum marker for each shard - retcode = take_min_status(cct, totrim.gen, peer_status.cbegin(), - peer_status.cend(), &min_markers); + retcode = maybe_remove_generation(); if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to correlate bucket sync status from peers" << dendl; + ldpp_dout(dpp, 4) << "error removing old generation from log: " + << cpp_strerror(retcode) << dendl; return set_cr_error(retcode); } - // trim shards with a ShardCollectCR - ldpp_dout(dpp, 10) << "trimming bilogs for bucket=" << pbucket_info->bucket - << " markers=" << min_markers << ", shards=" << min_markers.size() << dendl; - set_status("trimming bilog shards"); - yield call(new BucketTrimShardCollectCR(dpp, store, *pbucket_info, totrim, - min_markers)); - // ENODATA just means there were no keys to trim - if (retcode == -ENODATA) { - retcode = 0; - } - if (retcode < 0) { - ldpp_dout(dpp, 4) << "failed to trim bilog shards: " - << cpp_strerror(retcode) << dendl; - return set_cr_error(retcode); + if (clean_info) { + yield call(new BucketCleanIndexCollectCR(dpp, store, clean_info->first, + clean_info->second)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to remove previous generation: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + while (clean_info && retries < MAX_RETRIES) { + yield call(new RGWPutBucketInstanceInfoCR( + store->svc()->rados->get_async_processor(), + store, clean_info->first, false, {}, + nullptr, dpp)); + + // Raced, try again. + if (retcode == -ECANCELED) { + yield call(new RGWGetBucketInstanceInfoCR( + store->svc()->rados->get_async_processor(), + store, clean_info->first.bucket, + &(clean_info->first), nullptr, dpp)); + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to get bucket info: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + if (clean_info->first.layout.logs.front().gen == + clean_info->second.gen) { + clean_info->first.layout.logs.erase( + clean_info->first.layout.logs.begin()); + ++retries; + continue; + } + // Raced, but someone else did what we needed to. + retcode = 0; + } + + if (retcode < 0) { + ldpp_dout(dpp, 0) << "failed to put bucket info: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + clean_info = std::nullopt; + } + } else { + // To avoid hammering the OSD too hard, either trim old + // generations OR trim the current one. + + // determine the minimum marker for each shard + + // initialize each shard with the maximum marker, which is only used when + // there are no peers syncing from us + min_markers.assign(std::max(1u, rgw::num_shards(totrim)), + RGWSyncLogTrimCR::max_marker); + + + retcode = take_min_status(cct, totrim.gen, peer_status.cbegin(), + peer_status.cend(), &min_markers); + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to correlate bucket sync status from peers" << dendl; + return set_cr_error(retcode); + } + + // trim shards with a ShardCollectCR + ldpp_dout(dpp, 10) << "trimming bilogs for bucket=" << pbucket_info->bucket + << " markers=" << min_markers << ", shards=" << min_markers.size() << dendl; + set_status("trimming bilog shards"); + yield call(new BucketTrimShardCollectCR(dpp, store, *pbucket_info, totrim, + min_markers)); + // ENODATA just means there were no keys to trim + if (retcode == -ENODATA) { + retcode = 0; + } + if (retcode < 0) { + ldpp_dout(dpp, 4) << "failed to trim bilog shards: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } } observer->on_bucket_trimmed(std::move(bucket_instance)); @@ -675,6 +803,7 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) } /// trim each bucket instance while limiting the number of concurrent operations + class BucketTrimInstanceCollectCR : public RGWShardCollectCR { rgw::sal::RadosStore* const store; RGWHTTPManager *const http; @@ -1302,8 +1431,7 @@ int bilog_trim(const DoutPrefixProvider* p, rgw::sal::RadosStore* store, auto log_layout = *log; - auto r = static_cast(store)->svc()->bilog_rados - ->log_trim(p, bucket_info, log_layout, shard_id, start_marker, end_marker); + auto r = store->svc()->bilog_rados->log_trim(p, bucket_info, log_layout, shard_id, start_marker, end_marker); if (r < 0) { ldpp_dout(p, 5) << __PRETTY_FUNCTION__ << ":" << __LINE__ << "ERROR: bilog_rados->log_trim returned r=" << r << dendl; diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h index 2792d748bc5b6..9c8961c140fc1 100644 --- a/src/rgw/services/svc_rados.h +++ b/src/rgw/services/svc_rados.h @@ -108,10 +108,14 @@ public: return pool; } - librados::IoCtx& ioctx() { + librados::IoCtx& ioctx() & { return state.ioctx; } + librados::IoCtx&& ioctx() && { + return std::move(state.ioctx); + } + struct List { Pool *pool{nullptr}; -- 2.39.5