From aa1aca4ebe72128a4d3965b8b1ffbeec354e8184 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Tue, 6 Oct 2020 17:59:24 -0400 Subject: [PATCH] rgw: generalize error handling in RGWShardCollectCR RGWShardCollectCR was hard-coded to ignore ENOENT errors and print a 'failed to fetch log status' error message. this moves that logic into a handle_result() virtual function. it also exposes the member variables 'status' and 'max_concurrent' as protected, so they can be consulted or modified by overrides of handle_result() and spawn_next() Signed-off-by: Casey Bodley --- src/rgw/rgw_data_sync.cc | 50 +++++++++++++++++++++++++++++++++++++++ src/rgw/rgw_sync.cc | 38 +++++++++++++++++++++++++---- src/rgw/rgw_sync.h | 22 ++++++++++------- src/rgw/rgw_trim_bilog.cc | 20 ++++++++++++++++ src/rgw/rgw_trim_mdlog.cc | 38 +++++++++++++++++++++++++++++ 5 files changed, 154 insertions(+), 14 deletions(-) diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index be14bb2f0be..750c4ae7a3a 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -81,6 +81,16 @@ class RGWReadDataSyncStatusMarkersCR : public RGWShardCollectCR { map& markers; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to read data sync status: " + << cpp_strerror(r) << dendl; + } + return r; + } public: RGWReadDataSyncStatusMarkersCR(RGWDataSyncCtx *sc, int num_shards, map& markers) @@ -117,6 +127,16 @@ class RGWReadDataSyncRecoveringShardsCR : public RGWShardCollectCR { string marker; std::vector& omapkeys; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to list recovering data sync: " + << cpp_strerror(r) << dendl; + } + return r; + } public: RGWReadDataSyncRecoveringShardsCR(RGWDataSyncCtx *sc, uint64_t _max_entries, int _num_shards, std::vector& omapkeys) @@ -352,6 +372,16 @@ class RGWReadRemoteDataLogInfoCR : public RGWShardCollectCR { int shard_id; #define READ_DATALOG_MAX_CONCURRENT 10 + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to fetch remote datalog info: " + << cpp_strerror(r) << dendl; + } + return r; + } public: RGWReadRemoteDataLogInfoCR(RGWDataSyncCtx *_sc, int _num_shards, @@ -442,6 +472,16 @@ class RGWListRemoteDataLogCR : public RGWShardCollectCR { map::iterator iter; #define READ_DATALOG_MAX_CONCURRENT 10 + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to list remote datalog: " + << cpp_strerror(r) << dendl; + } + return r; + } public: RGWListRemoteDataLogCR(RGWDataSyncCtx *_sc, map& _shards, @@ -4962,6 +5002,16 @@ class RGWCollectBucketSyncStatusCR : public RGWShardCollectCR { using Vector = std::vector; Vector::iterator i, end; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to read bucket shard sync status: " + << cpp_strerror(r) << dendl; + } + return r; + } public: RGWCollectBucketSyncStatusCR(rgw::sal::RadosStore* store, RGWDataSyncCtx *sc, const RGWBucketInfo& source_bucket_info, diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc index 1015930b04d..a2b388d1eae 100644 --- a/src/rgw/rgw_sync.cc +++ b/src/rgw/rgw_sync.cc @@ -163,13 +163,13 @@ int RGWShardCollectCR::operate(const DoutPrefixProvider *dpp) { while (spawn_next()) { current_running++; - while (current_running >= max_concurrent) { + if (current_running >= max_concurrent) { int child_ret; yield wait_for_child(); if (collect_next(&child_ret)) { current_running--; - if (child_ret < 0 && child_ret != -ENOENT) { - ldout(cct, 10) << __func__ << ": failed to fetch log status, ret=" << child_ret << dendl; + child_ret = handle_result(child_ret); + if (child_ret < 0) { status = child_ret; } } @@ -180,8 +180,8 @@ int RGWShardCollectCR::operate(const DoutPrefixProvider *dpp) { yield wait_for_child(); if (collect_next(&child_ret)) { current_running--; - if (child_ret < 0 && child_ret != -ENOENT) { - ldout(cct, 10) << __func__ << ": failed to fetch log status, ret=" << child_ret << dendl; + child_ret = handle_result(child_ret); + if (child_ret < 0) { status = child_ret; } } @@ -204,6 +204,15 @@ class RGWReadRemoteMDLogInfoCR : public RGWShardCollectCR { int shard_id; #define READ_MDLOG_MAX_CONCURRENT 10 + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to fetch mdlog status: " << cpp_strerror(r) << dendl; + } + return r; + } public: RGWReadRemoteMDLogInfoCR(RGWMetaSyncEnv *_sync_env, const std::string& period, int _num_shards, @@ -225,6 +234,15 @@ class RGWListRemoteMDLogCR : public RGWShardCollectCR { map::iterator iter; #define READ_MDLOG_MAX_CONCURRENT 10 + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to list remote mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } public: RGWListRemoteMDLogCR(RGWMetaSyncEnv *_sync_env, const std::string& period, map& _shards, @@ -739,6 +757,16 @@ class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR { int shard_id{0}; map& markers; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to read metadata sync markers: " + << cpp_strerror(r) << dendl; + } + return r; + } public: RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards, map& markers) diff --git a/src/rgw/rgw_sync.h b/src/rgw/rgw_sync.h index 41b0119d58a..8c4e511ae3e 100644 --- a/src/rgw/rgw_sync.h +++ b/src/rgw/rgw_sync.h @@ -513,16 +513,20 @@ public: }; class RGWShardCollectCR : public RGWCoroutine { - int cur_shard = 0; - int current_running; + int current_running = 0; + protected: int max_concurrent; - int status; - -public: - RGWShardCollectCR(CephContext *_cct, int _max_concurrent) : RGWCoroutine(_cct), - current_running(0), - max_concurrent(_max_concurrent), - status(0) {} + int status = 0; + + // called with the result of each child. error codes can be ignored by + // returning 0. if handle_result() returns a negative value, it's + // treated as an error and stored in 'status'. the last such error is + // reported to the caller with set_cr_error() + virtual int handle_result(int r) = 0; + public: + RGWShardCollectCR(CephContext *_cct, int _max_concurrent) + : RGWCoroutine(_cct), max_concurrent(_max_concurrent) + {} virtual bool spawn_next() = 0; int operate(const DoutPrefixProvider *dpp) override; diff --git a/src/rgw/rgw_trim_bilog.cc b/src/rgw/rgw_trim_bilog.cc index 05fbe4bbbfb..c95c76b06e0 100644 --- a/src/rgw/rgw_trim_bilog.cc +++ b/src/rgw/rgw_trim_bilog.cc @@ -388,6 +388,16 @@ class BucketTrimShardCollectCR : public RGWShardCollectCR { const RGWBucketInfo& bucket_info; const std::vector& markers; //< shard markers to trim size_t i{0}; //< index of current shard marker + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim bilog shard: " << cpp_strerror(r) << dendl; + } + return r; + } public: BucketTrimShardCollectCR(const DoutPrefixProvider *dpp, rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info, @@ -577,6 +587,16 @@ class BucketTrimInstanceCollectCR : public RGWShardCollectCR { std::vector::const_iterator bucket; std::vector::const_iterator end; const DoutPrefixProvider *dpp; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim bucket instance: " << cpp_strerror(r) << dendl; + } + return r; + } public: BucketTrimInstanceCollectCR(rgw::sal::RadosStore* store, RGWHTTPManager *http, BucketTrimObserver *observer, diff --git a/src/rgw/rgw_trim_mdlog.cc b/src/rgw/rgw_trim_mdlog.cc index 852d0221a75..d8e19594aea 100644 --- a/src/rgw/rgw_trim_mdlog.cc +++ b/src/rgw/rgw_trim_mdlog.cc @@ -30,6 +30,15 @@ class PurgeLogShardsCR : public RGWShardCollectCR { static constexpr int max_concurrent = 16; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to remove mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } public: PurgeLogShardsCR(rgw::sal::RadosStore* store, const RGWMetadataLog* mdlog, const rgw_pool& pool, int num_shards) @@ -264,6 +273,15 @@ class MetaMasterTrimShardCollectCR : public RGWShardCollectCR { std::string oid; const rgw_meta_sync_status& sync_status; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } public: MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog, const rgw_meta_sync_status& sync_status) @@ -315,6 +333,17 @@ class MetaMasterStatusCollectCR : public RGWShardCollectCR { MasterTrimEnv& env; connection_map::iterator c; std::vector::iterator s; + + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to fetch metadata sync status: " + << cpp_strerror(r) << dendl; + } + return r; + } public: explicit MetaMasterStatusCollectCR(MasterTrimEnv& env) : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), @@ -521,6 +550,15 @@ class MetaPeerTrimShardCollectCR : public RGWShardCollectCR { RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR int shard_id{0}; + int handle_result(int r) override { + if (r == -ENOENT) { // ENOENT is not a fatal error + return 0; + } + if (r < 0) { + ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl; + } + return r; + } public: MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog) : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS), -- 2.39.5