rgw: generalize error handling in RGWShardCollectCR

author Casey Bodley <cbodley@redhat.com>

Tue, 6 Oct 2020 21:59:24 +0000 (17:59 -0400)

committer Casey Bodley <cbodley@redhat.com>

Fri, 27 May 2022 19:47:31 +0000 (15:47 -0400)
author Casey Bodley <cbodley@redhat.com>
Tue, 6 Oct 2020 21:59:24 +0000 (17:59 -0400)
committer Casey Bodley <cbodley@redhat.com>
Fri, 27 May 2022 19:47:31 +0000 (15:47 -0400)
diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc

index be14bb2f0bed3959552698d9498e9599b2c842ae..750c4ae7a3a508337b95d64938ea7b78127b3db4 100644 (file)
--- a/src/rgw/rgw_data_sync.cc
+++ b/src/rgw/rgw_data_sync.cc
@@ -81,6 +81,16 @@ class RGWReadDataSyncStatusMarkersCR : public RGWShardCollectCR {
  
    map<uint32_t, rgw_data_sync_marker>& markers;
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to read data sync status: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    RGWReadDataSyncStatusMarkersCR(RGWDataSyncCtx *sc, int num_shards,
                                   map<uint32_t, rgw_data_sync_marker>& markers)
@@ -117,6 +127,16 @@ class RGWReadDataSyncRecoveringShardsCR : public RGWShardCollectCR {
    string marker;
    std::vector<RGWRadosGetOmapKeysCR::ResultPtr>& omapkeys;
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to list recovering data sync: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    RGWReadDataSyncRecoveringShardsCR(RGWDataSyncCtx *sc, uint64_t _max_entries, int _num_shards,
                                      std::vector<RGWRadosGetOmapKeysCR::ResultPtr>& omapkeys)
@@ -352,6 +372,16 @@ class RGWReadRemoteDataLogInfoCR : public RGWShardCollectCR {
    int shard_id;
  #define READ_DATALOG_MAX_CONCURRENT 10
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to fetch remote datalog info: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
  public:
    RGWReadRemoteDataLogInfoCR(RGWDataSyncCtx *_sc,
                       int _num_shards,
@@ -442,6 +472,16 @@ class RGWListRemoteDataLogCR : public RGWShardCollectCR {
    map<int, string>::iterator iter;
  #define READ_DATALOG_MAX_CONCURRENT 10
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to list remote datalog: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
  public:
    RGWListRemoteDataLogCR(RGWDataSyncCtx *_sc,
                       map<int, string>& _shards,
@@ -4962,6 +5002,16 @@ class RGWCollectBucketSyncStatusCR : public RGWShardCollectCR {
    using Vector = std::vector<rgw_bucket_shard_sync_info>;
    Vector::iterator i, end;
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to read bucket shard sync status: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    RGWCollectBucketSyncStatusCR(rgw::sal::RadosStore* store, RGWDataSyncCtx *sc,
                                 const RGWBucketInfo& source_bucket_info,
diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc

index 1015930b04de77cd0e99948c5fc3052c866e0391..a2b388d1eae084e1ee57f37577675c6f2376670c 100644 (file)
--- a/src/rgw/rgw_sync.cc
+++ b/src/rgw/rgw_sync.cc
@@ -163,13 +163,13 @@ int RGWShardCollectCR::operate(const DoutPrefixProvider *dpp) {
      while (spawn_next()) {
        current_running++;
  
-      while (current_running >= max_concurrent) {
+      if (current_running >= max_concurrent) {
          int child_ret;
          yield wait_for_child();
          if (collect_next(&child_ret)) {
            current_running--;
-          if (child_ret < 0 && child_ret != -ENOENT) {
-            ldout(cct, 10) << __func__ << ": failed to fetch log status, ret=" << child_ret << dendl;
+          child_ret = handle_result(child_ret);
+          if (child_ret < 0) {
              status = child_ret;
            }
          }
@@ -180,8 +180,8 @@ int RGWShardCollectCR::operate(const DoutPrefixProvider *dpp) {
        yield wait_for_child();
        if (collect_next(&child_ret)) {
          current_running--;
-        if (child_ret < 0 && child_ret != -ENOENT) {
-          ldout(cct, 10) << __func__ << ": failed to fetch log status, ret=" << child_ret << dendl;
+        child_ret = handle_result(child_ret);
+        if (child_ret < 0) {
            status = child_ret;
          }
        }
@@ -204,6 +204,15 @@ class RGWReadRemoteMDLogInfoCR : public RGWShardCollectCR {
    int shard_id;
  #define READ_MDLOG_MAX_CONCURRENT 10
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to fetch mdlog status: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
  public:
    RGWReadRemoteMDLogInfoCR(RGWMetaSyncEnv *_sync_env,
                       const std::string& period, int _num_shards,
@@ -225,6 +234,15 @@ class RGWListRemoteMDLogCR : public RGWShardCollectCR {
    map<int, string>::iterator iter;
  #define READ_MDLOG_MAX_CONCURRENT 10
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to list remote mdlog shard: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
  public:
    RGWListRemoteMDLogCR(RGWMetaSyncEnv *_sync_env,
                       const std::string& period, map<int, string>& _shards,
@@ -739,6 +757,16 @@ class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR {
    int shard_id{0};
    map<uint32_t, rgw_meta_sync_marker>& markers;
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to read metadata sync markers: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards,
                               map<uint32_t, rgw_meta_sync_marker>& markers)
diff --git a/src/rgw/rgw_sync.h b/src/rgw/rgw_sync.h

index 41b0119d58a31cfa0fb1e984601867ee176901f1..8c4e511ae3eeb5c450609b235e159ba65cc06a13 100644 (file)
--- a/src/rgw/rgw_sync.h
+++ b/src/rgw/rgw_sync.h
@@ -513,16 +513,20 @@ public:
  };
  
  class RGWShardCollectCR : public RGWCoroutine {
-  int cur_shard = 0;
-  int current_running;
+  int current_running = 0;
+ protected:
    int max_concurrent;
-  int status;
-
-public:
-  RGWShardCollectCR(CephContext *_cct, int _max_concurrent) : RGWCoroutine(_cct),
-                                                             current_running(0),
-                                                             max_concurrent(_max_concurrent),
-                                                             status(0) {}
+  int status = 0;
+
+  // called with the result of each child. error codes can be ignored by
+  // returning 0. if handle_result() returns a negative value, it's
+  // treated as an error and stored in 'status'. the last such error is
+  // reported to the caller with set_cr_error()
+  virtual int handle_result(int r) = 0;
+ public:
+  RGWShardCollectCR(CephContext *_cct, int _max_concurrent)
+    : RGWCoroutine(_cct), max_concurrent(_max_concurrent)
+  {}
  
    virtual bool spawn_next() = 0;
    int operate(const DoutPrefixProvider *dpp) override;
diff --git a/src/rgw/rgw_trim_bilog.cc b/src/rgw/rgw_trim_bilog.cc

index 05fbe4bbbfb60c21007949fcc9ed78ca40f66739..c95c76b06e0d21fa9f3a48e7d3b681a2d34e17e1 100644 (file)
--- a/src/rgw/rgw_trim_bilog.cc
+++ b/src/rgw/rgw_trim_bilog.cc
@@ -388,6 +388,16 @@ class BucketTrimShardCollectCR : public RGWShardCollectCR {
    const RGWBucketInfo& bucket_info;
    const std::vector<std::string>& markers; //< shard markers to trim
    size_t i{0}; //< index of current shard marker
+
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to trim bilog shard: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    BucketTrimShardCollectCR(const DoutPrefixProvider *dpp,
                             rgw::sal::RadosStore* store, const RGWBucketInfo& bucket_info,
@@ -577,6 +587,16 @@ class BucketTrimInstanceCollectCR : public RGWShardCollectCR {
    std::vector<std::string>::const_iterator bucket;
    std::vector<std::string>::const_iterator end;
    const DoutPrefixProvider *dpp;
+
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to trim bucket instance: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    BucketTrimInstanceCollectCR(rgw::sal::RadosStore* store, RGWHTTPManager *http,
                                BucketTrimObserver *observer,
diff --git a/src/rgw/rgw_trim_mdlog.cc b/src/rgw/rgw_trim_mdlog.cc

index 852d0221a75162c083c343c467fb924b5d1dae69..d8e19594aeaa2e3535ab1a1eed699dedc04245ad 100644 (file)
--- a/src/rgw/rgw_trim_mdlog.cc
+++ b/src/rgw/rgw_trim_mdlog.cc
@@ -30,6 +30,15 @@ class PurgeLogShardsCR : public RGWShardCollectCR {
  
    static constexpr int max_concurrent = 16;
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to remove mdlog shard: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    PurgeLogShardsCR(rgw::sal::RadosStore* store, const RGWMetadataLog* mdlog,
                     const rgw_pool& pool, int num_shards)
@@ -264,6 +273,15 @@ class MetaMasterTrimShardCollectCR : public RGWShardCollectCR {
    std::string oid;
    const rgw_meta_sync_status& sync_status;
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog,
                                 const rgw_meta_sync_status& sync_status)
@@ -315,6 +333,17 @@ class MetaMasterStatusCollectCR : public RGWShardCollectCR {
    MasterTrimEnv& env;
    connection_map::iterator c;
    std::vector<rgw_meta_sync_status>::iterator s;
+
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to fetch metadata sync status: "
+          << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    explicit MetaMasterStatusCollectCR(MasterTrimEnv& env)
      : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
@@ -521,6 +550,15 @@ class MetaPeerTrimShardCollectCR : public RGWShardCollectCR {
    RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR
    int shard_id{0};
  
+  int handle_result(int r) override {
+    if (r == -ENOENT) { // ENOENT is not a fatal error
+      return 0;
+    }
+    if (r < 0) {
+      ldout(cct, 4) << "failed to trim mdlog shard: " << cpp_strerror(r) << dendl;
+    }
+    return r;
+  }
   public:
    MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog)
      : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
author	Casey Bodley <cbodley@redhat.com>
	Tue, 6 Oct 2020 21:59:24 +0000 (17:59 -0400)
committer	Casey Bodley <cbodley@redhat.com>
	Fri, 27 May 2022 19:47:31 +0000 (15:47 -0400)
src/rgw/rgw_data_sync.cc		patch \| blob \| history
src/rgw/rgw_sync.cc		patch \| blob \| history
src/rgw/rgw_sync.h		patch \| blob \| history
src/rgw/rgw_trim_bilog.cc		patch \| blob \| history
src/rgw/rgw_trim_mdlog.cc		patch \| blob \| history