]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: RGWMetaSyncShardControlCR retries with backoff on all error codes 13546/head
authorCasey Bodley <cbodley@redhat.com>
Mon, 20 Feb 2017 21:00:01 +0000 (16:00 -0500)
committerCasey Bodley <cbodley@redhat.com>
Mon, 27 Feb 2017 20:05:18 +0000 (15:05 -0500)
RGWBackoffControlCR only treats EBUSY and EAGAIN as 'temporary' error
codes, with all other errors being fatal when exit_on_error is set

to RGWMetaSyncShardControlCR, a 'fatal' error means that no further sync
is possible on that shard until the gateway restarts

this changes RGWMetaSyncShardControlCR to set exit_on_error to false, so
that it will continue to retry with backoff no matter what error code it
gets

Fixes: http://tracker.ceph.com/issues/19019
Signed-off-by: Casey Bodley <cbodley@redhat.com>
src/rgw/rgw_sync.cc

index 3e1b6c10e647fedf2fa2cc8938ca171ba29fdf22..65c213fa690bb0dee872c81199fe362434b20eb5 100644 (file)
@@ -1578,6 +1578,7 @@ public:
           ldout(sync_env->cct, 10) << *this << ": failed to fetch more log entries, retcode=" << retcode << dendl;
           yield lease_cr->go_down();
           drain_all();
+          *reset_backoff = false; // back off and try again later
           return retcode;
         }
         *reset_backoff = true; /* if we got to this point, all systems function */
@@ -1587,6 +1588,13 @@ public:
           yield call(new RGWReadMDLogEntriesCR(sync_env, mdlog, shard_id,
                                                &max_marker, INCREMENTAL_MAX_ENTRIES,
                                                &log_entries, &truncated));
+          if (retcode < 0) {
+            ldout(sync_env->cct, 10) << *this << ": failed to list mdlog entries, retcode=" << retcode << dendl;
+            yield lease_cr->go_down();
+            drain_all();
+            *reset_backoff = false; // back off and try again later
+            return retcode;
+          }
           for (log_iter = log_entries.begin(); log_iter != log_entries.end(); ++log_iter) {
             if (!period_marker.empty() && period_marker < log_iter->id) {
               done_with_period = true;
@@ -1657,12 +1665,13 @@ class RGWMetaSyncShardControlCR : public RGWBackoffControlCR
   rgw_meta_sync_marker sync_marker;
   const std::string period_marker;
 
+  static constexpr bool exit_on_error = false; // retry on all errors
 public:
   RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_bucket& _pool,
                             const std::string& period, RGWMetadataLog* mdlog,
                             uint32_t _shard_id, const rgw_meta_sync_marker& _marker,
                             std::string&& period_marker)
-    : RGWBackoffControlCR(_sync_env->cct, true), sync_env(_sync_env),
+    : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env),
       pool(_pool), period(period), mdlog(mdlog), shard_id(_shard_id),
       sync_marker(_marker), period_marker(std::move(period_marker)) {}