rgw: multisite metadata sync fairness

author Or Friedmann <ofriedma@redhat.com>

Wed, 16 Feb 2022 17:00:33 +0000 (17:00 +0000)

committer Shilpa Jagannath <smanjara@redhat.com>

Wed, 10 May 2023 15:24:22 +0000 (11:24 -0400)
author Or Friedmann <ofriedma@redhat.com>
Wed, 16 Feb 2022 17:00:33 +0000 (17:00 +0000)
committer Shilpa Jagannath <smanjara@redhat.com>
Wed, 10 May 2023 15:24:22 +0000 (11:24 -0400)
diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in

index 71def2e9de7f8706cfe4fdeff121f1eb10f7ba41..80a3cd488061f73767d4861d62ca282bf26348bb 100644 (file)
--- a/src/common/options/rgw.yaml.in
+++ b/src/common/options/rgw.yaml.in
@@ -2429,7 +2429,7 @@ options:
  - name: rgw_sync_lease_period
    type: int
    level: dev
-  default: 2_min
+  default: 10_min
    services:
    - rgw
    with_legacy: true
@@ -2442,6 +2442,20 @@ options:
    services:
    - rgw
    with_legacy: true
+- name: rgw_sync_work_period
+  type: uint
+  level: advanced
+  desc: Sync work period length
+  long_desc: Time in seconds a shard sync process will work before giving up lock.
+  default: 180
+  min: 60
+  services:
+  - rgw
+  with_legacy: true
+
+
+
+
  - name: rgw_sync_log_trim_max_buckets
    type: int
    level: advanced
diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc

index 60e450bdc9d2ec630d2f7fec8dade1373eca8abb..07eb44f2644f8481534d9b325f02ba21b6dd4306 100644 (file)
--- a/src/rgw/driver/rados/rgw_cr_rados.cc
+++ b/src/rgw/driver/rados/rgw_cr_rados.cc
@@ -237,15 +237,18 @@ int RGWAsyncLockSystemObj::_send_request(const DoutPrefixProvider *dpp)
    return l.lock_exclusive(&ref.pool.ioctx(), ref.obj.oid);
  }
  
-RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store,
-                      RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
-                       const string& _name, const string& _cookie, uint32_t _duration_secs) : RGWAsyncRadosRequest(caller, cn), store(_store),
-                                                              obj(_obj),
-                                                              lock_name(_name),
-                                                              cookie(_cookie),
-                                                              duration_secs(_duration_secs)
-{
-}
+RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(
+  RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore *_store,
+  RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+  const string& _name,
+  const string& _cookie,
+  uint32_t _duration_secs) : RGWAsyncRadosRequest(caller, cn),
+                                store(_store),
+                                obj(_obj),
+                                lock_name(_name),
+                                cookie(_cookie),
+                                duration_secs(_duration_secs)
+         {}
  
  int RGWAsyncUnlockSystemObj::_send_request(const DoutPrefixProvider *dpp)
  {
@@ -518,20 +521,22 @@ int RGWRadosRemoveOidCR::request_complete()
    return r;
  }
  
-RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store,
-                      const rgw_raw_obj& _obj,
-                      const string& _lock_name,
-                      const string& _cookie,
-                      uint32_t _duration) : RGWSimpleCoroutine(_store->ctx()),
-                                                async_rados(_async_rados),
-                                                store(_store),
-                                                lock_name(_lock_name),
-                                                cookie(_cookie),
-                                                duration(_duration),
-                                                obj(_obj),
-                                                req(NULL)
+RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(
+  RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore *_store,
+  const rgw_raw_obj& _obj,
+  const string& _lock_name,
+  const string& _cookie,
+  uint32_t _duration_secs) : RGWSimpleCoroutine(_store->ctx()),
+                            async_rados(_async_rados),
+                            store(_store),
+                            lock_name(_lock_name),
+                            cookie(_cookie),
+                            duration_secs(_duration_secs),
+                            obj(_obj),
+                            req(nullptr)
  {
-  set_description() << "rados lock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie << " duration=" << duration;
+  set_description() << "rados lock dest=" << obj << " lock=" <<
+    lock_name << " cookie=" << cookie << " duration_secs=" << duration_secs;
  }
  
  void RGWSimpleRadosLockCR::request_cleanup()
@@ -546,7 +551,8 @@ int RGWSimpleRadosLockCR::send_request(const DoutPrefixProvider *dpp)
  {
    set_status() << "sending request";
    req = new RGWAsyncLockSystemObj(this, stack->create_completion_notifier(),
-                                 store, NULL, obj, lock_name, cookie, duration);
+                                                       store, NULL, obj, lock_name, cookie,
+                                                 duration_secs);
    async_rados->queue(req);
    return 0;
  }
@@ -930,6 +936,7 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp)
  
  int RGWContinuousLeaseCR::operate(const DoutPrefixProvider *dpp)
  {
+  bool renew = false;
    if (aborted) {
      caller->set_sleeping(false);
      return set_cr_done();
@@ -938,11 +945,19 @@ int RGWContinuousLeaseCR::operate(const DoutPrefixProvider *dpp)
      last_renew_try_time = ceph::coarse_mono_clock::now();
      while (!going_down) {
        current_time = ceph::coarse_mono_clock::now();
-      yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, interval));
+      if (bid_mgr) {
+        if(renew || bid_mgr->is_highest_bidder(shard_id)) {
+          yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, lock_duration_secs));
+        }
+        else {
+          retcode = -EBUSY;
+        } 
+      } else {
+        yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, lock_duration_secs));
+      }
        if (latency) {
-       latency->add_latency(ceph::coarse_mono_clock::now() - current_time);
+             latency->add_latency(ceph::coarse_mono_clock::now() - current_time);
        }
-      current_time = ceph::coarse_mono_clock::now();
        if (current_time - last_renew_try_time > interval_tolerance) {
          // renewal should happen between 50%-90% of interval
          ldout(store->ctx(), 1) << *this << ": WARNING: did not renew lock " << obj << ":" << lock_name << ": within 90\% of interval. " << 
@@ -956,9 +971,16 @@ int RGWContinuousLeaseCR::operate(const DoutPrefixProvider *dpp)
          ldout(store->ctx(), 20) << *this << ": couldn't lock " << obj << ":" << lock_name << ": retcode=" << retcode << dendl;
          return set_state(RGWCoroutine_Error, retcode);
        }
+      renew = true;
        ldout(store->ctx(), 20) << *this << ": successfully locked " << obj << ":" << lock_name << dendl;
        set_locked(true);
-      yield wait(utime_t(interval / 2, 0));
+      yield wait(utime_t(lock_duration_secs / 2, 0));
+      if (releasing_lock) {
+        set_locked(false);
+        yield call(new RGWSimpleRadosUnlockCR(async_rados, store, obj, lock_name, cookie));
+        releasing_lock = false;
+        renew = false;
+      }
      }
      set_locked(false); /* moot at this point anyway */
      current_time = ceph::coarse_mono_clock::now();
diff --git a/src/rgw/driver/rados/rgw_cr_rados.h b/src/rgw/driver/rados/rgw_cr_rados.h

index f17061b37ea29b1f583df1e1112b97189966cc1f..5d78480bda066e15c82d4a36eb3ef728be5046dd 100644 (file)
--- a/src/rgw/driver/rados/rgw_cr_rados.h
+++ b/src/rgw/driver/rados/rgw_cr_rados.h
@@ -10,16 +10,17 @@
  #include "rgw_sal_rados.h"
  #include "common/WorkQueue.h"
  #include "common/Throttle.h"
-
+#include "cls/lock/cls_lock_client.h"
+#include "sync_fairness.h"
  #include <atomic>
  #include "common/ceph_time.h"
+#include <functional>
  
  #include "services/svc_sys_obj.h"
  #include "services/svc_bucket.h"
  
  struct rgw_http_param_pair;
  class RGWRESTConn;
-
  class RGWAsyncRadosRequest : public RefCountedObject {
    RGWCoroutine *caller;
    RGWAioCompletionNotifier *notifier;
@@ -380,9 +381,10 @@ class RGWAsyncLockSystemObj : public RGWAsyncRadosRequest {
  protected:
    int _send_request(const DoutPrefixProvider *dpp) override;
  public:
-  RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store,
-                        RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
-                       const std::string& _name, const std::string& _cookie, uint32_t _duration_secs);
+  RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn,
+                       rgw::sal::RadosStore *_store, RGWObjVersionTracker *_objv_tracker,
+                       const rgw_raw_obj& _obj, const std::string& _name,
+                       const std::string& _cookie, uint32_t _duration_secs);
  };
  
  class RGWAsyncUnlockSystemObj : public RGWAsyncRadosRequest {
@@ -766,22 +768,23 @@ public:
  };
  
  class RGWSimpleRadosLockCR : public RGWSimpleCoroutine {
-  RGWAsyncRadosProcessor *async_rados;
-  rgw::sal::RadosStore* store;
-  std::string lock_name;
-  std::string cookie;
-  uint32_t duration;
+  protected:
+    RGWAsyncRadosProcessor *async_rados;
+    rgw::sal::RadosStore* store;
+    std::string lock_name;
+    std::string cookie;
+    uint32_t duration_secs;
  
-  rgw_raw_obj obj;
+    rgw_raw_obj obj;
  
-  RGWAsyncLockSystemObj *req;
+    RGWAsyncLockSystemObj *req;
  
  public:
    RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store,
                       const rgw_raw_obj& _obj,
-                      const std::string& _lock_name,
+          const std::string& _lock_name,
                       const std::string& _cookie,
-                     uint32_t _duration);
+                     uint32_t _duration_secs);
    ~RGWSimpleRadosLockCR() override {
      request_cleanup();
    }
@@ -797,7 +800,6 @@ public:
      return buf;
    }
  };
-
  class RGWSimpleRadosUnlockCR : public RGWSimpleCoroutine {
    RGWAsyncRadosProcessor *async_rados;
    rgw::sal::RadosStore* store;
@@ -1454,8 +1456,8 @@ class RGWContinuousLeaseCR : public RGWCoroutine {
    const std::string lock_name;
    const std::string cookie{RGWSimpleRadosLockCR::gen_random_cookie(cct)};
  
-  int interval;
-  bool going_down{false};
+  int lock_duration_secs;
+  bool going_down{ false };
    bool locked{false};
    
    const ceph::timespan interval_tolerance;
@@ -1467,26 +1469,40 @@ class RGWContinuousLeaseCR : public RGWCoroutine {
    
    ceph::coarse_mono_time last_renew_try_time;
    ceph::coarse_mono_time current_time;
+  std::atomic<bool> releasing_lock = { false };
  
    LatencyMonitor* latency;
+  uint32_t shard_id;
+  rgw::sync_fairness::BidManager* bid_mgr;
+
+
  
  public:
    RGWContinuousLeaseCR(RGWAsyncRadosProcessor* async_rados,
                         rgw::sal::RadosStore* _store,
                         rgw_raw_obj obj, std::string lock_name,
-                       int interval, RGWCoroutine* caller,
+                       int _lock_duration_secs, RGWCoroutine* caller,
                        LatencyMonitor* const latency)
      : RGWCoroutine(_store->ctx()), async_rados(async_rados), store(_store),
        obj(std::move(obj)), lock_name(std::move(lock_name)),
-      interval(interval), interval_tolerance(ceph::make_timespan(9*interval/10)),
-      ts_interval(ceph::make_timespan(interval)), caller(caller), latency(latency)
+      lock_duration_secs(_lock_duration_secs), interval_tolerance(ceph::make_timespan(9*_lock_duration_secs/10)),
+      ts_interval(ceph::make_timespan(_lock_duration_secs)), caller(caller), latency(latency), shard_id(0), bid_mgr(nullptr)
+  {}
+  RGWContinuousLeaseCR(RGWAsyncRadosProcessor* async_rados,
+                       rgw::sal::RadosStore* _store,
+                       rgw_raw_obj obj, std::string lock_name,
+                       int _lock_duration_secs, RGWCoroutine* caller,
+                      LatencyMonitor* const latency, uint32_t _shard_id, rgw::sync_fairness::BidManager* _bid_mgr)
+    : RGWCoroutine(_store->ctx()), async_rados(async_rados), store(_store),
+      obj(std::move(obj)), lock_name(std::move(lock_name)),
+      lock_duration_secs(_lock_duration_secs), interval_tolerance(ceph::make_timespan(9*_lock_duration_secs/10)),
+      ts_interval(ceph::make_timespan(_lock_duration_secs)), caller(caller), latency(latency), shard_id(_shard_id), bid_mgr(_bid_mgr)
    {}
-
    virtual ~RGWContinuousLeaseCR() override;
  
    int operate(const DoutPrefixProvider *dpp) override;
  
-  bool is_locked() const {
+  virtual bool is_locked() const {
      if (ceph::coarse_mono_clock::now() - last_renew_try_time > ts_interval) {
        return false;
      }
@@ -1497,15 +1513,19 @@ public:
      locked = status;
    }
  
+  void release_lock() {
+    releasing_lock = true;
+    wakeup();
+  }
+
    void go_down() {
      going_down = true;
      wakeup();
    }
-
    void abort() {
      aborted = true;
    }
-};
+};// RGWContinuousLeaseCR
  
  class RGWRadosTimelogAddCR : public RGWSimpleCoroutine {
    const DoutPrefixProvider *dpp;
diff --git a/src/rgw/driver/rados/rgw_sync.cc b/src/rgw/driver/rados/rgw_sync.cc

index 9b1eb18a828bdd938a933976409eb92fc4de2fe4..5ed94c0937e8d97fd32deaf2a821dcb2a5165637 100644 (file)
--- a/src/rgw/driver/rados/rgw_sync.cc
+++ b/src/rgw/driver/rados/rgw_sync.cc
@@ -1433,6 +1433,12 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
    boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
    boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
  
+  ceph::coarse_mono_clock::duration work_duration;
+  bool work_period_done = false;
+  static constexpr ceph::coarse_mono_clock::time_point work_period_end_unset =
+    ceph::coarse_mono_clock::time_point::min();
+  ceph::coarse_mono_clock::time_point work_period_end = work_period_end_unset;
+  
    bool lost_lock = false;
  
    bool *reset_backoff;
@@ -1448,6 +1454,8 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
    int total_entries = 0;
  
    RGWSyncTraceNodeRef tn;
+
+
  public:
    RGWMetaSyncShardCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool,
                       const std::string& period, epoch_t realm_epoch,
@@ -1458,8 +1466,12 @@ public:
      : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), pool(_pool),
        period(period), realm_epoch(realm_epoch), mdlog(mdlog),
        shard_id(_shard_id), sync_marker(_marker),
-      period_marker(period_marker),
-      reset_backoff(_reset_backoff), tn(_tn) {
+            period_marker(period_marker),
+      work_duration(std::chrono::seconds(
+                     cct->_conf.get_val<uint64_t>("rgw_sync_work_period"))),
+      reset_backoff(_reset_backoff),
+      tn(_tn)
+  {
      *reset_backoff = false;
    }
  
@@ -1493,11 +1505,12 @@ public:
            return set_cr_error(r);
          }
          return 0;
-      }
-    }
+      } // switch
+    } // while (true)
+
      /* unreachable */
      return 0;
-  }
+  } // RGWMetaSyncShardCR::operate()
  
    void collect_children()
    {
@@ -1559,7 +1572,7 @@ public:
         rgw::sal::RadosStore* store = sync_env->store;
          lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
                                                  rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
-                                                lock_name, lock_duration, this, nullptr));
+                                                lock_name, lock_duration, this, nullptr, shard_id, sync_env->bid_manager));
          lease_stack.reset(spawn(lease_cr.get(), false));
          lost_lock = false;
        }
@@ -1572,7 +1585,9 @@ public:
          set_sleeping(true);
          yield;
        }
+
        tn->log(10, "took lease");
+      //work_period_end = ceph::coarse_mono_clock::now() + work_duration;
  
        /* lock succeeded, a retry now should avoid previous backoff status */
        *reset_backoff = true;
@@ -1587,12 +1602,18 @@ public:
        total_entries = sync_marker.pos;
  
        /* sync! */
+      work_period_end = ceph::coarse_mono_clock::now() + work_duration;
        do {
-        if (!lease_cr->is_locked()) {
-          tn->log(1, "lease is lost, abort");
-          lost_lock = true;
-          break;
+      while (!lease_cr->is_locked()) {
+        if (lease_cr->is_done()) {
+          drain_all();
+          tn->log(5, "failed to take lease");
+          return lease_cr->get_ret_status();
          }
+        set_sleeping(true);
+        yield;
+      }
+
          omapkeys = std::make_shared<RGWRadosGetOmapKeysCR::Result>();
          yield call(new RGWRadosGetOmapKeysCR(sync_env->store, rgw_raw_obj(pool, oid),
                                               marker, max_entries, omapkeys));
@@ -1631,6 +1652,11 @@ public:
            }
          }
          collect_children();
+  if(ceph::coarse_mono_clock::now() >= work_period_end)
+  {
+    ldpp_dout(sync_env->dpp, 20) << "metadata sync work period end releasing lock" << dendl;
+    yield lease_cr->release_lock();
+  }
        } while (omapkeys->more && can_adjust_marker);
  
        tn->unset_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
@@ -1675,7 +1701,6 @@ public:
         * if we reached here, it means that lost_lock is true, otherwise the state
         * change in the previous block will prevent us from reaching here
         */
-
        yield lease_cr->go_down();
  
        lease_cr.reset();
@@ -1708,29 +1733,45 @@ public:
        tn->log(10, "start incremental sync");
        can_adjust_marker = true;
        /* grab lock */
-      if (!lease_cr) { /* could have had  a lease_cr lock from previous state */
+      if (!lease_cr) { /* could have had a lock lease from
+                                  * previous state */
          yield {
+          tn->log(20, "creating RGWContinuousLeaseCR");
            uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
            string lock_name = "sync_lock";
           rgw::sal::RadosStore* store = sync_env->store;
            lease_cr.reset( new RGWContinuousLeaseCR(sync_env->async_rados, store,
                                                     rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
-                                                   lock_name, lock_duration, this, nullptr));
+                                                   lock_name, lock_duration, this, nullptr, shard_id, sync_env->bid_manager));
            lease_stack.reset(spawn(lease_cr.get(), false));
+
            lost_lock = false;
          }
          while (!lease_cr->is_locked()) {
            if (lease_cr->is_done()) {
              drain_all();
-            tn->log(5, "failed to take lease");
+            tn->log(10, "failed to take lease");
              return lease_cr->get_ret_status();
            }
            set_sleeping(true);
            yield;
          }
        }
-      tn->log(10, "took lease");
-      // if the period has advanced, we can't use the existing marker
+      while (!lease_cr->is_locked()) {
+       if (lease_cr->is_done()) {
+         drain_all();
+         tn->log(10, "failed to take lease");
+         return lease_cr->get_ret_status();
+       }
+       tn->log(20, "do not yet have lock; will try again");
+       set_sleeping(true);
+       yield;
+      }
+/*
+      if (work_period_end_unset == work_period_end) {
+       work_period_end = ceph::coarse_mono_clock::now() + work_duration;
+       tn->log(10, SSTR(*this << "took lease until " << work_period_end));
+      }      // if the period has advanced, we can't use the existing marker
        if (sync_marker.realm_epoch < realm_epoch) {
          ldpp_dout(sync_env->dpp, 4) << "clearing marker=" << sync_marker.marker
              << " from old realm_epoch=" << sync_marker.realm_epoch
@@ -1738,6 +1779,8 @@ public:
          sync_marker.realm_epoch = realm_epoch;
          sync_marker.marker.clear();
        }
+      */
+
        mdlog_marker = sync_marker.marker;
        set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env,
                                                           sync_env->shard_obj_name(shard_id),
@@ -1752,10 +1795,11 @@ public:
         */
        marker = max_marker = sync_marker.marker;
        /* inc sync */
+      work_period_end = ceph::coarse_mono_clock::now() + work_duration;
        do {
          if (!lease_cr->is_locked()) {
            lost_lock = true;
-          tn->log(1, "lease is lost, abort");
+          tn->log(10, "lost lease due to expiration");
            break;
          }
  #define INCREMENTAL_MAX_ENTRIES 100
@@ -1838,6 +1882,11 @@ public:
            tn->log(10, SSTR(*this << ": done with period"));
            break;
          }
+  if(ceph::coarse_mono_clock::now() >= work_period_end)
+  {
+    ldpp_dout(sync_env->dpp, 20) << "metadata sync work period end releasing lock" << dendl;
+    yield lease_cr->release_lock();
+  }
         if (mdlog_marker == max_marker && can_adjust_marker) {
            tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
           yield wait(utime_t(cct->_conf->rgw_meta_sync_poll_interval, 0));
@@ -1852,7 +1901,6 @@ public:
        }
  
        yield lease_cr->go_down();
-
        drain_all();
  
        if (lost_lock) {
@@ -1865,7 +1913,6 @@ public:
  
        return set_cr_done();
      }
-    /* TODO */
      return 0;
    }
  };
@@ -1895,14 +1942,16 @@ public:
      : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env),
        pool(_pool), period(period), realm_epoch(realm_epoch), mdlog(mdlog),
        shard_id(_shard_id), sync_marker(_marker),
-      period_marker(std::move(period_marker)) {
+      period_marker(std::move(period_marker))
+     {
      tn = sync_env->sync_tracer->add_node(_tn_parent, "shard",
                                           std::to_string(shard_id));
-  }
+  };
  
    RGWCoroutine *alloc_cr() override {
      return new RGWMetaSyncShardCR(sync_env, pool, period, realm_epoch, mdlog,
-                                  shard_id, sync_marker, period_marker, backoff_ptr(), tn);
+                                  shard_id, sync_marker, period_marker,
+                                  backoff_ptr(), tn);
    }
  
    RGWCoroutine *alloc_finisher_cr() override {
@@ -1934,7 +1983,8 @@ class RGWMetaSyncCR : public RGWCoroutine {
  
  public:
    RGWMetaSyncCR(RGWMetaSyncEnv *_sync_env, const RGWPeriodHistory::Cursor &cursor,
-                const rgw_meta_sync_status& _sync_status, RGWSyncTraceNodeRef& _tn)
+                const rgw_meta_sync_status& _sync_status, 
+                RGWSyncTraceNodeRef& _tn)
      : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
        pool(sync_env->store->svc()->zone->get_zone_params().log_pool),
        cursor(cursor), sync_status(_sync_status), tn(_tn) {}
@@ -1990,11 +2040,10 @@ public:
                  continue;
                }
              }
-
              using ShardCR = RGWMetaSyncShardControlCR;
              auto cr = new ShardCR(sync_env, pool, period_id, realm_epoch,
                                    mdlog, shard_id, marker,
-                                  std::move(period_marker), tn);
+                                  std::move(period_marker), tn);     
              auto stack = spawn(cr, false);
              shard_crs[shard_id] = RefPair{cr, stack};
            }
@@ -2234,8 +2283,6 @@ int RGWRemoteMetaLog::run_sync(const DoutPrefixProvider *dpp, optional_yield y)
      ldpp_dout(dpp, -1) << "ERROR: can't sync, mismatch between num shards, master num_shards=" << mdlog_info.num_shards << " local num_shards=" << num_shards << dendl;
      return -EINVAL;
    }
-
-  // construct and start the bid manager for sync fairness
    const auto& control_pool = store->svc()->zone->get_zone_params().control_pool;
    auto control_obj = rgw_raw_obj{control_pool, meta_sync_bids_oid};
  
@@ -2243,9 +2290,16 @@ int RGWRemoteMetaLog::run_sync(const DoutPrefixProvider *dpp, optional_yield y)
        store, control_obj, num_shards);
    r = bid_manager->start();
    if (r < 0) {
+    tn->log(0, SSTR("ERROR: failed to start bidding manager " << r));
      return r;
    }
    sync_env.bid_manager = bid_manager.get();
+  auto notifyrgws = sync_env.bid_manager->notify_cr();
+  r = run(dpp, notifyrgws);
+  if(r < 0) {
+    tn->log(0, SSTR("ERROR: failed to sync bidding information " << r));
+    return r;
+  } 
  
    RGWPeriodHistory::Cursor cursor;
    do {
@@ -2284,11 +2338,13 @@ int RGWRemoteMetaLog::run_sync(const DoutPrefixProvider *dpp, optional_yield y)
          if (r < 0) {
            return r;
          }
-        meta_sync_cr = new RGWMetaSyncCR(&sync_env, cursor, sync_status, tn);
-        r = run(dpp, meta_sync_cr);
-        if (r < 0) {
-          tn->log(0, "ERROR: failed to fetch all metadata keys");
-          return r;
+        {
+          meta_sync_cr = new RGWMetaSyncCR(&sync_env, cursor, sync_status, tn);
+          r = run(dpp, meta_sync_cr);
+               if (r < 0) {
+                 tn->log(0, "ERROR: failed to fetch all metadata keys");
+                 return r;
+               }
          }
          break;
        default:
@@ -2579,4 +2635,3 @@ void rgw_sync_error_info::dump(Formatter *f) const {
    encode_json("error_code", error_code, f);
    encode_json("message", message, f);
  }
-
diff --git a/src/rgw/driver/rados/sync_fairness.cc b/src/rgw/driver/rados/sync_fairness.cc

index 6406d924012d1f9d527c653c214d2996c74127ef..bb2065f1d817e53fbfc5e0506376c374b5505f9e 100644 (file)
--- a/src/rgw/driver/rados/sync_fairness.cc
+++ b/src/rgw/driver/rados/sync_fairness.cc
@@ -32,7 +32,7 @@ using bid_value = uint16_t;
  using bid_vector = std::vector<bid_value>; // bid per replication log shard
  
  using notifier_id = uint64_t;
-using bidder_map = boost::container::flat_map<notifier_id, bid_vector>;
+using bidder_map = std::unordered_map<notifier_id, bid_vector>;
  
  struct BidRequest {
    bid_vector bids;
@@ -266,10 +266,12 @@ class RadosBidManager : public BidManager, public Server, public DoutPrefix {
      // fill my_bids with random values
      std::random_device rd;
      std::default_random_engine rng{rd()};
-    std::uniform_int_distribution<bid_value> dist;
  
      my_bids.resize(num_shards);
-    std::generate(my_bids.begin(), my_bids.end(), [&] { return dist(rng); });
+    for(bid_value i = 0; i < num_shards; ++i) {
+      my_bids[i] = i;
+    }
+    std::shuffle(my_bids.begin(), my_bids.end(), rng);
    }
  
    int start() override
diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc

index b41d9c672142dc3c7d96d6181ebf31f018f7bdae..f3f6762d4c6fbf2d2b33cebdc955e640a6be61b0 100644 (file)
--- a/src/rgw/rgw_sync.cc
+++ b/src/rgw/rgw_sync.cc
@@ -21,4 +21,4 @@ void RGWRemoteMetaLog::finish()
  {
    going_down = true;
    stop();
-}
+}
+\ No newline at end of file
author	Or Friedmann <ofriedma@redhat.com>
	Wed, 16 Feb 2022 17:00:33 +0000 (17:00 +0000)
committer	Shilpa Jagannath <smanjara@redhat.com>
	Wed, 10 May 2023 15:24:22 +0000 (11:24 -0400)
src/common/options/rgw.yaml.in		patch \| blob \| history
src/rgw/driver/rados/rgw_cr_rados.cc		patch \| blob \| history
src/rgw/driver/rados/rgw_cr_rados.h		patch \| blob \| history
src/rgw/driver/rados/rgw_sync.cc		patch \| blob \| history
src/rgw/driver/rados/sync_fairness.cc		patch \| blob \| history
src/rgw/rgw_sync.cc		patch \| blob \| history