]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: add lease/readable tracking members, helpers
authorSage Weil <sage@redhat.com>
Fri, 19 Jul 2019 21:33:52 +0000 (16:33 -0500)
committerSage Weil <sage@redhat.com>
Thu, 26 Sep 2019 17:21:53 +0000 (12:21 -0500)
Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PeeringState.cc
src/osd/PeeringState.h

index df942f3f9b4ed488d4ac2e094f9db2655cfb3054..876910f4421a28768fe347fa2ddf6a027fb67800 100644 (file)
@@ -411,6 +411,7 @@ void PeeringState::advance_map(
   if (pool.info.last_change == osdmap_ref->get_epoch()) {
     pl->on_pool_change();
   }
+  readable_interval = pool.get_readable_interval();
   last_require_osd_release = osdmap->require_osd_release;
 }
 
@@ -741,6 +742,14 @@ void PeeringState::on_new_interval()
 
   init_hb_stamps();
 
+  acting_readable_until_ub.clear();
+  if (is_primary()) {
+    acting_readable_until_ub.resize(acting.size(), ceph::signedspan::zero());
+
+    // start lease here, so that we get acks during peering
+    renew_lease(pl->get_mnow());
+  }
+
   pl->on_new_interval();
 }
 
@@ -1071,6 +1080,23 @@ bool PeeringState::set_force_backfill(bool b)
   return did;
 }
 
+void PeeringState::recalc_readable_until()
+{
+  assert(is_primary());
+  ceph::signedspan min = readable_until_ub_sent;
+  for (unsigned i = 0; i < acting.size(); ++i) {
+    if (acting[i] == pg_whoami.osd || acting[i] == CRUSH_ITEM_NONE) {
+      continue;
+    }
+    if (acting_readable_until_ub[i] < min) {
+      min = acting_readable_until_ub[i];
+    }
+  }
+  readable_until = min;
+  readable_until_ub = min;
+  dout(20) << __func__ << " readable_until[_ub] " << readable_until << dendl;
+}
+
 bool PeeringState::adjust_need_up_thru(const OSDMapRef osdmap)
 {
   epoch_t up_thru = osdmap->get_up_thru(pg_whoami.osd);
index 9a7b8514922d96fc0cf1af1ac69565068567943b..9a40997c9f682d29dac9b46553d4074e6195ef92 100644 (file)
@@ -1343,6 +1343,23 @@ public:
 
   vector<HeartbeatStampsRef> hb_stamps;
 
+  ceph::signedspan readable_interval = ceph::signedspan::zero();
+
+  /// how long we can service reads in this interval
+  ceph::signedspan readable_until = ceph::signedspan::zero();
+
+  /// upper bound on any acting OSDs' readable_until in this interval
+  ceph::signedspan readable_until_ub = ceph::signedspan::zero();
+
+  /// [replica] upper bound we got from the primary (primary's clock)
+  ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero();
+
+  /// [primary] last upper bound shared by primary to replicas
+  ceph::signedspan readable_until_ub_sent = ceph::signedspan::zero();
+
+  /// [primary] readable ub acked by acting set members
+  vector<ceph::signedspan> acting_readable_until_ub;
+
   bool send_notify = false; ///< True if a notify needs to be sent to the primary
 
   bool dirty_info = false;          ///< small info structu on disk out of date
@@ -1916,6 +1933,26 @@ public:
     write_if_dirty(t);
   }
 
+  /// Get current interval's readable_until
+  ceph::signedspan get_readable_until() const {
+    return readable_until;
+  }
+
+  void renew_lease(ceph::signedspan now) {
+    bool was_min = (readable_until_ub == readable_until);
+    readable_until_ub_sent = now + readable_interval;
+    if (was_min) {
+      recalc_readable_until();
+    }
+  }
+
+  pg_lease_t get_lease() {
+    return pg_lease_t(readable_until, readable_until_ub_sent, readable_interval);
+  }
+
+  /// [primary] recalc readable_until[_ub] for the current interval
+  void recalc_readable_until();
+
   //============================ const helpers ================================
   const char *get_current_state() const {
     return state_history.get_current_state();