From 0863bdeb40b0a8c1a1ee5789494649d0ce64cedc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 19 Jul 2019 16:33:52 -0500 Subject: [PATCH] osd: add lease/readable tracking members, helpers Signed-off-by: Sage Weil --- src/osd/PeeringState.cc | 26 ++++++++++++++++++++++++++ src/osd/PeeringState.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index df942f3f9b4..876910f4421 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -411,6 +411,7 @@ void PeeringState::advance_map( if (pool.info.last_change == osdmap_ref->get_epoch()) { pl->on_pool_change(); } + readable_interval = pool.get_readable_interval(); last_require_osd_release = osdmap->require_osd_release; } @@ -741,6 +742,14 @@ void PeeringState::on_new_interval() init_hb_stamps(); + acting_readable_until_ub.clear(); + if (is_primary()) { + acting_readable_until_ub.resize(acting.size(), ceph::signedspan::zero()); + + // start lease here, so that we get acks during peering + renew_lease(pl->get_mnow()); + } + pl->on_new_interval(); } @@ -1071,6 +1080,23 @@ bool PeeringState::set_force_backfill(bool b) return did; } +void PeeringState::recalc_readable_until() +{ + assert(is_primary()); + ceph::signedspan min = readable_until_ub_sent; + for (unsigned i = 0; i < acting.size(); ++i) { + if (acting[i] == pg_whoami.osd || acting[i] == CRUSH_ITEM_NONE) { + continue; + } + if (acting_readable_until_ub[i] < min) { + min = acting_readable_until_ub[i]; + } + } + readable_until = min; + readable_until_ub = min; + dout(20) << __func__ << " readable_until[_ub] " << readable_until << dendl; +} + bool PeeringState::adjust_need_up_thru(const OSDMapRef osdmap) { epoch_t up_thru = osdmap->get_up_thru(pg_whoami.osd); diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 9a7b8514922..9a40997c9f6 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1343,6 +1343,23 @@ public: vector hb_stamps; + ceph::signedspan readable_interval = ceph::signedspan::zero(); + + /// how long we can service reads in this interval + ceph::signedspan readable_until = ceph::signedspan::zero(); + + /// upper bound on any acting OSDs' readable_until in this interval + ceph::signedspan readable_until_ub = ceph::signedspan::zero(); + + /// [replica] upper bound we got from the primary (primary's clock) + ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero(); + + /// [primary] last upper bound shared by primary to replicas + ceph::signedspan readable_until_ub_sent = ceph::signedspan::zero(); + + /// [primary] readable ub acked by acting set members + vector acting_readable_until_ub; + bool send_notify = false; ///< True if a notify needs to be sent to the primary bool dirty_info = false; ///< small info structu on disk out of date @@ -1916,6 +1933,26 @@ public: write_if_dirty(t); } + /// Get current interval's readable_until + ceph::signedspan get_readable_until() const { + return readable_until; + } + + void renew_lease(ceph::signedspan now) { + bool was_min = (readable_until_ub == readable_until); + readable_until_ub_sent = now + readable_interval; + if (was_min) { + recalc_readable_until(); + } + } + + pg_lease_t get_lease() { + return pg_lease_t(readable_until, readable_until_ub_sent, readable_interval); + } + + /// [primary] recalc readable_until[_ub] for the current interval + void recalc_readable_until(); + //============================ const helpers ================================ const char *get_current_state() const { return state_history.get_current_state(); -- 2.39.5