From 6945cc52fd68ebdd22f35abff81a6f91aa0922d6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 19 Jul 2019 16:52:17 -0500 Subject: [PATCH] osd/PeeringState: refresh prior_readable_until_ub in pg_history_t on share Before we share pg_history_t, refresh the prior_readable_until_ub to be a simple duration from *now*, so that it is completely clock-independent. The receiver can interpret it based on the receive time for the message, which loses a bit of precision but is safe since this is an upper bound. Signed-off-by: Sage Weil --- src/osd/PeeringState.cc | 26 ++++++++++++++++++++++++++ src/osd/PeeringState.h | 8 ++++++++ src/osd/osd_types.h | 19 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index e62aaa7139b..39a50ed2254 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -224,6 +224,8 @@ void PeeringState::check_recovery_sources(const OSDMapRef& osdmap) void PeeringState::update_history(const pg_history_t& new_history) { + auto mnow = pl->get_mnow(); + info.history.refresh_prior_readable_until_ub(mnow, prior_readable_until_ub); if (info.history.merge(new_history)) { psdout(20) << __func__ << " advanced history from " << new_history << dendl; dirty_info = true; @@ -232,6 +234,13 @@ void PeeringState::update_history(const pg_history_t& new_history) past_intervals.clear(); dirty_big_info = true; } + prior_readable_until_ub = info.history.get_prior_readable_until_ub(mnow); + if (prior_readable_until_ub != ceph::signedspan::zero()) { + dout(20) << __func__ + << " prior_readable_until_ub " << prior_readable_until_ub + << " (mnow " << mnow << " + " + << info.history.prior_readable_until_ub << ")" << dendl; + } } pl->on_info_history_change(); } @@ -2318,6 +2327,8 @@ void PeeringState::activate( info.purged_snaps.swap(purged); // start up replicas + info.history.refresh_prior_readable_until_ub(pl->get_mnow(), + prior_readable_until_ub); ceph_assert(!acting_recovery_backfill.empty()); for (set::iterator i = acting_recovery_backfill.begin(); @@ -2549,6 +2560,9 @@ void PeeringState::share_pg_info() { psdout(10) << "share_pg_info" << dendl; + info.history.refresh_prior_readable_until_ub(pl->get_mnow(), + prior_readable_until_ub); + // share new pg_info_t with replicas ceph_assert(!acting_recovery_backfill.empty()); for (set::iterator i = acting_recovery_backfill.begin(); @@ -2728,6 +2742,7 @@ void PeeringState::fulfill_query(const MQuery& query, PeeringCtxWrapper &rctx) { if (query.query.type == pg_query_t::INFO) { pair notify_info; + // note this refreshes our prior_readable_until_ub value update_history(query.query.history); fulfill_info(query.from, query.query, notify_info); rctx.send_notify( @@ -4272,6 +4287,9 @@ boost::statechart::result PeeringState::Reset::react(const ActMap&) { DECLARE_LOCALS; if (ps->should_send_notify() && ps->get_primary().osd >= 0) { + ps->info.history.refresh_prior_readable_until_ub( + pl->get_mnow(), + ps->prior_readable_until_ub); context< PeeringMachine >().send_notify( ps->get_primary().osd, pg_notify_t( @@ -5882,6 +5900,8 @@ boost::statechart::result PeeringState::ReplicaActive::react(const ActMap&) { DECLARE_LOCALS; if (ps->should_send_notify() && ps->get_primary().osd >= 0) { + ps->info.history.refresh_prior_readable_until_ub( + pl->get_mnow(), ps->prior_readable_until_ub); context< PeeringMachine >().send_notify( ps->get_primary().osd, pg_notify_t( @@ -6002,6 +6022,8 @@ boost::statechart::result PeeringState::Stray::react(const ActMap&) { DECLARE_LOCALS; if (ps->should_send_notify() && ps->get_primary().osd >= 0) { + ps->info.history.refresh_prior_readable_until_ub( + pl->get_mnow(), ps->prior_readable_until_ub); context< PeeringMachine >().send_notify( ps->get_primary().osd, pg_notify_t( @@ -6861,5 +6883,9 @@ ostream &operator<<(ostream &out, const PeeringState &ps) { out << " " << pg_state_string(ps.get_state()); if (ps.should_send_notify()) out << " NOTIFY"; + + if (ps.prior_readable_until_ub != ceph::signedspan::zero()) { + out << " pruub " << ps.prior_readable_until_ub; + } return out; } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 9c67ec52377..6bd4ead0c68 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1358,6 +1358,8 @@ public: /// upper bound on any acting OSDs' readable_until in this interval ceph::signedspan readable_until_ub = ceph::signedspan::zero(); + /// upper bound from prior interval(s) + ceph::signedspan prior_readable_until_ub = ceph::signedspan::zero(); /// [replica] upper bound we got from the primary (primary's clock) ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero(); @@ -1946,6 +1948,11 @@ public: return readable_until; } + /// Get prior intervals' readable_until upper bound + ceph::signedspan get_prior_readable_until_ub() const { + return prior_readable_until_ub; + } + void renew_lease(ceph::signedspan now) { bool was_min = (readable_until_ub == readable_until); readable_until_ub_sent = now + readable_interval; @@ -1953,6 +1960,7 @@ public: recalc_readable_until(); } } + void send_lease(); void schedule_renew_lease(); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 90a9a746bfe..7b114c53b7e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2760,6 +2760,25 @@ struct pg_history_t { void decode(ceph::buffer::list::const_iterator& p); void dump(ceph::Formatter *f) const; static void generate_test_instances(std::list& o); + + ceph::signedspan refresh_prior_readable_until_ub( + ceph::signedspan now, ///< now, relative to osd startup_time + ceph::signedspan ub) { ///< ub, relative to osd startup_time + if (now >= ub) { + // prior interval(s) are unreadable; we can zero the upper bound + prior_readable_until_ub = ceph::signedspan::zero(); + return ceph::signedspan::zero(); + } else { + prior_readable_until_ub = ub - now; + return ub; + } + } + ceph::signedspan get_prior_readable_until_ub(ceph::signedspan now) { + if (prior_readable_until_ub == ceph::signedspan::zero()) { + return ceph::signedspan::zero(); + } + return now + prior_readable_until_ub; + } }; WRITE_CLASS_ENCODER(pg_history_t) -- 2.39.5