]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PeeringState: refresh prior_readable_until_ub in pg_history_t on share
authorSage Weil <sage@redhat.com>
Fri, 19 Jul 2019 21:52:17 +0000 (16:52 -0500)
committerSage Weil <sage@redhat.com>
Thu, 26 Sep 2019 17:21:53 +0000 (12:21 -0500)
Before we share pg_history_t, refresh the prior_readable_until_ub to be
a simple duration from *now*, so that it is completely clock-independent.
The receiver can interpret it based on the receive time for the message,
which loses a bit of precision but is safe since this is an upper bound.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/osd_types.h

index e62aaa7139b6edad02ef1b45c4941569f0a00235..39a50ed2254e9bad22e61b58891355237d3f330a 100644 (file)
@@ -224,6 +224,8 @@ void PeeringState::check_recovery_sources(const OSDMapRef& osdmap)
 
 void PeeringState::update_history(const pg_history_t& new_history)
 {
+  auto mnow = pl->get_mnow();
+  info.history.refresh_prior_readable_until_ub(mnow, prior_readable_until_ub);
   if (info.history.merge(new_history)) {
     psdout(20) << __func__ << " advanced history from " << new_history << dendl;
     dirty_info = true;
@@ -232,6 +234,13 @@ void PeeringState::update_history(const pg_history_t& new_history)
       past_intervals.clear();
       dirty_big_info = true;
     }
+    prior_readable_until_ub = info.history.get_prior_readable_until_ub(mnow);
+    if (prior_readable_until_ub != ceph::signedspan::zero()) {
+      dout(20) << __func__
+              << " prior_readable_until_ub " << prior_readable_until_ub
+              << " (mnow " << mnow << " + "
+              << info.history.prior_readable_until_ub << ")" << dendl;
+    }
   }
   pl->on_info_history_change();
 }
@@ -2318,6 +2327,8 @@ void PeeringState::activate(
     info.purged_snaps.swap(purged);
 
     // start up replicas
+    info.history.refresh_prior_readable_until_ub(pl->get_mnow(),
+                                                prior_readable_until_ub);
 
     ceph_assert(!acting_recovery_backfill.empty());
     for (set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
@@ -2549,6 +2560,9 @@ void PeeringState::share_pg_info()
 {
   psdout(10) << "share_pg_info" << dendl;
 
+  info.history.refresh_prior_readable_until_ub(pl->get_mnow(),
+                                              prior_readable_until_ub);
+
   // share new pg_info_t with replicas
   ceph_assert(!acting_recovery_backfill.empty());
   for (set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
@@ -2728,6 +2742,7 @@ void PeeringState::fulfill_query(const MQuery& query, PeeringCtxWrapper &rctx)
 {
   if (query.query.type == pg_query_t::INFO) {
     pair<pg_shard_t, pg_info_t> notify_info;
+    // note this refreshes our prior_readable_until_ub value
     update_history(query.query.history);
     fulfill_info(query.from, query.query, notify_info);
     rctx.send_notify(
@@ -4272,6 +4287,9 @@ boost::statechart::result PeeringState::Reset::react(const ActMap&)
 {
   DECLARE_LOCALS;
   if (ps->should_send_notify() && ps->get_primary().osd >= 0) {
+    ps->info.history.refresh_prior_readable_until_ub(
+      pl->get_mnow(),
+      ps->prior_readable_until_ub);
     context< PeeringMachine >().send_notify(
       ps->get_primary().osd,
       pg_notify_t(
@@ -5882,6 +5900,8 @@ boost::statechart::result PeeringState::ReplicaActive::react(const ActMap&)
 {
   DECLARE_LOCALS;
   if (ps->should_send_notify() && ps->get_primary().osd >= 0) {
+    ps->info.history.refresh_prior_readable_until_ub(
+      pl->get_mnow(), ps->prior_readable_until_ub);
     context< PeeringMachine >().send_notify(
       ps->get_primary().osd,
       pg_notify_t(
@@ -6002,6 +6022,8 @@ boost::statechart::result PeeringState::Stray::react(const ActMap&)
 {
   DECLARE_LOCALS;
   if (ps->should_send_notify() && ps->get_primary().osd >= 0) {
+    ps->info.history.refresh_prior_readable_until_ub(
+      pl->get_mnow(), ps->prior_readable_until_ub);
     context< PeeringMachine >().send_notify(
       ps->get_primary().osd,
       pg_notify_t(
@@ -6861,5 +6883,9 @@ ostream &operator<<(ostream &out, const PeeringState &ps) {
   out << " " << pg_state_string(ps.get_state());
   if (ps.should_send_notify())
     out << " NOTIFY";
+
+  if (ps.prior_readable_until_ub != ceph::signedspan::zero()) {
+    out << " pruub " << ps.prior_readable_until_ub;
+  }
   return out;
 }
index 9c67ec52377d872ebdd418e4b1c29d906c9a4a0f..6bd4ead0c684ffa470b695fbae8356a14cb2d6b4 100644 (file)
@@ -1358,6 +1358,8 @@ public:
 
   /// upper bound on any acting OSDs' readable_until in this interval
   ceph::signedspan readable_until_ub = ceph::signedspan::zero();
+  /// upper bound from prior interval(s)
+  ceph::signedspan prior_readable_until_ub = ceph::signedspan::zero();
 
   /// [replica] upper bound we got from the primary (primary's clock)
   ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero();
@@ -1946,6 +1948,11 @@ public:
     return readable_until;
   }
 
+  /// Get prior intervals' readable_until upper bound
+  ceph::signedspan get_prior_readable_until_ub() const {
+    return prior_readable_until_ub;
+  }
+
   void renew_lease(ceph::signedspan now) {
     bool was_min = (readable_until_ub == readable_until);
     readable_until_ub_sent = now + readable_interval;
@@ -1953,6 +1960,7 @@ public:
       recalc_readable_until();
     }
   }
+
   void send_lease();
   void schedule_renew_lease();
 
index 90a9a746bfe188a71fcfdaca5237d7814fd8ade2..7b114c53b7e26c6e706764c65f6c2727e669ea48 100644 (file)
@@ -2760,6 +2760,25 @@ struct pg_history_t {
   void decode(ceph::buffer::list::const_iterator& p);
   void dump(ceph::Formatter *f) const;
   static void generate_test_instances(std::list<pg_history_t*>& o);
+
+  ceph::signedspan refresh_prior_readable_until_ub(
+    ceph::signedspan now,  ///< now, relative to osd startup_time
+    ceph::signedspan ub) { ///< ub, relative to osd startup_time
+    if (now >= ub) {
+      // prior interval(s) are unreadable; we can zero the upper bound
+      prior_readable_until_ub = ceph::signedspan::zero();
+      return ceph::signedspan::zero();
+    } else {
+      prior_readable_until_ub = ub - now;
+      return ub;
+    }
+  }
+  ceph::signedspan get_prior_readable_until_ub(ceph::signedspan now) {
+    if (prior_readable_until_ub == ceph::signedspan::zero()) {
+      return ceph::signedspan::zero();
+    }
+    return now + prior_readable_until_ub;
+  }
 };
 WRITE_CLASS_ENCODER(pg_history_t)