]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PeeringState: do not start renewing leases until PG is activated 33129/head
authorSage Weil <sage@redhat.com>
Fri, 7 Feb 2020 16:33:26 +0000 (10:33 -0600)
committerSage Weil <sage@redhat.com>
Thu, 13 Feb 2020 19:24:46 +0000 (13:24 -0600)
The activate() work renews the lease so that we can send lease info out
to the peers immediately.  However, these messages may get delayed.  Since
we immediately start scheduling renewals, it's possible for the renewal
to go out before the PG is active, crashing the replicas.

Fix by not scheduling renewals until the PG is really active.

Also, renew aggressively at that point in time, since it may have been a
while since we first started the activation.

Fixes: https://tracker.ceph.com/issues/44041
Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PeeringState.cc

index aa9e047c9337125748a6788e4c3c41183259e8bb..3c84ebf350a21fa9373e86e4d5ff4dce5871928d 100644 (file)
@@ -2390,7 +2390,7 @@ void PeeringState::activate(
 
     if (HAVE_FEATURE(upacting_features, SERVER_OCTOPUS)) {
       renew_lease(pl->get_mnow());
-      schedule_renew_lease();
+      // do not schedule until we are actually activated
     }
 
     // adjust purged_snaps: PG may have been inactive while snaps were pruned
@@ -5880,6 +5880,16 @@ void PeeringState::Active::all_activated_and_committed()
   ceph_assert(!ps->acting_recovery_backfill.empty());
   ceph_assert(ps->blocked_by.empty());
 
+  if (HAVE_FEATURE(ps->upacting_features, SERVER_OCTOPUS)) {
+    // this is overkill when the activation is quick, but when it is slow it
+    // is important, because the lease was renewed by the activate itself but we
+    // don't know how long ago that was, and simply scheduling now may leave
+    // a gap in lease coverage.  keep it simple and aggressively renew.
+    ps->renew_lease(pl->get_mnow());
+    ps->send_lease();
+    ps->schedule_renew_lease();
+  }
+
   // Degraded?
   ps->update_calc_stats();
   if (ps->info.stats.stats.sum.num_objects_degraded) {