]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
PG: release backfill reservations if a backfill peer rejects
authorSamuel Just <sam.just@inktank.com>
Mon, 29 Sep 2014 22:01:25 +0000 (15:01 -0700)
committerSamuel Just <sam.just@inktank.com>
Thu, 30 Oct 2014 20:47:51 +0000 (13:47 -0700)
Also, the full peer will wait until the rejection from the primary
to do a state transition.

Fixes: #9626
Backport: giant, firefly, dumpling
Signed-off-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit 624aaf2a4ea9950153a89ff921e2adce683a6f51)

src/osd/PG.cc
src/osd/ReplicatedPG.cc

index b68086dcdf28c970d36653fb1d874fc25495a6be..10d33a5078ca61869b7d9c3edeb6757e27d2fdc1 100644 (file)
@@ -5782,8 +5782,29 @@ PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
   pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
   pg->state_set(PG_STATE_BACKFILL_TOOFULL);
 
+  for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
+       it != pg->backfill_targets.end();
+       ++it) {
+    assert(*it != pg->pg_whoami);
+    ConnectionRef con = pg->osd->get_con_osd_cluster(
+      it->osd, pg->get_osdmap()->get_epoch());
+    if (con) {
+      if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) {
+        pg->osd->send_message_osd_cluster(
+          new MBackfillReserve(
+           MBackfillReserve::REJECT,
+           spg_t(pg->info.pgid.pgid, it->shard),
+           pg->get_osdmap()->get_epoch()),
+         con.get());
+      }
+    }
+  }
+
   pg->osd->recovery_wq.dequeue(pg);
 
+  pg->waiting_on_backfill.clear();
+  pg->finish_recovery_op(hobject_t::get_max());
+
   pg->schedule_backfill_full_retry();
   return transit<NotBackfilling>();
 }
@@ -6069,7 +6090,7 @@ PG::RecoveryState::RepRecovering::react(const BackfillTooFull &)
 {
   PG *pg = context< RecoveryMachine >().pg;
   pg->reject_reservation();
-  return transit<RepNotRecovering>();
+  return discard_event();
 }
 
 void PG::RecoveryState::RepRecovering::exit()
index f1e1e99dcff54e69480ee0948ad3bcc273958391..d8a6ce68ce50f4411d6916bd04537263e011e345 100644 (file)
@@ -2073,12 +2073,16 @@ void ReplicatedPG::do_scan(
       }
       peer_backfill_info[from] = bi;
 
-      assert(waiting_on_backfill.find(from) != waiting_on_backfill.end());
-      waiting_on_backfill.erase(from);
+      if (waiting_on_backfill.find(from) != waiting_on_backfill.end()) {
+       waiting_on_backfill.erase(from);
 
-      if (waiting_on_backfill.empty()) {
-        assert(peer_backfill_info.size() == backfill_targets.size());
-        finish_recovery_op(hobject_t::get_max());
+       if (waiting_on_backfill.empty()) {
+         assert(peer_backfill_info.size() == backfill_targets.size());
+         finish_recovery_op(hobject_t::get_max());
+       }
+      } else {
+       // we canceled backfill for a while due to a too full, and this
+       // is an extra response from a non-too-full peer
       }
     }
     break;