From: Samuel Just Date: Mon, 29 Sep 2014 22:01:25 +0000 (-0700) Subject: PG: release backfill reservations if a backfill peer rejects X-Git-Tag: v0.80.8~30^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8401e7ffa1768770f451143b3c110d1deae1bd40;p=ceph.git PG: release backfill reservations if a backfill peer rejects Also, the full peer will wait until the rejection from the primary to do a state transition. Fixes: #9626 Backport: giant, firefly, dumpling Signed-off-by: Samuel Just (cherry picked from commit 624aaf2a4ea9950153a89ff921e2adce683a6f51) --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index b68086dcdf28..10d33a5078ca 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5782,8 +5782,29 @@ PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &) pg->osd->local_reserver.cancel_reservation(pg->info.pgid); pg->state_set(PG_STATE_BACKFILL_TOOFULL); + for (set::iterator it = pg->backfill_targets.begin(); + it != pg->backfill_targets.end(); + ++it) { + assert(*it != pg->pg_whoami); + ConnectionRef con = pg->osd->get_con_osd_cluster( + it->osd, pg->get_osdmap()->get_epoch()); + if (con) { + if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) { + pg->osd->send_message_osd_cluster( + new MBackfillReserve( + MBackfillReserve::REJECT, + spg_t(pg->info.pgid.pgid, it->shard), + pg->get_osdmap()->get_epoch()), + con.get()); + } + } + } + pg->osd->recovery_wq.dequeue(pg); + pg->waiting_on_backfill.clear(); + pg->finish_recovery_op(hobject_t::get_max()); + pg->schedule_backfill_full_retry(); return transit(); } @@ -6069,7 +6090,7 @@ PG::RecoveryState::RepRecovering::react(const BackfillTooFull &) { PG *pg = context< RecoveryMachine >().pg; pg->reject_reservation(); - return transit(); + return discard_event(); } void PG::RecoveryState::RepRecovering::exit() diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index f1e1e99dcff5..d8a6ce68ce50 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2073,12 +2073,16 @@ void ReplicatedPG::do_scan( } peer_backfill_info[from] = bi; - assert(waiting_on_backfill.find(from) != waiting_on_backfill.end()); - waiting_on_backfill.erase(from); + if (waiting_on_backfill.find(from) != waiting_on_backfill.end()) { + waiting_on_backfill.erase(from); - if (waiting_on_backfill.empty()) { - assert(peer_backfill_info.size() == backfill_targets.size()); - finish_recovery_op(hobject_t::get_max()); + if (waiting_on_backfill.empty()) { + assert(peer_backfill_info.size() == backfill_targets.size()); + finish_recovery_op(hobject_t::get_max()); + } + } else { + // we canceled backfill for a while due to a too full, and this + // is an extra response from a non-too-full peer } } break;