From: Samuel Just Date: Mon, 28 Oct 2013 23:09:59 +0000 (-0700) Subject: ReplicatedPG: recover_backfill: don't prematurely adjust last_backfill X-Git-Tag: v0.72-rc1~7^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4139e75d63b0503dbb7fea8036044eda5e8b7cf1;p=ceph.git ReplicatedPG: recover_backfill: don't prematurely adjust last_backfill We can't adjust last_backfill to object x until x has been fully backfilled. pending_backfill_updates contains all those backfills started, but which have not yet been reflected in pinfo.last_update. backfills_in_flight contains those backfills which have not yet completed. Thus, we can adjust last_update to the largest entry in pending_backfill_updates not in backfills_in_flight. Signed-off-by: Samuel Just --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 082fd55b8455..6cd5e2801e98 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -7950,8 +7950,13 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle) * All objects < MIN(peer_backfill_info.begin, backfill_info.begin) in PG are * backfilled. No deleted objects in this interval remain on backfill_target. * - * peer_info[backfill_target].last_backfill = MIN(peer_backfill_info.begin, - * backfill_info.begin, backfills_in_flight) + * All objects <= peer_info[backfill_target].last_backfill have been backfilled + * to backfill_target + * + * There *MAY* be objects between last_backfill_started and + * MIN(peer_backfill_info.begin, backfill_info.begin) in the event that client + * io created objects since the last scan. For this reason, we call + * update_range() again before continuing backfill. */ int ReplicatedPG::recover_backfill( int max, @@ -8135,18 +8140,24 @@ int ReplicatedPG::recover_backfill( dout(20) << *i << " is still in flight" << dendl; } - hobject_t bound = backfills_in_flight.size() ? + hobject_t next_backfill_to_complete = backfills_in_flight.size() ? *(backfills_in_flight.begin()) : backfill_pos; - if (bound > pinfo.last_backfill) { - pinfo.last_backfill = bound; - for (map::iterator i = pending_backfill_updates.begin(); - i != pending_backfill_updates.end() && i->first < bound; - pending_backfill_updates.erase(i++)) { - pinfo.stats.add(i->second); - } + hobject_t new_last_backfill = pinfo.last_backfill; + for (map::iterator i = pending_backfill_updates.begin(); + i != pending_backfill_updates.end() && + i->first < next_backfill_to_complete; + pending_backfill_updates.erase(i++)) { + pinfo.stats.add(i->second); + assert(i->first > new_last_backfill); + new_last_backfill = i->first; + } + assert(!pending_backfill_updates.empty() || + new_last_backfill == last_backfill_started); + if (new_last_backfill > pinfo.last_backfill) { + pinfo.last_backfill = new_last_backfill; epoch_t e = get_osdmap()->get_epoch(); MOSDPGBackfill *m = NULL; - if (bound.is_max()) { + if (pinfo.last_backfill.is_max()) { m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_FINISH, e, e, info.pgid); // Use default priority here, must match sub_op priority /* pinfo.stats might be wrong if we did log-based recovery on the @@ -8158,7 +8169,7 @@ int ReplicatedPG::recover_backfill( m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_PROGRESS, e, e, info.pgid); // Use default priority here, must match sub_op priority } - m->last_backfill = bound; + m->last_backfill = pinfo.last_backfill; m->stats = pinfo.stats; osd->send_message_osd_cluster(backfill_target, m, get_osdmap()->get_epoch()); }