From c0d92b6744d793f85c1ce2f7c5c67b9e9c18a135 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Mon, 13 Jan 2014 11:38:48 -0800 Subject: [PATCH] osd: Implement multiple backfill target handling Fixes: #5858 Signed-off-by: David Zafman --- src/osd/ReplicatedPG.cc | 416 ++++++++++++++++++++++++---------------- src/osd/ReplicatedPG.h | 7 +- 2 files changed, 256 insertions(+), 167 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index f5a1640102d..422f41d1ea7 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1884,14 +1884,14 @@ void ReplicatedPG::do_scan( bi.objects[first] = i->second; } } - peer_backfill_info.insert(pair(from, bi)); + peer_backfill_info[from] = bi; assert(waiting_on_backfill.find(from) != waiting_on_backfill.end()); waiting_on_backfill.erase(from); if (waiting_on_backfill.empty()) { assert(peer_backfill_info.size() == backfill_targets.size()); - finish_recovery_op(bi.begin); + finish_recovery_op(hobject_t::get_max()); } } break; @@ -4627,7 +4627,8 @@ void ReplicatedPG::finish_ctx(OpContext *ctx) info.stats.stats.add(ctx->delta_stats, ctx->obs->oi.category); for (unsigned i = 0; i < backfill_targets.size() ; ++i) { - pg_info_t& pinfo = peer_info[i]; + int bt = backfill_targets[i]; + pg_info_t& pinfo = peer_info[bt]; if (soid <= pinfo.last_backfill) pinfo.stats.stats.add(ctx->delta_stats, ctx->obs->oi.category); else if (soid <= last_backfill_started) @@ -5662,13 +5663,15 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now) } // ship resulting transaction, log entries, and pg_stats - if (is_backfill_targets(peer) && soid > last_backfill_started && + if (is_backfill_targets(peer) && + soid > MAX(last_backfill_started, pinfo.last_backfill) && // only skip normal (not temp pool=-1) objects soid.pool == (int64_t)info.pgid.pool()) { dout(10) << "issue_repop shipping empty opt to osd." << peer - <<", object beyond last_backfill_started" - << last_backfill_started << ", last_backfill is " - << pinfo.last_backfill << dendl; + <<", object " << soid + << " beyond MAX(last_backfill_started " + << last_backfill_started << ", pinfo.last_backfill " + << pinfo.last_backfill << ")" << dendl; ObjectStore::Transaction t; ::encode(t, wr->get_data()); } else { @@ -8133,15 +8136,16 @@ void ReplicatedPG::on_shutdown() void ReplicatedPG::on_activate() { if (!backfill_targets.empty()) { - dout(10) << "on activate: backfill_targets " << backfill_targets << dendl; last_backfill_started = earliest_backfill(); -#if 0 - last_backfill_started = peer_info[backfill_targets[0]].last_backfill; -#endif - dout(10) << "From " << last_backfill_started << " instead of " << peer_info[backfill_targets[0]].last_backfill << dendl; + new_backfill = true; assert(!last_backfill_started.is_max()); - //dout(10) << " backfill targets " << backfill_targets - // << " from " << last_backfill_started << dendl; + dout(5) << "on activate: bft=" << backfill_targets + << " from " << last_backfill_started << dendl; + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + dout(5) << "target osd." << backfill_targets[i] + << " from " << peer_info[backfill_targets[i]].last_backfill + << dendl; + } } hit_set_setup(); @@ -8372,7 +8376,7 @@ bool ReplicatedPG::start_recovery_ops( state_test(PG_STATE_BACKFILL) && !backfill_targets.empty() && started < max && missing.num_missing() == 0 && - waiting_on_backfill.find(backfill_targets[0]) == waiting_on_backfill.end()) { + waiting_on_backfill.empty()) { if (get_osdmap()->test_flag(CEPH_OSDMAP_NOBACKFILL)) { dout(10) << "deferring backfill due to NOBACKFILL" << dendl; deferred_backfill = true; @@ -8753,6 +8757,38 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle) return started; } +hobject_t ReplicatedPG::earliest_peer_backfill() const +{ + hobject_t e = hobject_t::get_max(); + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int peer = backfill_targets[i]; + map::const_iterator iter = + peer_backfill_info.find(peer); + assert(iter != peer_backfill_info.end()); + if (iter->second.begin < e) + e = iter->second.begin; + } + return e; +} + +bool ReplicatedPG::all_peer_done() const +{ + // Primary hasn't got any more objects + assert(backfill_info.empty()); + + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + map::const_iterator piter = + peer_backfill_info.find(bt); + assert(piter != peer_backfill_info.end()); + const BackfillInterval& pbi = piter->second; + // See if peer has more to process + if (!pbi.extends_to_end() || !pbi.empty()) + return false; + } + return true; +} + /** * recover_backfill * @@ -8761,21 +8797,23 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle) * backfilled: fully pushed to replica or present in replica's missing set (both * our copy and theirs). * - * All objects on backfill_targets in [MIN,peer_backfill_info.begin) are either + * All objects on a backfill_target in + * [MIN,peer_backfill_info[backfill_target].begin) are either * not present or backfilled (all removed objects have been removed). * There may be PG objects in this interval yet to be backfilled. * - * All objects in PG in [MIN,backfill_info.begin) have been backfilled to - * backfill_targets. There may be objects on backfill_target yet to be deleted. + * All objects in PG in [MIN,backfill_info.begin) have been backfilled to all + * backfill_targets. There may be objects on backfill_target(s) yet to be deleted. * - * All objects < MIN(peer_backfill_info.begin, backfill_info.begin) in PG are - * backfilled. No deleted objects in this interval remain on backfill_target. + * For a backfill target, all objects < MIN(peer_backfill_info[target].begin, + * backfill_info.begin) in PG are backfilled. No deleted objects in this + * interval remain on the backfill target. * - * All objects <= peer_info[backfill_target].last_backfill have been backfilled - * to backfill_target + * For a backfill target, all objects <= peer_info[target].last_backfill + * have been backfilled to target * * There *MAY* be objects between last_backfill_started and - * MIN(peer_backfill_info.begin, backfill_info.begin) in the event that client + * MIN(peer_backfill_info[*].begin, backfill_info.begin) in the event that client * io created objects since the last scan. For this reason, we call * update_range() again before continuing backfill. */ @@ -8783,45 +8821,31 @@ int ReplicatedPG::recover_backfill( int max, ThreadPool::TPHandle &handle, bool *work_started) { - dout(10) << "recover_backfill (" << max << ") " << backfill_targets - << " last_backfill_started " << last_backfill_started << dendl; + dout(10) << "recover_backfill (" << max << ")" + << " bft=" << backfill_targets + << " last_backfill_started " << last_backfill_started << dendl; assert(!backfill_targets.empty()); - for (unsigned i = 0 ; i < backfill_targets.size(); ++i) { - int peer = backfill_targets[i]; - pg_info_t& pinfo = peer_info[peer]; - bool created = false; - map::iterator it = peer_backfill_info.find(peer); - - if (it == peer_backfill_info.end()) { - created = true; - BackfillInterval bi; - peer_backfill_info.insert(pair(peer, bi)); - it = peer_backfill_info.find(peer); + // Initialize from prior backfill state + if (new_backfill) { + // on_activate() was called prior to getting here + assert(last_backfill_started == earliest_backfill()); + new_backfill = false; + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + peer_backfill_info[bt].reset(peer_info[bt].last_backfill); } - - BackfillInterval& pbi = it->second; - -#if 0 - // Initialize from prior backfill state - if (pbi.begin < pinfo.last_backfill) - pbi.reset(pinfo.last_backfill); -#endif - - dout(10) << " peer osd." << peer - << " info " << pinfo - << (created ? " created" : "") << " interval " << pbi.begin << "-" << pbi.end - << " " << pbi.objects.size() << " objects" << dendl; + backfill_info.reset(last_backfill_started); } - //XXX: Look through backfill_targets - pg_info_t& pinfo = peer_info[backfill_targets[0]]; - BackfillInterval& pbi = peer_backfill_info.find(backfill_targets[0])->second; - - // Initialize from prior backfill state - if (pbi.begin < pinfo.last_backfill) { - pbi.reset(pinfo.last_backfill); - backfill_info.reset(pinfo.last_backfill); + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + dout(10) << "peer osd." << bt + << " info " << peer_info[bt] + << " interval " << peer_backfill_info[bt].begin + << "-" << peer_backfill_info[bt].end + << " " << peer_backfill_info[bt].objects.size() << " objects" + << dendl; } // update our local interval to cope with recent changes @@ -8829,17 +8853,20 @@ int ReplicatedPG::recover_backfill( update_range(&backfill_info, handle); int ops = 0; - map > to_push; - map to_remove; + vector > > to_push; + vector > to_remove; set add_to_stat; - pbi.trim_to(last_backfill_started); + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + peer_backfill_info[bt].trim_to(last_backfill_started); + } backfill_info.trim_to(last_backfill_started); - hobject_t backfill_pos = MIN(backfill_info.begin, pbi.begin); + hobject_t backfill_pos = MIN(backfill_info.begin, earliest_peer_backfill()); while (ops < max) { - if (backfill_info.begin <= pbi.begin && + if (backfill_info.begin <= earliest_peer_backfill() && !backfill_info.extends_to_end() && backfill_info.empty()) { hobject_t next = backfill_info.end; backfill_info.clear(); @@ -8848,56 +8875,130 @@ int ReplicatedPG::recover_backfill( update_range(&backfill_info, handle); backfill_info.trim(); } - backfill_pos = MIN(backfill_info.begin, pbi.begin); + backfill_pos = MIN(backfill_info.begin, earliest_peer_backfill()); - dout(20) << " my backfill " << backfill_info.begin << "-" << backfill_info.end - << " " << backfill_info.objects << dendl; - dout(20) << " peer backfill " << pbi.begin << "-" << pbi.end << " " << pbi.objects << dendl; + dout(20) << " my backfill interval " << backfill_info.begin << "-" << backfill_info.end + << " " << backfill_info.objects.size() << " objects" + << " " << backfill_info.objects + << dendl; - if (pbi.begin <= backfill_info.begin && - !pbi.extends_to_end() && pbi.empty()) { - dout(10) << " scanning peer osd." << backfill_targets[0] << " from " << pbi.end << dendl; - epoch_t e = get_osdmap()->get_epoch(); - MOSDPGScan *m = new MOSDPGScan(MOSDPGScan::OP_SCAN_GET_DIGEST, e, e, info.pgid, + bool sent_scan = false; + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + BackfillInterval& pbi = peer_backfill_info[bt]; + + dout(20) << " peer osd." << bt << " backfill " << pbi.begin << "-" + << pbi.end << " " << pbi.objects << dendl; + if (pbi.begin <= backfill_info.begin && + !pbi.extends_to_end() && pbi.empty()) { + dout(10) << " scanning peer osd." << bt << " from " << pbi.end << dendl; + epoch_t e = get_osdmap()->get_epoch(); + MOSDPGScan *m = new MOSDPGScan(MOSDPGScan::OP_SCAN_GET_DIGEST, e, e, info.pgid, pbi.end, hobject_t()); - osd->send_message_osd_cluster(backfill_targets[0], m, get_osdmap()->get_epoch()); - assert(waiting_on_backfill.find(backfill_targets[0]) == waiting_on_backfill.end()); - waiting_on_backfill.insert(backfill_targets[0]); - start_recovery_op(pbi.end); - ops++; - break; + osd->send_message_osd_cluster(bt, m, get_osdmap()->get_epoch()); + assert(waiting_on_backfill.find(bt) == waiting_on_backfill.end()); + waiting_on_backfill.insert(bt); + sent_scan = true; + } + } + + // Count simultaneous scans as a single op and let those complete + if (sent_scan) { + ops++; + start_recovery_op(hobject_t::get_max()); // XXX: was pbi.end + break; } - if (backfill_info.empty() && pbi.empty()) { - dout(10) << " reached end for both local and peer" << dendl; + if (backfill_info.empty() && all_peer_done()) { + dout(10) << " reached end for both local and all peers" << dendl; break; } - if (pbi.begin < backfill_info.begin) { - dout(20) << " removing peer " << pbi.begin << dendl; - to_remove[pbi.begin] = pbi.objects.begin()->second; - // Object was degraded, but won't be recovered - if (waiting_for_degraded_object.count(pbi.begin)) { - requeue_ops( - waiting_for_degraded_object[pbi.begin]); - waiting_for_degraded_object.erase(pbi.begin); + // Get object within set of peers to operate on and + // the set of targets for which that object applies. + hobject_t check = earliest_peer_backfill(); + + if (check < backfill_info.begin) { + + vector check_targets; + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + BackfillInterval& pbi = peer_backfill_info[bt]; + if (pbi.begin == check) + check_targets.push_back(bt); + } + assert(!check_targets.empty()); + + dout(20) << " BACKFILL removing " << check + << " from peers " << check_targets << dendl; + for (unsigned i = 0; i < check_targets.size(); ++i) { + int bt = check_targets[i]; + BackfillInterval& pbi = peer_backfill_info[bt]; + assert(pbi.begin == check); + + to_remove.push_back(boost::make_tuple(check, pbi.objects.begin()->second, bt)); + pbi.pop_front(); } - last_backfill_started = pbi.begin; - pbi.pop_front(); + last_backfill_started = check; // Don't increment ops here because deletions // are cheap and not replied to unlike real recovery_ops, // and we can't increment ops without requeueing ourself // for recovery. - } else if (pbi.begin == backfill_info.begin) { + } else { eversion_t& obj_v = backfill_info.objects.begin()->second; - if (pbi.objects.begin()->second != obj_v) { + + vector need_ver_targs, missing_targs, keep_ver_targs, skip_targs; + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + BackfillInterval& pbi = peer_backfill_info[bt]; + // Find all check peers that have the wrong version + if (check == backfill_info.begin && check == pbi.begin) { + if (pbi.objects.begin()->second != obj_v) { + need_ver_targs.push_back(bt); + } else { + keep_ver_targs.push_back(bt); + } + } else { + pg_info_t& pinfo = peer_info[bt]; + + // Only include peers that we've caught up to their backfill line + // otherwise, they only appear to be missing this object + // because their pbi.begin > backfill_info.begin. + if (backfill_info.begin > pinfo.last_backfill) + missing_targs.push_back(bt); + else + skip_targs.push_back(bt); + } + } + + if (!keep_ver_targs.empty()) { + // These peers have version obj_v + dout(20) << " BACKFILL keeping " << check + << " with ver " << obj_v + << " on peers " << keep_ver_targs << dendl; + //assert(!waiting_for_degraded_object.count(check)); + } + if (!need_ver_targs.empty() || !missing_targs.empty()) { ObjectContextRef obc = get_object_context(backfill_info.begin, false); assert(obc); if (obc->get_backfill_read()) { - dout(20) << " replacing peer " << pbi.begin << " with local " - << obj_v << dendl; - to_push[pbi.begin] = boost::make_tuple( - obj_v, pbi.objects.begin()->second, obc); + if (!need_ver_targs.empty()) { + dout(20) << " BACKFILL replacing " << check + << " with ver " << obj_v + << " to peers " << need_ver_targs << dendl; + } + if (!missing_targs.empty()) { + dout(20) << " BACKFILL pushing " << backfill_info.begin + << " with ver " << obj_v + << " to peers " << missing_targs << dendl; + } + vector all_push = need_ver_targs; + all_push.insert(all_push.end(), missing_targs.begin(), missing_targs.end()); + + to_push.push_back( + boost::tuple > + (backfill_info.begin, obj_v, obc, all_push)); + // Count all simultaneous pushes of the same object as a single op ops++; } else { *work_started = true; @@ -8905,44 +9006,26 @@ int ReplicatedPG::recover_backfill( << "; could not get rw_manager lock" << dendl; break; } - } else { - dout(20) << " keeping peer " << pbi.begin << " " - << pbi.objects.begin()->second << dendl; - // Object was degraded, but won't be recovered - if (waiting_for_degraded_object.count(pbi.begin)) { - requeue_ops(waiting_for_degraded_object[pbi.begin]); - waiting_for_degraded_object.erase(pbi.begin); - } } - last_backfill_started = pbi.begin; - add_to_stat.insert(pbi.begin); + dout(20) << "need_ver_targs=" << need_ver_targs + << " keep_ver_targs=" << keep_ver_targs << dendl; + dout(20) << "backfill_targets=" << backfill_targets + << " missing_targs=" << missing_targs + << " skip_targs=" << skip_targs << dendl; + + last_backfill_started = backfill_info.begin; + add_to_stat.insert(backfill_info.begin); // XXX: Only one for all pushes? backfill_info.pop_front(); - pbi.pop_front(); - } else { - ObjectContextRef obc = get_object_context(backfill_info.begin, false); - assert(obc); - if (obc->get_backfill_read()) { - dout(20) << " pushing local " << backfill_info.begin << " " - << backfill_info.objects.begin()->second - << " to peer osd." << backfill_targets[0] << dendl; - to_push[backfill_info.begin] = - boost::make_tuple( - backfill_info.objects.begin()->second, - eversion_t(), - obc); - add_to_stat.insert(backfill_info.begin); - last_backfill_started = backfill_info.begin; - backfill_info.pop_front(); - ops++; - } else { - *work_started = true; - dout(20) << "backfill blocking on " << backfill_info.begin - << "; could not get rw_manager lock" << dendl; - break; + vector check_targets = need_ver_targs; + check_targets.insert(check_targets.end(), keep_ver_targs.begin(), keep_ver_targs.end()); + for (unsigned i = 0; i < check_targets.size(); ++i) { + int bt = check_targets[i]; + BackfillInterval& pbi = peer_backfill_info[bt]; + pbi.pop_front(); } } } - backfill_pos = MIN(backfill_info.begin, pbi.begin); + backfill_pos = MIN(backfill_info.begin, earliest_peer_backfill()); for (set::iterator i = add_to_stat.begin(); i != add_to_stat.end(); @@ -8952,32 +9035,24 @@ int ReplicatedPG::recover_backfill( add_object_context_to_pg_stat(obc, &stat); pending_backfill_updates[*i] = stat; } - for (map::iterator i = to_remove.begin(); - i != to_remove.end(); - ++i) { + for (unsigned i = 0; i < to_remove.size(); ++i) { handle.reset_tp_timeout(); // ordered before any subsequent updates - send_remove_op(i->first, i->second, backfill_targets[0]); + send_remove_op(to_remove[i].get<0>(), to_remove[i].get<1>(), to_remove[i].get<2>()); - pending_backfill_updates[i->first]; // add empty stat! + pending_backfill_updates[to_remove[i].get<0>()]; // add empty stat! } PGBackend::RecoveryHandle *h = pgbackend->open_recovery_op(); - for (map >::iterator i = - to_push.begin(); - i != to_push.end(); - ++i) { + for (unsigned i = 0; i < to_push.size(); ++i) { handle.reset_tp_timeout(); - prep_backfill_object_push( - i->first, i->second.get<0>(), i->second.get<1>(), i->second.get<2>(), - backfill_targets[0], h); + prep_backfill_object_push(to_push[i].get<0>(), to_push[i].get<1>(), + to_push[i].get<2>(), to_push[i].get<3>(), h); } pgbackend->run_recovery_op(h, cct->_conf->osd_recovery_op_priority); - dout(5) << "backfill_pos is " << backfill_pos << " and pinfo.last_backfill is " - << pinfo.last_backfill << dendl; + dout(5) << "backfill_pos is " << backfill_pos << dendl; for (set::iterator i = backfills_in_flight.begin(); i != backfills_in_flight.end(); ++i) { @@ -8986,15 +9061,23 @@ int ReplicatedPG::recover_backfill( hobject_t next_backfill_to_complete = backfills_in_flight.size() ? *(backfills_in_flight.begin()) : backfill_pos; - hobject_t new_last_backfill = pinfo.last_backfill; + hobject_t new_last_backfill = earliest_backfill(); + dout(10) << "starting new_last_backfill at " << new_last_backfill << dendl; for (map::iterator i = pending_backfill_updates.begin(); i != pending_backfill_updates.end() && i->first < next_backfill_to_complete; pending_backfill_updates.erase(i++)) { - pinfo.stats.add(i->second); assert(i->first > new_last_backfill); + for (unsigned j = 0; j < backfill_targets.size(); ++j) { + int bt = backfill_targets[j]; + pg_info_t& pinfo = peer_info[bt]; + //Add stats to all peers that were missing object + if (i->first > pinfo.last_backfill) + pinfo.stats.add(i->second); + } new_last_backfill = i->first; } + dout(10) << "possible new_last_backfill at " << new_last_backfill << dendl; /* If last_backfill is snapdir, we know that head necessarily cannot exist, * therefore it's safe to bump the snap up to NOSNAP. This is necessary @@ -9014,9 +9097,14 @@ int ReplicatedPG::recover_backfill( new_last_backfill = backfill_pos; last_backfill_started = backfill_pos; } - for (unsigned i = 0 ; i < backfill_targets.size(); ++i) { - int peer = backfill_targets[i]; - pg_info_t& pinfo = peer_info[peer]; + dout(10) << "final new_last_backfill at " << new_last_backfill << dendl; + + // If new_last_backfill == MAX, then we will send OP_BACKFILL_FINISH to + // all the backfill targets. Otherwise, we will move last_backfill up on + // those targets need it and send OP_BACKFILL_PROGRESS to them. + for (unsigned i = 0; i < backfill_targets.size(); ++i) { + int bt = backfill_targets[i]; + pg_info_t& pinfo = peer_info[bt]; if (new_last_backfill > pinfo.last_backfill) { pinfo.last_backfill = new_last_backfill; @@ -9029,22 +9117,18 @@ int ReplicatedPG::recover_backfill( * backfilled portion in addition to continuing backfill. */ pinfo.stats = info.stats; - //start_recovery_op(hobject_t::get_max()); + start_recovery_op(hobject_t::get_max()); } else { m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_PROGRESS, e, e, info.pgid); // Use default priority here, must match sub_op priority } m->last_backfill = pinfo.last_backfill; m->stats = pinfo.stats; - osd->send_message_osd_cluster(peer, m, get_osdmap()->get_epoch()); + osd->send_message_osd_cluster(bt, m, get_osdmap()->get_epoch()); + dout(10) << " peer osd." << bt + << " num_objects now " << pinfo.stats.stats.sum.num_objects + << " / " << info.stats.stats.sum.num_objects << dendl; } - dout(10) << " peer osd." << peer << " num_objects now " << pinfo.stats.stats.sum.num_objects - << " / " << info.stats.stats.sum.num_objects << dendl; - } - - // XXX: This use to happen before a single send_message_osd_cluster() call - if (new_last_backfill.is_max()) { - start_recovery_op(hobject_t::get_max()); } if (ops) @@ -9053,18 +9137,20 @@ int ReplicatedPG::recover_backfill( } void ReplicatedPG::prep_backfill_object_push( - hobject_t oid, eversion_t v, eversion_t have, + hobject_t oid, eversion_t v, ObjectContextRef obc, - int peer, + vector peers, PGBackend::RecoveryHandle *h) { - dout(10) << "push_backfill_object " << oid << " v " << v << " to osd." << peer << dendl; - assert(is_backfill_targets(peer)); + dout(10) << "push_backfill_object " << oid << " v " << v << " to peers " << peers << dendl; + assert(!peers.empty()); backfills_in_flight.insert(oid); - map::iterator bpm = peer_missing.find(peer); - assert(bpm != peer_missing.end()); - bpm->second.add(oid, eversion_t(), eversion_t()); + for (unsigned int i = 0 ; i < peers.size(); ++i) { + map::iterator bpm = peer_missing.find(peers[i]); + assert(bpm != peer_missing.end()); + bpm->second.add(oid, eversion_t(), eversion_t()); + } assert(!recovering.count(oid)); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 8ada9ca9a55..a43ebd5c71e 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -751,6 +751,7 @@ protected: /// last backfill operation started hobject_t last_backfill_started; + bool new_backfill; int prep_object_replica_pushes(const hobject_t& soid, eversion_t v, PGBackend::RecoveryHandle *h); @@ -816,6 +817,8 @@ protected: int recover_primary(int max, ThreadPool::TPHandle &handle); int recover_replicas(int max, ThreadPool::TPHandle &handle); + hobject_t earliest_peer_backfill() const; + bool all_peer_done() const; /** * @param work_started will be set to true if recover_backfill got anywhere * @returns the number of operations started @@ -843,8 +846,8 @@ protected: ); void prep_backfill_object_push( - hobject_t oid, eversion_t v, eversion_t have, ObjectContextRef obc, - int peer, + hobject_t oid, eversion_t v, ObjectContextRef obc, + vector peer, PGBackend::RecoveryHandle *h); void send_remove_op(const hobject_t& oid, eversion_t v, int peer); -- 2.47.3