compat_mode = false;
}
- // For now we only backfill 1 at a time as before
- if (!backfill.empty())
- backfill.resize(1);
+ if (compat_mode && !backfill.empty()) {
+ backfill.resize(1);
+ }
// This might cause a problem if min_size is large
// and we need to backfill more than 1 osd. Older
}
}
- if (pg.get_backfill_target() >= 0)
- out << " bft=" << pg.get_backfill_target();
+ if (!pg.backfill_targets.empty())
+ out << " bft=" << pg.backfill_targets;
if (pg.last_complete_ondisk != pg.info.last_complete)
out << " lcod " << pg.last_complete_ondisk;
PG *pg = context< RecoveryMachine >().pg;
pg->backfill_reserved = true;
pg->osd->queue_for_recovery(pg);
+ pg->state_clear(PG_STATE_BACKFILL_TOOFULL);
pg->state_clear(PG_STATE_BACKFILL_WAIT);
pg->state_set(PG_STATE_BACKFILL);
}
PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_context ctx)
: my_base(ctx),
- NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/WaitRemoteBackfillReserved")
+ NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active/WaitRemoteBackfillReserved"),
+ backfill_osd_it(context< Active >().sorted_backfill_set.begin())
{
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
pg->state_set(PG_STATE_BACKFILL_WAIT);
- ConnectionRef con = pg->osd->get_con_osd_cluster(
- pg->get_backfill_target(), pg->get_osdmap()->get_epoch());
- if (con) {
- if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) {
- unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH
+ post_event(RemoteBackfillReserved());
+}
+
+boost::statechart::result
+PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteBackfillReserved &evt)
+{
+ PG *pg = context< RecoveryMachine >().pg;
+
+ if (backfill_osd_it != context< Active >().sorted_backfill_set.end()) {
+ //The primary never backfills itself
+ assert(*backfill_osd_it != pg->osd->whoami);
+ ConnectionRef con = pg->osd->get_con_osd_cluster(*backfill_osd_it, pg->get_osdmap()->get_epoch());
+ if (con) {
+ if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) {
+ unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH
: OSDService::BACKFILL_LOW;
- pg->osd->send_message_osd_cluster(
- new MBackfillReserve(
+ pg->osd->send_message_osd_cluster(
+ new MBackfillReserve(
MBackfillReserve::REQUEST,
pg->info.pgid,
pg->get_osdmap()->get_epoch(), priority),
con.get());
- } else {
- post_event(RemoteBackfillReserved());
+ } else {
+ post_event(RemoteBackfillReserved());
+ }
}
+ ++backfill_osd_it;
+ } else {
+ post_event(AllBackfillsReserved());
}
+ return discard_event();
}
void PG::RecoveryState::WaitRemoteBackfillReserved::exit()
pg->osd->recoverystate_perf->tinc(rs_waitremotebackfillreserved_latency, dur);
}
-boost::statechart::result
-PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteBackfillReserved &evt)
-{
- PG *pg = context< RecoveryMachine >().pg;
- pg->state_clear(PG_STATE_BACKFILL_TOOFULL);
- return transit<Backfilling>();
-}
-
boost::statechart::result
PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteReservationRejected &evt)
{
{
PG *pg = context< RecoveryMachine >().pg;
assert(!pg->pg_log.get_missing().have_missing());
- pg->state_clear(PG_STATE_RECOVERING);
// release remote reservations
for (set<int>::const_iterator i = context< Active >().sorted_acting_set.begin();
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active"),
sorted_acting_set(context< RecoveryMachine >().pg->actingbackfill.begin(),
context< RecoveryMachine >().pg->actingbackfill.end()),
+ sorted_backfill_set(context< RecoveryMachine >().pg->backfill_targets.begin(),
+ context< RecoveryMachine >().pg->backfill_targets.end()),
all_replicas_activated(false)
{
context< RecoveryMachine >().log_enter(state_name);
BackfillInterval backfill_info;
BackfillInterval peer_backfill_info;
- vector<int> backfill_targets;
bool backfill_reserved;
bool backfill_reserving;
friend class OSD;
public:
- // Compatibility with single backfill target code
- int get_backfill_target() const {
- int backfill_target = -1;
- if (backfill_targets.size() > 0)
- backfill_target = backfill_targets[0];
- return backfill_target;
- }
+ vector<int> backfill_targets;
protected:
TrivialEvent(LocalRecoveryReserved)
TrivialEvent(RemoteRecoveryReserved)
TrivialEvent(AllRemotesReserved)
+ TrivialEvent(AllBackfillsReserved)
TrivialEvent(Recovering)
- TrivialEvent(WaitRemoteBackfillReserved)
TrivialEvent(GoClean)
TrivialEvent(AllReplicasActivated)
void exit();
const set<int> sorted_acting_set;
+ const set<int> sorted_backfill_set;
bool all_replicas_activated;
typedef boost::mpl::list <
struct WaitRemoteBackfillReserved : boost::statechart::state< WaitRemoteBackfillReserved, Active >, NamedState {
typedef boost::mpl::list<
boost::statechart::custom_reaction< RemoteBackfillReserved >,
- boost::statechart::custom_reaction< RemoteReservationRejected >
+ boost::statechart::custom_reaction< RemoteReservationRejected >,
+ boost::statechart::transition< AllBackfillsReserved, Backfilling >
> reactions;
+ set<int>::const_iterator backfill_osd_it;
WaitRemoteBackfillReserved(my_context ctx);
void exit();
boost::statechart::result react(const RemoteBackfillReserved& evt);
publish_stats_to_osd();
// done!
peer_missing[peer].got(soid, recovery_info.version);
- if (peer == get_backfill_target() && backfills_in_flight.count(soid)) {
+ if (!backfill_targets.empty() && peer == backfill_targets[0]
+ && backfills_in_flight.count(soid)) {
map<hobject_t, ObjectContextRef>::iterator i = recovering.find(soid);
assert(i != recovering.end());
list<OpRequestRef> requeue_list;
// Object is degraded if after last_backfill AND
// we are backfilling it
- if (peer == get_backfill_target() &&
+ if (!backfill_targets.empty() && peer == backfill_targets[0] &&
peer_info[peer].last_backfill <= soid &&
last_backfill_started >= soid &&
backfills_in_flight.count(soid))
// The last_backfill_started is used as the backfill line since
// that determines the boundary for writes.
pg_info_t *backfill_target_info = NULL;
- int backfill_target = get_backfill_target();
bool before_backfill = false;
- if (backfill_target >= 0) {
- backfill_target_info = &peer_info[backfill_target];
+ if (!backfill_targets.empty()) {
+ backfill_target_info = &peer_info[backfill_targets[0]];
before_backfill = obc->obs.oi.soid <= last_backfill_started;
}
case MOSDPGScan::OP_SCAN_DIGEST:
{
int from = m->get_source().num();
- assert(from == get_backfill_target());
+ //XXX: Check that from is in backfill_targets vector
+ //assert(from == get_backfill_target());
BackfillInterval& bi = peer_backfill_info;
bi.begin = m->begin;
bi.end = m->end;
ctx->obc->ssc->snapset = ctx->new_snapset;
info.stats.stats.add(ctx->delta_stats, ctx->obs->oi.category);
- int backfill_target = get_backfill_target();
- if (backfill_target >= 0) {
- pg_info_t& pinfo = peer_info[backfill_target];
+ if (!backfill_targets.empty()) {
+ pg_info_t& pinfo = peer_info[backfill_targets[0]];
if (soid <= pinfo.last_backfill)
pinfo.stats.stats.add(ctx->delta_stats, ctx->obs->oi.category);
else if (soid <= last_backfill_started)
assert(0 == "broken implementation, do not use");
}
- int backfill_target = get_backfill_target();
// ship resulting transaction, log entries, and pg_stats
- if (peer == backfill_target && soid > last_backfill_started &&
+ if (!backfill_targets.empty() && peer == backfill_targets[0] && soid > last_backfill_started &&
// only skip normal (not temp pool=-1) objects
soid.pool == (int64_t)info.pgid.pool()) {
dout(10) << "issue_repop shipping empty opt to osd." << peer
::encode(repop->ctx->log, wr->logbl);
- if (backfill_target >= 0 && backfill_target == peer)
+ if (!backfill_targets.empty() && backfill_targets[0] == peer)
wr->pg_stats = pinfo.stats; // reflects backfill progress
else
wr->pg_stats = info.stats;
for (unsigned i=1; i<actingbackfill.size(); ++i) {
int peer = actingbackfill[i];
if (!peer_missing[peer].is_missing(oid)) {
- assert(peer == get_backfill_target());
+ assert(!backfill_targets.empty() && peer == backfill_targets[0]);
continue;
}
eversion_t h = peer_missing[peer].missing[oid].have;
// For now only care about a single backfill at a time
void ReplicatedPG::on_activate()
{
- int backfill_target = get_backfill_target();
- if (backfill_target != -1) {
- last_backfill_started = peer_info[backfill_target].last_backfill;
+ if (!backfill_targets.empty()) {
+ last_backfill_started = peer_info[backfill_targets[0]].last_backfill;
assert(!last_backfill_started.is_max());
- dout(10) << " chose backfill target osd." << backfill_target
- << " from " << last_backfill_started << dendl;
+ dout(10) << " chose backfill target osd." << backfill_targets[0]
+ << " from " << last_backfill_started << dendl;
}
hit_set_setup();
work_in_progress = true;
bool deferred_backfill = false;
- int backfill_target = get_backfill_target();
if (recovering.empty() &&
state_test(PG_STATE_BACKFILL) &&
- backfill_target >= 0 && started < max &&
+ !backfill_targets.empty() && started < max &&
missing.num_missing() == 0 &&
!waiting_on_backfill) {
if (get_osdmap()->test_flag(CEPH_OSDMAP_NOBACKFILL)) {
ThreadPool::TPHandle &handle, bool *work_started)
{
dout(10) << "recover_backfill (" << max << ")" << dendl;
- int backfill_target = get_backfill_target();
- assert(backfill_target >= 0);
+ assert(!backfill_targets.empty());
- pg_info_t& pinfo = peer_info[backfill_target];
+ //XXX: Look through backfill_targets
+ pg_info_t& pinfo = peer_info[backfill_targets[0]];
BackfillInterval& pbi = peer_backfill_info;
// Initialize from prior backfill state
backfill_info.reset(pinfo.last_backfill);
}
- dout(10) << " peer osd." << backfill_target
+ dout(10) << " peer osd." << backfill_targets[0]
<< " last_backfill_started " << last_backfill_started
<< " info " << pinfo
<< " interval " << pbi.begin << "-" << pbi.end
if (pbi.begin <= backfill_info.begin &&
!pbi.extends_to_end() && pbi.empty()) {
- dout(10) << " scanning peer osd." << backfill_target << " from " << pbi.end << dendl;
+ dout(10) << " scanning peer osd." << backfill_targets[0] << " from " << pbi.end << dendl;
epoch_t e = get_osdmap()->get_epoch();
MOSDPGScan *m = new MOSDPGScan(MOSDPGScan::OP_SCAN_GET_DIGEST, e, e, info.pgid,
pbi.end, hobject_t());
- osd->send_message_osd_cluster(backfill_target, m, get_osdmap()->get_epoch());
+ osd->send_message_osd_cluster(backfill_targets[0], m, get_osdmap()->get_epoch());
waiting_on_backfill = true;
start_recovery_op(pbi.end);
ops++;
if (obc->get_backfill_read()) {
dout(20) << " pushing local " << backfill_info.begin << " "
<< backfill_info.objects.begin()->second
- << " to peer osd." << backfill_target << dendl;
+ << " to peer osd." << backfill_targets[0] << dendl;
to_push[backfill_info.begin] =
boost::make_tuple(
backfill_info.objects.begin()->second,
handle.reset_tp_timeout();
// ordered before any subsequent updates
- send_remove_op(i->first, i->second, backfill_target);
+ send_remove_op(i->first, i->second, backfill_targets[0]);
pending_backfill_updates[i->first]; // add empty stat!
}
handle.reset_tp_timeout();
prep_backfill_object_push(
i->first, i->second.get<0>(), i->second.get<1>(), i->second.get<2>(),
- backfill_target, h);
+ backfill_targets[0], h);
}
pgbackend->run_recovery_op(h, cct->_conf->osd_recovery_op_priority);
}
m->last_backfill = pinfo.last_backfill;
m->stats = pinfo.stats;
- osd->send_message_osd_cluster(backfill_target, m, get_osdmap()->get_epoch());
+ osd->send_message_osd_cluster(backfill_targets[0], m, get_osdmap()->get_epoch());
}
dout(10) << " peer num_objects now " << pinfo.stats.stats.sum.num_objects
PGBackend::RecoveryHandle *h)
{
dout(10) << "push_backfill_object " << oid << " v " << v << " to osd." << peer << dendl;
+ assert(!backfill_targets.empty());
backfills_in_flight.insert(oid);
- map<int, pg_missing_t>::iterator bpm = peer_missing.find(get_backfill_target());
+ map<int, pg_missing_t>::iterator bpm = peer_missing.find(backfill_targets[0]);
assert(bpm != peer_missing.end());
bpm->second.add(oid, eversion_t(), eversion_t());