eversion_t log_tail; // oldest log entry.
- interval_set<uint64_t> incomplete; // incomplete hash ranges prior to last_complete
+ hobject_t last_backfill; // objects >= this and < last_complete may be missing
interval_set<snapid_t> purged_snaps;
}
} history;
- Info() {}
- Info(pg_t p) : pgid(p) { }
+ Info()
+ : last_backfill(hobject_t::get_max())
+ { }
+ Info(pg_t p)
+ : pgid(p),
+ last_backfill(hobject_t::get_max())
+ { }
bool is_empty() const { return last_update.version == 0; }
bool dne() const { return history.epoch_created == 0; }
- bool is_incomplete() const { return !incomplete.empty(); }
+ bool is_incomplete() const { return last_backfill != hobject_t::get_max(); }
void encode(bufferlist &bl) const {
__u8 v = 25;
::encode(last_update, bl);
::encode(last_complete, bl);
::encode(log_tail, bl);
- ::encode(incomplete, bl);
+ ::encode(last_backfill, bl);
::encode(stats, bl);
history.encode(bl);
::encode(purged_snaps, bl);
::decode(log_backlog, bl);
}
if (v >= 24)
- ::decode(incomplete, bl);
+ ::decode(last_backfill, bl);
::decode(stats, bl);
history.decode(bl);
if (v >= 22)
};
BackfillInterval backfill_info;
- map<int,BackfillInterval> peer_backfill_info;
+ int backfill_target;
+ BackfillInterval peer_backfill_info;
epoch_t last_peering_reset;
if (pgi.last_complete != pgi.last_update)
out << " lc " << pgi.last_complete;
out << " (" << pgi.log_tail << "," << pgi.last_update << "]";
- if (!pgi.incomplete.empty())
- out << " incomp " << std::hex << pgi.incomplete << std::dec;
+ if (pgi.is_incomplete())
+ out << " lb " << pgi.last_backfill;
}
//out << " c " << pgi.epoch_created;
out << " n=" << pgi.stats.stats.sum.num_objects;
case MOSDPGScan::OP_SCAN_DIGEST:
{
int from = m->get_source().num();
- BackfillInterval& bi = peer_backfill_info[from];
+ assert(from == backfill_target);
+ BackfillInterval& bi = peer_backfill_info;
bi.begin = m->begin;
bi.end = m->end;
bufferlist::iterator p = m->get_data().begin();
dout(10) << "do_backfill " << *m << dendl;
switch (m->op) {
- case MOSDPGBackfill::OP_BACKFILL_PROGRESS:
+ case MOSDPGBackfill::OP_BACKFILL_FINISH:
{
- assert(get_role() < 0);
-
- info.incomplete = m->incomplete;
-
- ObjectStore::Transaction *t = new ObjectStore::Transaction;
- write_info(*t);
- int tr = osd->store->queue_transaction(&osr, t);
- assert(tr == 0);
+ MOSDPGBackfill *reply = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_FINISH_ACK,
+ get_osdmap()->get_epoch(), m->query_epoch,
+ info.pgid);
+ osd->cluster_messenger->send_message(reply, m->get_connection());
}
- break;
+ // fall-thru
- case MOSDPGBackfill::OP_BACKFILL_FINISH:
+ case MOSDPGBackfill::OP_BACKFILL_PROGRESS:
{
assert(get_role() < 0);
- info.last_complete = info.last_update;
- info.incomplete.clear();
-
- ObjectStore::Transaction *t = new ObjectStore::Transaction;
+
+ info.last_update = m->last_complete;
+ info.last_complete = m->last_complete;
+ info.last_backfill = m->last_backfill;
+
log.clear();
log.head = info.last_update;
log.tail = info.last_update;
+
+ info.log_tail = log.tail;
+
+ ObjectStore::Transaction *t = new ObjectStore::Transaction;
write_log(*t);
write_info(*t);
int tr = osd->store->queue_transaction(&osr, t);
assert(tr == 0);
-
- MOSDPGBackfill *reply = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_FINISH_ACK,
- get_osdmap()->get_epoch(), m->query_epoch, info.pgid);
- osd->cluster_messenger->send_message(reply, m->get_connection());
}
break;
dout(10) << "recover_backfill (" << max << ")" << dendl;
assert(!backfill.empty());
- // initially just backfill one peer at a time. FIXME.
- int peer = *backfill.begin();
- Info& pinfo = peer_info[peer];
- BackfillInterval& pbi = peer_backfill_info[peer];
-
- dout(10) << " peer osd." << peer << " " << pinfo
- << " interval " << pbi.begin << "-" << pbi.end << " " << pbi.objects.size() << " objects" << dendl;
+ // backfill one peer at a time.
+ if (backfill_target < 0) {
+ backfill_target = *backfill.begin();
+ dout(10) << " chose backfill target osd." << backfill_target << dendl;
+ }
+ Info& pinfo = peer_info[backfill_target];
+ BackfillInterval& pbi = peer_backfill_info;
+ dout(10) << " peer osd." << backfill_target << " " << pinfo
+ << " interval " << pbi.begin << "-" << pbi.end
+ << " " << pbi.objects.size() << " objects" << dendl;
+
// does the pg exist yet on the peer?
if (pinfo.dne()) {
// ok, we know they have no objects.
// fill in pinfo
pinfo.last_update = info.last_update;
pinfo.log_tail = info.last_update;
- pinfo.incomplete.insert(0, 0x100000000ull);
+ pinfo.last_backfill = hobject_t();
pinfo.history = info.history;
- dout(10) << " peer osd." << peer << " pg dne; setting info to " << pinfo << dendl;
+ dout(10) << " peer osd." << backfill_target << " pg dne; setting info to " << pinfo << dendl;
// create pg on remote
MOSDPGInfo *mp = new MOSDPGInfo(get_osdmap()->get_epoch());
mp->pg_info.push_back(pinfo);
- osd->cluster_messenger->send_message(mp, get_osdmap()->get_cluster_inst(peer));
+ osd->cluster_messenger->send_message(mp, get_osdmap()->get_cluster_inst(backfill_target));
}
int ops = 0;
epoch_t e = get_osdmap()->get_epoch();
MOSDPGScan *m = new MOSDPGScan(MOSDPGScan::OP_SCAN_GET_DIGEST, e, e, info.pgid,
pbi.end, hobject_t());
- osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(peer));
+ osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(backfill_target));
start_recovery_op(pbi.end);
ops++;
break;
if (pbi.begin != hobject_t::get_max()) {
pbi.begin = hobject_t::get_max();
- pinfo.incomplete.clear();
+ pinfo.last_backfill = hobject_t::get_max();
epoch_t e = get_osdmap()->get_epoch();
MOSDPGBackfill *m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_FINISH, e, e, info.pgid);
- osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(peer));
+ m->last_complete = info.last_update;
+ m->last_backfill = hobject_t::get_max();
+ osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(backfill_target));
start_recovery_op(hobject_t::get_max());
ops++;
}
assert(pf < backfill_info.end);
dout(20) << " removing peer " << pf << " <= local end " << backfill_info.end << dendl;
- send_remove_op(pf, pv, peer);
+ send_remove_op(pf, pv, backfill_target);
pbi.pop_front();
continue;
}
if (pbi.empty()) {
assert(pbi.at_end());
dout(20) << " pushing local " << my_first << " " << backfill_info.objects.begin()->second
- << " to peer osd." << peer << dendl;
- push_backfill_object(my_first, mv, peer);
+ << " to peer osd." << backfill_target << dendl;
+ push_backfill_object(my_first, mv, backfill_target);
backfill_info.pop_front();
pbi.begin = my_first;
++ops;
if (peer_first < my_first) {
dout(20) << " removing peer " << peer_first << " <= local " << my_first << dendl;
- send_remove_op(peer_first, pv, peer);
+ send_remove_op(peer_first, pv, backfill_target);
pbi.pop_front();
} else if (peer_first == my_first) {
if (pv == mv) {
dout(20) << " keeping peer " << peer_first << " " << pv << dendl;
} else {
dout(20) << " replacing peer " << peer_first << " with local " << mv << dendl;
- push_backfill_object(my_first, mv, peer);
+ push_backfill_object(my_first, mv, backfill_target);
++ops;
}
pbi.pop_front();
} else {
// peer_first > my_first
dout(20) << " pushing local " << my_first << " " << mv
- << " to peer osd." << peer << dendl;
- push_backfill_object(my_first, mv, peer);
+ << " to peer osd." << backfill_target << dendl;
+ push_backfill_object(my_first, mv, backfill_target);
backfill_info.pop_front();
++ops;
}
}
- if (!pinfo.incomplete.empty()) {
- hobject_t b;
- b.set_filestore_key(pinfo.incomplete.range_start());
- dout(20) << " b " << b << " pbi.begin " << pbi.begin << " " << pinfo << dendl;
- if (b < pbi.begin) {
- pinfo.incomplete.erase(b.get_filestore_key(), pbi.begin.get_filestore_key() - b.get_filestore_key());
- dout(10) << " peer osd." << peer << " info.incomplete now "
- << std::hex << pinfo.incomplete << std::dec << dendl;
-
- epoch_t e = get_osdmap()->get_epoch();
- MOSDPGBackfill *m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_PROGRESS, e, e, info.pgid);
- m->incomplete = pinfo.incomplete;
- osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(peer));
- }
+ if (pinfo.last_backfill < pbi.begin) {
+ pinfo.last_backfill = pbi.begin;
+
+ dout(10) << " peer osd." << backfill_target << " info.last_backfill now " << pinfo.last_backfill << dendl;
+
+ epoch_t e = get_osdmap()->get_epoch();
+ MOSDPGBackfill *m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_PROGRESS, e, e, info.pgid);
+ m->last_backfill = pinfo.last_backfill;
+ m->last_complete = info.last_update;
+ osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(backfill_target));
}
return ops;
}