From b99e135848ca5666308344cf5ecc9c7f95f30137 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 5 Dec 2011 17:25:21 -0800 Subject: [PATCH] osd: make backfill (basically) work again Still need to handle concurrent updates, log recovery vs backfill, etc. Signed-off-by: Sage Weil --- src/messages/MOSDPGBackfill.h | 4 --- src/osd/PG.cc | 63 ++++++++++++++++++++++++++++++----- src/osd/PG.h | 5 +++ src/osd/ReplicatedPG.cc | 10 ++---- 4 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/messages/MOSDPGBackfill.h b/src/messages/MOSDPGBackfill.h index 917e1ae8db9c8..1a3af6c38441f 100644 --- a/src/messages/MOSDPGBackfill.h +++ b/src/messages/MOSDPGBackfill.h @@ -38,7 +38,6 @@ public: epoch_t map_epoch, query_epoch; pg_t pgid; hobject_t last_backfill; - eversion_t last_complete; virtual void decode_payload(CephContext *cct) { bufferlist::iterator p = payload.begin(); @@ -47,7 +46,6 @@ public: ::decode(query_epoch, p); ::decode(pgid, p); ::decode(last_backfill, p); - ::decode(last_complete, p); } virtual void encode_payload(CephContext *cct) { @@ -56,7 +54,6 @@ public: ::encode(query_epoch, payload); ::encode(pgid, payload); ::encode(last_backfill, payload); - ::encode(last_complete, payload); } MOSDPGBackfill() : Message(MSG_OSD_PG_BACKFILL) {} @@ -75,7 +72,6 @@ public: out << "pg_backfill(" << get_op_name(op) << " " << pgid << " e " << map_epoch << "/" << query_epoch - << " lc " << last_complete << " lb " << last_backfill << ")"; } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index c8d8c005861ac..62a2f6931049d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -180,6 +180,14 @@ void PG::proc_replica_log(ObjectStore::Transaction& t, lu = pp->version; break; } + + if (oe.soid > oinfo.last_backfill) { + // past backfill line, don't care + dout(10) << " had " << oe << " beyond last_backfill : skipping" << dendl; + ++pp; + continue; + } + if (ne.version > oe.version) { dout(10) << " had " << oe << " new " << ne << " : new will supercede" << dendl; } else { @@ -467,6 +475,15 @@ bool PG::search_for_missing(const Info &oinfo, const Missing *omissing, << dendl; continue; } + if (p->first >= oinfo.last_backfill) { + // FIXME: this is _probably_ true, although it could conceivably + // be in the undefined region! Hmm! + dout(10) << "search_for_missing " << soid << " " << need + << " also missing on osd." << fromosd + << " (past last_backfill " << oinfo.last_backfill << ")" + << dendl; + continue; + } if (oinfo.last_complete < need) { if (!omissing) { // We know that the peer lacks some objects at the revision we need. @@ -617,6 +634,11 @@ bool PG::is_all_uptodate() const dout(10) << __func__ << ": osd." << peer << " has " << pm->second.num_missing() << " missing" << dendl; uptodate = false; } + map::const_iterator pi = peer_info.find(peer); + if (pi->second.last_backfill != hobject_t::get_max()) { + dout(10) << __func__ << ": osd." << peer << " has last_backfill " << pi->second.last_backfill << dendl; + uptodate = false; + } } if (uptodate) @@ -1186,6 +1208,27 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, dout(10) << "activate peer osd." << peer << " " << pi << dendl; + if (log.tail > pi.last_update) { + // reset, backfill + pi.last_update = info.last_update; + pi.last_complete = info.last_complete; + pi.last_backfill = hobject_t(); + pi.history = info.history; + + peer_missing[peer].clear(); + + dout(10) << "activate peer osd." << peer << " must restart backfill, sending info " << pi << dendl; + if (activator_map) { + if (activator_map->count(peer) == 0) + (*activator_map)[peer] = new MOSDPGInfo(get_osdmap()->get_epoch()); + (*activator_map)[peer]->pg_info.push_back(pi); + } else { + m = new MOSDPGLog(get_osdmap()->get_epoch(), pi); + osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(peer)); + } + continue; + } + if (pi.last_update == info.last_update) { // empty log if (!pi.is_empty() && activator_map) { @@ -1197,10 +1240,9 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, dout(10) << "activate peer osd." << peer << " is up to date, but sending pg_log anyway" << dendl; m = new MOSDPGLog(get_osdmap()->get_epoch(), info); } - } - else { - m = new MOSDPGLog(get_osdmap()->get_epoch(), info); + } else { assert(log.tail <= pi.last_update); + m = new MOSDPGLog(get_osdmap()->get_epoch(), info); // send new stuff to append to replicas log assert(info.last_update > pi.last_update); m->log.copy_after(log, pi.last_update); @@ -1213,9 +1255,9 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, eversion_t plu = pi.last_update; for (list::iterator p = m->log.log.begin(); p != m->log.log.end(); - p++) - if (p->version > plu) - pm.add_next_event(*p); + p++) + if (p->soid < pi.last_backfill) + pm.add_next_event(*p); } if (m) { @@ -1232,8 +1274,7 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, pi.last_complete = pi.last_update; dout(10) << "activate peer osd." << peer << " " << pi << " uptodate" << dendl; } else { - dout(10) << "activate peer osd." << peer << " " << pi - << " missing " << pm << dendl; + dout(10) << "activate peer osd." << peer << " " << pi << " missing " << pm << dendl; } } @@ -4222,6 +4263,12 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx) if (pi.is_empty()) continue; // no pg data, nothing divergent + if (pi.last_update < pg->log.tail) { + dout(10) << " osd." << *i << " is not contiguous, will restart backfill" << dendl; + pg->peer_missing[*i]; + continue; + } + if (pi.last_update == pi.last_complete && // peer has no missing pi.last_update == pg->info.last_update) { // peer is up to date // replica has no missing and identical log as us. no need to diff --git a/src/osd/PG.h b/src/osd/PG.h index 9e252966752b5..7487095e6d85b 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -718,6 +718,11 @@ public: void got(const hobject_t& oid, eversion_t v); void got(const std::map::iterator &m); + void clear() { + missing.clear(); + rmissing.clear(); + } + void encode(bufferlist &bl) const { __u8 struct_v = 1; ::encode(struct_v, bl); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 51ca7b7072329..1175067db52ce 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -880,7 +880,7 @@ void ReplicatedPG::do_backfill(MOSDPGBackfill *m) switch (m->op) { case MOSDPGBackfill::OP_BACKFILL_FINISH: { - assert(get_role() < 0); + assert(is_replica()); assert(g_conf->osd_kill_backfill_at != 1); MOSDPGBackfill *reply = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_FINISH_ACK, @@ -892,11 +892,9 @@ void ReplicatedPG::do_backfill(MOSDPGBackfill *m) case MOSDPGBackfill::OP_BACKFILL_PROGRESS: { - assert(get_role() < 0); + assert(is_replica()); assert(g_conf->osd_kill_backfill_at != 2); - info.last_update = m->last_complete; - info.last_complete = m->last_complete; info.last_backfill = m->last_backfill; log.clear(); @@ -4105,7 +4103,7 @@ void ReplicatedPG::sub_op_push_reply(MOSDSubOpReply *reply) pi->data_subset_pushing, pi->clone_subsets); } else { // done! - if (pi->version > peer_info[peer].log_tail) + if (peer_missing[peer].is_missing(soid)) // so that we ignore backfill; imprecise! peer_missing[peer].got(soid, pi->version); pushing[soid].erase(peer); @@ -5336,7 +5334,6 @@ int ReplicatedPG::recover_backfill(int max) epoch_t e = get_osdmap()->get_epoch(); MOSDPGBackfill *m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_FINISH, e, e, info.pgid); - m->last_complete = info.last_update; m->last_backfill = hobject_t::get_max(); osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(backfill_target)); start_recovery_op(hobject_t::get_max()); @@ -5407,7 +5404,6 @@ int ReplicatedPG::recover_backfill(int max) epoch_t e = get_osdmap()->get_epoch(); MOSDPGBackfill *m = new MOSDPGBackfill(MOSDPGBackfill::OP_BACKFILL_PROGRESS, e, e, info.pgid); m->last_backfill = pinfo.last_backfill; - m->last_complete = info.last_update; osd->cluster_messenger->send_message(m, get_osdmap()->get_cluster_inst(backfill_target)); } return ops; -- 2.39.5