From b48646a23ee420e3f90644a0065724a189409b4d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 12 Dec 2008 20:01:14 -0800 Subject: [PATCH] osd: for remaining peers, pull either log or backlog, but not both. Pull as far back as peer's last_epoch_started (if they have that much). This ensures we will pull any divergent entries, if there are any, so that we can update our peer_missing map accordingly. --- src/osd/OSD.cc | 18 ++++------------ src/osd/PG.cc | 56 ++++++++++++++++++++++++++++++++++++-------------- src/osd/PG.h | 21 ++++++++----------- 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index bfe66a532a2e1..0ed8a6c9c1029 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3001,26 +3001,16 @@ void OSD::handle_pg_query(MOSDPGQuery *m) // primary -> other, when building master log if (it->second.type == PG::Query::LOG) { - dout(10) << *pg << " sending info+missing+log since " << it->second.floor + dout(10) << *pg << " sending info+missing+log since " << it->second.since << dendl; - /* - if (!m->log.copy_after_unless_divergent(pg->log, it->second.split, it->second.floor)) { - dout(10) << *pg << " divergent, sending backlog" << dendl; - it->second.type = PG::Query::BACKLOG; - } - */ - m->log.copy_after(pg->log, it->second.floor); + m->log.copy_after(pg->log, it->second.since); } if (it->second.type == PG::Query::BACKLOG) { dout(10) << *pg << " sending info+missing+backlog" << dendl; - if (pg->log.backlog) { - m->log = pg->log; - } else { + if (!pg->log.backlog) pg->generate_backlog(); - m->log = pg->log; - pg->drop_backlog(); - } + m->log = pg->log; } else if (it->second.type == PG::Query::FULLLOG) { dout(10) << *pg << " sending info+missing+full log" << dendl; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index ec475787ceebc..af970b24447dd 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1023,25 +1023,30 @@ void PG::peer(ObjectStore::Transaction& t, // gather log(+missing) from that person! if (newest_update_osd != osd->whoami) { - if (peer_info[newest_update_osd].log_bottom <= log.top) { + Info& pi = peer_info[newest_update_osd]; + if (pi.log_bottom <= log.top) { if (peer_log_requested.count(newest_update_osd)) { dout(10) << " newest update on osd" << newest_update_osd << " v " << newest_update << ", already queried log" << dendl; } else { - // we'd like it back to oldest_update, but will settle for log_bottom - eversion_t since = MAX(peer_info[newest_update_osd].log_bottom, - oldest_update); + // we'd _like_ it back to oldest_update, but take what we can get. dout(10) << " newest update on osd" << newest_update_osd << " v " << newest_update - << ", querying since " << since + << ", querying since oldest_update " << oldest_update << dendl; - query_map[newest_update_osd][info.pgid] = Query(Query::LOG, since, info.history); - //Query(Query::LOG, log.top, since, info.history); + query_map[newest_update_osd][info.pgid] = Query(Query::LOG, oldest_update, info.history); peer_log_requested.insert(newest_update_osd); } } else { + dout(10) << " newest update on osd" << newest_update_osd + << ", whose log.bottom " << pi.log_bottom + << " > my log.top " << log.top + << ", i will need a backlog" << dendl; + // it's possible another peer could fill in the missing bit, but + // pretty unlikely. someday it may be worth the complexity to + // try. until then, just get the full backlog. if (peer_summary_requested.count(newest_update_osd)) { dout(10) << " newest update on osd" << newest_update_osd << " v " << newest_update @@ -1052,9 +1057,6 @@ void PG::peer(ObjectStore::Transaction& t, << " v " << newest_update << ", querying entire summary/backlog" << dendl; - assert((peer_info[newest_update_osd].last_complete >= - peer_info[newest_update_osd].log_bottom) || - peer_info[newest_update_osd].log_backlog); // or else we're in trouble. query_map[newest_update_osd][info.pgid] = Query(Query::BACKLOG, info.history); peer_summary_requested.insert(newest_update_osd); } @@ -1087,6 +1089,14 @@ void PG::peer(ObjectStore::Transaction& t, /* we also detect divergent replicas here by pulling the full log from everyone. + + for example: + 0: 1: 2: + 2'6 2'6 2'6 + 2'7 2'7 2'7 + 3'8 | 2'8 2'8 + 3'9 | 2'9 + */ // gather missing from peers @@ -1100,11 +1110,27 @@ void PG::peer(ObjectStore::Transaction& t, } if (peer_log_requested.count(peer) || peer_summary_requested.count(peer)) continue; - - dout(10) << " pulling log+missing from osd" << peer - << dendl; - query_map[peer][info.pgid] = Query(Query::FULLLOG, info.history); - peer_log_requested.insert(peer); + + Info& pi = peer_info[peer]; + assert(pi.last_update <= log.top); + + if (pi.last_update < log.bottom) { + // we need the full backlog in order to build this node's missing map. + dout(10) << " osd" << peer << " last_update " << pi.last_update + << " < log.bottom " << log.bottom + << ", pulling missing+backlog" << dendl; + query_map[peer][info.pgid] = Query(Query::BACKLOG, info.history); + peer_summary_requested.insert(peer); + } else { + // we need just enough log to get any divergent items so that we + // can appropriate adjust the missing map. that can be as far back + // as the peer's last_epoch_started. + eversion_t from(pi.history.last_epoch_started, 0); + dout(10) << " osd" << peer << " last_update " << pi.last_update + << ", pulling missing+log from it's last_epoch_started " << from << dendl; + query_map[peer][info.pgid] = Query(Query::LOG, from, info.history); + peer_log_requested.insert(peer); + } } if (!have_all_missing) return; diff --git a/src/osd/PG.h b/src/osd/PG.h index 5cf8963cf1ffb..810f444371bce 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -146,33 +146,28 @@ public: struct Query { const static int INFO = 0; const static int LOG = 1; - const static int BACKLOG = 2; - const static int FULLLOG = 3; - const static int LOGFROM = 4; + const static int BACKLOG = 3; + const static int MISSING = 4; + const static int FULLLOG = 5; __s32 type; - //eversion_t split; - eversion_t floor; + eversion_t since; Info::History history; Query() : type(-1) {} Query(int t, Info::History& h) : type(t), history(h) { assert(t != LOG); } - Query(int t, eversion_t f, Info::History& h) : - type(t), - //split(s), - floor(f), history(h) { assert(t == LOG); } + Query(int t, eversion_t s, Info::History& h) : + type(t), since(s), history(h) { assert(t == LOG); } void encode(bufferlist &bl) const { ::encode(type, bl); - //::encode(split, bl); - ::encode(floor, bl); + ::encode(since, bl); history.encode(bl); } void decode(bufferlist::iterator &bl) { ::decode(type, bl); - //::decode(split, bl); - ::decode(floor, bl); + ::decode(since, bl); history.decode(bl); } }; -- 2.39.5