From 796ff1d178fdb108415a629fd29d6b7ddc160f99 Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Sun, 14 Nov 2010 13:54:55 -0800 Subject: [PATCH] Fix bugs in search_for_missing, _process_pg_info PG::search_for_missing: fix a bug with the handling of MSG_OSD_PG_INFO messages. Formerly, when processing these messages, we erroneously assumed that there was nothing missing on the peer at all even in cases where there were missing objects. PG::merge_log: drop unused Missing parameter. _process_pg_info: Don't assume that just because we requested a Log message at some point, that that is the message we're prcessing. Correctly handle cases where we didn't get the peer's Missing set or Log. Signed-off-by: Colin McCabe --- src/osd/OSD.cc | 66 +++++++++++++++++++++++++++++--------------------- src/osd/OSD.h | 2 +- src/osd/PG.cc | 38 +++++++++++++++++------------ src/osd/PG.h | 8 +++--- 4 files changed, 66 insertions(+), 48 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 79ba9902ef99a..9ac2e7fb57239 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3777,7 +3777,7 @@ void OSD::handle_pg_notify(MOSDPGNotify *m) void OSD::_process_pg_info(epoch_t epoch, int from, PG::Info &info, PG::Log &log, - PG::Missing &missing, + PG::Missing *missing, map* info_map, int& created) { @@ -3821,7 +3821,18 @@ void OSD::_process_pg_info(epoch_t epoch, int from, } assert(pg); - dout(10) << *pg << " got " << info << " " << log << " " << missing << dendl; + dout(10) << *pg << ": " << __func__ << " info: " << info << ", "; + if (log.empty()) + *_dout << "(log omitted)"; + else + *_dout << "log: " << log; + *_dout << ", "; + if (!missing) + *_dout << "(missing omitted)"; + else + *_dout << "missing: " << *missing; + *_dout << dendl; + unreg_last_pg_scrub(pg->info.pgid, pg->info.history.last_scrub_stamp); pg->info.history.merge(info.history); reg_last_pg_scrub(pg->info.pgid, pg->info.history.last_scrub_stamp); @@ -3829,40 +3840,40 @@ void OSD::_process_pg_info(epoch_t epoch, int from, // dump log dout(15) << *pg << " my log = "; pg->log.print(*_dout); - *_dout << dendl; - dout(15) << *pg << " osd" << from << " log = "; - log.print(*_dout); + *_dout << std::endl; + if (!log.empty()) { + *_dout << *pg << " osd" << from << " log = "; + log.print(*_dout); + } *_dout << dendl; if (pg->is_primary()) { // i am PRIMARY - if (pg->peer_log_requested.count(from) || - pg->peer_summary_requested.count(from)) - { - if (!pg->is_active()) { - pg->proc_replica_log(*t, info, log, missing, from); - - // peer - map< int, map > query_map; - pg->do_peer(*t, fin->contexts, query_map, info_map); - pg->update_stats(); - do_queries(query_map); + if (pg->is_active()) { + // PG is ACTIVE + if (pg->missing.have_missing()) { + dout(10) << *pg << " searching osd" << from << " log for missing items." << dendl; + pg->search_for_missing(info, missing, from); } else { - if (pg->missing.have_missing()) { - dout(10) << *pg << " searching osd" << from << " log for missing items." << dendl; - pg->search_for_missing(log, missing, from); - } - else { - dout(10) << *pg << " ignoring osd" << from << " log, pg is already active" << dendl; - } + dout(10) << *pg << " ignoring osd" << from << " log, pg is already active" << dendl; } } + else if ((!log.empty()) && missing) { + // PG is INACTIVE + pg->proc_replica_log(*t, info, log, *missing, from); + + // peer + map< int, map > query_map; + pg->do_peer(*t, fin->contexts, query_map, info_map); + pg->update_stats(); + do_queries(query_map); + } } else { if (!pg->info.dne()) { // i am REPLICA if (!pg->is_active()) { - pg->merge_log(*t, info, log, missing, from); + pg->merge_log(*t, info, log, from); pg->activate(*t, fin->contexts, info_map); } else { // just update our stats @@ -3915,7 +3926,7 @@ void OSD::handle_pg_log(MOSDPGLog *m) if (!require_same_or_newer_map(m, m->get_epoch())) return; _process_pg_info(m->get_epoch(), from, - m->info, m->log, m->missing, 0, + m->info, m->log, &m->missing, 0, created); if (created) update_heartbeat_peers(); @@ -3934,14 +3945,13 @@ void OSD::handle_pg_info(MOSDPGInfo *m) if (!require_same_or_newer_map(m, m->get_epoch())) return; PG::Log empty_log; - PG::Missing empty_missing; map info_map; int created = 0; for (vector::iterator p = m->pg_info.begin(); p != m->pg_info.end(); ++p) - _process_pg_info(m->get_epoch(), from, *p, empty_log, empty_missing, &info_map, created); + _process_pg_info(m->get_epoch(), from, *p, empty_log, NULL, &info_map, created); do_infos(info_map); if (created) @@ -4008,7 +4018,7 @@ void OSD::handle_pg_missing(MOSDPGMissing *m) PG::Log empty_log; int created = 0; _process_pg_info(m->get_epoch(), from, m->info, - empty_log, m->missing, NULL, created); + empty_log, &m->missing, NULL, created); if (created) update_heartbeat_peers(); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 67477fdd4ee74..02d0b5f29732d 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -642,7 +642,7 @@ protected: void _process_pg_info(epoch_t epoch, int from, PG::Info &info, PG::Log &log, - PG::Missing &missing, + PG::Missing *missing, map* info_map, int& created); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index bed9a4baf7d1d..c8c7849dda1bf 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -179,7 +179,7 @@ void PG::proc_replica_log(ObjectStore::Transaction& t, Info &oinfo, Log &olog, M // make any adjustments to their missing map; we are taking their // log to be authoritative (i.e., their entries are by definitely // non-divergent). - merge_log(t, oinfo, olog, omissing, from); + merge_log(t, oinfo, olog, from); } else if (is_acting(from)) { // replica. have master log. @@ -262,7 +262,7 @@ void PG::proc_replica_log(ObjectStore::Transaction& t, Info &oinfo, Log &olog, M peer_info[from] = oinfo; dout(10) << " peer osd" << from << " now " << oinfo << dendl; - search_for_missing(olog, omissing, from); + search_for_missing(oinfo, &omissing, from); peer_missing[from].swap(omissing); } @@ -323,7 +323,7 @@ bool PG::merge_old_entry(ObjectStore::Transaction& t, Log::Entry& oe) } void PG::merge_log(ObjectStore::Transaction& t, - Info &oinfo, Log &olog, Missing &omissing, int fromosd) + Info &oinfo, Log &olog, int fromosd) { dout(10) << "merge_log " << olog << " from osd" << fromosd << " into " << log << dendl; @@ -510,13 +510,14 @@ void PG::merge_log(ObjectStore::Transaction& t, } /* - * process replica's missing map to determine if they have - * any objects that i need + * Process information from a replica to determine if it could have any + * objects that i need. * * TODO: if the missing set becomes very large, this could get expensive. * Instead, we probably want to just iterate over our unfound set. */ -void PG::search_for_missing(Log &olog, Missing &omissing, int fromosd) +void PG::search_for_missing(const Info &oinfo, const Missing *omissing, + int fromosd) { // found items? for (map::iterator p = missing.missing.begin(); @@ -525,16 +526,21 @@ void PG::search_for_missing(Log &olog, Missing &omissing, int fromosd) const sobject_t &soid(p->first); eversion_t need = p->second.need; eversion_t have = p->second.have; - if (omissing.is_missing(soid)) { - dout(10) << "search_for_missing " << soid << " " << need - << " also missing on osd" << fromosd << dendl; - continue; - } - if (need > olog.head) { - dout(10) << "search_for_missing " << soid << " " << need - << " > olog.head " << olog.head << ", also missing on osd" << fromosd - << dendl; - continue; + if (oinfo.last_complete < need) { + if (!omissing) { + // We know that the peer lacks some objects at the revision we need. + // Without the peer's missing set, we don't know whether it has this + // particular object or not. + dout(10) << __func__ << " " << soid << " " << need + << " might also be missing on osd" << fromosd << dendl; + continue; + } + + if (omissing->is_missing(soid)) { + dout(10) << "search_for_missing " << soid << " " << need + << " also missing on osd" << fromosd << dendl; + continue; + } } dout(10) << "search_for_missing " << soid << " " << need << " is on osd" << fromosd << dendl; diff --git a/src/osd/PG.h b/src/osd/PG.h index 0fce35b5b8278..f5174fe191e34 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -818,10 +818,12 @@ public: virtual void calc_trim_to() = 0; - void proc_replica_log(ObjectStore::Transaction& t, Info &oinfo, Log &olog, Missing& omissing, int from); + void proc_replica_log(ObjectStore::Transaction& t, Info &oinfo, Log &olog, + Missing& omissing, int from); bool merge_old_entry(ObjectStore::Transaction& t, Log::Entry& oe); - void merge_log(ObjectStore::Transaction& t, Info &oinfo, Log &olog, Missing& omissing, int from); - void search_for_missing(Log &olog, Missing &omissing, int fromosd); + void merge_log(ObjectStore::Transaction& t, Info &oinfo, Log &olog, int from); + void search_for_missing(const Info &oinfo, const Missing *omissing, + int fromosd); void check_for_lost_objects(); void forget_lost_objects(); -- 2.39.5