From 19bcd747b452b79c80744620e635f036bfc1b738 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 28 Feb 2011 16:05:08 -0800 Subject: [PATCH] osd: trigger discover_all_missing after replay delay We were calling discover_all_missing only when we went immediately active, not after we were in the replay state (which triggers from a timer event that calls OSD::activate_pg(). Move the call into PG::activate() so that we catch both callers. This requires passing in a query_map from the caller. While we're at it, clean up some other instances where we are defining a new query_map deep within the call tree. Fixes: #847 (I hope) Signed-off-by: Sage Weil --- src/osd/OSD.cc | 29 +++++++++++++++++------------ src/osd/OSD.h | 1 + src/osd/PG.cc | 10 ++++------ src/osd/PG.h | 1 + 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2bf774e8a5507..14e6d1ebe94b4 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4121,9 +4121,7 @@ void OSD::handle_pg_notify(MOSDPGNotify *m) if (pg->is_active() && pg->have_unfound()) { // Make sure we've requested MISSING information from every OSD // we know about. - map< int, map > query_map; pg->discover_all_missing(query_map); - do_queries(query_map); } int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin); @@ -4156,12 +4154,13 @@ void OSD::_process_pg_info(epoch_t epoch, int from, PG::Info &info, PG::Log &log, PG::Missing *missing, + map< int, map >& query_map, map* info_map, int& created) { ObjectStore::Transaction *t = new ObjectStore::Transaction; C_Contexts *fin = new C_Contexts; - + PG *pg = 0; if (!_have_pg(info.pgid)) { vector up, acting; @@ -4235,9 +4234,7 @@ void OSD::_process_pg_info(epoch_t epoch, int from, if (pg->have_unfound()) { // Make sure we've requested MISSING information from every OSD // we know about. - map< int, map > query_map; pg->discover_all_missing(query_map); - do_queries(query_map); } else { dout(10) << *pg << " ignoring osd" << from << " log, pg is already active" << dendl; @@ -4248,10 +4245,8 @@ void OSD::_process_pg_info(epoch_t epoch, int from, pg->proc_replica_log(*t, info, log, *missing, from); // peer - map< int, map > query_map; pg->do_peer(*t, fin->contexts, query_map, info_map); pg->update_stats(); - do_queries(query_map); } } else if (!pg->info.dne()) { if (!pg->is_active()) { @@ -4263,7 +4258,7 @@ void OSD::_process_pg_info(epoch_t epoch, int from, assert(pg->log.tail <= pg->info.last_complete || pg->log.backlog); assert(pg->log.head == pg->info.last_update); - pg->activate(*t, fin->contexts, info_map); + pg->activate(*t, fin->contexts, query_map, info_map); } else { // ACTIVE REPLICA assert(pg->is_replica()); @@ -4311,9 +4306,11 @@ void OSD::handle_pg_log(MOSDPGLog *m) int created = 0; if (!require_same_or_newer_map(m, m->get_epoch())) return; + map< int, map > query_map; _process_pg_info(m->get_epoch(), from, - m->info, m->log, &m->missing, 0, + m->info, m->log, &m->missing, query_map, 0, created); + do_queries(query_map); if (created) update_heartbeat_peers(); @@ -4333,12 +4330,14 @@ void OSD::handle_pg_info(MOSDPGInfo *m) PG::Log empty_log; map info_map; int created = 0; + map< int, map > query_map; for (vector::iterator p = m->pg_info.begin(); p != m->pg_info.end(); ++p) - _process_pg_info(m->get_epoch(), from, *p, empty_log, NULL, &info_map, created); + _process_pg_info(m->get_epoch(), from, *p, empty_log, NULL, query_map, &info_map, created); + do_queries(query_map); do_infos(info_map); if (created) update_heartbeat_peers(); @@ -4401,10 +4400,12 @@ void OSD::handle_pg_missing(MOSDPGMissing *m) if (!require_same_or_newer_map(m, m->get_epoch())) return; + map< int, map > query_map; PG::Log empty_log; int created = 0; _process_pg_info(m->get_epoch(), from, m->info, - empty_log, &m->missing, NULL, created); + empty_log, &m->missing, query_map, NULL, created); + do_queries(query_map); if (created) update_heartbeat_peers(); @@ -4861,6 +4862,8 @@ void OSD::activate_pg(pg_t pgid, utime_t activate_at) { assert(osd_lock.is_locked()); + map< int, map > query_map; // peer -> PG -> get_summary_since + if (pg_map.count(pgid)) { PG *pg = _lookup_lock_pg(pgid); if (pg->is_crashed() && @@ -4869,13 +4872,15 @@ void OSD::activate_pg(pg_t pgid, utime_t activate_at) pg->replay_until == activate_at) { ObjectStore::Transaction *t = new ObjectStore::Transaction; C_Contexts *fin = new C_Contexts; - pg->activate(*t, fin->contexts); + pg->activate(*t, fin->contexts, query_map); int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin); assert(tr == 0); } pg->unlock(); } + do_queries(query_map); + // wake up _all_ pg waiters; raw pg -> actual pg mapping may have shifted wake_all_pg_waiters(); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index e43101b364506..ab9e3f610d348 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -665,6 +665,7 @@ protected: PG::Info &info, PG::Log &log, PG::Missing *missing, + map< int, map >& query_map, map* info_map, int& created); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 736a7a205b1e9..b5ac5f2423e5f 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1708,9 +1708,7 @@ void PG::do_peer(ObjectStore::Transaction& t, list& tfin, } else if (!is_active()) { // -- ok, activate! - activate(t, tfin, activator_map); - if (have_unfound()) - discover_all_missing(query_map); + activate(t, tfin, query_map, activator_map); } else if (is_all_uptodate()) finish_recovery(t, tfin); @@ -1763,6 +1761,7 @@ void PG::build_might_have_unfound() } void PG::activate(ObjectStore::Transaction& t, list& tfin, + map< int, map >& query_map, map *activator_map) { assert(!is_active()); @@ -1854,6 +1853,8 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, if (is_primary()) { dout(10) << "activate - starting recovery" << dendl; osd->queue_for_recovery(this); + if (have_unfound()) + discover_all_missing(query_map); } } @@ -1938,9 +1939,6 @@ void PG::activate(ObjectStore::Transaction& t, list& tfin, } } - // discard unneeded peering state - //peer_log.clear(); // actually, do this carefully, in case peer() is called again. - // all clean? if (is_all_uptodate()) finish_recovery(t, tfin); diff --git a/src/osd/PG.h b/src/osd/PG.h index 7a56ce9f91eea..727ee9dd0880a 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -860,6 +860,7 @@ public: map *activator_map=0); void build_might_have_unfound(); void activate(ObjectStore::Transaction& t, list& tfin, + map< int, map >& query_map, map *activator_map=0); bool have_unfound() const { -- 2.39.5