From: Greg Farnum Date: Fri, 25 Apr 2014 00:20:48 +0000 (-0700) Subject: OSD: scan for dropped PGs in consume_map instead of advance_map X-Git-Tag: v0.81~57^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2ec92c7602b8891b66f8ec1da59f957ad07a1bc1;p=ceph.git OSD: scan for dropped PGs in consume_map instead of advance_map We have to wait until after we know that nobody will be adding ops for newly-dead PGs to the list. While we're moving it, switch the locking so we only hold a write lock while deleting the actual lists. Signed-off-by: Greg Farnum --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 6846aff3e0a..a2956b7a6d8 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2019,12 +2019,14 @@ PG *OSD::get_pg_or_queue_for_pg(spg_t pgid, OpRequestRef op) { { RWLock::RLocker l(pg_map_lock); - if (pg_map.count(pgid)) - return pg_map[pgid]; + ceph::unordered_map::iterator i = pg_map.find(pgid); + if (i != pg_map.end()) + return i->second; } RWLock::WLocker l(pg_map_lock); - if (pg_map.count(pgid)) { - return pg_map[pgid]; + ceph::unordered_map::iterator i = pg_map.find(pgid); + if (i != pg_map.end()) { + return i->second; } else { waiting_for_pg[pgid].push_back(op); return NULL; @@ -6065,26 +6067,6 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin) p->second.acting.swap(acting); // keep the latest } } - - // scan pgs with waiters - RWLock::WLocker l(pg_map_lock); - map >::iterator p = waiting_for_pg.begin(); - while (p != waiting_for_pg.end()) { - spg_t pgid = p->first; - - vector acting; - int nrep = osdmap->pg_to_acting_osds(pgid.pgid, acting); - int role = osdmap->calc_pg_role(whoami, acting, nrep); - if (role >= 0) { - ++p; // still me - } else { - dout(10) << " discarding waiting ops for " << pgid << dendl; - while (!p->second.empty()) { - p->second.pop_front(); - } - waiting_for_pg.erase(p++); - } - } } void OSD::consume_map() @@ -6148,6 +6130,44 @@ void OSD::consume_map() (*i)->put(); } + // remove any PGs which we no longer host from the waiting_for_pg list + set pgs_to_delete; + { + RWLock::RLocker l(pg_map_lock); + map >::iterator p = waiting_for_pg.begin(); + while (p != waiting_for_pg.end()) { + spg_t pgid = p->first; + + vector acting; + int nrep = osdmap->pg_to_acting_osds(pgid.pgid, acting); + int role = osdmap->calc_pg_role(whoami, acting, nrep); + + if (role < 0) { + pgs_to_delete.insert(p->first); + /* we can delete list contents under the read lock because + * nobody will be adding to them -- everybody is now using a map + * new enough that they will simply drop ops instead of adding + * them to the list. */ + dout(10) << " discarding waiting ops for " << pgid << dendl; + while (!p->second.empty()) { + p->second.pop_front(); + } + } + ++p; + } + } + { + RWLock::WLocker l(pg_map_lock); + for (set::iterator i = pgs_to_delete.begin(); + i != pgs_to_delete.end(); + ++i) { + map >::iterator p = waiting_for_pg.find(*i); + assert(p->second.empty()); + waiting_for_pg.erase(p); + } + } + + // scan pg's { RWLock::RLocker l(pg_map_lock); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index a7059fbbfed..b92be0e3608 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1471,7 +1471,7 @@ protected: ); ///< @return false if there was a map gap between from and now void wake_pg_waiters(PG* pg, spg_t pgid) { - // Need write lock on pg_map + // Need write lock on pg_map_lock map >::iterator i = waiting_for_pg.find(pgid); if (i != waiting_for_pg.end()) { for (list::iterator j = i->second.begin();