]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
OSD: scan for dropped PGs in consume_map instead of advance_map
authorGreg Farnum <greg@inktank.com>
Fri, 25 Apr 2014 00:20:48 +0000 (17:20 -0700)
committerGreg Farnum <greg@inktank.com>
Mon, 5 May 2014 22:29:20 +0000 (15:29 -0700)
We have to wait until after we know that nobody will be adding ops for
newly-dead PGs to the list. While we're moving it, switch the locking
so we only hold a write lock while deleting the actual lists.

Signed-off-by: Greg Farnum <greg@inktank.com>
src/osd/OSD.cc
src/osd/OSD.h

index 6846aff3e0aa4ac5a465703a7f9e2275b5399cee..a2956b7a6d89faa6c1f3ecf99f3f88ac9fb16ffd 100644 (file)
@@ -2019,12 +2019,14 @@ PG *OSD::get_pg_or_queue_for_pg(spg_t pgid, OpRequestRef op)
 {
   {
     RWLock::RLocker l(pg_map_lock);
-    if (pg_map.count(pgid))
-      return pg_map[pgid];
+    ceph::unordered_map<spg_t, PG*>::iterator i = pg_map.find(pgid);
+    if (i != pg_map.end())
+      return i->second;
   }
   RWLock::WLocker l(pg_map_lock);
-  if (pg_map.count(pgid)) {
-    return pg_map[pgid];
+  ceph::unordered_map<spg_t, PG*>::iterator i = pg_map.find(pgid);
+  if (i != pg_map.end()) {
+    return i->second;
   } else {
     waiting_for_pg[pgid].push_back(op);
     return NULL;
@@ -6065,26 +6067,6 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin)
       p->second.acting.swap(acting);  // keep the latest
     }
   }
-
-  // scan pgs with waiters
-  RWLock::WLocker l(pg_map_lock);
-  map<spg_t, list<OpRequestRef> >::iterator p = waiting_for_pg.begin();
-  while (p != waiting_for_pg.end()) {
-    spg_t pgid = p->first;
-
-    vector<int> acting;
-    int nrep = osdmap->pg_to_acting_osds(pgid.pgid, acting);
-    int role = osdmap->calc_pg_role(whoami, acting, nrep);
-    if (role >= 0) {
-      ++p;  // still me
-    } else {
-      dout(10) << " discarding waiting ops for " << pgid << dendl;
-      while (!p->second.empty()) {
-       p->second.pop_front();
-      }
-      waiting_for_pg.erase(p++);
-    }
-  }
 }
 
 void OSD::consume_map()
@@ -6148,6 +6130,44 @@ void OSD::consume_map()
     (*i)->put();
   }
 
+  // remove any PGs which we no longer host from the waiting_for_pg list
+  set<spg_t> pgs_to_delete;
+  {
+    RWLock::RLocker l(pg_map_lock);
+    map<spg_t, list<OpRequestRef> >::iterator p = waiting_for_pg.begin();
+    while (p != waiting_for_pg.end()) {
+      spg_t pgid = p->first;
+
+      vector<int> acting;
+      int nrep = osdmap->pg_to_acting_osds(pgid.pgid, acting);
+      int role = osdmap->calc_pg_role(whoami, acting, nrep);
+
+      if (role < 0) {
+        pgs_to_delete.insert(p->first);
+        /* we can delete list contents under the read lock because
+         * nobody will be adding to them -- everybody is now using a map
+         * new enough that they will simply drop ops instead of adding
+         * them to the list. */
+        dout(10) << " discarding waiting ops for " << pgid << dendl;
+        while (!p->second.empty()) {
+          p->second.pop_front();
+        }
+      }
+      ++p;
+    }
+  }
+  {
+    RWLock::WLocker l(pg_map_lock);
+    for (set<spg_t>::iterator i = pgs_to_delete.begin();
+        i != pgs_to_delete.end();
+        ++i) {
+      map<spg_t, list<OpRequestRef> >::iterator p = waiting_for_pg.find(*i);
+      assert(p->second.empty());
+      waiting_for_pg.erase(p);
+    }
+  }
+
+
   // scan pg's
   {
     RWLock::RLocker l(pg_map_lock);
index a7059fbbfed5ac59dd301dd5b128cbd56d299c39..b92be0e36089202acae06dc90ec10402fcddbfab 100644 (file)
@@ -1471,7 +1471,7 @@ protected:
     ); ///< @return false if there was a map gap between from and now
 
   void wake_pg_waiters(PG* pg, spg_t pgid) {
-    // Need write lock on pg_map
+    // Need write lock on pg_map_lock
     map<spg_t, list<OpRequestRef> >::iterator i = waiting_for_pg.find(pgid);
     if (i != waiting_for_pg.end()) {
       for (list<OpRequestRef>::iterator j = i->second.begin();