]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: fix recursive map_lock via check_replay_queue()
authorSage Weil <sage@newdream.net>
Mon, 27 Feb 2012 17:56:21 +0000 (09:56 -0800)
committerSage Weil <sage@newdream.net>
Mon, 27 Feb 2012 17:56:21 +0000 (09:56 -0800)
Also drop activate_pg() helper while we're at it, so it's clear that we
are the only user.

recursive lock of OSD::map_lock (33)
 ceph version 0.42-146-g7ad35ce (commit:7ad35ce489cc5f9169eb838e1196fa2ca4d6e985)
2012-02-24 12:30:16.541416 1: (PG::lock(bool)+0x2a) [0xa09348]
2012-02-24 12:30:16.541424 2: (OSD::_lookup_lock_pg(pg_t)+0xbd) [0x84b8df]
2012-02-24 12:30:16.541431 3: (OSD::activate_pg(pg_t, utime_t)+0x9f) [0x87463b]
2012-02-24 12:30:16.541442 4: (OSD::check_replay_queue()+0x12f) [0x87452d]
2012-02-24 12:30:16.541450 5: (OSD::tick()+0x23c) [0x8535ea]
2012-02-24 12:30:16.541456 6: (OSD::C_Tick::finish(int)+0x1f) [0x881671]
2012-02-24 12:30:16.541462 7: (SafeTimer::timer_thread()+0x2d5) [0x8f8211]
2012-02-24 12:30:16.541468 8: (SafeTimerThread::entry()+0x1c) [0x8f923c]
2012-02-24 12:30:16.541475 9: (Thread::_entry_func(void*)+0x23) [0x9c8109]
2012-02-24 12:30:16.541485 10: (()+0x68ba) [0x7f9dbed838ba]
2012-02-24 12:30:16.541491 11: (clone()+0x6d) [0x7f9dbd66f02d]
2012-02-24 12:30:16.541495 common/lockdep.cc: In function 'int lockdep_will_lock(const char*, int)' thread 7f9db9d98700 time 2012-02-24 12:30:16.541504

Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
Reviewed-by: Sam Just <samuel.just@dreamhost.com>
src/osd/OSD.cc
src/osd/OSD.h

index 4637277d830297474c8a8ab590bfa17a364a8a8a..99d41bba5d85503f72df19544057c0cefa859854 100644 (file)
@@ -1166,6 +1166,15 @@ PG *OSD::_lookup_lock_pg(pg_t pgid)
   return pg;
 }
 
+PG *OSD::_lookup_lock_pg_with_map_lock_held(pg_t pgid)
+{
+  assert(osd_lock.is_locked());
+  assert(pg_map.count(pgid));
+  PG *pg = pg_map[pgid];
+  pg->lock_with_map_lock_held();
+  return pg;
+}
+
 PG *OSD::lookup_lock_raw_pg(pg_t pgid)
 {
   Mutex::Locker l(osd_lock);
@@ -5019,8 +5028,13 @@ void OSD::_remove_pg(PG *pg)
 // =========================================================
 // RECOVERY
 
+/*
+ * caller holds osd_lock
+ */
 void OSD::check_replay_queue()
 {
+  assert(osd_lock.is_locked());
+
   utime_t now = ceph_clock_now(g_ceph_context);
   list< pair<pg_t,utime_t> > pgids;
   replay_queue_lock.Lock();
@@ -5031,29 +5045,21 @@ void OSD::check_replay_queue()
   }
   replay_queue_lock.Unlock();
 
-  for (list< pair<pg_t,utime_t> >::iterator p = pgids.begin(); p != pgids.end(); p++)
-    activate_pg(p->first, p->second);
-}
-
-/*
- * NOTE: this is called from SafeTimer, so caller holds osd_lock
- */
-void OSD::activate_pg(pg_t pgid, utime_t activate_at)
-{
-  assert(osd_lock.is_locked());
-
-  if (pg_map.count(pgid)) {
-    PG *pg = _lookup_lock_pg(pgid);
-    dout(10) << "activate_pg " << *pg << dendl;
-    if (pg->is_active() &&
-       pg->is_replay() &&
-       pg->get_role() == 0 &&
-       pg->replay_until == activate_at) {
-      pg->replay_queued_ops();
+  for (list< pair<pg_t,utime_t> >::iterator p = pgids.begin(); p != pgids.end(); p++) {
+    pg_t pgid = p->first;
+    if (pg_map.count(pgid)) {
+      PG *pg = _lookup_lock_pg_with_map_lock_held(pgid);
+      dout(10) << "check_replay_queue " << *pg << dendl;
+      if (pg->is_active() &&
+         pg->is_replay() &&
+         pg->get_role() == 0 &&
+         pg->replay_until == p->second) {
+       pg->replay_queued_ops();
+      }
+      pg->unlock();
+    } else {
+      dout(10) << "check_replay_queue pgid " << pgid << " (not found)" << dendl;
     }
-    pg->unlock();
-  } else {
-    dout(10) << "activate_pg pgid " << pgid << " (not found)" << dendl;
   }
   
   // wake up _all_ pg waiters; raw pg -> actual pg mapping may have shifted
index f80b725a47709ff9cd17581c1052218ba1fc011e..ad6ff7e3c927dfa3efbcf3e1bd216a1d87cfef5b 100644 (file)
@@ -446,6 +446,7 @@ protected:
 
   bool  _have_pg(pg_t pgid);
   PG   *_lookup_lock_pg(pg_t pgid);
+  PG   *_lookup_lock_pg_with_map_lock_held(pg_t pgid);
   PG   *_open_lock_pg(pg_t pg, bool no_lockdep_check=false);  // create new PG (in memory)
   PG   *_create_lock_pg(pg_t pgid, bool newly_created,
                        int role, vector<int>& up, vector<int>& acting, pg_history_t history,
@@ -760,7 +761,6 @@ protected:
   list< pair<pg_t, utime_t > > replay_queue;
   
   void check_replay_queue();
-  void activate_pg(pg_t pgid, utime_t activate_at);
 
 
   // -- snap trimming --