]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: addressed possible race conditions / deadlocks from unit testing
authorJason Dillaman <dillaman@redhat.com>
Thu, 23 Jul 2015 20:15:12 +0000 (16:15 -0400)
committerJason Dillaman <dillaman@redhat.com>
Fri, 13 Nov 2015 04:27:07 +0000 (23:27 -0500)
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/AioImageRequestWQ.cc
src/librbd/AioImageRequestWQ.h
src/librbd/ImageWatcher.cc
src/librbd/ImageWatcher.h
src/librbd/Journal.cc
src/librbd/Journal.h

index 6b58b0b728b0f6739973cddd0e5a3a2090a601d2..789865348bb36521644460fbdf971c9f83ecea7d 100644 (file)
@@ -275,35 +275,28 @@ void AioImageRequestWQ::queue(AioImageRequest *req) {
   }
 }
 
-void AioImageRequestWQ::handle_releasing_lock() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << __func__ << ": ictx=" << &m_image_ctx << dendl;
-
-  if (!m_blocking_writes) {
-    m_blocking_writes = true;
-    block_writes();
-  }
-}
-
-void AioImageRequestWQ::handle_lock_updated(bool lock_supported,
-                                            bool lock_owner) {
+void AioImageRequestWQ::handle_lock_updated(
+    ImageWatcher::LockUpdateState state) {
   assert(m_image_ctx.owner_lock.is_locked());
 
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << __func__ << ": ictx=" << &m_image_ctx << ", "
-                 << "lock_support=" << lock_supported << ", "
-                 << "owner=" << lock_owner << dendl;
+                 << "state=" << state << dendl;
 
-  if ((!lock_supported || lock_owner) && m_blocking_writes) {
+  if ((state == ImageWatcher::LOCK_UPDATE_STATE_NOT_SUPPORTED ||
+       state == ImageWatcher::LOCK_UPDATE_STATE_LOCKED) && m_blocking_writes) {
     m_blocking_writes = false;
     unblock_writes();
-  } else if (lock_supported && !lock_owner) {
+  } else if (state == ImageWatcher::LOCK_UPDATE_STATE_RELEASING &&
+             !m_blocking_writes) {
+    m_blocking_writes = true;
+    block_writes();
+  } else if (state == ImageWatcher::LOCK_UPDATE_STATE_UNLOCKED) {
+    assert(m_blocking_writes);
     assert(writes_blocked());
-    if (!writes_empty()) {
-      m_image_ctx.image_watcher->request_lock();
-    }
+  } else if (state == ImageWatcher::LOCK_UPDATE_STATE_NOTIFICATION &&
+             !writes_empty()) {
+    m_image_ctx.image_watcher->request_lock();
   }
 }
 
index 75ec889b53133b9812113c925858e9fda88c56ff..d269fdbd0f8e203dcf12bc051ba4ec5a0a1129e1 100644 (file)
@@ -62,11 +62,8 @@ private:
     virtual bool handle_requested_lock() {
       return true;
     }
-    virtual void handle_releasing_lock() {
-      aio_work_queue->handle_releasing_lock();
-    }
-    virtual void handle_lock_updated(bool lock_supported, bool lock_owner) {
-      aio_work_queue->handle_lock_updated(lock_supported, lock_owner);
+    virtual void handle_lock_updated(ImageWatcher::LockUpdateState state) {
+      aio_work_queue->handle_lock_updated(state);
     }
   };
 
@@ -84,8 +81,7 @@ private:
   bool is_lock_required() const;
   void queue(AioImageRequest *req);
 
-  void handle_releasing_lock();
-  void handle_lock_updated(bool lock_supported, bool lock_owner);
+  void handle_lock_updated(ImageWatcher::LockUpdateState state);
 };
 
 } // namespace librbd
index 69825a1df9141f9bed362072a49632cd6b059e28..bedc5a50c6537e80af4c363d32bcd57a2a4b747b 100644 (file)
@@ -139,19 +139,22 @@ int ImageWatcher::refresh() {
 
   int r = 0;
   if (lock_support_changed) {
-    if (is_lock_supported() && !is_lock_owner()) {
+    if (is_lock_supported()) {
       // image opened, exclusive lock dynamically enabled, or now HEAD
-      notify_listeners_releasing_lock();
-    } else if (!is_lock_supported() && is_lock_owner()) {
-      // exclusive lock dynamically disabled or now snapshot
-      m_image_ctx.owner_lock.put_read();
-      {
-        RWLock::WLocker owner_locker(m_image_ctx.owner_lock);
-        r = release_lock();
+      notify_listeners_updated_lock(LOCK_UPDATE_STATE_RELEASING);
+      notify_listeners_updated_lock(LOCK_UPDATE_STATE_UNLOCKED);
+    } else if (!is_lock_supported()) {
+      if (is_lock_owner()) {
+        // exclusive lock dynamically disabled or now snapshot
+        m_image_ctx.owner_lock.put_read();
+        {
+          RWLock::WLocker owner_locker(m_image_ctx.owner_lock);
+          r = release_lock();
+        }
+        m_image_ctx.owner_lock.get_read();
       }
-      m_image_ctx.owner_lock.get_read();
+      notify_listeners_updated_lock(LOCK_UPDATE_STATE_NOT_SUPPORTED);
     }
-    notify_listeners_updated_lock();
   }
   return r;
 }
@@ -397,7 +400,7 @@ int ImageWatcher::release_lock()
 
     // alert listeners that all incoming IO needs to be stopped since the
     // lock is being released
-    notify_listeners_releasing_lock();
+    notify_listeners_updated_lock(LOCK_UPDATE_STATE_RELEASING);
 
     RWLock::WLocker md_locker(m_image_ctx.md_lock);
     r = librbd::_flush(&m_image_ctx);
@@ -416,7 +419,7 @@ int ImageWatcher::release_lock()
   {
     RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
     if (m_lock_owner_state == LOCK_OWNER_STATE_NOT_LOCKED) {
-      notify_listeners_updated_lock();
+      notify_listeners_updated_lock(LOCK_UPDATE_STATE_UNLOCKED);
     }
   }
   m_image_ctx.owner_lock.get_write();
@@ -638,7 +641,11 @@ void ImageWatcher::notify_acquired_lock() {
   ldout(m_image_ctx.cct, 10) << this << " notify acquired lock" << dendl;
 
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-  notify_listeners_updated_lock();
+  if (m_lock_owner_state != LOCK_OWNER_STATE_LOCKED) {
+    return;
+  }
+
+  notify_listeners_updated_lock(LOCK_UPDATE_STATE_LOCKED);
 
   bufferlist bl;
   ::encode(NotifyMessage(AcquiredLockPayload(get_client_id())), bl);
@@ -874,7 +881,7 @@ void ImageWatcher::handle_payload(const AcquiredLockPayload &payload,
     if (cancel_async_requests) {
       schedule_cancel_async_requests();
     }
-    notify_listeners_updated_lock();
+    notify_listeners_updated_lock(LOCK_UPDATE_STATE_NOTIFICATION);
   }
 }
 
@@ -900,7 +907,7 @@ void ImageWatcher::handle_payload(const ReleasedLockPayload &payload,
     if (cancel_async_requests) {
       schedule_cancel_async_requests();
     }
-    notify_listeners_updated_lock();
+    notify_listeners_updated_lock(LOCK_UPDATE_STATE_NOTIFICATION);
   }
 }
 
@@ -1191,7 +1198,7 @@ void ImageWatcher::reregister_watch() {
   }
 
   if (m_lock_owner_state == LOCK_OWNER_STATE_NOT_LOCKED) {
-    notify_listeners_updated_lock();
+    notify_listeners_updated_lock(LOCK_UPDATE_STATE_UNLOCKED);
   }
 }
 
@@ -1210,27 +1217,8 @@ void ImageWatcher::RemoteContext::finish(int r) {
   m_image_watcher.schedule_async_complete(m_async_request_id, r);
 }
 
-void ImageWatcher::notify_listeners_releasing_lock() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  Listeners listeners;
-  {
-    Mutex::Locker listeners_locker(m_listeners_lock);
-    m_listeners_in_use = true;
-    listeners = m_listeners;
-  }
-
-  for (Listeners::iterator it = listeners.begin();
-       it != listeners.end(); ++it) {
-    (*it)->handle_releasing_lock();
-  }
-
-  Mutex::Locker listeners_locker(m_listeners_lock);
-  m_listeners_in_use = false;
-  m_listeners_cond.Signal();
-}
-
-void ImageWatcher::notify_listeners_updated_lock() {
+void ImageWatcher::notify_listeners_updated_lock(
+    LockUpdateState lock_update_state) {
   assert(m_image_ctx.owner_lock.is_locked());
 
   Listeners listeners;
@@ -1240,17 +1228,9 @@ void ImageWatcher::notify_listeners_updated_lock() {
     listeners = m_listeners;
   }
 
-  bool lock_supported;
-  {
-    RWLock::RLocker watch_locker(m_watch_lock);
-    lock_supported = m_lock_supported;
-  }
-
-  assert(lock_supported || m_lock_owner_state == LOCK_OWNER_STATE_NOT_LOCKED);
   for (Listeners::iterator it = listeners.begin();
        it != listeners.end(); ++it) {
-    (*it)->handle_lock_updated(lock_supported,
-                               m_lock_owner_state == LOCK_OWNER_STATE_LOCKED);
+    (*it)->handle_lock_updated(lock_update_state);
   }
 
   Mutex::Locker listeners_locker(m_listeners_lock);
index c2c0ce395a75e9ba6d8d753e32fd258e67c809bb..2b3da278e796ffd2c6faed0a3135b20be4bb2c09 100644 (file)
@@ -26,12 +26,19 @@ template <typename T> class TaskFinisher;
 
 class ImageWatcher {
 public:
+  enum LockUpdateState {
+    LOCK_UPDATE_STATE_NOT_SUPPORTED,
+    LOCK_UPDATE_STATE_LOCKED,
+    LOCK_UPDATE_STATE_RELEASING,
+    LOCK_UPDATE_STATE_UNLOCKED,
+    LOCK_UPDATE_STATE_NOTIFICATION
+  };
+
   struct Listener {
     virtual ~Listener() {}
 
     virtual bool handle_requested_lock() = 0;
-    virtual void handle_releasing_lock() = 0;
-    virtual void handle_lock_updated(bool lock_supported, bool lock_owner) = 0;
+    virtual void handle_lock_updated(LockUpdateState lock_update_state) = 0;
   };
 
   ImageWatcher(ImageCtx& image_ctx);
@@ -297,8 +304,7 @@ private:
 
   void reregister_watch();
 
-  void notify_listeners_releasing_lock();
-  void notify_listeners_updated_lock();
+  void notify_listeners_updated_lock(LockUpdateState lock_update_state);
 };
 
 } // namespace librbd
index 9618357330482652b5b35c0ddb4c73de721dbe78..737a120f5a5bc1b921e80437b5ebd8457f26a25f 100644 (file)
@@ -38,7 +38,8 @@ Journal::Journal(ImageCtx &image_ctx)
   : m_image_ctx(image_ctx), m_journaler(NULL),
     m_lock("Journal::m_lock"), m_state(STATE_UNINITIALIZED),
     m_lock_listener(this), m_replay_handler(this), m_close_pending(false),
-    m_event_tid(0), m_blocking_writes(false), m_journal_replay(NULL) {
+    m_event_lock("Journal::m_event_lock"), m_event_tid(0),
+    m_blocking_writes(false), m_journal_replay(NULL) {
 
   ldout(m_image_ctx.cct, 5) << this << ": ictx=" << &m_image_ctx << dendl;
 
@@ -49,6 +50,7 @@ Journal::Journal(ImageCtx &image_ctx)
 }
 
 Journal::~Journal() {
+  m_image_ctx.op_work_queue->drain();
   assert(m_journaler == NULL);
   assert(m_journal_replay == NULL);
 
@@ -64,11 +66,6 @@ bool Journal::is_journal_supported(ImageCtx &image_ctx) {
           !image_ctx.read_only && image_ctx.snap_id == CEPH_NOSNAP);
 }
 
-bool Journal::is_journal_replaying() const {
-  Mutex::Locker locker(m_lock);
-  return (m_state == STATE_REPLAYING);
-}
-
 int Journal::create(librados::IoCtx &io_ctx, const std::string &image_id) {
   CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
   ldout(cct, 5) << __func__ << ": image=" << image_id << dendl;
@@ -122,6 +119,19 @@ bool Journal::is_journal_ready() const {
   return (m_state == STATE_RECORDING);
 }
 
+bool Journal::is_journal_replaying() const {
+  Mutex::Locker locker(m_lock);
+  return (m_state == STATE_REPLAYING);
+}
+
+bool Journal::wait_for_journal_ready() {
+  Mutex::Locker locker(m_lock);
+  while (m_state != STATE_UNINITIALIZED && m_state != STATE_RECORDING) {
+    wait_for_state_transition();
+  }
+  return (m_state == STATE_RECORDING);
+}
+
 void Journal::open() {
   Mutex::Locker locker(m_lock);
   if (m_journaler != NULL) {
@@ -154,6 +164,9 @@ int Journal::close() {
       m_close_pending = true;
       wait_for_state_transition();
       break;
+    case STATE_STOPPING_RECORDING:
+      wait_for_state_transition();
+      break;
     case STATE_RECORDING:
       r = stop_recording();
       if (r < 0) {
@@ -176,15 +189,19 @@ uint64_t Journal::append_event(AioCompletion *aio_comp,
                                uint64_t offset, size_t length,
                                bool flush_entry) {
   assert(m_image_ctx.owner_lock.is_locked());
-  assert(m_state == STATE_RECORDING);
 
   bufferlist bl;
   ::encode(event_entry, bl);
 
-  ::journal::Future future = m_journaler->append("", bl);
+  ::journal::Future future;
   uint64_t tid;
   {
     Mutex::Locker locker(m_lock);
+    assert(m_state == STATE_RECORDING);
+
+    future = m_journaler->append("", bl);
+
+    Mutex::Locker event_locker(m_event_lock);
     tid = ++m_event_tid;
     assert(tid != 0);
 
@@ -213,7 +230,7 @@ void Journal::commit_event(uint64_t tid, int r) {
   ldout(cct, 20) << this << " " << __func__ << ": tid=" << tid << ", "
                  "r=" << r << dendl;
 
-  Mutex::Locker locker(m_lock);
+  Mutex::Locker event_locker(m_event_lock);
   Events::iterator it = m_events.find(tid);
   if (it == m_events.end()) {
     return;
@@ -231,7 +248,7 @@ void Journal::commit_event_extent(uint64_t tid, uint64_t offset,
                  << "length=" << length << ", "
                  << "r=" << r << dendl;
 
-  Mutex::Locker locker(m_lock);
+  Mutex::Locker event_locker(m_event_lock);
   Events::iterator it = m_events.find(tid);
   if (it == m_events.end()) {
     return;
@@ -263,7 +280,7 @@ void Journal::flush_event(uint64_t tid, Context *on_safe) {
 
   ::journal::Future future;
   {
-    Mutex::Locker locker(m_lock);
+    Mutex::Locker event_locker(m_event_lock);
     future = wait_event(m_lock, tid, on_safe);
   }
 
@@ -277,13 +294,13 @@ void Journal::wait_event(uint64_t tid, Context *on_safe) {
   ldout(cct, 20) << this << " " << __func__ << ": tid=" << tid << ", "
                  << "on_safe=" << on_safe << dendl;
 
-  Mutex::Locker locker(m_lock);
+  Mutex::Locker event_locker(m_event_lock);
   wait_event(m_lock, tid, on_safe);
 }
 
 ::journal::Future Journal::wait_event(Mutex &lock, uint64_t tid,
                                       Context *on_safe) {
-  assert(m_lock.is_locked());
+  assert(m_event_lock.is_locked());
   CephContext *cct = m_image_ctx.cct;
 
   Events::iterator it = m_events.find(tid);
@@ -332,7 +349,7 @@ void Journal::destroy_journaler() {
 }
 
 void Journal::complete_event(Events::iterator it, int r) {
-  assert(m_lock.is_locked());
+  assert(m_event_lock.is_locked());
   assert(m_state == STATE_RECORDING);
 
   CephContext *cct = m_image_ctx.cct;
@@ -454,7 +471,7 @@ void Journal::handle_event_safe(int r, uint64_t tid) {
   AioObjectRequests aio_object_requests;
   Contexts on_safe_contexts;
   {
-    Mutex::Locker locker(m_lock);
+    Mutex::Locker event_locker(m_event_lock);
     Events::iterator it = m_events.find(tid);
     assert(it != m_events.end());
 
@@ -500,39 +517,41 @@ bool Journal::handle_requested_lock() {
   ldout(cct, 20) << this << " " << __func__ << ": " << "state=" << m_state
                  << dendl;
 
-  // prevent peers from taking our lock while we are replaying
+  // prevent peers from taking our lock while we are replaying since that
+  // will stale forward progress
   return (m_state != STATE_INITIALIZING && m_state != STATE_REPLAYING);
 }
 
-void Journal::handle_releasing_lock() {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << dendl;
-
-  Mutex::Locker locker(m_lock);
-  if (m_state == STATE_INITIALIZING || m_state == STATE_REPLAYING) {
-    // wait for replay to successfully interrupt
-    m_close_pending = true;
-    wait_for_state_transition();
-  }
-
-  if (m_state == STATE_UNINITIALIZED || m_state == STATE_RECORDING) {
-    // prevent new write ops but allow pending ops to flush to the journal
-    block_writes();
-  }
-}
-
-void Journal::handle_lock_updated(bool lock_owner) {
+void Journal::handle_lock_updated(ImageWatcher::LockUpdateState state) {
 
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << ": "
-                 << "owner=" << lock_owner << dendl;
+                 << "state=" << state << dendl;
 
   Mutex::Locker locker(m_lock);
-  if (lock_owner && m_state == STATE_UNINITIALIZED) {
+  if (state == ImageWatcher::LOCK_UPDATE_STATE_LOCKED &&
+      m_state == STATE_UNINITIALIZED) {
     create_journaler();
-  } else if (!lock_owner && m_state != STATE_UNINITIALIZED) {
+  } else if (state == ImageWatcher::LOCK_UPDATE_STATE_RELEASING) {
+    if (m_state == STATE_INITIALIZING || m_state == STATE_REPLAYING) {
+      // wait for replay to successfully interrupt
+      m_close_pending = true;
+      wait_for_state_transition();
+    }
+
+    if (m_state == STATE_UNINITIALIZED || m_state == STATE_RECORDING) {
+      // prevent new write ops but allow pending ops to flush to the journal
+      block_writes();
+    }
+  } else if ((state == ImageWatcher::LOCK_UPDATE_STATE_NOT_SUPPORTED ||
+              state == ImageWatcher::LOCK_UPDATE_STATE_UNLOCKED) &&
+             m_state != STATE_UNINITIALIZED &&
+             m_state != STATE_STOPPING_RECORDING) {
     assert(m_state == STATE_RECORDING);
-    assert(m_events.empty());
+    {
+      Mutex::Locker event_locker(m_event_lock);
+      assert(m_events.empty());
+    }
 
     int r = stop_recording();
     if (r < 0) {
@@ -546,10 +565,11 @@ int Journal::stop_recording() {
   assert(m_lock.is_locked());
   assert(m_journaler != NULL);
 
-  C_SaferCond cond;
-  m_journaler->stop_append(&cond);
+  transition_state(STATE_STOPPING_RECORDING);
 
+  C_SaferCond cond;
   m_lock.Unlock();
+  m_journaler->stop_append(&cond);
   int r = cond.wait();
   m_lock.Lock();
 
index e0e68a8ca98f26e4245cbec3ee5bd778605ae7b4..392960507c942b343bb170a48f4ae683d60414b9 100644 (file)
@@ -48,6 +48,8 @@ public:
   bool is_journal_ready() const;
   bool is_journal_replaying() const;
 
+  bool wait_for_journal_ready();
+
   void open();
   int close();
 
@@ -73,6 +75,7 @@ private:
     STATE_INITIALIZING,
     STATE_REPLAYING,
     STATE_RECORDING,
+    STATE_STOPPING_RECORDING
   };
 
   struct Event {
@@ -105,11 +108,8 @@ private:
     virtual bool handle_requested_lock() {
       return journal->handle_requested_lock();
     }
-    virtual void handle_releasing_lock() {
-      journal->handle_releasing_lock();
-    }
-    virtual void handle_lock_updated(bool lock_supported, bool lock_owner) {
-      journal->handle_lock_updated(lock_owner);
+    virtual void handle_lock_updated(ImageWatcher::LockUpdateState state) {
+      journal->handle_lock_updated(state);
     }
   };
 
@@ -169,6 +169,7 @@ private:
   ReplayHandler m_replay_handler;
   bool m_close_pending;
 
+  Mutex m_event_lock;
   uint64_t m_event_tid;
   Events m_events;
 
@@ -191,8 +192,7 @@ private:
   void handle_event_safe(int r, uint64_t tid);
 
   bool handle_requested_lock();
-  void handle_releasing_lock();
-  void handle_lock_updated(bool lock_owner);
+  void handle_lock_updated(ImageWatcher::LockUpdateState state);
 
   int stop_recording();